xref: /openbmc/qemu/tcg/ppc/tcg-target.c.inc (revision 02e5d7d7)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "elf.h"
26#include "../tcg-pool.c.inc"
27
28#if defined _CALL_DARWIN || defined __APPLE__
29#define TCG_TARGET_CALL_DARWIN
30#endif
31#ifdef _CALL_SYSV
32# define TCG_TARGET_CALL_ALIGN_ARGS   1
33#endif
34
35/* For some memory operations, we need a scratch that isn't R0.  For the AIX
36   calling convention, we can re-use the TOC register since we'll be reloading
37   it at every call.  Otherwise R12 will do nicely as neither a call-saved
38   register nor a parameter register.  */
39#ifdef _CALL_AIX
40# define TCG_REG_TMP1   TCG_REG_R2
41#else
42# define TCG_REG_TMP1   TCG_REG_R12
43#endif
44
45#define TCG_VEC_TMP1    TCG_REG_V0
46#define TCG_VEC_TMP2    TCG_REG_V1
47
48#define TCG_REG_TB     TCG_REG_R31
49#define USE_REG_TB     (TCG_TARGET_REG_BITS == 64)
50
51/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
52#define SZP  ((int)sizeof(void *))
53
54/* Shorthand for size of a register.  */
55#define SZR  (TCG_TARGET_REG_BITS / 8)
56
57#define TCG_CT_CONST_S16  0x100
58#define TCG_CT_CONST_U16  0x200
59#define TCG_CT_CONST_S32  0x400
60#define TCG_CT_CONST_U32  0x800
61#define TCG_CT_CONST_ZERO 0x1000
62#define TCG_CT_CONST_MONE 0x2000
63#define TCG_CT_CONST_WSZ  0x4000
64
65static tcg_insn_unit *tb_ret_addr;
66
67TCGPowerISA have_isa;
68static bool have_isel;
69bool have_altivec;
70bool have_vsx;
71
72#ifndef CONFIG_SOFTMMU
73#define TCG_GUEST_BASE_REG 30
74#endif
75
76#ifdef CONFIG_DEBUG_TCG
77static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
78    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
79    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
80    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
81    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
82    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
83    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
84    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
85    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
86};
87#endif
88
89static const int tcg_target_reg_alloc_order[] = {
90    TCG_REG_R14,  /* call saved registers */
91    TCG_REG_R15,
92    TCG_REG_R16,
93    TCG_REG_R17,
94    TCG_REG_R18,
95    TCG_REG_R19,
96    TCG_REG_R20,
97    TCG_REG_R21,
98    TCG_REG_R22,
99    TCG_REG_R23,
100    TCG_REG_R24,
101    TCG_REG_R25,
102    TCG_REG_R26,
103    TCG_REG_R27,
104    TCG_REG_R28,
105    TCG_REG_R29,
106    TCG_REG_R30,
107    TCG_REG_R31,
108    TCG_REG_R12,  /* call clobbered, non-arguments */
109    TCG_REG_R11,
110    TCG_REG_R2,
111    TCG_REG_R13,
112    TCG_REG_R10,  /* call clobbered, arguments */
113    TCG_REG_R9,
114    TCG_REG_R8,
115    TCG_REG_R7,
116    TCG_REG_R6,
117    TCG_REG_R5,
118    TCG_REG_R4,
119    TCG_REG_R3,
120
121    /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
122    TCG_REG_V2,   /* call clobbered, vectors */
123    TCG_REG_V3,
124    TCG_REG_V4,
125    TCG_REG_V5,
126    TCG_REG_V6,
127    TCG_REG_V7,
128    TCG_REG_V8,
129    TCG_REG_V9,
130    TCG_REG_V10,
131    TCG_REG_V11,
132    TCG_REG_V12,
133    TCG_REG_V13,
134    TCG_REG_V14,
135    TCG_REG_V15,
136    TCG_REG_V16,
137    TCG_REG_V17,
138    TCG_REG_V18,
139    TCG_REG_V19,
140};
141
142static const int tcg_target_call_iarg_regs[] = {
143    TCG_REG_R3,
144    TCG_REG_R4,
145    TCG_REG_R5,
146    TCG_REG_R6,
147    TCG_REG_R7,
148    TCG_REG_R8,
149    TCG_REG_R9,
150    TCG_REG_R10
151};
152
153static const int tcg_target_call_oarg_regs[] = {
154    TCG_REG_R3,
155    TCG_REG_R4
156};
157
158static const int tcg_target_callee_save_regs[] = {
159#ifdef TCG_TARGET_CALL_DARWIN
160    TCG_REG_R11,
161#endif
162    TCG_REG_R14,
163    TCG_REG_R15,
164    TCG_REG_R16,
165    TCG_REG_R17,
166    TCG_REG_R18,
167    TCG_REG_R19,
168    TCG_REG_R20,
169    TCG_REG_R21,
170    TCG_REG_R22,
171    TCG_REG_R23,
172    TCG_REG_R24,
173    TCG_REG_R25,
174    TCG_REG_R26,
175    TCG_REG_R27, /* currently used for the global env */
176    TCG_REG_R28,
177    TCG_REG_R29,
178    TCG_REG_R30,
179    TCG_REG_R31
180};
181
182static inline bool in_range_b(tcg_target_long target)
183{
184    return target == sextract64(target, 0, 26);
185}
186
187static uint32_t reloc_pc24_val(tcg_insn_unit *pc, tcg_insn_unit *target)
188{
189    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
190    tcg_debug_assert(in_range_b(disp));
191    return disp & 0x3fffffc;
192}
193
194static bool reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target)
195{
196    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
197    if (in_range_b(disp)) {
198        *pc = (*pc & ~0x3fffffc) | (disp & 0x3fffffc);
199        return true;
200    }
201    return false;
202}
203
204static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target)
205{
206    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
207    tcg_debug_assert(disp == (int16_t) disp);
208    return disp & 0xfffc;
209}
210
211static bool reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target)
212{
213    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
214    if (disp == (int16_t) disp) {
215        *pc = (*pc & ~0xfffc) | (disp & 0xfffc);
216        return true;
217    }
218    return false;
219}
220
221/* parse target specific constraints */
222static const char *target_parse_constraint(TCGArgConstraint *ct,
223                                           const char *ct_str, TCGType type)
224{
225    switch (*ct_str++) {
226    case 'A': case 'B': case 'C': case 'D':
227        ct->ct |= TCG_CT_REG;
228        tcg_regset_set_reg(ct->u.regs, 3 + ct_str[0] - 'A');
229        break;
230    case 'r':
231        ct->ct |= TCG_CT_REG;
232        ct->u.regs = 0xffffffff;
233        break;
234    case 'v':
235        ct->ct |= TCG_CT_REG;
236        ct->u.regs = 0xffffffff00000000ull;
237        break;
238    case 'L':                   /* qemu_ld constraint */
239        ct->ct |= TCG_CT_REG;
240        ct->u.regs = 0xffffffff;
241        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
242#ifdef CONFIG_SOFTMMU
243        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
244        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
245#endif
246        break;
247    case 'S':                   /* qemu_st constraint */
248        ct->ct |= TCG_CT_REG;
249        ct->u.regs = 0xffffffff;
250        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
251#ifdef CONFIG_SOFTMMU
252        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
253        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
254        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
255#endif
256        break;
257    case 'I':
258        ct->ct |= TCG_CT_CONST_S16;
259        break;
260    case 'J':
261        ct->ct |= TCG_CT_CONST_U16;
262        break;
263    case 'M':
264        ct->ct |= TCG_CT_CONST_MONE;
265        break;
266    case 'T':
267        ct->ct |= TCG_CT_CONST_S32;
268        break;
269    case 'U':
270        ct->ct |= TCG_CT_CONST_U32;
271        break;
272    case 'W':
273        ct->ct |= TCG_CT_CONST_WSZ;
274        break;
275    case 'Z':
276        ct->ct |= TCG_CT_CONST_ZERO;
277        break;
278    default:
279        return NULL;
280    }
281    return ct_str;
282}
283
284/* test if a constant matches the constraint */
285static int tcg_target_const_match(tcg_target_long val, TCGType type,
286                                  const TCGArgConstraint *arg_ct)
287{
288    int ct = arg_ct->ct;
289    if (ct & TCG_CT_CONST) {
290        return 1;
291    }
292
293    /* The only 32-bit constraint we use aside from
294       TCG_CT_CONST is TCG_CT_CONST_S16.  */
295    if (type == TCG_TYPE_I32) {
296        val = (int32_t)val;
297    }
298
299    if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
300        return 1;
301    } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
302        return 1;
303    } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
304        return 1;
305    } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
306        return 1;
307    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
308        return 1;
309    } else if ((ct & TCG_CT_CONST_MONE) && val == -1) {
310        return 1;
311    } else if ((ct & TCG_CT_CONST_WSZ)
312               && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
313        return 1;
314    }
315    return 0;
316}
317
318#define OPCD(opc) ((opc)<<26)
319#define XO19(opc) (OPCD(19)|((opc)<<1))
320#define MD30(opc) (OPCD(30)|((opc)<<2))
321#define MDS30(opc) (OPCD(30)|((opc)<<1))
322#define XO31(opc) (OPCD(31)|((opc)<<1))
323#define XO58(opc) (OPCD(58)|(opc))
324#define XO62(opc) (OPCD(62)|(opc))
325#define VX4(opc)  (OPCD(4)|(opc))
326
327#define B      OPCD( 18)
328#define BC     OPCD( 16)
329#define LBZ    OPCD( 34)
330#define LHZ    OPCD( 40)
331#define LHA    OPCD( 42)
332#define LWZ    OPCD( 32)
333#define LWZUX  XO31( 55)
334#define STB    OPCD( 38)
335#define STH    OPCD( 44)
336#define STW    OPCD( 36)
337
338#define STD    XO62(  0)
339#define STDU   XO62(  1)
340#define STDX   XO31(149)
341
342#define LD     XO58(  0)
343#define LDX    XO31( 21)
344#define LDU    XO58(  1)
345#define LDUX   XO31( 53)
346#define LWA    XO58(  2)
347#define LWAX   XO31(341)
348
349#define ADDIC  OPCD( 12)
350#define ADDI   OPCD( 14)
351#define ADDIS  OPCD( 15)
352#define ORI    OPCD( 24)
353#define ORIS   OPCD( 25)
354#define XORI   OPCD( 26)
355#define XORIS  OPCD( 27)
356#define ANDI   OPCD( 28)
357#define ANDIS  OPCD( 29)
358#define MULLI  OPCD(  7)
359#define CMPLI  OPCD( 10)
360#define CMPI   OPCD( 11)
361#define SUBFIC OPCD( 8)
362
363#define LWZU   OPCD( 33)
364#define STWU   OPCD( 37)
365
366#define RLWIMI OPCD( 20)
367#define RLWINM OPCD( 21)
368#define RLWNM  OPCD( 23)
369
370#define RLDICL MD30(  0)
371#define RLDICR MD30(  1)
372#define RLDIMI MD30(  3)
373#define RLDCL  MDS30( 8)
374
375#define BCLR   XO19( 16)
376#define BCCTR  XO19(528)
377#define CRAND  XO19(257)
378#define CRANDC XO19(129)
379#define CRNAND XO19(225)
380#define CROR   XO19(449)
381#define CRNOR  XO19( 33)
382
383#define EXTSB  XO31(954)
384#define EXTSH  XO31(922)
385#define EXTSW  XO31(986)
386#define ADD    XO31(266)
387#define ADDE   XO31(138)
388#define ADDME  XO31(234)
389#define ADDZE  XO31(202)
390#define ADDC   XO31( 10)
391#define AND    XO31( 28)
392#define SUBF   XO31( 40)
393#define SUBFC  XO31(  8)
394#define SUBFE  XO31(136)
395#define SUBFME XO31(232)
396#define SUBFZE XO31(200)
397#define OR     XO31(444)
398#define XOR    XO31(316)
399#define MULLW  XO31(235)
400#define MULHW  XO31( 75)
401#define MULHWU XO31( 11)
402#define DIVW   XO31(491)
403#define DIVWU  XO31(459)
404#define CMP    XO31(  0)
405#define CMPL   XO31( 32)
406#define LHBRX  XO31(790)
407#define LWBRX  XO31(534)
408#define LDBRX  XO31(532)
409#define STHBRX XO31(918)
410#define STWBRX XO31(662)
411#define STDBRX XO31(660)
412#define MFSPR  XO31(339)
413#define MTSPR  XO31(467)
414#define SRAWI  XO31(824)
415#define NEG    XO31(104)
416#define MFCR   XO31( 19)
417#define MFOCRF (MFCR | (1u << 20))
418#define NOR    XO31(124)
419#define CNTLZW XO31( 26)
420#define CNTLZD XO31( 58)
421#define CNTTZW XO31(538)
422#define CNTTZD XO31(570)
423#define CNTPOPW XO31(378)
424#define CNTPOPD XO31(506)
425#define ANDC   XO31( 60)
426#define ORC    XO31(412)
427#define EQV    XO31(284)
428#define NAND   XO31(476)
429#define ISEL   XO31( 15)
430
431#define MULLD  XO31(233)
432#define MULHD  XO31( 73)
433#define MULHDU XO31(  9)
434#define DIVD   XO31(489)
435#define DIVDU  XO31(457)
436
437#define LBZX   XO31( 87)
438#define LHZX   XO31(279)
439#define LHAX   XO31(343)
440#define LWZX   XO31( 23)
441#define STBX   XO31(215)
442#define STHX   XO31(407)
443#define STWX   XO31(151)
444
445#define EIEIO  XO31(854)
446#define HWSYNC XO31(598)
447#define LWSYNC (HWSYNC | (1u << 21))
448
449#define SPR(a, b) ((((a)<<5)|(b))<<11)
450#define LR     SPR(8, 0)
451#define CTR    SPR(9, 0)
452
453#define SLW    XO31( 24)
454#define SRW    XO31(536)
455#define SRAW   XO31(792)
456
457#define SLD    XO31( 27)
458#define SRD    XO31(539)
459#define SRAD   XO31(794)
460#define SRADI  XO31(413<<1)
461
462#define TW     XO31( 4)
463#define TRAP   (TW | TO(31))
464
465#define NOP    ORI  /* ori 0,0,0 */
466
467#define LVX        XO31(103)
468#define LVEBX      XO31(7)
469#define LVEHX      XO31(39)
470#define LVEWX      XO31(71)
471#define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
472#define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
473#define LXSIWZX    (XO31(12) | 1)   /* v2.07, force tx=1 */
474#define LXV        (OPCD(61) | 8 | 1)  /* v3.00, force tx=1 */
475#define LXSD       (OPCD(57) | 2)   /* v3.00 */
476#define LXVWSX     (XO31(364) | 1)  /* v3.00, force tx=1 */
477
478#define STVX       XO31(231)
479#define STVEWX     XO31(199)
480#define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
481#define STXSIWX    (XO31(140) | 1)  /* v2.07, force sx=1 */
482#define STXV       (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
483#define STXSD      (OPCD(61) | 2)   /* v3.00 */
484
485#define VADDSBS    VX4(768)
486#define VADDUBS    VX4(512)
487#define VADDUBM    VX4(0)
488#define VADDSHS    VX4(832)
489#define VADDUHS    VX4(576)
490#define VADDUHM    VX4(64)
491#define VADDSWS    VX4(896)
492#define VADDUWS    VX4(640)
493#define VADDUWM    VX4(128)
494#define VADDUDM    VX4(192)       /* v2.07 */
495
496#define VSUBSBS    VX4(1792)
497#define VSUBUBS    VX4(1536)
498#define VSUBUBM    VX4(1024)
499#define VSUBSHS    VX4(1856)
500#define VSUBUHS    VX4(1600)
501#define VSUBUHM    VX4(1088)
502#define VSUBSWS    VX4(1920)
503#define VSUBUWS    VX4(1664)
504#define VSUBUWM    VX4(1152)
505#define VSUBUDM    VX4(1216)      /* v2.07 */
506
507#define VNEGW      (VX4(1538) | (6 << 16))  /* v3.00 */
508#define VNEGD      (VX4(1538) | (7 << 16))  /* v3.00 */
509
510#define VMAXSB     VX4(258)
511#define VMAXSH     VX4(322)
512#define VMAXSW     VX4(386)
513#define VMAXSD     VX4(450)       /* v2.07 */
514#define VMAXUB     VX4(2)
515#define VMAXUH     VX4(66)
516#define VMAXUW     VX4(130)
517#define VMAXUD     VX4(194)       /* v2.07 */
518#define VMINSB     VX4(770)
519#define VMINSH     VX4(834)
520#define VMINSW     VX4(898)
521#define VMINSD     VX4(962)       /* v2.07 */
522#define VMINUB     VX4(514)
523#define VMINUH     VX4(578)
524#define VMINUW     VX4(642)
525#define VMINUD     VX4(706)       /* v2.07 */
526
527#define VCMPEQUB   VX4(6)
528#define VCMPEQUH   VX4(70)
529#define VCMPEQUW   VX4(134)
530#define VCMPEQUD   VX4(199)       /* v2.07 */
531#define VCMPGTSB   VX4(774)
532#define VCMPGTSH   VX4(838)
533#define VCMPGTSW   VX4(902)
534#define VCMPGTSD   VX4(967)       /* v2.07 */
535#define VCMPGTUB   VX4(518)
536#define VCMPGTUH   VX4(582)
537#define VCMPGTUW   VX4(646)
538#define VCMPGTUD   VX4(711)       /* v2.07 */
539#define VCMPNEB    VX4(7)         /* v3.00 */
540#define VCMPNEH    VX4(71)        /* v3.00 */
541#define VCMPNEW    VX4(135)       /* v3.00 */
542
543#define VSLB       VX4(260)
544#define VSLH       VX4(324)
545#define VSLW       VX4(388)
546#define VSLD       VX4(1476)      /* v2.07 */
547#define VSRB       VX4(516)
548#define VSRH       VX4(580)
549#define VSRW       VX4(644)
550#define VSRD       VX4(1732)      /* v2.07 */
551#define VSRAB      VX4(772)
552#define VSRAH      VX4(836)
553#define VSRAW      VX4(900)
554#define VSRAD      VX4(964)       /* v2.07 */
555#define VRLB       VX4(4)
556#define VRLH       VX4(68)
557#define VRLW       VX4(132)
558#define VRLD       VX4(196)       /* v2.07 */
559
560#define VMULEUB    VX4(520)
561#define VMULEUH    VX4(584)
562#define VMULEUW    VX4(648)       /* v2.07 */
563#define VMULOUB    VX4(8)
564#define VMULOUH    VX4(72)
565#define VMULOUW    VX4(136)       /* v2.07 */
566#define VMULUWM    VX4(137)       /* v2.07 */
567#define VMULLD     VX4(457)       /* v3.10 */
568#define VMSUMUHM   VX4(38)
569
570#define VMRGHB     VX4(12)
571#define VMRGHH     VX4(76)
572#define VMRGHW     VX4(140)
573#define VMRGLB     VX4(268)
574#define VMRGLH     VX4(332)
575#define VMRGLW     VX4(396)
576
577#define VPKUHUM    VX4(14)
578#define VPKUWUM    VX4(78)
579
580#define VAND       VX4(1028)
581#define VANDC      VX4(1092)
582#define VNOR       VX4(1284)
583#define VOR        VX4(1156)
584#define VXOR       VX4(1220)
585#define VEQV       VX4(1668)      /* v2.07 */
586#define VNAND      VX4(1412)      /* v2.07 */
587#define VORC       VX4(1348)      /* v2.07 */
588
589#define VSPLTB     VX4(524)
590#define VSPLTH     VX4(588)
591#define VSPLTW     VX4(652)
592#define VSPLTISB   VX4(780)
593#define VSPLTISH   VX4(844)
594#define VSPLTISW   VX4(908)
595
596#define VSLDOI     VX4(44)
597
598#define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
599#define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
600#define XXSPLTIB   (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
601
602#define MFVSRD     (XO31(51) | 1)   /* v2.07, force sx=1 */
603#define MFVSRWZ    (XO31(115) | 1)  /* v2.07, force sx=1 */
604#define MTVSRD     (XO31(179) | 1)  /* v2.07, force tx=1 */
605#define MTVSRWZ    (XO31(243) | 1)  /* v2.07, force tx=1 */
606#define MTVSRDD    (XO31(435) | 1)  /* v3.00, force tx=1 */
607#define MTVSRWS    (XO31(403) | 1)  /* v3.00, force tx=1 */
608
609#define RT(r) ((r)<<21)
610#define RS(r) ((r)<<21)
611#define RA(r) ((r)<<16)
612#define RB(r) ((r)<<11)
613#define TO(t) ((t)<<21)
614#define SH(s) ((s)<<11)
615#define MB(b) ((b)<<6)
616#define ME(e) ((e)<<1)
617#define BO(o) ((o)<<21)
618#define MB64(b) ((b)<<5)
619#define FXM(b) (1 << (19 - (b)))
620
621#define VRT(r)  (((r) & 31) << 21)
622#define VRA(r)  (((r) & 31) << 16)
623#define VRB(r)  (((r) & 31) << 11)
624#define VRC(r)  (((r) & 31) <<  6)
625
626#define LK    1
627
628#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
629#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
630#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
631#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
632
633#define BF(n)    ((n)<<23)
634#define BI(n, c) (((c)+((n)*4))<<16)
635#define BT(n, c) (((c)+((n)*4))<<21)
636#define BA(n, c) (((c)+((n)*4))<<16)
637#define BB(n, c) (((c)+((n)*4))<<11)
638#define BC_(n, c) (((c)+((n)*4))<<6)
639
640#define BO_COND_TRUE  BO(12)
641#define BO_COND_FALSE BO( 4)
642#define BO_ALWAYS     BO(20)
643
644enum {
645    CR_LT,
646    CR_GT,
647    CR_EQ,
648    CR_SO
649};
650
651static const uint32_t tcg_to_bc[] = {
652    [TCG_COND_EQ]  = BC | BI(7, CR_EQ) | BO_COND_TRUE,
653    [TCG_COND_NE]  = BC | BI(7, CR_EQ) | BO_COND_FALSE,
654    [TCG_COND_LT]  = BC | BI(7, CR_LT) | BO_COND_TRUE,
655    [TCG_COND_GE]  = BC | BI(7, CR_LT) | BO_COND_FALSE,
656    [TCG_COND_LE]  = BC | BI(7, CR_GT) | BO_COND_FALSE,
657    [TCG_COND_GT]  = BC | BI(7, CR_GT) | BO_COND_TRUE,
658    [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE,
659    [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE,
660    [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE,
661    [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE,
662};
663
664/* The low bit here is set if the RA and RB fields must be inverted.  */
665static const uint32_t tcg_to_isel[] = {
666    [TCG_COND_EQ]  = ISEL | BC_(7, CR_EQ),
667    [TCG_COND_NE]  = ISEL | BC_(7, CR_EQ) | 1,
668    [TCG_COND_LT]  = ISEL | BC_(7, CR_LT),
669    [TCG_COND_GE]  = ISEL | BC_(7, CR_LT) | 1,
670    [TCG_COND_LE]  = ISEL | BC_(7, CR_GT) | 1,
671    [TCG_COND_GT]  = ISEL | BC_(7, CR_GT),
672    [TCG_COND_LTU] = ISEL | BC_(7, CR_LT),
673    [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1,
674    [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1,
675    [TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
676};
677
678static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
679                        intptr_t value, intptr_t addend)
680{
681    tcg_insn_unit *target;
682    int16_t lo;
683    int32_t hi;
684
685    value += addend;
686    target = (tcg_insn_unit *)value;
687
688    switch (type) {
689    case R_PPC_REL14:
690        return reloc_pc14(code_ptr, target);
691    case R_PPC_REL24:
692        return reloc_pc24(code_ptr, target);
693    case R_PPC_ADDR16:
694        /*
695         * We are (slightly) abusing this relocation type.  In particular,
696         * assert that the low 2 bits are zero, and do not modify them.
697         * That way we can use this with LD et al that have opcode bits
698         * in the low 2 bits of the insn.
699         */
700        if ((value & 3) || value != (int16_t)value) {
701            return false;
702        }
703        *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
704        break;
705    case R_PPC_ADDR32:
706        /*
707         * We are abusing this relocation type.  Again, this points to
708         * a pair of insns, lis + load.  This is an absolute address
709         * relocation for PPC32 so the lis cannot be removed.
710         */
711        lo = value;
712        hi = value - lo;
713        if (hi + lo != value) {
714            return false;
715        }
716        code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
717        code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
718        break;
719    default:
720        g_assert_not_reached();
721    }
722    return true;
723}
724
725static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
726                             TCGReg base, tcg_target_long offset);
727
728static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
729{
730    if (ret == arg) {
731        return true;
732    }
733    switch (type) {
734    case TCG_TYPE_I64:
735        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
736        /* fallthru */
737    case TCG_TYPE_I32:
738        if (ret < TCG_REG_V0) {
739            if (arg < TCG_REG_V0) {
740                tcg_out32(s, OR | SAB(arg, ret, arg));
741                break;
742            } else if (have_isa_2_07) {
743                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
744                          | VRT(arg) | RA(ret));
745                break;
746            } else {
747                /* Altivec does not support vector->integer moves.  */
748                return false;
749            }
750        } else if (arg < TCG_REG_V0) {
751            if (have_isa_2_07) {
752                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
753                          | VRT(ret) | RA(arg));
754                break;
755            } else {
756                /* Altivec does not support integer->vector moves.  */
757                return false;
758            }
759        }
760        /* fallthru */
761    case TCG_TYPE_V64:
762    case TCG_TYPE_V128:
763        tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
764        tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
765        break;
766    default:
767        g_assert_not_reached();
768    }
769    return true;
770}
771
772static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
773                               int sh, int mb)
774{
775    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
776    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
777    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
778    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb);
779}
780
781static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
782                               int sh, int mb, int me)
783{
784    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
785}
786
787static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
788{
789    tcg_out_rld(s, RLDICL, dst, src, 0, 32);
790}
791
792static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
793{
794    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
795}
796
797static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
798{
799    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
800}
801
802static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
803{
804    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
805}
806
807static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
808{
809    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
810}
811
812/* Emit a move into ret of arg, if it can be done in one insn.  */
813static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
814{
815    if (arg == (int16_t)arg) {
816        tcg_out32(s, ADDI | TAI(ret, 0, arg));
817        return true;
818    }
819    if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
820        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
821        return true;
822    }
823    return false;
824}
825
826static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
827                             tcg_target_long arg, bool in_prologue)
828{
829    intptr_t tb_diff;
830    tcg_target_long tmp;
831    int shift;
832
833    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
834
835    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
836        arg = (int32_t)arg;
837    }
838
839    /* Load 16-bit immediates with one insn.  */
840    if (tcg_out_movi_one(s, ret, arg)) {
841        return;
842    }
843
844    /* Load addresses within the TB with one insn.  */
845    tb_diff = arg - (intptr_t)s->code_gen_ptr;
846    if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
847        tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
848        return;
849    }
850
851    /* Load 32-bit immediates with two insns.  Note that we've already
852       eliminated bare ADDIS, so we know both insns are required.  */
853    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
854        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
855        tcg_out32(s, ORI | SAI(ret, ret, arg));
856        return;
857    }
858    if (arg == (uint32_t)arg && !(arg & 0x8000)) {
859        tcg_out32(s, ADDI | TAI(ret, 0, arg));
860        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
861        return;
862    }
863
864    /* Load masked 16-bit value.  */
865    if (arg > 0 && (arg & 0x8000)) {
866        tmp = arg | 0x7fff;
867        if ((tmp & (tmp + 1)) == 0) {
868            int mb = clz64(tmp + 1) + 1;
869            tcg_out32(s, ADDI | TAI(ret, 0, arg));
870            tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
871            return;
872        }
873    }
874
875    /* Load common masks with 2 insns.  */
876    shift = ctz64(arg);
877    tmp = arg >> shift;
878    if (tmp == (int16_t)tmp) {
879        tcg_out32(s, ADDI | TAI(ret, 0, tmp));
880        tcg_out_shli64(s, ret, ret, shift);
881        return;
882    }
883    shift = clz64(arg);
884    if (tcg_out_movi_one(s, ret, arg << shift)) {
885        tcg_out_shri64(s, ret, ret, shift);
886        return;
887    }
888
889    /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
890    if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
891        tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
892        return;
893    }
894
895    /* Use the constant pool, if possible.  */
896    if (!in_prologue && USE_REG_TB) {
897        new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
898                       -(intptr_t)s->code_gen_ptr);
899        tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
900        return;
901    }
902
903    tmp = arg >> 31 >> 1;
904    tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
905    if (tmp) {
906        tcg_out_shli64(s, ret, ret, 32);
907    }
908    if (arg & 0xffff0000) {
909        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
910    }
911    if (arg & 0xffff) {
912        tcg_out32(s, ORI | SAI(ret, ret, arg));
913    }
914}
915
916static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
917                             tcg_target_long val)
918{
919    uint32_t load_insn;
920    int rel, low;
921    intptr_t add;
922
923    low = (int8_t)val;
924    if (low >= -16 && low < 16) {
925        if (val == (tcg_target_long)dup_const(MO_8, low)) {
926            tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
927            return;
928        }
929        if (val == (tcg_target_long)dup_const(MO_16, low)) {
930            tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
931            return;
932        }
933        if (val == (tcg_target_long)dup_const(MO_32, low)) {
934            tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
935            return;
936        }
937    }
938    if (have_isa_3_00 && val == (tcg_target_long)dup_const(MO_8, val)) {
939        tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
940        return;
941    }
942
943    /*
944     * Otherwise we must load the value from the constant pool.
945     */
946    if (USE_REG_TB) {
947        rel = R_PPC_ADDR16;
948        add = -(intptr_t)s->code_gen_ptr;
949    } else {
950        rel = R_PPC_ADDR32;
951        add = 0;
952    }
953
954    if (have_vsx) {
955        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
956        load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
957        if (TCG_TARGET_REG_BITS == 64) {
958            new_pool_label(s, val, rel, s->code_ptr, add);
959        } else {
960            new_pool_l2(s, rel, s->code_ptr, add, val, val);
961        }
962    } else {
963        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
964        if (TCG_TARGET_REG_BITS == 64) {
965            new_pool_l2(s, rel, s->code_ptr, add, val, val);
966        } else {
967            new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
968        }
969    }
970
971    if (USE_REG_TB) {
972        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
973        load_insn |= RA(TCG_REG_TB);
974    } else {
975        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
976        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
977    }
978    tcg_out32(s, load_insn);
979}
980
981static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
982                         tcg_target_long arg)
983{
984    switch (type) {
985    case TCG_TYPE_I32:
986    case TCG_TYPE_I64:
987        tcg_debug_assert(ret < TCG_REG_V0);
988        tcg_out_movi_int(s, type, ret, arg, false);
989        break;
990
991    case TCG_TYPE_V64:
992    case TCG_TYPE_V128:
993        tcg_debug_assert(ret >= TCG_REG_V0);
994        tcg_out_dupi_vec(s, type, ret, arg);
995        break;
996
997    default:
998        g_assert_not_reached();
999    }
1000}
1001
1002static bool mask_operand(uint32_t c, int *mb, int *me)
1003{
1004    uint32_t lsb, test;
1005
1006    /* Accept a bit pattern like:
1007           0....01....1
1008           1....10....0
1009           0..01..10..0
1010       Keep track of the transitions.  */
1011    if (c == 0 || c == -1) {
1012        return false;
1013    }
1014    test = c;
1015    lsb = test & -test;
1016    test += lsb;
1017    if (test & (test - 1)) {
1018        return false;
1019    }
1020
1021    *me = clz32(lsb);
1022    *mb = test ? clz32(test & -test) + 1 : 0;
1023    return true;
1024}
1025
1026static bool mask64_operand(uint64_t c, int *mb, int *me)
1027{
1028    uint64_t lsb;
1029
1030    if (c == 0) {
1031        return false;
1032    }
1033
1034    lsb = c & -c;
1035    /* Accept 1..10..0.  */
1036    if (c == -lsb) {
1037        *mb = 0;
1038        *me = clz64(lsb);
1039        return true;
1040    }
1041    /* Accept 0..01..1.  */
1042    if (lsb == 1 && (c & (c + 1)) == 0) {
1043        *mb = clz64(c + 1) + 1;
1044        *me = 63;
1045        return true;
1046    }
1047    return false;
1048}
1049
1050static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1051{
1052    int mb, me;
1053
1054    if (mask_operand(c, &mb, &me)) {
1055        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
1056    } else if ((c & 0xffff) == c) {
1057        tcg_out32(s, ANDI | SAI(src, dst, c));
1058        return;
1059    } else if ((c & 0xffff0000) == c) {
1060        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1061        return;
1062    } else {
1063        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
1064        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1065    }
1066}
1067
1068static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
1069{
1070    int mb, me;
1071
1072    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1073    if (mask64_operand(c, &mb, &me)) {
1074        if (mb == 0) {
1075            tcg_out_rld(s, RLDICR, dst, src, 0, me);
1076        } else {
1077            tcg_out_rld(s, RLDICL, dst, src, 0, mb);
1078        }
1079    } else if ((c & 0xffff) == c) {
1080        tcg_out32(s, ANDI | SAI(src, dst, c));
1081        return;
1082    } else if ((c & 0xffff0000) == c) {
1083        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1084        return;
1085    } else {
1086        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
1087        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1088    }
1089}
1090
1091static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
1092                           int op_lo, int op_hi)
1093{
1094    if (c >> 16) {
1095        tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
1096        src = dst;
1097    }
1098    if (c & 0xffff) {
1099        tcg_out32(s, op_lo | SAI(src, dst, c));
1100        src = dst;
1101    }
1102}
1103
1104static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1105{
1106    tcg_out_zori32(s, dst, src, c, ORI, ORIS);
1107}
1108
1109static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1110{
1111    tcg_out_zori32(s, dst, src, c, XORI, XORIS);
1112}
1113
1114static void tcg_out_b(TCGContext *s, int mask, tcg_insn_unit *target)
1115{
1116    ptrdiff_t disp = tcg_pcrel_diff(s, target);
1117    if (in_range_b(disp)) {
1118        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
1119    } else {
1120        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
1121        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
1122        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
1123    }
1124}
1125
1126static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
1127                             TCGReg base, tcg_target_long offset)
1128{
1129    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
1130    bool is_int_store = false;
1131    TCGReg rs = TCG_REG_TMP1;
1132
1133    switch (opi) {
1134    case LD: case LWA:
1135        align = 3;
1136        /* FALLTHRU */
1137    default:
1138        if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
1139            rs = rt;
1140            break;
1141        }
1142        break;
1143    case LXSD:
1144    case STXSD:
1145        align = 3;
1146        break;
1147    case LXV:
1148    case STXV:
1149        align = 15;
1150        break;
1151    case STD:
1152        align = 3;
1153        /* FALLTHRU */
1154    case STB: case STH: case STW:
1155        is_int_store = true;
1156        break;
1157    }
1158
1159    /* For unaligned, or very large offsets, use the indexed form.  */
1160    if (offset & align || offset != (int32_t)offset || opi == 0) {
1161        if (rs == base) {
1162            rs = TCG_REG_R0;
1163        }
1164        tcg_debug_assert(!is_int_store || rs != rt);
1165        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
1166        tcg_out32(s, opx | TAB(rt & 31, base, rs));
1167        return;
1168    }
1169
1170    l0 = (int16_t)offset;
1171    offset = (offset - l0) >> 16;
1172    l1 = (int16_t)offset;
1173
1174    if (l1 < 0 && orig >= 0) {
1175        extra = 0x4000;
1176        l1 = (int16_t)(offset - 0x4000);
1177    }
1178    if (l1) {
1179        tcg_out32(s, ADDIS | TAI(rs, base, l1));
1180        base = rs;
1181    }
1182    if (extra) {
1183        tcg_out32(s, ADDIS | TAI(rs, base, extra));
1184        base = rs;
1185    }
1186    if (opi != ADDI || base != rt || l0 != 0) {
1187        tcg_out32(s, opi | TAI(rt & 31, base, l0));
1188    }
1189}
1190
1191static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
1192                           TCGReg va, TCGReg vb, int shb)
1193{
1194    tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
1195}
1196
1197static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1198                       TCGReg base, intptr_t offset)
1199{
1200    int shift;
1201
1202    switch (type) {
1203    case TCG_TYPE_I32:
1204        if (ret < TCG_REG_V0) {
1205            tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
1206            break;
1207        }
1208        if (have_isa_2_07 && have_vsx) {
1209            tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
1210            break;
1211        }
1212        tcg_debug_assert((offset & 3) == 0);
1213        tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
1214        shift = (offset - 4) & 0xc;
1215        if (shift) {
1216            tcg_out_vsldoi(s, ret, ret, ret, shift);
1217        }
1218        break;
1219    case TCG_TYPE_I64:
1220        if (ret < TCG_REG_V0) {
1221            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1222            tcg_out_mem_long(s, LD, LDX, ret, base, offset);
1223            break;
1224        }
1225        /* fallthru */
1226    case TCG_TYPE_V64:
1227        tcg_debug_assert(ret >= TCG_REG_V0);
1228        if (have_vsx) {
1229            tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
1230                             ret, base, offset);
1231            break;
1232        }
1233        tcg_debug_assert((offset & 7) == 0);
1234        tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
1235        if (offset & 8) {
1236            tcg_out_vsldoi(s, ret, ret, ret, 8);
1237        }
1238        break;
1239    case TCG_TYPE_V128:
1240        tcg_debug_assert(ret >= TCG_REG_V0);
1241        tcg_debug_assert((offset & 15) == 0);
1242        tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
1243                         LVX, ret, base, offset);
1244        break;
1245    default:
1246        g_assert_not_reached();
1247    }
1248}
1249
1250static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1251                              TCGReg base, intptr_t offset)
1252{
1253    int shift;
1254
1255    switch (type) {
1256    case TCG_TYPE_I32:
1257        if (arg < TCG_REG_V0) {
1258            tcg_out_mem_long(s, STW, STWX, arg, base, offset);
1259            break;
1260        }
1261        if (have_isa_2_07 && have_vsx) {
1262            tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
1263            break;
1264        }
1265        assert((offset & 3) == 0);
1266        tcg_debug_assert((offset & 3) == 0);
1267        shift = (offset - 4) & 0xc;
1268        if (shift) {
1269            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
1270            arg = TCG_VEC_TMP1;
1271        }
1272        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1273        break;
1274    case TCG_TYPE_I64:
1275        if (arg < TCG_REG_V0) {
1276            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1277            tcg_out_mem_long(s, STD, STDX, arg, base, offset);
1278            break;
1279        }
1280        /* fallthru */
1281    case TCG_TYPE_V64:
1282        tcg_debug_assert(arg >= TCG_REG_V0);
1283        if (have_vsx) {
1284            tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
1285                             STXSDX, arg, base, offset);
1286            break;
1287        }
1288        tcg_debug_assert((offset & 7) == 0);
1289        if (offset & 8) {
1290            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
1291            arg = TCG_VEC_TMP1;
1292        }
1293        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1294        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
1295        break;
1296    case TCG_TYPE_V128:
1297        tcg_debug_assert(arg >= TCG_REG_V0);
1298        tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
1299                         STVX, arg, base, offset);
1300        break;
1301    default:
1302        g_assert_not_reached();
1303    }
1304}
1305
1306static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1307                               TCGReg base, intptr_t ofs)
1308{
1309    return false;
1310}
1311
1312static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
1313                        int const_arg2, int cr, TCGType type)
1314{
1315    int imm;
1316    uint32_t op;
1317
1318    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1319
1320    /* Simplify the comparisons below wrt CMPI.  */
1321    if (type == TCG_TYPE_I32) {
1322        arg2 = (int32_t)arg2;
1323    }
1324
1325    switch (cond) {
1326    case TCG_COND_EQ:
1327    case TCG_COND_NE:
1328        if (const_arg2) {
1329            if ((int16_t) arg2 == arg2) {
1330                op = CMPI;
1331                imm = 1;
1332                break;
1333            } else if ((uint16_t) arg2 == arg2) {
1334                op = CMPLI;
1335                imm = 1;
1336                break;
1337            }
1338        }
1339        op = CMPL;
1340        imm = 0;
1341        break;
1342
1343    case TCG_COND_LT:
1344    case TCG_COND_GE:
1345    case TCG_COND_LE:
1346    case TCG_COND_GT:
1347        if (const_arg2) {
1348            if ((int16_t) arg2 == arg2) {
1349                op = CMPI;
1350                imm = 1;
1351                break;
1352            }
1353        }
1354        op = CMP;
1355        imm = 0;
1356        break;
1357
1358    case TCG_COND_LTU:
1359    case TCG_COND_GEU:
1360    case TCG_COND_LEU:
1361    case TCG_COND_GTU:
1362        if (const_arg2) {
1363            if ((uint16_t) arg2 == arg2) {
1364                op = CMPLI;
1365                imm = 1;
1366                break;
1367            }
1368        }
1369        op = CMPL;
1370        imm = 0;
1371        break;
1372
1373    default:
1374        tcg_abort();
1375    }
1376    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1377
1378    if (imm) {
1379        tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
1380    } else {
1381        if (const_arg2) {
1382            tcg_out_movi(s, type, TCG_REG_R0, arg2);
1383            arg2 = TCG_REG_R0;
1384        }
1385        tcg_out32(s, op | RA(arg1) | RB(arg2));
1386    }
1387}
1388
1389static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1390                                TCGReg dst, TCGReg src)
1391{
1392    if (type == TCG_TYPE_I32) {
1393        tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1394        tcg_out_shri32(s, dst, dst, 5);
1395    } else {
1396        tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1397        tcg_out_shri64(s, dst, dst, 6);
1398    }
1399}
1400
1401static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
1402{
1403    /* X != 0 implies X + -1 generates a carry.  Extra addition
1404       trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.  */
1405    if (dst != src) {
1406        tcg_out32(s, ADDIC | TAI(dst, src, -1));
1407        tcg_out32(s, SUBFE | TAB(dst, dst, src));
1408    } else {
1409        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1410        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1411    }
1412}
1413
1414static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1415                                  bool const_arg2)
1416{
1417    if (const_arg2) {
1418        if ((uint32_t)arg2 == arg2) {
1419            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1420        } else {
1421            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1422            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1423        }
1424    } else {
1425        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1426    }
1427    return TCG_REG_R0;
1428}
1429
1430static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1431                            TCGArg arg0, TCGArg arg1, TCGArg arg2,
1432                            int const_arg2)
1433{
1434    int crop, sh;
1435
1436    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1437
1438    /* Ignore high bits of a potential constant arg2.  */
1439    if (type == TCG_TYPE_I32) {
1440        arg2 = (uint32_t)arg2;
1441    }
1442
1443    /* Handle common and trivial cases before handling anything else.  */
1444    if (arg2 == 0) {
1445        switch (cond) {
1446        case TCG_COND_EQ:
1447            tcg_out_setcond_eq0(s, type, arg0, arg1);
1448            return;
1449        case TCG_COND_NE:
1450            if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1451                tcg_out_ext32u(s, TCG_REG_R0, arg1);
1452                arg1 = TCG_REG_R0;
1453            }
1454            tcg_out_setcond_ne0(s, arg0, arg1);
1455            return;
1456        case TCG_COND_GE:
1457            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1458            arg1 = arg0;
1459            /* FALLTHRU */
1460        case TCG_COND_LT:
1461            /* Extract the sign bit.  */
1462            if (type == TCG_TYPE_I32) {
1463                tcg_out_shri32(s, arg0, arg1, 31);
1464            } else {
1465                tcg_out_shri64(s, arg0, arg1, 63);
1466            }
1467            return;
1468        default:
1469            break;
1470        }
1471    }
1472
1473    /* If we have ISEL, we can implement everything with 3 or 4 insns.
1474       All other cases below are also at least 3 insns, so speed up the
1475       code generator by not considering them and always using ISEL.  */
1476    if (have_isel) {
1477        int isel, tab;
1478
1479        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1480
1481        isel = tcg_to_isel[cond];
1482
1483        tcg_out_movi(s, type, arg0, 1);
1484        if (isel & 1) {
1485            /* arg0 = (bc ? 0 : 1) */
1486            tab = TAB(arg0, 0, arg0);
1487            isel &= ~1;
1488        } else {
1489            /* arg0 = (bc ? 1 : 0) */
1490            tcg_out_movi(s, type, TCG_REG_R0, 0);
1491            tab = TAB(arg0, arg0, TCG_REG_R0);
1492        }
1493        tcg_out32(s, isel | tab);
1494        return;
1495    }
1496
1497    switch (cond) {
1498    case TCG_COND_EQ:
1499        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1500        tcg_out_setcond_eq0(s, type, arg0, arg1);
1501        return;
1502
1503    case TCG_COND_NE:
1504        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1505        /* Discard the high bits only once, rather than both inputs.  */
1506        if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1507            tcg_out_ext32u(s, TCG_REG_R0, arg1);
1508            arg1 = TCG_REG_R0;
1509        }
1510        tcg_out_setcond_ne0(s, arg0, arg1);
1511        return;
1512
1513    case TCG_COND_GT:
1514    case TCG_COND_GTU:
1515        sh = 30;
1516        crop = 0;
1517        goto crtest;
1518
1519    case TCG_COND_LT:
1520    case TCG_COND_LTU:
1521        sh = 29;
1522        crop = 0;
1523        goto crtest;
1524
1525    case TCG_COND_GE:
1526    case TCG_COND_GEU:
1527        sh = 31;
1528        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT);
1529        goto crtest;
1530
1531    case TCG_COND_LE:
1532    case TCG_COND_LEU:
1533        sh = 31;
1534        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT);
1535    crtest:
1536        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1537        if (crop) {
1538            tcg_out32(s, crop);
1539        }
1540        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1541        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
1542        break;
1543
1544    default:
1545        tcg_abort();
1546    }
1547}
1548
1549static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l)
1550{
1551    if (l->has_value) {
1552        bc |= reloc_pc14_val(s->code_ptr, l->u.value_ptr);
1553    } else {
1554        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
1555    }
1556    tcg_out32(s, bc);
1557}
1558
1559static void tcg_out_brcond(TCGContext *s, TCGCond cond,
1560                           TCGArg arg1, TCGArg arg2, int const_arg2,
1561                           TCGLabel *l, TCGType type)
1562{
1563    tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1564    tcg_out_bc(s, tcg_to_bc[cond], l);
1565}
1566
1567static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
1568                            TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
1569                            TCGArg v2, bool const_c2)
1570{
1571    /* If for some reason both inputs are zero, don't produce bad code.  */
1572    if (v1 == 0 && v2 == 0) {
1573        tcg_out_movi(s, type, dest, 0);
1574        return;
1575    }
1576
1577    tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
1578
1579    if (have_isel) {
1580        int isel = tcg_to_isel[cond];
1581
1582        /* Swap the V operands if the operation indicates inversion.  */
1583        if (isel & 1) {
1584            int t = v1;
1585            v1 = v2;
1586            v2 = t;
1587            isel &= ~1;
1588        }
1589        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
1590        if (v2 == 0) {
1591            tcg_out_movi(s, type, TCG_REG_R0, 0);
1592        }
1593        tcg_out32(s, isel | TAB(dest, v1, v2));
1594    } else {
1595        if (dest == v2) {
1596            cond = tcg_invert_cond(cond);
1597            v2 = v1;
1598        } else if (dest != v1) {
1599            if (v1 == 0) {
1600                tcg_out_movi(s, type, dest, 0);
1601            } else {
1602                tcg_out_mov(s, type, dest, v1);
1603            }
1604        }
1605        /* Branch forward over one insn */
1606        tcg_out32(s, tcg_to_bc[cond] | 8);
1607        if (v2 == 0) {
1608            tcg_out_movi(s, type, dest, 0);
1609        } else {
1610            tcg_out_mov(s, type, dest, v2);
1611        }
1612    }
1613}
1614
1615static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
1616                          TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
1617{
1618    if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
1619        tcg_out32(s, opc | RA(a0) | RS(a1));
1620    } else {
1621        tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type);
1622        /* Note that the only other valid constant for a2 is 0.  */
1623        if (have_isel) {
1624            tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
1625            tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
1626        } else if (!const_a2 && a0 == a2) {
1627            tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8);
1628            tcg_out32(s, opc | RA(a0) | RS(a1));
1629        } else {
1630            tcg_out32(s, opc | RA(a0) | RS(a1));
1631            tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8);
1632            if (const_a2) {
1633                tcg_out_movi(s, type, a0, 0);
1634            } else {
1635                tcg_out_mov(s, type, a0, a2);
1636            }
1637        }
1638    }
1639}
1640
1641static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
1642                         const int *const_args)
1643{
1644    static const struct { uint8_t bit1, bit2; } bits[] = {
1645        [TCG_COND_LT ] = { CR_LT, CR_LT },
1646        [TCG_COND_LE ] = { CR_LT, CR_GT },
1647        [TCG_COND_GT ] = { CR_GT, CR_GT },
1648        [TCG_COND_GE ] = { CR_GT, CR_LT },
1649        [TCG_COND_LTU] = { CR_LT, CR_LT },
1650        [TCG_COND_LEU] = { CR_LT, CR_GT },
1651        [TCG_COND_GTU] = { CR_GT, CR_GT },
1652        [TCG_COND_GEU] = { CR_GT, CR_LT },
1653    };
1654
1655    TCGCond cond = args[4], cond2;
1656    TCGArg al, ah, bl, bh;
1657    int blconst, bhconst;
1658    int op, bit1, bit2;
1659
1660    al = args[0];
1661    ah = args[1];
1662    bl = args[2];
1663    bh = args[3];
1664    blconst = const_args[2];
1665    bhconst = const_args[3];
1666
1667    switch (cond) {
1668    case TCG_COND_EQ:
1669        op = CRAND;
1670        goto do_equality;
1671    case TCG_COND_NE:
1672        op = CRNAND;
1673    do_equality:
1674        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
1675        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
1676        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
1677        break;
1678
1679    case TCG_COND_LT:
1680    case TCG_COND_LE:
1681    case TCG_COND_GT:
1682    case TCG_COND_GE:
1683    case TCG_COND_LTU:
1684    case TCG_COND_LEU:
1685    case TCG_COND_GTU:
1686    case TCG_COND_GEU:
1687        bit1 = bits[cond].bit1;
1688        bit2 = bits[cond].bit2;
1689        op = (bit1 != bit2 ? CRANDC : CRAND);
1690        cond2 = tcg_unsigned_cond(cond);
1691
1692        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
1693        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
1694        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
1695        tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ));
1696        break;
1697
1698    default:
1699        tcg_abort();
1700    }
1701}
1702
1703static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1704                             const int *const_args)
1705{
1706    tcg_out_cmp2(s, args + 1, const_args + 1);
1707    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1708    tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31);
1709}
1710
1711static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
1712                             const int *const_args)
1713{
1714    tcg_out_cmp2(s, args, const_args);
1715    tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
1716}
1717
1718static void tcg_out_mb(TCGContext *s, TCGArg a0)
1719{
1720    uint32_t insn = HWSYNC;
1721    a0 &= TCG_MO_ALL;
1722    if (a0 == TCG_MO_LD_LD) {
1723        insn = LWSYNC;
1724    } else if (a0 == TCG_MO_ST_ST) {
1725        insn = EIEIO;
1726    }
1727    tcg_out32(s, insn);
1728}
1729
1730void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1731                              uintptr_t addr)
1732{
1733    if (TCG_TARGET_REG_BITS == 64) {
1734        tcg_insn_unit i1, i2;
1735        intptr_t tb_diff = addr - tc_ptr;
1736        intptr_t br_diff = addr - (jmp_addr + 4);
1737        uint64_t pair;
1738
1739        /* This does not exercise the range of the branch, but we do
1740           still need to be able to load the new value of TCG_REG_TB.
1741           But this does still happen quite often.  */
1742        if (tb_diff == (int16_t)tb_diff) {
1743            i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
1744            i2 = B | (br_diff & 0x3fffffc);
1745        } else {
1746            intptr_t lo = (int16_t)tb_diff;
1747            intptr_t hi = (int32_t)(tb_diff - lo);
1748            assert(tb_diff == hi + lo);
1749            i1 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
1750            i2 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
1751        }
1752#ifdef HOST_WORDS_BIGENDIAN
1753        pair = (uint64_t)i1 << 32 | i2;
1754#else
1755        pair = (uint64_t)i2 << 32 | i1;
1756#endif
1757
1758        /* As per the enclosing if, this is ppc64.  Avoid the _Static_assert
1759           within atomic_set that would fail to build a ppc32 host.  */
1760        atomic_set__nocheck((uint64_t *)jmp_addr, pair);
1761        flush_icache_range(jmp_addr, jmp_addr + 8);
1762    } else {
1763        intptr_t diff = addr - jmp_addr;
1764        tcg_debug_assert(in_range_b(diff));
1765        atomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc));
1766        flush_icache_range(jmp_addr, jmp_addr + 4);
1767    }
1768}
1769
1770static void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1771{
1772#ifdef _CALL_AIX
1773    /* Look through the descriptor.  If the branch is in range, and we
1774       don't have to spend too much effort on building the toc.  */
1775    void *tgt = ((void **)target)[0];
1776    uintptr_t toc = ((uintptr_t *)target)[1];
1777    intptr_t diff = tcg_pcrel_diff(s, tgt);
1778
1779    if (in_range_b(diff) && toc == (uint32_t)toc) {
1780        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
1781        tcg_out_b(s, LK, tgt);
1782    } else {
1783        /* Fold the low bits of the constant into the addresses below.  */
1784        intptr_t arg = (intptr_t)target;
1785        int ofs = (int16_t)arg;
1786
1787        if (ofs + 8 < 0x8000) {
1788            arg -= ofs;
1789        } else {
1790            ofs = 0;
1791        }
1792        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
1793        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
1794        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
1795        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
1796        tcg_out32(s, BCCTR | BO_ALWAYS | LK);
1797    }
1798#elif defined(_CALL_ELF) && _CALL_ELF == 2
1799    intptr_t diff;
1800
1801    /* In the ELFv2 ABI, we have to set up r12 to contain the destination
1802       address, which the callee uses to compute its TOC address.  */
1803    /* FIXME: when the branch is in range, we could avoid r12 load if we
1804       knew that the destination uses the same TOC, and what its local
1805       entry point offset is.  */
1806    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
1807
1808    diff = tcg_pcrel_diff(s, target);
1809    if (in_range_b(diff)) {
1810        tcg_out_b(s, LK, target);
1811    } else {
1812        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
1813        tcg_out32(s, BCCTR | BO_ALWAYS | LK);
1814    }
1815#else
1816    tcg_out_b(s, LK, target);
1817#endif
1818}
1819
1820static const uint32_t qemu_ldx_opc[16] = {
1821    [MO_UB] = LBZX,
1822    [MO_UW] = LHZX,
1823    [MO_UL] = LWZX,
1824    [MO_Q]  = LDX,
1825    [MO_SW] = LHAX,
1826    [MO_SL] = LWAX,
1827    [MO_BSWAP | MO_UB] = LBZX,
1828    [MO_BSWAP | MO_UW] = LHBRX,
1829    [MO_BSWAP | MO_UL] = LWBRX,
1830    [MO_BSWAP | MO_Q]  = LDBRX,
1831};
1832
1833static const uint32_t qemu_stx_opc[16] = {
1834    [MO_UB] = STBX,
1835    [MO_UW] = STHX,
1836    [MO_UL] = STWX,
1837    [MO_Q]  = STDX,
1838    [MO_BSWAP | MO_UB] = STBX,
1839    [MO_BSWAP | MO_UW] = STHBRX,
1840    [MO_BSWAP | MO_UL] = STWBRX,
1841    [MO_BSWAP | MO_Q]  = STDBRX,
1842};
1843
1844static const uint32_t qemu_exts_opc[4] = {
1845    EXTSB, EXTSH, EXTSW, 0
1846};
1847
1848#if defined (CONFIG_SOFTMMU)
1849#include "../tcg-ldst.c.inc"
1850
1851/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
1852 *                                 int mmu_idx, uintptr_t ra)
1853 */
1854static void * const qemu_ld_helpers[16] = {
1855    [MO_UB]   = helper_ret_ldub_mmu,
1856    [MO_LEUW] = helper_le_lduw_mmu,
1857    [MO_LEUL] = helper_le_ldul_mmu,
1858    [MO_LEQ]  = helper_le_ldq_mmu,
1859    [MO_BEUW] = helper_be_lduw_mmu,
1860    [MO_BEUL] = helper_be_ldul_mmu,
1861    [MO_BEQ]  = helper_be_ldq_mmu,
1862};
1863
1864/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
1865 *                                 uintxx_t val, int mmu_idx, uintptr_t ra)
1866 */
1867static void * const qemu_st_helpers[16] = {
1868    [MO_UB]   = helper_ret_stb_mmu,
1869    [MO_LEUW] = helper_le_stw_mmu,
1870    [MO_LEUL] = helper_le_stl_mmu,
1871    [MO_LEQ]  = helper_le_stq_mmu,
1872    [MO_BEUW] = helper_be_stw_mmu,
1873    [MO_BEUL] = helper_be_stl_mmu,
1874    [MO_BEQ]  = helper_be_stq_mmu,
1875};
1876
1877/* We expect to use a 16-bit negative offset from ENV.  */
1878QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1879QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
1880
1881/* Perform the TLB load and compare.  Places the result of the comparison
1882   in CR7, loads the addend of the TLB into R3, and returns the register
1883   containing the guest address (zero-extended into R4).  Clobbers R0 and R2. */
1884
1885static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
1886                               TCGReg addrlo, TCGReg addrhi,
1887                               int mem_index, bool is_read)
1888{
1889    int cmp_off
1890        = (is_read
1891           ? offsetof(CPUTLBEntry, addr_read)
1892           : offsetof(CPUTLBEntry, addr_write));
1893    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1894    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1895    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1896    unsigned s_bits = opc & MO_SIZE;
1897    unsigned a_bits = get_alignment_bits(opc);
1898
1899    /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
1900    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
1901    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
1902
1903    /* Extract the page index, shifted into place for tlb index.  */
1904    if (TCG_TARGET_REG_BITS == 32) {
1905        tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
1906                       TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1907    } else {
1908        tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
1909                       TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1910    }
1911    tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
1912
1913    /* Load the TLB comparator.  */
1914    if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
1915        uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
1916                        ? LWZUX : LDUX);
1917        tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
1918    } else {
1919        tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
1920        if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
1921            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
1922            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
1923        } else {
1924            tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
1925        }
1926    }
1927
1928    /* Load the TLB addend for use on the fast path.  Do this asap
1929       to minimize any load use delay.  */
1930    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3,
1931               offsetof(CPUTLBEntry, addend));
1932
1933    /* Clear the non-page, non-alignment bits from the address */
1934    if (TCG_TARGET_REG_BITS == 32) {
1935        /* We don't support unaligned accesses on 32-bits.
1936         * Preserve the bottom bits and thus trigger a comparison
1937         * failure on unaligned accesses.
1938         */
1939        if (a_bits < s_bits) {
1940            a_bits = s_bits;
1941        }
1942        tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
1943                    (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
1944    } else {
1945        TCGReg t = addrlo;
1946
1947        /* If the access is unaligned, we need to make sure we fail if we
1948         * cross a page boundary.  The trick is to add the access size-1
1949         * to the address before masking the low bits.  That will make the
1950         * address overflow to the next page if we cross a page boundary,
1951         * which will then force a mismatch of the TLB compare.
1952         */
1953        if (a_bits < s_bits) {
1954            unsigned a_mask = (1 << a_bits) - 1;
1955            unsigned s_mask = (1 << s_bits) - 1;
1956            tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
1957            t = TCG_REG_R0;
1958        }
1959
1960        /* Mask the address for the requested alignment.  */
1961        if (TARGET_LONG_BITS == 32) {
1962            tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
1963                        (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
1964            /* Zero-extend the address for use in the final address.  */
1965            tcg_out_ext32u(s, TCG_REG_R4, addrlo);
1966            addrlo = TCG_REG_R4;
1967        } else if (a_bits == 0) {
1968            tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
1969        } else {
1970            tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
1971                        64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
1972            tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
1973        }
1974    }
1975
1976    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
1977        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
1978                    0, 7, TCG_TYPE_I32);
1979        tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
1980        tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
1981    } else {
1982        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
1983                    0, 7, TCG_TYPE_TL);
1984    }
1985
1986    return addrlo;
1987}
1988
1989/* Record the context of a call to the out of line helper code for the slow
1990   path for a load or store, so that we can later generate the correct
1991   helper code.  */
1992static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1993                                TCGReg datalo_reg, TCGReg datahi_reg,
1994                                TCGReg addrlo_reg, TCGReg addrhi_reg,
1995                                tcg_insn_unit *raddr, tcg_insn_unit *lptr)
1996{
1997    TCGLabelQemuLdst *label = new_ldst_label(s);
1998
1999    label->is_ld = is_ld;
2000    label->oi = oi;
2001    label->datalo_reg = datalo_reg;
2002    label->datahi_reg = datahi_reg;
2003    label->addrlo_reg = addrlo_reg;
2004    label->addrhi_reg = addrhi_reg;
2005    label->raddr = raddr;
2006    label->label_ptr[0] = lptr;
2007}
2008
2009static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2010{
2011    TCGMemOpIdx oi = lb->oi;
2012    MemOp opc = get_memop(oi);
2013    TCGReg hi, lo, arg = TCG_REG_R3;
2014
2015    if (!reloc_pc14(lb->label_ptr[0], s->code_ptr)) {
2016        return false;
2017    }
2018
2019    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
2020
2021    lo = lb->addrlo_reg;
2022    hi = lb->addrhi_reg;
2023    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2024#ifdef TCG_TARGET_CALL_ALIGN_ARGS
2025        arg |= 1;
2026#endif
2027        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2028        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2029    } else {
2030        /* If the address needed to be zero-extended, we'll have already
2031           placed it in R4.  The only remaining case is 64-bit guest.  */
2032        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
2033    }
2034
2035    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
2036    tcg_out32(s, MFSPR | RT(arg) | LR);
2037
2038    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
2039
2040    lo = lb->datalo_reg;
2041    hi = lb->datahi_reg;
2042    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2043        tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4);
2044        tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3);
2045    } else if (opc & MO_SIGN) {
2046        uint32_t insn = qemu_exts_opc[opc & MO_SIZE];
2047        tcg_out32(s, insn | RA(lo) | RS(TCG_REG_R3));
2048    } else {
2049        tcg_out_mov(s, TCG_TYPE_REG, lo, TCG_REG_R3);
2050    }
2051
2052    tcg_out_b(s, 0, lb->raddr);
2053    return true;
2054}
2055
2056static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2057{
2058    TCGMemOpIdx oi = lb->oi;
2059    MemOp opc = get_memop(oi);
2060    MemOp s_bits = opc & MO_SIZE;
2061    TCGReg hi, lo, arg = TCG_REG_R3;
2062
2063    if (!reloc_pc14(lb->label_ptr[0], s->code_ptr)) {
2064        return false;
2065    }
2066
2067    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
2068
2069    lo = lb->addrlo_reg;
2070    hi = lb->addrhi_reg;
2071    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2072#ifdef TCG_TARGET_CALL_ALIGN_ARGS
2073        arg |= 1;
2074#endif
2075        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2076        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2077    } else {
2078        /* If the address needed to be zero-extended, we'll have already
2079           placed it in R4.  The only remaining case is 64-bit guest.  */
2080        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
2081    }
2082
2083    lo = lb->datalo_reg;
2084    hi = lb->datahi_reg;
2085    if (TCG_TARGET_REG_BITS == 32) {
2086        switch (s_bits) {
2087        case MO_64:
2088#ifdef TCG_TARGET_CALL_ALIGN_ARGS
2089            arg |= 1;
2090#endif
2091            tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2092            /* FALLTHRU */
2093        case MO_32:
2094            tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2095            break;
2096        default:
2097            tcg_out_rlw(s, RLWINM, arg++, lo, 0, 32 - (8 << s_bits), 31);
2098            break;
2099        }
2100    } else {
2101        if (s_bits == MO_64) {
2102            tcg_out_mov(s, TCG_TYPE_I64, arg++, lo);
2103        } else {
2104            tcg_out_rld(s, RLDICL, arg++, lo, 0, 64 - (8 << s_bits));
2105        }
2106    }
2107
2108    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
2109    tcg_out32(s, MFSPR | RT(arg) | LR);
2110
2111    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
2112
2113    tcg_out_b(s, 0, lb->raddr);
2114    return true;
2115}
2116#endif /* SOFTMMU */
2117
2118static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
2119{
2120    TCGReg datalo, datahi, addrlo, rbase;
2121    TCGReg addrhi __attribute__((unused));
2122    TCGMemOpIdx oi;
2123    MemOp opc, s_bits;
2124#ifdef CONFIG_SOFTMMU
2125    int mem_index;
2126    tcg_insn_unit *label_ptr;
2127#endif
2128
2129    datalo = *args++;
2130    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
2131    addrlo = *args++;
2132    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
2133    oi = *args++;
2134    opc = get_memop(oi);
2135    s_bits = opc & MO_SIZE;
2136
2137#ifdef CONFIG_SOFTMMU
2138    mem_index = get_mmuidx(oi);
2139    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true);
2140
2141    /* Load a pointer into the current opcode w/conditional branch-link. */
2142    label_ptr = s->code_ptr;
2143    tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
2144
2145    rbase = TCG_REG_R3;
2146#else  /* !CONFIG_SOFTMMU */
2147    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
2148    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2149        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
2150        addrlo = TCG_REG_TMP1;
2151    }
2152#endif
2153
2154    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
2155        if (opc & MO_BSWAP) {
2156            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2157            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
2158            tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0));
2159        } else if (rbase != 0) {
2160            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2161            tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo));
2162            tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0));
2163        } else if (addrlo == datahi) {
2164            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
2165            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
2166        } else {
2167            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
2168            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
2169        }
2170    } else {
2171        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
2172        if (!have_isa_2_06 && insn == LDBRX) {
2173            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2174            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
2175            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
2176            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
2177        } else if (insn) {
2178            tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
2179        } else {
2180            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
2181            tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
2182            insn = qemu_exts_opc[s_bits];
2183            tcg_out32(s, insn | RA(datalo) | RS(datalo));
2184        }
2185    }
2186
2187#ifdef CONFIG_SOFTMMU
2188    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
2189                        s->code_ptr, label_ptr);
2190#endif
2191}
2192
2193static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
2194{
2195    TCGReg datalo, datahi, addrlo, rbase;
2196    TCGReg addrhi __attribute__((unused));
2197    TCGMemOpIdx oi;
2198    MemOp opc, s_bits;
2199#ifdef CONFIG_SOFTMMU
2200    int mem_index;
2201    tcg_insn_unit *label_ptr;
2202#endif
2203
2204    datalo = *args++;
2205    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
2206    addrlo = *args++;
2207    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
2208    oi = *args++;
2209    opc = get_memop(oi);
2210    s_bits = opc & MO_SIZE;
2211
2212#ifdef CONFIG_SOFTMMU
2213    mem_index = get_mmuidx(oi);
2214    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false);
2215
2216    /* Load a pointer into the current opcode w/conditional branch-link. */
2217    label_ptr = s->code_ptr;
2218    tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
2219
2220    rbase = TCG_REG_R3;
2221#else  /* !CONFIG_SOFTMMU */
2222    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
2223    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2224        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
2225        addrlo = TCG_REG_TMP1;
2226    }
2227#endif
2228
2229    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
2230        if (opc & MO_BSWAP) {
2231            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2232            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
2233            tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0));
2234        } else if (rbase != 0) {
2235            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2236            tcg_out32(s, STWX | SAB(datahi, rbase, addrlo));
2237            tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0));
2238        } else {
2239            tcg_out32(s, STW | TAI(datahi, addrlo, 0));
2240            tcg_out32(s, STW | TAI(datalo, addrlo, 4));
2241        }
2242    } else {
2243        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
2244        if (!have_isa_2_06 && insn == STDBRX) {
2245            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
2246            tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4));
2247            tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
2248            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1));
2249        } else {
2250            tcg_out32(s, insn | SAB(datalo, rbase, addrlo));
2251        }
2252    }
2253
2254#ifdef CONFIG_SOFTMMU
2255    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
2256                        s->code_ptr, label_ptr);
2257#endif
2258}
2259
2260static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2261{
2262    int i;
2263    for (i = 0; i < count; ++i) {
2264        p[i] = NOP;
2265    }
2266}
2267
2268/* Parameters for function call generation, used in tcg.c.  */
2269#define TCG_TARGET_STACK_ALIGN       16
2270#define TCG_TARGET_EXTEND_ARGS       1
2271
2272#ifdef _CALL_AIX
2273# define LINK_AREA_SIZE                (6 * SZR)
2274# define LR_OFFSET                     (1 * SZR)
2275# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
2276#elif defined(TCG_TARGET_CALL_DARWIN)
2277# define LINK_AREA_SIZE                (6 * SZR)
2278# define LR_OFFSET                     (2 * SZR)
2279#elif TCG_TARGET_REG_BITS == 64
2280# if defined(_CALL_ELF) && _CALL_ELF == 2
2281#  define LINK_AREA_SIZE               (4 * SZR)
2282#  define LR_OFFSET                    (1 * SZR)
2283# endif
2284#else /* TCG_TARGET_REG_BITS == 32 */
2285# if defined(_CALL_SYSV)
2286#  define LINK_AREA_SIZE               (2 * SZR)
2287#  define LR_OFFSET                    (1 * SZR)
2288# endif
2289#endif
2290#ifndef LR_OFFSET
2291# error "Unhandled abi"
2292#endif
2293#ifndef TCG_TARGET_CALL_STACK_OFFSET
2294# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
2295#endif
2296
2297#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2298#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
2299
2300#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
2301                     + TCG_STATIC_CALL_ARGS_SIZE    \
2302                     + CPU_TEMP_BUF_SIZE            \
2303                     + REG_SAVE_SIZE                \
2304                     + TCG_TARGET_STACK_ALIGN - 1)  \
2305                    & -TCG_TARGET_STACK_ALIGN)
2306
2307#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
2308
2309static void tcg_target_qemu_prologue(TCGContext *s)
2310{
2311    int i;
2312
2313#ifdef _CALL_AIX
2314    void **desc = (void **)s->code_ptr;
2315    desc[0] = desc + 2;                   /* entry point */
2316    desc[1] = 0;                          /* environment pointer */
2317    s->code_ptr = (void *)(desc + 2);     /* skip over descriptor */
2318#endif
2319
2320    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
2321                  CPU_TEMP_BUF_SIZE);
2322
2323    /* Prologue */
2324    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
2325    tcg_out32(s, (SZR == 8 ? STDU : STWU)
2326              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
2327
2328    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2329        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2330                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2331    }
2332    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2333
2334#ifndef CONFIG_SOFTMMU
2335    if (guest_base) {
2336        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
2337        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2338    }
2339#endif
2340
2341    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2342    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
2343    if (USE_REG_TB) {
2344        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
2345    }
2346    tcg_out32(s, BCCTR | BO_ALWAYS);
2347
2348    /* Epilogue */
2349    s->code_gen_epilogue = tb_ret_addr = s->code_ptr;
2350
2351    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2352    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2353        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2354                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2355    }
2356    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
2357    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
2358    tcg_out32(s, BCLR | BO_ALWAYS);
2359}
2360
2361static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
2362                       const int *const_args)
2363{
2364    TCGArg a0, a1, a2;
2365    int c;
2366
2367    switch (opc) {
2368    case INDEX_op_exit_tb:
2369        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
2370        tcg_out_b(s, 0, tb_ret_addr);
2371        break;
2372    case INDEX_op_goto_tb:
2373        if (s->tb_jmp_insn_offset) {
2374            /* Direct jump. */
2375            if (TCG_TARGET_REG_BITS == 64) {
2376                /* Ensure the next insns are 8-byte aligned. */
2377                if ((uintptr_t)s->code_ptr & 7) {
2378                    tcg_out32(s, NOP);
2379                }
2380                s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
2381                tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0));
2382                tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0));
2383            } else {
2384                s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
2385                tcg_out32(s, B);
2386                s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
2387                break;
2388            }
2389        } else {
2390            /* Indirect jump. */
2391            tcg_debug_assert(s->tb_jmp_insn_offset == NULL);
2392            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, 0,
2393                       (intptr_t)(s->tb_jmp_insn_offset + args[0]));
2394        }
2395        tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
2396        tcg_out32(s, BCCTR | BO_ALWAYS);
2397        set_jmp_reset_offset(s, args[0]);
2398        if (USE_REG_TB) {
2399            /* For the unlinked case, need to reset TCG_REG_TB.  */
2400            c = -tcg_current_code_size(s);
2401            assert(c == (int16_t)c);
2402            tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, c));
2403        }
2404        break;
2405    case INDEX_op_goto_ptr:
2406        tcg_out32(s, MTSPR | RS(args[0]) | CTR);
2407        if (USE_REG_TB) {
2408            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
2409        }
2410        tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
2411        tcg_out32(s, BCCTR | BO_ALWAYS);
2412        break;
2413    case INDEX_op_br:
2414        {
2415            TCGLabel *l = arg_label(args[0]);
2416            uint32_t insn = B;
2417
2418            if (l->has_value) {
2419                insn |= reloc_pc24_val(s->code_ptr, l->u.value_ptr);
2420            } else {
2421                tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
2422            }
2423            tcg_out32(s, insn);
2424        }
2425        break;
2426    case INDEX_op_ld8u_i32:
2427    case INDEX_op_ld8u_i64:
2428        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2429        break;
2430    case INDEX_op_ld8s_i32:
2431    case INDEX_op_ld8s_i64:
2432        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2433        tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0]));
2434        break;
2435    case INDEX_op_ld16u_i32:
2436    case INDEX_op_ld16u_i64:
2437        tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
2438        break;
2439    case INDEX_op_ld16s_i32:
2440    case INDEX_op_ld16s_i64:
2441        tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
2442        break;
2443    case INDEX_op_ld_i32:
2444    case INDEX_op_ld32u_i64:
2445        tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
2446        break;
2447    case INDEX_op_ld32s_i64:
2448        tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
2449        break;
2450    case INDEX_op_ld_i64:
2451        tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
2452        break;
2453    case INDEX_op_st8_i32:
2454    case INDEX_op_st8_i64:
2455        tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
2456        break;
2457    case INDEX_op_st16_i32:
2458    case INDEX_op_st16_i64:
2459        tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
2460        break;
2461    case INDEX_op_st_i32:
2462    case INDEX_op_st32_i64:
2463        tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
2464        break;
2465    case INDEX_op_st_i64:
2466        tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
2467        break;
2468
2469    case INDEX_op_add_i32:
2470        a0 = args[0], a1 = args[1], a2 = args[2];
2471        if (const_args[2]) {
2472        do_addi_32:
2473            tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
2474        } else {
2475            tcg_out32(s, ADD | TAB(a0, a1, a2));
2476        }
2477        break;
2478    case INDEX_op_sub_i32:
2479        a0 = args[0], a1 = args[1], a2 = args[2];
2480        if (const_args[1]) {
2481            if (const_args[2]) {
2482                tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
2483            } else {
2484                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2485            }
2486        } else if (const_args[2]) {
2487            a2 = -a2;
2488            goto do_addi_32;
2489        } else {
2490            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2491        }
2492        break;
2493
2494    case INDEX_op_and_i32:
2495        a0 = args[0], a1 = args[1], a2 = args[2];
2496        if (const_args[2]) {
2497            tcg_out_andi32(s, a0, a1, a2);
2498        } else {
2499            tcg_out32(s, AND | SAB(a1, a0, a2));
2500        }
2501        break;
2502    case INDEX_op_and_i64:
2503        a0 = args[0], a1 = args[1], a2 = args[2];
2504        if (const_args[2]) {
2505            tcg_out_andi64(s, a0, a1, a2);
2506        } else {
2507            tcg_out32(s, AND | SAB(a1, a0, a2));
2508        }
2509        break;
2510    case INDEX_op_or_i64:
2511    case INDEX_op_or_i32:
2512        a0 = args[0], a1 = args[1], a2 = args[2];
2513        if (const_args[2]) {
2514            tcg_out_ori32(s, a0, a1, a2);
2515        } else {
2516            tcg_out32(s, OR | SAB(a1, a0, a2));
2517        }
2518        break;
2519    case INDEX_op_xor_i64:
2520    case INDEX_op_xor_i32:
2521        a0 = args[0], a1 = args[1], a2 = args[2];
2522        if (const_args[2]) {
2523            tcg_out_xori32(s, a0, a1, a2);
2524        } else {
2525            tcg_out32(s, XOR | SAB(a1, a0, a2));
2526        }
2527        break;
2528    case INDEX_op_andc_i32:
2529        a0 = args[0], a1 = args[1], a2 = args[2];
2530        if (const_args[2]) {
2531            tcg_out_andi32(s, a0, a1, ~a2);
2532        } else {
2533            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2534        }
2535        break;
2536    case INDEX_op_andc_i64:
2537        a0 = args[0], a1 = args[1], a2 = args[2];
2538        if (const_args[2]) {
2539            tcg_out_andi64(s, a0, a1, ~a2);
2540        } else {
2541            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2542        }
2543        break;
2544    case INDEX_op_orc_i32:
2545        if (const_args[2]) {
2546            tcg_out_ori32(s, args[0], args[1], ~args[2]);
2547            break;
2548        }
2549        /* FALLTHRU */
2550    case INDEX_op_orc_i64:
2551        tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
2552        break;
2553    case INDEX_op_eqv_i32:
2554        if (const_args[2]) {
2555            tcg_out_xori32(s, args[0], args[1], ~args[2]);
2556            break;
2557        }
2558        /* FALLTHRU */
2559    case INDEX_op_eqv_i64:
2560        tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
2561        break;
2562    case INDEX_op_nand_i32:
2563    case INDEX_op_nand_i64:
2564        tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
2565        break;
2566    case INDEX_op_nor_i32:
2567    case INDEX_op_nor_i64:
2568        tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
2569        break;
2570
2571    case INDEX_op_clz_i32:
2572        tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1],
2573                      args[2], const_args[2]);
2574        break;
2575    case INDEX_op_ctz_i32:
2576        tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1],
2577                      args[2], const_args[2]);
2578        break;
2579    case INDEX_op_ctpop_i32:
2580        tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
2581        break;
2582
2583    case INDEX_op_clz_i64:
2584        tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1],
2585                      args[2], const_args[2]);
2586        break;
2587    case INDEX_op_ctz_i64:
2588        tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1],
2589                      args[2], const_args[2]);
2590        break;
2591    case INDEX_op_ctpop_i64:
2592        tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
2593        break;
2594
2595    case INDEX_op_mul_i32:
2596        a0 = args[0], a1 = args[1], a2 = args[2];
2597        if (const_args[2]) {
2598            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2599        } else {
2600            tcg_out32(s, MULLW | TAB(a0, a1, a2));
2601        }
2602        break;
2603
2604    case INDEX_op_div_i32:
2605        tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
2606        break;
2607
2608    case INDEX_op_divu_i32:
2609        tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
2610        break;
2611
2612    case INDEX_op_shl_i32:
2613        if (const_args[2]) {
2614            /* Limit immediate shift count lest we create an illegal insn.  */
2615            tcg_out_shli32(s, args[0], args[1], args[2] & 31);
2616        } else {
2617            tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
2618        }
2619        break;
2620    case INDEX_op_shr_i32:
2621        if (const_args[2]) {
2622            /* Limit immediate shift count lest we create an illegal insn.  */
2623            tcg_out_shri32(s, args[0], args[1], args[2] & 31);
2624        } else {
2625            tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
2626        }
2627        break;
2628    case INDEX_op_sar_i32:
2629        if (const_args[2]) {
2630            /* Limit immediate shift count lest we create an illegal insn.  */
2631            tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2] & 31));
2632        } else {
2633            tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
2634        }
2635        break;
2636    case INDEX_op_rotl_i32:
2637        if (const_args[2]) {
2638            tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
2639        } else {
2640            tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
2641                         | MB(0) | ME(31));
2642        }
2643        break;
2644    case INDEX_op_rotr_i32:
2645        if (const_args[2]) {
2646            tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
2647        } else {
2648            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
2649            tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
2650                         | MB(0) | ME(31));
2651        }
2652        break;
2653
2654    case INDEX_op_brcond_i32:
2655        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2656                       arg_label(args[3]), TCG_TYPE_I32);
2657        break;
2658    case INDEX_op_brcond_i64:
2659        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2660                       arg_label(args[3]), TCG_TYPE_I64);
2661        break;
2662    case INDEX_op_brcond2_i32:
2663        tcg_out_brcond2(s, args, const_args);
2664        break;
2665
2666    case INDEX_op_neg_i32:
2667    case INDEX_op_neg_i64:
2668        tcg_out32(s, NEG | RT(args[0]) | RA(args[1]));
2669        break;
2670
2671    case INDEX_op_not_i32:
2672    case INDEX_op_not_i64:
2673        tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
2674        break;
2675
2676    case INDEX_op_add_i64:
2677        a0 = args[0], a1 = args[1], a2 = args[2];
2678        if (const_args[2]) {
2679        do_addi_64:
2680            tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
2681        } else {
2682            tcg_out32(s, ADD | TAB(a0, a1, a2));
2683        }
2684        break;
2685    case INDEX_op_sub_i64:
2686        a0 = args[0], a1 = args[1], a2 = args[2];
2687        if (const_args[1]) {
2688            if (const_args[2]) {
2689                tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
2690            } else {
2691                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2692            }
2693        } else if (const_args[2]) {
2694            a2 = -a2;
2695            goto do_addi_64;
2696        } else {
2697            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2698        }
2699        break;
2700
2701    case INDEX_op_shl_i64:
2702        if (const_args[2]) {
2703            /* Limit immediate shift count lest we create an illegal insn.  */
2704            tcg_out_shli64(s, args[0], args[1], args[2] & 63);
2705        } else {
2706            tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
2707        }
2708        break;
2709    case INDEX_op_shr_i64:
2710        if (const_args[2]) {
2711            /* Limit immediate shift count lest we create an illegal insn.  */
2712            tcg_out_shri64(s, args[0], args[1], args[2] & 63);
2713        } else {
2714            tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
2715        }
2716        break;
2717    case INDEX_op_sar_i64:
2718        if (const_args[2]) {
2719            int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1);
2720            tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh);
2721        } else {
2722            tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
2723        }
2724        break;
2725    case INDEX_op_rotl_i64:
2726        if (const_args[2]) {
2727            tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
2728        } else {
2729            tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
2730        }
2731        break;
2732    case INDEX_op_rotr_i64:
2733        if (const_args[2]) {
2734            tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
2735        } else {
2736            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
2737            tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
2738        }
2739        break;
2740
2741    case INDEX_op_mul_i64:
2742        a0 = args[0], a1 = args[1], a2 = args[2];
2743        if (const_args[2]) {
2744            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2745        } else {
2746            tcg_out32(s, MULLD | TAB(a0, a1, a2));
2747        }
2748        break;
2749    case INDEX_op_div_i64:
2750        tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
2751        break;
2752    case INDEX_op_divu_i64:
2753        tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
2754        break;
2755
2756    case INDEX_op_qemu_ld_i32:
2757        tcg_out_qemu_ld(s, args, false);
2758        break;
2759    case INDEX_op_qemu_ld_i64:
2760        tcg_out_qemu_ld(s, args, true);
2761        break;
2762    case INDEX_op_qemu_st_i32:
2763        tcg_out_qemu_st(s, args, false);
2764        break;
2765    case INDEX_op_qemu_st_i64:
2766        tcg_out_qemu_st(s, args, true);
2767        break;
2768
2769    case INDEX_op_ext8s_i32:
2770    case INDEX_op_ext8s_i64:
2771        c = EXTSB;
2772        goto gen_ext;
2773    case INDEX_op_ext16s_i32:
2774    case INDEX_op_ext16s_i64:
2775        c = EXTSH;
2776        goto gen_ext;
2777    case INDEX_op_ext_i32_i64:
2778    case INDEX_op_ext32s_i64:
2779        c = EXTSW;
2780        goto gen_ext;
2781    gen_ext:
2782        tcg_out32(s, c | RS(args[1]) | RA(args[0]));
2783        break;
2784    case INDEX_op_extu_i32_i64:
2785        tcg_out_ext32u(s, args[0], args[1]);
2786        break;
2787
2788    case INDEX_op_setcond_i32:
2789        tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
2790                        const_args[2]);
2791        break;
2792    case INDEX_op_setcond_i64:
2793        tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
2794                        const_args[2]);
2795        break;
2796    case INDEX_op_setcond2_i32:
2797        tcg_out_setcond2(s, args, const_args);
2798        break;
2799
2800    case INDEX_op_bswap16_i32:
2801    case INDEX_op_bswap16_i64:
2802        a0 = args[0], a1 = args[1];
2803        /* a1 = abcd */
2804        if (a0 != a1) {
2805            /* a0 = (a1 r<< 24) & 0xff # 000c */
2806            tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
2807            /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */
2808            tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23);
2809        } else {
2810            /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */
2811            tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23);
2812            /* a0 = (a1 r<< 24) & 0xff # 000c */
2813            tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
2814            /* a0 = a0 | r0 # 00dc */
2815            tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0));
2816        }
2817        break;
2818
2819    case INDEX_op_bswap32_i32:
2820    case INDEX_op_bswap32_i64:
2821        /* Stolen from gcc's builtin_bswap32 */
2822        a1 = args[1];
2823        a0 = args[0] == a1 ? TCG_REG_R0 : args[0];
2824
2825        /* a1 = args[1] # abcd */
2826        /* a0 = rotate_left (a1, 8) # bcda */
2827        tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
2828        /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */
2829        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
2830        /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */
2831        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
2832
2833        if (a0 == TCG_REG_R0) {
2834            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2835        }
2836        break;
2837
2838    case INDEX_op_bswap64_i64:
2839        a0 = args[0], a1 = args[1], a2 = TCG_REG_R0;
2840        if (a0 == a1) {
2841            a0 = TCG_REG_R0;
2842            a2 = a1;
2843        }
2844
2845        /* a1 = # abcd efgh */
2846        /* a0 = rl32(a1, 8) # 0000 fghe */
2847        tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
2848        /* a0 = dep(a0, rl32(a1, 24), 0xff000000) # 0000 hghe */
2849        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
2850        /* a0 = dep(a0, rl32(a1, 24), 0x0000ff00) # 0000 hgfe */
2851        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
2852
2853        /* a0 = rl64(a0, 32) # hgfe 0000 */
2854        /* a2 = rl64(a1, 32) # efgh abcd */
2855        tcg_out_rld(s, RLDICL, a0, a0, 32, 0);
2856        tcg_out_rld(s, RLDICL, a2, a1, 32, 0);
2857
2858        /* a0 = dep(a0, rl32(a2, 8), 0xffffffff)  # hgfe bcda */
2859        tcg_out_rlw(s, RLWIMI, a0, a2, 8, 0, 31);
2860        /* a0 = dep(a0, rl32(a2, 24), 0xff000000) # hgfe dcda */
2861        tcg_out_rlw(s, RLWIMI, a0, a2, 24, 0, 7);
2862        /* a0 = dep(a0, rl32(a2, 24), 0x0000ff00) # hgfe dcba */
2863        tcg_out_rlw(s, RLWIMI, a0, a2, 24, 16, 23);
2864
2865        if (a0 == 0) {
2866            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2867        }
2868        break;
2869
2870    case INDEX_op_deposit_i32:
2871        if (const_args[2]) {
2872            uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
2873            tcg_out_andi32(s, args[0], args[0], ~mask);
2874        } else {
2875            tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
2876                        32 - args[3] - args[4], 31 - args[3]);
2877        }
2878        break;
2879    case INDEX_op_deposit_i64:
2880        if (const_args[2]) {
2881            uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
2882            tcg_out_andi64(s, args[0], args[0], ~mask);
2883        } else {
2884            tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
2885                        64 - args[3] - args[4]);
2886        }
2887        break;
2888
2889    case INDEX_op_extract_i32:
2890        tcg_out_rlw(s, RLWINM, args[0], args[1],
2891                    32 - args[2], 32 - args[3], 31);
2892        break;
2893    case INDEX_op_extract_i64:
2894        tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
2895        break;
2896
2897    case INDEX_op_movcond_i32:
2898        tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
2899                        args[3], args[4], const_args[2]);
2900        break;
2901    case INDEX_op_movcond_i64:
2902        tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
2903                        args[3], args[4], const_args[2]);
2904        break;
2905
2906#if TCG_TARGET_REG_BITS == 64
2907    case INDEX_op_add2_i64:
2908#else
2909    case INDEX_op_add2_i32:
2910#endif
2911        /* Note that the CA bit is defined based on the word size of the
2912           environment.  So in 64-bit mode it's always carry-out of bit 63.
2913           The fallback code using deposit works just as well for 32-bit.  */
2914        a0 = args[0], a1 = args[1];
2915        if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
2916            a0 = TCG_REG_R0;
2917        }
2918        if (const_args[4]) {
2919            tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
2920        } else {
2921            tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
2922        }
2923        if (const_args[5]) {
2924            tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
2925        } else {
2926            tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
2927        }
2928        if (a0 != args[0]) {
2929            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2930        }
2931        break;
2932
2933#if TCG_TARGET_REG_BITS == 64
2934    case INDEX_op_sub2_i64:
2935#else
2936    case INDEX_op_sub2_i32:
2937#endif
2938        a0 = args[0], a1 = args[1];
2939        if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
2940            a0 = TCG_REG_R0;
2941        }
2942        if (const_args[2]) {
2943            tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
2944        } else {
2945            tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
2946        }
2947        if (const_args[3]) {
2948            tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
2949        } else {
2950            tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
2951        }
2952        if (a0 != args[0]) {
2953            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2954        }
2955        break;
2956
2957    case INDEX_op_muluh_i32:
2958        tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
2959        break;
2960    case INDEX_op_mulsh_i32:
2961        tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
2962        break;
2963    case INDEX_op_muluh_i64:
2964        tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
2965        break;
2966    case INDEX_op_mulsh_i64:
2967        tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
2968        break;
2969
2970    case INDEX_op_mb:
2971        tcg_out_mb(s, args[0]);
2972        break;
2973
2974    case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */
2975    case INDEX_op_mov_i64:
2976    case INDEX_op_movi_i32:  /* Always emitted via tcg_out_movi.  */
2977    case INDEX_op_movi_i64:
2978    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
2979    default:
2980        tcg_abort();
2981    }
2982}
2983
2984int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2985{
2986    switch (opc) {
2987    case INDEX_op_and_vec:
2988    case INDEX_op_or_vec:
2989    case INDEX_op_xor_vec:
2990    case INDEX_op_andc_vec:
2991    case INDEX_op_not_vec:
2992        return 1;
2993    case INDEX_op_orc_vec:
2994        return have_isa_2_07;
2995    case INDEX_op_add_vec:
2996    case INDEX_op_sub_vec:
2997    case INDEX_op_smax_vec:
2998    case INDEX_op_smin_vec:
2999    case INDEX_op_umax_vec:
3000    case INDEX_op_umin_vec:
3001    case INDEX_op_shlv_vec:
3002    case INDEX_op_shrv_vec:
3003    case INDEX_op_sarv_vec:
3004    case INDEX_op_rotlv_vec:
3005        return vece <= MO_32 || have_isa_2_07;
3006    case INDEX_op_ssadd_vec:
3007    case INDEX_op_sssub_vec:
3008    case INDEX_op_usadd_vec:
3009    case INDEX_op_ussub_vec:
3010        return vece <= MO_32;
3011    case INDEX_op_cmp_vec:
3012    case INDEX_op_shli_vec:
3013    case INDEX_op_shri_vec:
3014    case INDEX_op_sari_vec:
3015    case INDEX_op_rotli_vec:
3016        return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
3017    case INDEX_op_neg_vec:
3018        return vece >= MO_32 && have_isa_3_00;
3019    case INDEX_op_mul_vec:
3020        switch (vece) {
3021        case MO_8:
3022        case MO_16:
3023            return -1;
3024        case MO_32:
3025            return have_isa_2_07 ? 1 : -1;
3026        case MO_64:
3027            return have_isa_3_10;
3028        }
3029        return 0;
3030    case INDEX_op_bitsel_vec:
3031        return have_vsx;
3032    case INDEX_op_rotrv_vec:
3033        return -1;
3034    default:
3035        return 0;
3036    }
3037}
3038
3039static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
3040                            TCGReg dst, TCGReg src)
3041{
3042    tcg_debug_assert(dst >= TCG_REG_V0);
3043
3044    /* Splat from integer reg allowed via constraints for v3.00.  */
3045    if (src < TCG_REG_V0) {
3046        tcg_debug_assert(have_isa_3_00);
3047        switch (vece) {
3048        case MO_64:
3049            tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
3050            return true;
3051        case MO_32:
3052            tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
3053            return true;
3054        default:
3055            /* Fail, so that we fall back on either dupm or mov+dup.  */
3056            return false;
3057        }
3058    }
3059
3060    /*
3061     * Recall we use (or emulate) VSX integer loads, so the integer is
3062     * right justified within the left (zero-index) double-word.
3063     */
3064    switch (vece) {
3065    case MO_8:
3066        tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
3067        break;
3068    case MO_16:
3069        tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
3070        break;
3071    case MO_32:
3072        tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
3073        break;
3074    case MO_64:
3075        if (have_vsx) {
3076            tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
3077            break;
3078        }
3079        tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
3080        tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
3081        break;
3082    default:
3083        g_assert_not_reached();
3084    }
3085    return true;
3086}
3087
3088static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
3089                             TCGReg out, TCGReg base, intptr_t offset)
3090{
3091    int elt;
3092
3093    tcg_debug_assert(out >= TCG_REG_V0);
3094    switch (vece) {
3095    case MO_8:
3096        if (have_isa_3_00) {
3097            tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
3098        } else {
3099            tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
3100        }
3101        elt = extract32(offset, 0, 4);
3102#ifndef HOST_WORDS_BIGENDIAN
3103        elt ^= 15;
3104#endif
3105        tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
3106        break;
3107    case MO_16:
3108        tcg_debug_assert((offset & 1) == 0);
3109        if (have_isa_3_00) {
3110            tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
3111        } else {
3112            tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
3113        }
3114        elt = extract32(offset, 1, 3);
3115#ifndef HOST_WORDS_BIGENDIAN
3116        elt ^= 7;
3117#endif
3118        tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
3119        break;
3120    case MO_32:
3121        if (have_isa_3_00) {
3122            tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
3123            break;
3124        }
3125        tcg_debug_assert((offset & 3) == 0);
3126        tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
3127        elt = extract32(offset, 2, 2);
3128#ifndef HOST_WORDS_BIGENDIAN
3129        elt ^= 3;
3130#endif
3131        tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
3132        break;
3133    case MO_64:
3134        if (have_vsx) {
3135            tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
3136            break;
3137        }
3138        tcg_debug_assert((offset & 7) == 0);
3139        tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
3140        tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
3141        elt = extract32(offset, 3, 1);
3142#ifndef HOST_WORDS_BIGENDIAN
3143        elt = !elt;
3144#endif
3145        if (elt) {
3146            tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
3147        } else {
3148            tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
3149        }
3150        break;
3151    default:
3152        g_assert_not_reached();
3153    }
3154    return true;
3155}
3156
3157static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
3158                           unsigned vecl, unsigned vece,
3159                           const TCGArg *args, const int *const_args)
3160{
3161    static const uint32_t
3162        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
3163        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
3164        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
3165        neg_op[4] = { 0, 0, VNEGW, VNEGD },
3166        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
3167        ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
3168        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
3169        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
3170        ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
3171        usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
3172        sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
3173        ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
3174        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
3175        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
3176        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
3177        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
3178        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
3179        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
3180        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
3181        mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
3182        mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
3183        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
3184        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
3185        pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
3186        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
3187
3188    TCGType type = vecl + TCG_TYPE_V64;
3189    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
3190    uint32_t insn;
3191
3192    switch (opc) {
3193    case INDEX_op_ld_vec:
3194        tcg_out_ld(s, type, a0, a1, a2);
3195        return;
3196    case INDEX_op_st_vec:
3197        tcg_out_st(s, type, a0, a1, a2);
3198        return;
3199    case INDEX_op_dupm_vec:
3200        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
3201        return;
3202
3203    case INDEX_op_add_vec:
3204        insn = add_op[vece];
3205        break;
3206    case INDEX_op_sub_vec:
3207        insn = sub_op[vece];
3208        break;
3209    case INDEX_op_neg_vec:
3210        insn = neg_op[vece];
3211        a2 = a1;
3212        a1 = 0;
3213        break;
3214    case INDEX_op_mul_vec:
3215        insn = mul_op[vece];
3216        break;
3217    case INDEX_op_ssadd_vec:
3218        insn = ssadd_op[vece];
3219        break;
3220    case INDEX_op_sssub_vec:
3221        insn = sssub_op[vece];
3222        break;
3223    case INDEX_op_usadd_vec:
3224        insn = usadd_op[vece];
3225        break;
3226    case INDEX_op_ussub_vec:
3227        insn = ussub_op[vece];
3228        break;
3229    case INDEX_op_smin_vec:
3230        insn = smin_op[vece];
3231        break;
3232    case INDEX_op_umin_vec:
3233        insn = umin_op[vece];
3234        break;
3235    case INDEX_op_smax_vec:
3236        insn = smax_op[vece];
3237        break;
3238    case INDEX_op_umax_vec:
3239        insn = umax_op[vece];
3240        break;
3241    case INDEX_op_shlv_vec:
3242        insn = shlv_op[vece];
3243        break;
3244    case INDEX_op_shrv_vec:
3245        insn = shrv_op[vece];
3246        break;
3247    case INDEX_op_sarv_vec:
3248        insn = sarv_op[vece];
3249        break;
3250    case INDEX_op_and_vec:
3251        insn = VAND;
3252        break;
3253    case INDEX_op_or_vec:
3254        insn = VOR;
3255        break;
3256    case INDEX_op_xor_vec:
3257        insn = VXOR;
3258        break;
3259    case INDEX_op_andc_vec:
3260        insn = VANDC;
3261        break;
3262    case INDEX_op_not_vec:
3263        insn = VNOR;
3264        a2 = a1;
3265        break;
3266    case INDEX_op_orc_vec:
3267        insn = VORC;
3268        break;
3269
3270    case INDEX_op_cmp_vec:
3271        switch (args[3]) {
3272        case TCG_COND_EQ:
3273            insn = eq_op[vece];
3274            break;
3275        case TCG_COND_NE:
3276            insn = ne_op[vece];
3277            break;
3278        case TCG_COND_GT:
3279            insn = gts_op[vece];
3280            break;
3281        case TCG_COND_GTU:
3282            insn = gtu_op[vece];
3283            break;
3284        default:
3285            g_assert_not_reached();
3286        }
3287        break;
3288
3289    case INDEX_op_bitsel_vec:
3290        tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
3291        return;
3292
3293    case INDEX_op_dup2_vec:
3294        assert(TCG_TARGET_REG_BITS == 32);
3295        /* With inputs a1 = xLxx, a2 = xHxx  */
3296        tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1));  /* a0  = xxHL */
3297        tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8);          /* tmp = HLxx */
3298        tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8);          /* a0  = HLHL */
3299        return;
3300
3301    case INDEX_op_ppc_mrgh_vec:
3302        insn = mrgh_op[vece];
3303        break;
3304    case INDEX_op_ppc_mrgl_vec:
3305        insn = mrgl_op[vece];
3306        break;
3307    case INDEX_op_ppc_muleu_vec:
3308        insn = muleu_op[vece];
3309        break;
3310    case INDEX_op_ppc_mulou_vec:
3311        insn = mulou_op[vece];
3312        break;
3313    case INDEX_op_ppc_pkum_vec:
3314        insn = pkum_op[vece];
3315        break;
3316    case INDEX_op_rotlv_vec:
3317        insn = rotl_op[vece];
3318        break;
3319    case INDEX_op_ppc_msum_vec:
3320        tcg_debug_assert(vece == MO_16);
3321        tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
3322        return;
3323
3324    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
3325    case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi.  */
3326    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
3327    default:
3328        g_assert_not_reached();
3329    }
3330
3331    tcg_debug_assert(insn != 0);
3332    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
3333}
3334
3335static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
3336                           TCGv_vec v1, TCGArg imm, TCGOpcode opci)
3337{
3338    TCGv_vec t1 = tcg_temp_new_vec(type);
3339
3340    /* Splat w/bytes for xxspltib.  */
3341    tcg_gen_dupi_vec(MO_8, t1, imm & ((8 << vece) - 1));
3342    vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
3343              tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3344    tcg_temp_free_vec(t1);
3345}
3346
3347static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
3348                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3349{
3350    bool need_swap = false, need_inv = false;
3351
3352    tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
3353
3354    switch (cond) {
3355    case TCG_COND_EQ:
3356    case TCG_COND_GT:
3357    case TCG_COND_GTU:
3358        break;
3359    case TCG_COND_NE:
3360        if (have_isa_3_00 && vece <= MO_32) {
3361            break;
3362        }
3363        /* fall through */
3364    case TCG_COND_LE:
3365    case TCG_COND_LEU:
3366        need_inv = true;
3367        break;
3368    case TCG_COND_LT:
3369    case TCG_COND_LTU:
3370        need_swap = true;
3371        break;
3372    case TCG_COND_GE:
3373    case TCG_COND_GEU:
3374        need_swap = need_inv = true;
3375        break;
3376    default:
3377        g_assert_not_reached();
3378    }
3379
3380    if (need_inv) {
3381        cond = tcg_invert_cond(cond);
3382    }
3383    if (need_swap) {
3384        TCGv_vec t1;
3385        t1 = v1, v1 = v2, v2 = t1;
3386        cond = tcg_swap_cond(cond);
3387    }
3388
3389    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
3390              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
3391
3392    if (need_inv) {
3393        tcg_gen_not_vec(vece, v0, v0);
3394    }
3395}
3396
3397static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
3398                           TCGv_vec v1, TCGv_vec v2)
3399{
3400    TCGv_vec t1 = tcg_temp_new_vec(type);
3401    TCGv_vec t2 = tcg_temp_new_vec(type);
3402    TCGv_vec t3, t4;
3403
3404    switch (vece) {
3405    case MO_8:
3406    case MO_16:
3407        vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
3408                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3409        vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
3410                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3411        vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
3412                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3413        vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
3414                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3415        vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
3416                  tcgv_vec_arg(v0), tcgv_vec_arg(t1));
3417	break;
3418
3419    case MO_32:
3420        tcg_debug_assert(!have_isa_2_07);
3421        t3 = tcg_temp_new_vec(type);
3422        t4 = tcg_temp_new_vec(type);
3423        tcg_gen_dupi_vec(MO_8, t4, -16);
3424        vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
3425                  tcgv_vec_arg(v2), tcgv_vec_arg(t4));
3426        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
3427                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3428        tcg_gen_dupi_vec(MO_8, t3, 0);
3429        vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t3),
3430                  tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(t3));
3431        vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t3),
3432                  tcgv_vec_arg(t3), tcgv_vec_arg(t4));
3433        tcg_gen_add_vec(MO_32, v0, t2, t3);
3434        tcg_temp_free_vec(t3);
3435        tcg_temp_free_vec(t4);
3436        break;
3437
3438    default:
3439        g_assert_not_reached();
3440    }
3441    tcg_temp_free_vec(t1);
3442    tcg_temp_free_vec(t2);
3443}
3444
3445void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3446                       TCGArg a0, ...)
3447{
3448    va_list va;
3449    TCGv_vec v0, v1, v2, t0;
3450    TCGArg a2;
3451
3452    va_start(va, a0);
3453    v0 = temp_tcgv_vec(arg_temp(a0));
3454    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3455    a2 = va_arg(va, TCGArg);
3456
3457    switch (opc) {
3458    case INDEX_op_shli_vec:
3459        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
3460        break;
3461    case INDEX_op_shri_vec:
3462        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
3463        break;
3464    case INDEX_op_sari_vec:
3465        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
3466        break;
3467    case INDEX_op_rotli_vec:
3468        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
3469        break;
3470    case INDEX_op_cmp_vec:
3471        v2 = temp_tcgv_vec(arg_temp(a2));
3472        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3473        break;
3474    case INDEX_op_mul_vec:
3475        v2 = temp_tcgv_vec(arg_temp(a2));
3476        expand_vec_mul(type, vece, v0, v1, v2);
3477        break;
3478    case INDEX_op_rotlv_vec:
3479        v2 = temp_tcgv_vec(arg_temp(a2));
3480        t0 = tcg_temp_new_vec(type);
3481        tcg_gen_neg_vec(vece, t0, v2);
3482        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3483        tcg_temp_free_vec(t0);
3484        break;
3485    default:
3486        g_assert_not_reached();
3487    }
3488    va_end(va);
3489}
3490
3491static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
3492{
3493    static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
3494    static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
3495    static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
3496    static const TCGTargetOpDef S_S = { .args_ct_str = { "S", "S" } };
3497    static const TCGTargetOpDef r_ri = { .args_ct_str = { "r", "ri" } };
3498    static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
3499    static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
3500    static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } };
3501    static const TCGTargetOpDef S_S_S = { .args_ct_str = { "S", "S", "S" } };
3502    static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
3503    static const TCGTargetOpDef r_r_rI = { .args_ct_str = { "r", "r", "rI" } };
3504    static const TCGTargetOpDef r_r_rT = { .args_ct_str = { "r", "r", "rT" } };
3505    static const TCGTargetOpDef r_r_rU = { .args_ct_str = { "r", "r", "rU" } };
3506    static const TCGTargetOpDef r_rI_ri
3507        = { .args_ct_str = { "r", "rI", "ri" } };
3508    static const TCGTargetOpDef r_rI_rT
3509        = { .args_ct_str = { "r", "rI", "rT" } };
3510    static const TCGTargetOpDef r_r_rZW
3511        = { .args_ct_str = { "r", "r", "rZW" } };
3512    static const TCGTargetOpDef L_L_L_L
3513        = { .args_ct_str = { "L", "L", "L", "L" } };
3514    static const TCGTargetOpDef S_S_S_S
3515        = { .args_ct_str = { "S", "S", "S", "S" } };
3516    static const TCGTargetOpDef movc
3517        = { .args_ct_str = { "r", "r", "ri", "rZ", "rZ" } };
3518    static const TCGTargetOpDef dep
3519        = { .args_ct_str = { "r", "0", "rZ" } };
3520    static const TCGTargetOpDef br2
3521        = { .args_ct_str = { "r", "r", "ri", "ri" } };
3522    static const TCGTargetOpDef setc2
3523        = { .args_ct_str = { "r", "r", "r", "ri", "ri" } };
3524    static const TCGTargetOpDef add2
3525        = { .args_ct_str = { "r", "r", "r", "r", "rI", "rZM" } };
3526    static const TCGTargetOpDef sub2
3527        = { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } };
3528    static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } };
3529    static const TCGTargetOpDef v_vr = { .args_ct_str = { "v", "vr" } };
3530    static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } };
3531    static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } };
3532    static const TCGTargetOpDef v_v_v_v
3533        = { .args_ct_str = { "v", "v", "v", "v" } };
3534
3535    switch (op) {
3536    case INDEX_op_goto_ptr:
3537        return &r;
3538
3539    case INDEX_op_ld8u_i32:
3540    case INDEX_op_ld8s_i32:
3541    case INDEX_op_ld16u_i32:
3542    case INDEX_op_ld16s_i32:
3543    case INDEX_op_ld_i32:
3544    case INDEX_op_st8_i32:
3545    case INDEX_op_st16_i32:
3546    case INDEX_op_st_i32:
3547    case INDEX_op_ctpop_i32:
3548    case INDEX_op_neg_i32:
3549    case INDEX_op_not_i32:
3550    case INDEX_op_ext8s_i32:
3551    case INDEX_op_ext16s_i32:
3552    case INDEX_op_bswap16_i32:
3553    case INDEX_op_bswap32_i32:
3554    case INDEX_op_extract_i32:
3555    case INDEX_op_ld8u_i64:
3556    case INDEX_op_ld8s_i64:
3557    case INDEX_op_ld16u_i64:
3558    case INDEX_op_ld16s_i64:
3559    case INDEX_op_ld32u_i64:
3560    case INDEX_op_ld32s_i64:
3561    case INDEX_op_ld_i64:
3562    case INDEX_op_st8_i64:
3563    case INDEX_op_st16_i64:
3564    case INDEX_op_st32_i64:
3565    case INDEX_op_st_i64:
3566    case INDEX_op_ctpop_i64:
3567    case INDEX_op_neg_i64:
3568    case INDEX_op_not_i64:
3569    case INDEX_op_ext8s_i64:
3570    case INDEX_op_ext16s_i64:
3571    case INDEX_op_ext32s_i64:
3572    case INDEX_op_ext_i32_i64:
3573    case INDEX_op_extu_i32_i64:
3574    case INDEX_op_bswap16_i64:
3575    case INDEX_op_bswap32_i64:
3576    case INDEX_op_bswap64_i64:
3577    case INDEX_op_extract_i64:
3578        return &r_r;
3579
3580    case INDEX_op_add_i32:
3581    case INDEX_op_and_i32:
3582    case INDEX_op_or_i32:
3583    case INDEX_op_xor_i32:
3584    case INDEX_op_andc_i32:
3585    case INDEX_op_orc_i32:
3586    case INDEX_op_eqv_i32:
3587    case INDEX_op_shl_i32:
3588    case INDEX_op_shr_i32:
3589    case INDEX_op_sar_i32:
3590    case INDEX_op_rotl_i32:
3591    case INDEX_op_rotr_i32:
3592    case INDEX_op_setcond_i32:
3593    case INDEX_op_and_i64:
3594    case INDEX_op_andc_i64:
3595    case INDEX_op_shl_i64:
3596    case INDEX_op_shr_i64:
3597    case INDEX_op_sar_i64:
3598    case INDEX_op_rotl_i64:
3599    case INDEX_op_rotr_i64:
3600    case INDEX_op_setcond_i64:
3601        return &r_r_ri;
3602    case INDEX_op_mul_i32:
3603    case INDEX_op_mul_i64:
3604        return &r_r_rI;
3605    case INDEX_op_div_i32:
3606    case INDEX_op_divu_i32:
3607    case INDEX_op_nand_i32:
3608    case INDEX_op_nor_i32:
3609    case INDEX_op_muluh_i32:
3610    case INDEX_op_mulsh_i32:
3611    case INDEX_op_orc_i64:
3612    case INDEX_op_eqv_i64:
3613    case INDEX_op_nand_i64:
3614    case INDEX_op_nor_i64:
3615    case INDEX_op_div_i64:
3616    case INDEX_op_divu_i64:
3617    case INDEX_op_mulsh_i64:
3618    case INDEX_op_muluh_i64:
3619        return &r_r_r;
3620    case INDEX_op_sub_i32:
3621        return &r_rI_ri;
3622    case INDEX_op_add_i64:
3623        return &r_r_rT;
3624    case INDEX_op_or_i64:
3625    case INDEX_op_xor_i64:
3626        return &r_r_rU;
3627    case INDEX_op_sub_i64:
3628        return &r_rI_rT;
3629    case INDEX_op_clz_i32:
3630    case INDEX_op_ctz_i32:
3631    case INDEX_op_clz_i64:
3632    case INDEX_op_ctz_i64:
3633        return &r_r_rZW;
3634
3635    case INDEX_op_brcond_i32:
3636    case INDEX_op_brcond_i64:
3637        return &r_ri;
3638
3639    case INDEX_op_movcond_i32:
3640    case INDEX_op_movcond_i64:
3641        return &movc;
3642    case INDEX_op_deposit_i32:
3643    case INDEX_op_deposit_i64:
3644        return &dep;
3645    case INDEX_op_brcond2_i32:
3646        return &br2;
3647    case INDEX_op_setcond2_i32:
3648        return &setc2;
3649    case INDEX_op_add2_i64:
3650    case INDEX_op_add2_i32:
3651        return &add2;
3652    case INDEX_op_sub2_i64:
3653    case INDEX_op_sub2_i32:
3654        return &sub2;
3655
3656    case INDEX_op_qemu_ld_i32:
3657        return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
3658                ? &r_L : &r_L_L);
3659    case INDEX_op_qemu_st_i32:
3660        return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
3661                ? &S_S : &S_S_S);
3662    case INDEX_op_qemu_ld_i64:
3663        return (TCG_TARGET_REG_BITS == 64 ? &r_L
3664                : TARGET_LONG_BITS == 32 ? &L_L_L : &L_L_L_L);
3665    case INDEX_op_qemu_st_i64:
3666        return (TCG_TARGET_REG_BITS == 64 ? &S_S
3667                : TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S);
3668
3669    case INDEX_op_add_vec:
3670    case INDEX_op_sub_vec:
3671    case INDEX_op_mul_vec:
3672    case INDEX_op_and_vec:
3673    case INDEX_op_or_vec:
3674    case INDEX_op_xor_vec:
3675    case INDEX_op_andc_vec:
3676    case INDEX_op_orc_vec:
3677    case INDEX_op_cmp_vec:
3678    case INDEX_op_ssadd_vec:
3679    case INDEX_op_sssub_vec:
3680    case INDEX_op_usadd_vec:
3681    case INDEX_op_ussub_vec:
3682    case INDEX_op_smax_vec:
3683    case INDEX_op_smin_vec:
3684    case INDEX_op_umax_vec:
3685    case INDEX_op_umin_vec:
3686    case INDEX_op_shlv_vec:
3687    case INDEX_op_shrv_vec:
3688    case INDEX_op_sarv_vec:
3689    case INDEX_op_rotlv_vec:
3690    case INDEX_op_rotrv_vec:
3691    case INDEX_op_ppc_mrgh_vec:
3692    case INDEX_op_ppc_mrgl_vec:
3693    case INDEX_op_ppc_muleu_vec:
3694    case INDEX_op_ppc_mulou_vec:
3695    case INDEX_op_ppc_pkum_vec:
3696    case INDEX_op_dup2_vec:
3697        return &v_v_v;
3698    case INDEX_op_not_vec:
3699    case INDEX_op_neg_vec:
3700        return &v_v;
3701    case INDEX_op_dup_vec:
3702        return have_isa_3_00 ? &v_vr : &v_v;
3703    case INDEX_op_ld_vec:
3704    case INDEX_op_st_vec:
3705    case INDEX_op_dupm_vec:
3706        return &v_r;
3707    case INDEX_op_bitsel_vec:
3708    case INDEX_op_ppc_msum_vec:
3709        return &v_v_v_v;
3710
3711    default:
3712        return NULL;
3713    }
3714}
3715
3716static void tcg_target_init(TCGContext *s)
3717{
3718    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3719    unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
3720
3721    have_isa = tcg_isa_base;
3722    if (hwcap & PPC_FEATURE_ARCH_2_06) {
3723        have_isa = tcg_isa_2_06;
3724    }
3725#ifdef PPC_FEATURE2_ARCH_2_07
3726    if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
3727        have_isa = tcg_isa_2_07;
3728    }
3729#endif
3730#ifdef PPC_FEATURE2_ARCH_3_00
3731    if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
3732        have_isa = tcg_isa_3_00;
3733    }
3734#endif
3735#ifdef PPC_FEATURE2_ARCH_3_10
3736    if (hwcap2 & PPC_FEATURE2_ARCH_3_10) {
3737        have_isa = tcg_isa_3_10;
3738    }
3739#endif
3740
3741#ifdef PPC_FEATURE2_HAS_ISEL
3742    /* Prefer explicit instruction from the kernel. */
3743    have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0;
3744#else
3745    /* Fall back to knowing Power7 (2.06) has ISEL. */
3746    have_isel = have_isa_2_06;
3747#endif
3748
3749    if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
3750        have_altivec = true;
3751        /* We only care about the portion of VSX that overlaps Altivec. */
3752        if (hwcap & PPC_FEATURE_HAS_VSX) {
3753            have_vsx = true;
3754        }
3755    }
3756
3757    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
3758    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
3759    if (have_altivec) {
3760        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3761        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3762    }
3763
3764    tcg_target_call_clobber_regs = 0;
3765    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3766    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3767    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3768    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3769    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3770    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3771    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
3772    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
3773    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
3774    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
3775    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
3776    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
3777
3778    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3779    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3780    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3781    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3782    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3783    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3784    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3785    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3786    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3787    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3788    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3789    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3790    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3791    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3792    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3793    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3794    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3795    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3796    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3797    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3798
3799    s->reserved_regs = 0;
3800    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
3801    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
3802#if defined(_CALL_SYSV)
3803    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
3804#endif
3805#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
3806    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
3807#endif
3808    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
3809    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
3810    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
3811    if (USE_REG_TB) {
3812        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
3813    }
3814}
3815
3816#ifdef __ELF__
3817typedef struct {
3818    DebugFrameCIE cie;
3819    DebugFrameFDEHeader fde;
3820    uint8_t fde_def_cfa[4];
3821    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
3822} DebugFrame;
3823
3824/* We're expecting a 2 byte uleb128 encoded value.  */
3825QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3826
3827#if TCG_TARGET_REG_BITS == 64
3828# define ELF_HOST_MACHINE EM_PPC64
3829#else
3830# define ELF_HOST_MACHINE EM_PPC
3831#endif
3832
3833static DebugFrame debug_frame = {
3834    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3835    .cie.id = -1,
3836    .cie.version = 1,
3837    .cie.code_align = 1,
3838    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
3839    .cie.return_column = 65,
3840
3841    /* Total FDE size does not include the "len" member.  */
3842    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
3843
3844    .fde_def_cfa = {
3845        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
3846        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3847        (FRAME_SIZE >> 7)
3848    },
3849    .fde_reg_ofs = {
3850        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
3851        0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
3852    }
3853};
3854
3855void tcg_register_jit(void *buf, size_t buf_size)
3856{
3857    uint8_t *p = &debug_frame.fde_reg_ofs[3];
3858    int i;
3859
3860    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
3861        p[0] = 0x80 + tcg_target_callee_save_regs[i];
3862        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
3863    }
3864
3865    debug_frame.fde.func_start = (uintptr_t)buf;
3866    debug_frame.fde.func_len = buf_size;
3867
3868    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3869}
3870#endif /* __ELF__ */
3871
3872void flush_icache_range(uintptr_t start, uintptr_t stop)
3873{
3874    uintptr_t p, start1, stop1;
3875    size_t dsize = qemu_dcache_linesize;
3876    size_t isize = qemu_icache_linesize;
3877
3878    start1 = start & ~(dsize - 1);
3879    stop1 = (stop + dsize - 1) & ~(dsize - 1);
3880    for (p = start1; p < stop1; p += dsize) {
3881        asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
3882    }
3883    asm volatile ("sync" : : : "memory");
3884
3885    start &= start & ~(isize - 1);
3886    stop1 = (stop + isize - 1) & ~(isize - 1);
3887    for (p = start1; p < stop1; p += isize) {
3888        asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
3889    }
3890    asm volatile ("sync" : : : "memory");
3891    asm volatile ("isync" : : : "memory");
3892}
3893