xref: /openbmc/qemu/tcg/ppc/tcg-target.c.inc (revision fca9d723)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "elf.h"
26#include "../tcg-pool.c.inc"
27
28#if defined _CALL_DARWIN || defined __APPLE__
29#define TCG_TARGET_CALL_DARWIN
30#endif
31#ifdef _CALL_SYSV
32# define TCG_TARGET_CALL_ALIGN_ARGS   1
33#endif
34
35/* For some memory operations, we need a scratch that isn't R0.  For the AIX
36   calling convention, we can re-use the TOC register since we'll be reloading
37   it at every call.  Otherwise R12 will do nicely as neither a call-saved
38   register nor a parameter register.  */
39#ifdef _CALL_AIX
40# define TCG_REG_TMP1   TCG_REG_R2
41#else
42# define TCG_REG_TMP1   TCG_REG_R12
43#endif
44
45#define TCG_VEC_TMP1    TCG_REG_V0
46#define TCG_VEC_TMP2    TCG_REG_V1
47
48#define TCG_REG_TB     TCG_REG_R31
49#define USE_REG_TB     (TCG_TARGET_REG_BITS == 64)
50
51/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
52#define SZP  ((int)sizeof(void *))
53
54/* Shorthand for size of a register.  */
55#define SZR  (TCG_TARGET_REG_BITS / 8)
56
57#define TCG_CT_CONST_S16  0x100
58#define TCG_CT_CONST_U16  0x200
59#define TCG_CT_CONST_S32  0x400
60#define TCG_CT_CONST_U32  0x800
61#define TCG_CT_CONST_ZERO 0x1000
62#define TCG_CT_CONST_MONE 0x2000
63#define TCG_CT_CONST_WSZ  0x4000
64
65static tcg_insn_unit *tb_ret_addr;
66
67TCGPowerISA have_isa;
68static bool have_isel;
69bool have_altivec;
70bool have_vsx;
71
72#ifndef CONFIG_SOFTMMU
73#define TCG_GUEST_BASE_REG 30
74#endif
75
76#ifdef CONFIG_DEBUG_TCG
77static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
78    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
79    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
80    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
81    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
82    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
83    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
84    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
85    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
86};
87#endif
88
89static const int tcg_target_reg_alloc_order[] = {
90    TCG_REG_R14,  /* call saved registers */
91    TCG_REG_R15,
92    TCG_REG_R16,
93    TCG_REG_R17,
94    TCG_REG_R18,
95    TCG_REG_R19,
96    TCG_REG_R20,
97    TCG_REG_R21,
98    TCG_REG_R22,
99    TCG_REG_R23,
100    TCG_REG_R24,
101    TCG_REG_R25,
102    TCG_REG_R26,
103    TCG_REG_R27,
104    TCG_REG_R28,
105    TCG_REG_R29,
106    TCG_REG_R30,
107    TCG_REG_R31,
108    TCG_REG_R12,  /* call clobbered, non-arguments */
109    TCG_REG_R11,
110    TCG_REG_R2,
111    TCG_REG_R13,
112    TCG_REG_R10,  /* call clobbered, arguments */
113    TCG_REG_R9,
114    TCG_REG_R8,
115    TCG_REG_R7,
116    TCG_REG_R6,
117    TCG_REG_R5,
118    TCG_REG_R4,
119    TCG_REG_R3,
120
121    /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
122    TCG_REG_V2,   /* call clobbered, vectors */
123    TCG_REG_V3,
124    TCG_REG_V4,
125    TCG_REG_V5,
126    TCG_REG_V6,
127    TCG_REG_V7,
128    TCG_REG_V8,
129    TCG_REG_V9,
130    TCG_REG_V10,
131    TCG_REG_V11,
132    TCG_REG_V12,
133    TCG_REG_V13,
134    TCG_REG_V14,
135    TCG_REG_V15,
136    TCG_REG_V16,
137    TCG_REG_V17,
138    TCG_REG_V18,
139    TCG_REG_V19,
140};
141
142static const int tcg_target_call_iarg_regs[] = {
143    TCG_REG_R3,
144    TCG_REG_R4,
145    TCG_REG_R5,
146    TCG_REG_R6,
147    TCG_REG_R7,
148    TCG_REG_R8,
149    TCG_REG_R9,
150    TCG_REG_R10
151};
152
153static const int tcg_target_call_oarg_regs[] = {
154    TCG_REG_R3,
155    TCG_REG_R4
156};
157
158static const int tcg_target_callee_save_regs[] = {
159#ifdef TCG_TARGET_CALL_DARWIN
160    TCG_REG_R11,
161#endif
162    TCG_REG_R14,
163    TCG_REG_R15,
164    TCG_REG_R16,
165    TCG_REG_R17,
166    TCG_REG_R18,
167    TCG_REG_R19,
168    TCG_REG_R20,
169    TCG_REG_R21,
170    TCG_REG_R22,
171    TCG_REG_R23,
172    TCG_REG_R24,
173    TCG_REG_R25,
174    TCG_REG_R26,
175    TCG_REG_R27, /* currently used for the global env */
176    TCG_REG_R28,
177    TCG_REG_R29,
178    TCG_REG_R30,
179    TCG_REG_R31
180};
181
182static inline bool in_range_b(tcg_target_long target)
183{
184    return target == sextract64(target, 0, 26);
185}
186
187static uint32_t reloc_pc24_val(tcg_insn_unit *pc, tcg_insn_unit *target)
188{
189    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
190    tcg_debug_assert(in_range_b(disp));
191    return disp & 0x3fffffc;
192}
193
194static bool reloc_pc24(tcg_insn_unit *pc, tcg_insn_unit *target)
195{
196    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
197    if (in_range_b(disp)) {
198        *pc = (*pc & ~0x3fffffc) | (disp & 0x3fffffc);
199        return true;
200    }
201    return false;
202}
203
204static uint16_t reloc_pc14_val(tcg_insn_unit *pc, tcg_insn_unit *target)
205{
206    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
207    tcg_debug_assert(disp == (int16_t) disp);
208    return disp & 0xfffc;
209}
210
211static bool reloc_pc14(tcg_insn_unit *pc, tcg_insn_unit *target)
212{
213    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
214    if (disp == (int16_t) disp) {
215        *pc = (*pc & ~0xfffc) | (disp & 0xfffc);
216        return true;
217    }
218    return false;
219}
220
221/* parse target specific constraints */
222static const char *target_parse_constraint(TCGArgConstraint *ct,
223                                           const char *ct_str, TCGType type)
224{
225    switch (*ct_str++) {
226    case 'A': case 'B': case 'C': case 'D':
227        tcg_regset_set_reg(ct->regs, 3 + ct_str[0] - 'A');
228        break;
229    case 'r':
230        ct->regs = 0xffffffff;
231        break;
232    case 'v':
233        ct->regs = 0xffffffff00000000ull;
234        break;
235    case 'L':                   /* qemu_ld constraint */
236        ct->regs = 0xffffffff;
237        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
238#ifdef CONFIG_SOFTMMU
239        tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
240        tcg_regset_reset_reg(ct->regs, TCG_REG_R5);
241#endif
242        break;
243    case 'S':                   /* qemu_st constraint */
244        ct->regs = 0xffffffff;
245        tcg_regset_reset_reg(ct->regs, TCG_REG_R3);
246#ifdef CONFIG_SOFTMMU
247        tcg_regset_reset_reg(ct->regs, TCG_REG_R4);
248        tcg_regset_reset_reg(ct->regs, TCG_REG_R5);
249        tcg_regset_reset_reg(ct->regs, TCG_REG_R6);
250#endif
251        break;
252    case 'I':
253        ct->ct |= TCG_CT_CONST_S16;
254        break;
255    case 'J':
256        ct->ct |= TCG_CT_CONST_U16;
257        break;
258    case 'M':
259        ct->ct |= TCG_CT_CONST_MONE;
260        break;
261    case 'T':
262        ct->ct |= TCG_CT_CONST_S32;
263        break;
264    case 'U':
265        ct->ct |= TCG_CT_CONST_U32;
266        break;
267    case 'W':
268        ct->ct |= TCG_CT_CONST_WSZ;
269        break;
270    case 'Z':
271        ct->ct |= TCG_CT_CONST_ZERO;
272        break;
273    default:
274        return NULL;
275    }
276    return ct_str;
277}
278
279/* test if a constant matches the constraint */
280static int tcg_target_const_match(tcg_target_long val, TCGType type,
281                                  const TCGArgConstraint *arg_ct)
282{
283    int ct = arg_ct->ct;
284    if (ct & TCG_CT_CONST) {
285        return 1;
286    }
287
288    /* The only 32-bit constraint we use aside from
289       TCG_CT_CONST is TCG_CT_CONST_S16.  */
290    if (type == TCG_TYPE_I32) {
291        val = (int32_t)val;
292    }
293
294    if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
295        return 1;
296    } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
297        return 1;
298    } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
299        return 1;
300    } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
301        return 1;
302    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
303        return 1;
304    } else if ((ct & TCG_CT_CONST_MONE) && val == -1) {
305        return 1;
306    } else if ((ct & TCG_CT_CONST_WSZ)
307               && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
308        return 1;
309    }
310    return 0;
311}
312
313#define OPCD(opc) ((opc)<<26)
314#define XO19(opc) (OPCD(19)|((opc)<<1))
315#define MD30(opc) (OPCD(30)|((opc)<<2))
316#define MDS30(opc) (OPCD(30)|((opc)<<1))
317#define XO31(opc) (OPCD(31)|((opc)<<1))
318#define XO58(opc) (OPCD(58)|(opc))
319#define XO62(opc) (OPCD(62)|(opc))
320#define VX4(opc)  (OPCD(4)|(opc))
321
322#define B      OPCD( 18)
323#define BC     OPCD( 16)
324#define LBZ    OPCD( 34)
325#define LHZ    OPCD( 40)
326#define LHA    OPCD( 42)
327#define LWZ    OPCD( 32)
328#define LWZUX  XO31( 55)
329#define STB    OPCD( 38)
330#define STH    OPCD( 44)
331#define STW    OPCD( 36)
332
333#define STD    XO62(  0)
334#define STDU   XO62(  1)
335#define STDX   XO31(149)
336
337#define LD     XO58(  0)
338#define LDX    XO31( 21)
339#define LDU    XO58(  1)
340#define LDUX   XO31( 53)
341#define LWA    XO58(  2)
342#define LWAX   XO31(341)
343
344#define ADDIC  OPCD( 12)
345#define ADDI   OPCD( 14)
346#define ADDIS  OPCD( 15)
347#define ORI    OPCD( 24)
348#define ORIS   OPCD( 25)
349#define XORI   OPCD( 26)
350#define XORIS  OPCD( 27)
351#define ANDI   OPCD( 28)
352#define ANDIS  OPCD( 29)
353#define MULLI  OPCD(  7)
354#define CMPLI  OPCD( 10)
355#define CMPI   OPCD( 11)
356#define SUBFIC OPCD( 8)
357
358#define LWZU   OPCD( 33)
359#define STWU   OPCD( 37)
360
361#define RLWIMI OPCD( 20)
362#define RLWINM OPCD( 21)
363#define RLWNM  OPCD( 23)
364
365#define RLDICL MD30(  0)
366#define RLDICR MD30(  1)
367#define RLDIMI MD30(  3)
368#define RLDCL  MDS30( 8)
369
370#define BCLR   XO19( 16)
371#define BCCTR  XO19(528)
372#define CRAND  XO19(257)
373#define CRANDC XO19(129)
374#define CRNAND XO19(225)
375#define CROR   XO19(449)
376#define CRNOR  XO19( 33)
377
378#define EXTSB  XO31(954)
379#define EXTSH  XO31(922)
380#define EXTSW  XO31(986)
381#define ADD    XO31(266)
382#define ADDE   XO31(138)
383#define ADDME  XO31(234)
384#define ADDZE  XO31(202)
385#define ADDC   XO31( 10)
386#define AND    XO31( 28)
387#define SUBF   XO31( 40)
388#define SUBFC  XO31(  8)
389#define SUBFE  XO31(136)
390#define SUBFME XO31(232)
391#define SUBFZE XO31(200)
392#define OR     XO31(444)
393#define XOR    XO31(316)
394#define MULLW  XO31(235)
395#define MULHW  XO31( 75)
396#define MULHWU XO31( 11)
397#define DIVW   XO31(491)
398#define DIVWU  XO31(459)
399#define CMP    XO31(  0)
400#define CMPL   XO31( 32)
401#define LHBRX  XO31(790)
402#define LWBRX  XO31(534)
403#define LDBRX  XO31(532)
404#define STHBRX XO31(918)
405#define STWBRX XO31(662)
406#define STDBRX XO31(660)
407#define MFSPR  XO31(339)
408#define MTSPR  XO31(467)
409#define SRAWI  XO31(824)
410#define NEG    XO31(104)
411#define MFCR   XO31( 19)
412#define MFOCRF (MFCR | (1u << 20))
413#define NOR    XO31(124)
414#define CNTLZW XO31( 26)
415#define CNTLZD XO31( 58)
416#define CNTTZW XO31(538)
417#define CNTTZD XO31(570)
418#define CNTPOPW XO31(378)
419#define CNTPOPD XO31(506)
420#define ANDC   XO31( 60)
421#define ORC    XO31(412)
422#define EQV    XO31(284)
423#define NAND   XO31(476)
424#define ISEL   XO31( 15)
425
426#define MULLD  XO31(233)
427#define MULHD  XO31( 73)
428#define MULHDU XO31(  9)
429#define DIVD   XO31(489)
430#define DIVDU  XO31(457)
431
432#define LBZX   XO31( 87)
433#define LHZX   XO31(279)
434#define LHAX   XO31(343)
435#define LWZX   XO31( 23)
436#define STBX   XO31(215)
437#define STHX   XO31(407)
438#define STWX   XO31(151)
439
440#define EIEIO  XO31(854)
441#define HWSYNC XO31(598)
442#define LWSYNC (HWSYNC | (1u << 21))
443
444#define SPR(a, b) ((((a)<<5)|(b))<<11)
445#define LR     SPR(8, 0)
446#define CTR    SPR(9, 0)
447
448#define SLW    XO31( 24)
449#define SRW    XO31(536)
450#define SRAW   XO31(792)
451
452#define SLD    XO31( 27)
453#define SRD    XO31(539)
454#define SRAD   XO31(794)
455#define SRADI  XO31(413<<1)
456
457#define TW     XO31( 4)
458#define TRAP   (TW | TO(31))
459
460#define NOP    ORI  /* ori 0,0,0 */
461
462#define LVX        XO31(103)
463#define LVEBX      XO31(7)
464#define LVEHX      XO31(39)
465#define LVEWX      XO31(71)
466#define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
467#define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
468#define LXSIWZX    (XO31(12) | 1)   /* v2.07, force tx=1 */
469#define LXV        (OPCD(61) | 8 | 1)  /* v3.00, force tx=1 */
470#define LXSD       (OPCD(57) | 2)   /* v3.00 */
471#define LXVWSX     (XO31(364) | 1)  /* v3.00, force tx=1 */
472
473#define STVX       XO31(231)
474#define STVEWX     XO31(199)
475#define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
476#define STXSIWX    (XO31(140) | 1)  /* v2.07, force sx=1 */
477#define STXV       (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
478#define STXSD      (OPCD(61) | 2)   /* v3.00 */
479
480#define VADDSBS    VX4(768)
481#define VADDUBS    VX4(512)
482#define VADDUBM    VX4(0)
483#define VADDSHS    VX4(832)
484#define VADDUHS    VX4(576)
485#define VADDUHM    VX4(64)
486#define VADDSWS    VX4(896)
487#define VADDUWS    VX4(640)
488#define VADDUWM    VX4(128)
489#define VADDUDM    VX4(192)       /* v2.07 */
490
491#define VSUBSBS    VX4(1792)
492#define VSUBUBS    VX4(1536)
493#define VSUBUBM    VX4(1024)
494#define VSUBSHS    VX4(1856)
495#define VSUBUHS    VX4(1600)
496#define VSUBUHM    VX4(1088)
497#define VSUBSWS    VX4(1920)
498#define VSUBUWS    VX4(1664)
499#define VSUBUWM    VX4(1152)
500#define VSUBUDM    VX4(1216)      /* v2.07 */
501
502#define VNEGW      (VX4(1538) | (6 << 16))  /* v3.00 */
503#define VNEGD      (VX4(1538) | (7 << 16))  /* v3.00 */
504
505#define VMAXSB     VX4(258)
506#define VMAXSH     VX4(322)
507#define VMAXSW     VX4(386)
508#define VMAXSD     VX4(450)       /* v2.07 */
509#define VMAXUB     VX4(2)
510#define VMAXUH     VX4(66)
511#define VMAXUW     VX4(130)
512#define VMAXUD     VX4(194)       /* v2.07 */
513#define VMINSB     VX4(770)
514#define VMINSH     VX4(834)
515#define VMINSW     VX4(898)
516#define VMINSD     VX4(962)       /* v2.07 */
517#define VMINUB     VX4(514)
518#define VMINUH     VX4(578)
519#define VMINUW     VX4(642)
520#define VMINUD     VX4(706)       /* v2.07 */
521
522#define VCMPEQUB   VX4(6)
523#define VCMPEQUH   VX4(70)
524#define VCMPEQUW   VX4(134)
525#define VCMPEQUD   VX4(199)       /* v2.07 */
526#define VCMPGTSB   VX4(774)
527#define VCMPGTSH   VX4(838)
528#define VCMPGTSW   VX4(902)
529#define VCMPGTSD   VX4(967)       /* v2.07 */
530#define VCMPGTUB   VX4(518)
531#define VCMPGTUH   VX4(582)
532#define VCMPGTUW   VX4(646)
533#define VCMPGTUD   VX4(711)       /* v2.07 */
534#define VCMPNEB    VX4(7)         /* v3.00 */
535#define VCMPNEH    VX4(71)        /* v3.00 */
536#define VCMPNEW    VX4(135)       /* v3.00 */
537
538#define VSLB       VX4(260)
539#define VSLH       VX4(324)
540#define VSLW       VX4(388)
541#define VSLD       VX4(1476)      /* v2.07 */
542#define VSRB       VX4(516)
543#define VSRH       VX4(580)
544#define VSRW       VX4(644)
545#define VSRD       VX4(1732)      /* v2.07 */
546#define VSRAB      VX4(772)
547#define VSRAH      VX4(836)
548#define VSRAW      VX4(900)
549#define VSRAD      VX4(964)       /* v2.07 */
550#define VRLB       VX4(4)
551#define VRLH       VX4(68)
552#define VRLW       VX4(132)
553#define VRLD       VX4(196)       /* v2.07 */
554
555#define VMULEUB    VX4(520)
556#define VMULEUH    VX4(584)
557#define VMULEUW    VX4(648)       /* v2.07 */
558#define VMULOUB    VX4(8)
559#define VMULOUH    VX4(72)
560#define VMULOUW    VX4(136)       /* v2.07 */
561#define VMULUWM    VX4(137)       /* v2.07 */
562#define VMULLD     VX4(457)       /* v3.10 */
563#define VMSUMUHM   VX4(38)
564
565#define VMRGHB     VX4(12)
566#define VMRGHH     VX4(76)
567#define VMRGHW     VX4(140)
568#define VMRGLB     VX4(268)
569#define VMRGLH     VX4(332)
570#define VMRGLW     VX4(396)
571
572#define VPKUHUM    VX4(14)
573#define VPKUWUM    VX4(78)
574
575#define VAND       VX4(1028)
576#define VANDC      VX4(1092)
577#define VNOR       VX4(1284)
578#define VOR        VX4(1156)
579#define VXOR       VX4(1220)
580#define VEQV       VX4(1668)      /* v2.07 */
581#define VNAND      VX4(1412)      /* v2.07 */
582#define VORC       VX4(1348)      /* v2.07 */
583
584#define VSPLTB     VX4(524)
585#define VSPLTH     VX4(588)
586#define VSPLTW     VX4(652)
587#define VSPLTISB   VX4(780)
588#define VSPLTISH   VX4(844)
589#define VSPLTISW   VX4(908)
590
591#define VSLDOI     VX4(44)
592
593#define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
594#define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
595#define XXSPLTIB   (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
596
597#define MFVSRD     (XO31(51) | 1)   /* v2.07, force sx=1 */
598#define MFVSRWZ    (XO31(115) | 1)  /* v2.07, force sx=1 */
599#define MTVSRD     (XO31(179) | 1)  /* v2.07, force tx=1 */
600#define MTVSRWZ    (XO31(243) | 1)  /* v2.07, force tx=1 */
601#define MTVSRDD    (XO31(435) | 1)  /* v3.00, force tx=1 */
602#define MTVSRWS    (XO31(403) | 1)  /* v3.00, force tx=1 */
603
604#define RT(r) ((r)<<21)
605#define RS(r) ((r)<<21)
606#define RA(r) ((r)<<16)
607#define RB(r) ((r)<<11)
608#define TO(t) ((t)<<21)
609#define SH(s) ((s)<<11)
610#define MB(b) ((b)<<6)
611#define ME(e) ((e)<<1)
612#define BO(o) ((o)<<21)
613#define MB64(b) ((b)<<5)
614#define FXM(b) (1 << (19 - (b)))
615
616#define VRT(r)  (((r) & 31) << 21)
617#define VRA(r)  (((r) & 31) << 16)
618#define VRB(r)  (((r) & 31) << 11)
619#define VRC(r)  (((r) & 31) <<  6)
620
621#define LK    1
622
623#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
624#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
625#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
626#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
627
628#define BF(n)    ((n)<<23)
629#define BI(n, c) (((c)+((n)*4))<<16)
630#define BT(n, c) (((c)+((n)*4))<<21)
631#define BA(n, c) (((c)+((n)*4))<<16)
632#define BB(n, c) (((c)+((n)*4))<<11)
633#define BC_(n, c) (((c)+((n)*4))<<6)
634
635#define BO_COND_TRUE  BO(12)
636#define BO_COND_FALSE BO( 4)
637#define BO_ALWAYS     BO(20)
638
639enum {
640    CR_LT,
641    CR_GT,
642    CR_EQ,
643    CR_SO
644};
645
646static const uint32_t tcg_to_bc[] = {
647    [TCG_COND_EQ]  = BC | BI(7, CR_EQ) | BO_COND_TRUE,
648    [TCG_COND_NE]  = BC | BI(7, CR_EQ) | BO_COND_FALSE,
649    [TCG_COND_LT]  = BC | BI(7, CR_LT) | BO_COND_TRUE,
650    [TCG_COND_GE]  = BC | BI(7, CR_LT) | BO_COND_FALSE,
651    [TCG_COND_LE]  = BC | BI(7, CR_GT) | BO_COND_FALSE,
652    [TCG_COND_GT]  = BC | BI(7, CR_GT) | BO_COND_TRUE,
653    [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE,
654    [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE,
655    [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE,
656    [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE,
657};
658
659/* The low bit here is set if the RA and RB fields must be inverted.  */
660static const uint32_t tcg_to_isel[] = {
661    [TCG_COND_EQ]  = ISEL | BC_(7, CR_EQ),
662    [TCG_COND_NE]  = ISEL | BC_(7, CR_EQ) | 1,
663    [TCG_COND_LT]  = ISEL | BC_(7, CR_LT),
664    [TCG_COND_GE]  = ISEL | BC_(7, CR_LT) | 1,
665    [TCG_COND_LE]  = ISEL | BC_(7, CR_GT) | 1,
666    [TCG_COND_GT]  = ISEL | BC_(7, CR_GT),
667    [TCG_COND_LTU] = ISEL | BC_(7, CR_LT),
668    [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1,
669    [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1,
670    [TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
671};
672
673static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
674                        intptr_t value, intptr_t addend)
675{
676    tcg_insn_unit *target;
677    int16_t lo;
678    int32_t hi;
679
680    value += addend;
681    target = (tcg_insn_unit *)value;
682
683    switch (type) {
684    case R_PPC_REL14:
685        return reloc_pc14(code_ptr, target);
686    case R_PPC_REL24:
687        return reloc_pc24(code_ptr, target);
688    case R_PPC_ADDR16:
689        /*
690         * We are (slightly) abusing this relocation type.  In particular,
691         * assert that the low 2 bits are zero, and do not modify them.
692         * That way we can use this with LD et al that have opcode bits
693         * in the low 2 bits of the insn.
694         */
695        if ((value & 3) || value != (int16_t)value) {
696            return false;
697        }
698        *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
699        break;
700    case R_PPC_ADDR32:
701        /*
702         * We are abusing this relocation type.  Again, this points to
703         * a pair of insns, lis + load.  This is an absolute address
704         * relocation for PPC32 so the lis cannot be removed.
705         */
706        lo = value;
707        hi = value - lo;
708        if (hi + lo != value) {
709            return false;
710        }
711        code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
712        code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
713        break;
714    default:
715        g_assert_not_reached();
716    }
717    return true;
718}
719
720static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
721                             TCGReg base, tcg_target_long offset);
722
723static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
724{
725    if (ret == arg) {
726        return true;
727    }
728    switch (type) {
729    case TCG_TYPE_I64:
730        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
731        /* fallthru */
732    case TCG_TYPE_I32:
733        if (ret < TCG_REG_V0) {
734            if (arg < TCG_REG_V0) {
735                tcg_out32(s, OR | SAB(arg, ret, arg));
736                break;
737            } else if (have_isa_2_07) {
738                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
739                          | VRT(arg) | RA(ret));
740                break;
741            } else {
742                /* Altivec does not support vector->integer moves.  */
743                return false;
744            }
745        } else if (arg < TCG_REG_V0) {
746            if (have_isa_2_07) {
747                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
748                          | VRT(ret) | RA(arg));
749                break;
750            } else {
751                /* Altivec does not support integer->vector moves.  */
752                return false;
753            }
754        }
755        /* fallthru */
756    case TCG_TYPE_V64:
757    case TCG_TYPE_V128:
758        tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
759        tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
760        break;
761    default:
762        g_assert_not_reached();
763    }
764    return true;
765}
766
767static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
768                               int sh, int mb)
769{
770    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
771    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
772    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
773    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb);
774}
775
776static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
777                               int sh, int mb, int me)
778{
779    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
780}
781
782static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
783{
784    tcg_out_rld(s, RLDICL, dst, src, 0, 32);
785}
786
787static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
788{
789    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
790}
791
792static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
793{
794    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
795}
796
797static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
798{
799    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
800}
801
802static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
803{
804    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
805}
806
807/* Emit a move into ret of arg, if it can be done in one insn.  */
808static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
809{
810    if (arg == (int16_t)arg) {
811        tcg_out32(s, ADDI | TAI(ret, 0, arg));
812        return true;
813    }
814    if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
815        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
816        return true;
817    }
818    return false;
819}
820
821static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
822                             tcg_target_long arg, bool in_prologue)
823{
824    intptr_t tb_diff;
825    tcg_target_long tmp;
826    int shift;
827
828    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
829
830    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
831        arg = (int32_t)arg;
832    }
833
834    /* Load 16-bit immediates with one insn.  */
835    if (tcg_out_movi_one(s, ret, arg)) {
836        return;
837    }
838
839    /* Load addresses within the TB with one insn.  */
840    tb_diff = arg - (intptr_t)s->code_gen_ptr;
841    if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
842        tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
843        return;
844    }
845
846    /* Load 32-bit immediates with two insns.  Note that we've already
847       eliminated bare ADDIS, so we know both insns are required.  */
848    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
849        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
850        tcg_out32(s, ORI | SAI(ret, ret, arg));
851        return;
852    }
853    if (arg == (uint32_t)arg && !(arg & 0x8000)) {
854        tcg_out32(s, ADDI | TAI(ret, 0, arg));
855        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
856        return;
857    }
858
859    /* Load masked 16-bit value.  */
860    if (arg > 0 && (arg & 0x8000)) {
861        tmp = arg | 0x7fff;
862        if ((tmp & (tmp + 1)) == 0) {
863            int mb = clz64(tmp + 1) + 1;
864            tcg_out32(s, ADDI | TAI(ret, 0, arg));
865            tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
866            return;
867        }
868    }
869
870    /* Load common masks with 2 insns.  */
871    shift = ctz64(arg);
872    tmp = arg >> shift;
873    if (tmp == (int16_t)tmp) {
874        tcg_out32(s, ADDI | TAI(ret, 0, tmp));
875        tcg_out_shli64(s, ret, ret, shift);
876        return;
877    }
878    shift = clz64(arg);
879    if (tcg_out_movi_one(s, ret, arg << shift)) {
880        tcg_out_shri64(s, ret, ret, shift);
881        return;
882    }
883
884    /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
885    if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
886        tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
887        return;
888    }
889
890    /* Use the constant pool, if possible.  */
891    if (!in_prologue && USE_REG_TB) {
892        new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
893                       -(intptr_t)s->code_gen_ptr);
894        tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
895        return;
896    }
897
898    tmp = arg >> 31 >> 1;
899    tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
900    if (tmp) {
901        tcg_out_shli64(s, ret, ret, 32);
902    }
903    if (arg & 0xffff0000) {
904        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
905    }
906    if (arg & 0xffff) {
907        tcg_out32(s, ORI | SAI(ret, ret, arg));
908    }
909}
910
911static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
912                             tcg_target_long val)
913{
914    uint32_t load_insn;
915    int rel, low;
916    intptr_t add;
917
918    low = (int8_t)val;
919    if (low >= -16 && low < 16) {
920        if (val == (tcg_target_long)dup_const(MO_8, low)) {
921            tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
922            return;
923        }
924        if (val == (tcg_target_long)dup_const(MO_16, low)) {
925            tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
926            return;
927        }
928        if (val == (tcg_target_long)dup_const(MO_32, low)) {
929            tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
930            return;
931        }
932    }
933    if (have_isa_3_00 && val == (tcg_target_long)dup_const(MO_8, val)) {
934        tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
935        return;
936    }
937
938    /*
939     * Otherwise we must load the value from the constant pool.
940     */
941    if (USE_REG_TB) {
942        rel = R_PPC_ADDR16;
943        add = -(intptr_t)s->code_gen_ptr;
944    } else {
945        rel = R_PPC_ADDR32;
946        add = 0;
947    }
948
949    if (have_vsx) {
950        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
951        load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
952        if (TCG_TARGET_REG_BITS == 64) {
953            new_pool_label(s, val, rel, s->code_ptr, add);
954        } else {
955            new_pool_l2(s, rel, s->code_ptr, add, val, val);
956        }
957    } else {
958        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
959        if (TCG_TARGET_REG_BITS == 64) {
960            new_pool_l2(s, rel, s->code_ptr, add, val, val);
961        } else {
962            new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
963        }
964    }
965
966    if (USE_REG_TB) {
967        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
968        load_insn |= RA(TCG_REG_TB);
969    } else {
970        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
971        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
972    }
973    tcg_out32(s, load_insn);
974}
975
976static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
977                         tcg_target_long arg)
978{
979    switch (type) {
980    case TCG_TYPE_I32:
981    case TCG_TYPE_I64:
982        tcg_debug_assert(ret < TCG_REG_V0);
983        tcg_out_movi_int(s, type, ret, arg, false);
984        break;
985
986    case TCG_TYPE_V64:
987    case TCG_TYPE_V128:
988        tcg_debug_assert(ret >= TCG_REG_V0);
989        tcg_out_dupi_vec(s, type, ret, arg);
990        break;
991
992    default:
993        g_assert_not_reached();
994    }
995}
996
997static bool mask_operand(uint32_t c, int *mb, int *me)
998{
999    uint32_t lsb, test;
1000
1001    /* Accept a bit pattern like:
1002           0....01....1
1003           1....10....0
1004           0..01..10..0
1005       Keep track of the transitions.  */
1006    if (c == 0 || c == -1) {
1007        return false;
1008    }
1009    test = c;
1010    lsb = test & -test;
1011    test += lsb;
1012    if (test & (test - 1)) {
1013        return false;
1014    }
1015
1016    *me = clz32(lsb);
1017    *mb = test ? clz32(test & -test) + 1 : 0;
1018    return true;
1019}
1020
1021static bool mask64_operand(uint64_t c, int *mb, int *me)
1022{
1023    uint64_t lsb;
1024
1025    if (c == 0) {
1026        return false;
1027    }
1028
1029    lsb = c & -c;
1030    /* Accept 1..10..0.  */
1031    if (c == -lsb) {
1032        *mb = 0;
1033        *me = clz64(lsb);
1034        return true;
1035    }
1036    /* Accept 0..01..1.  */
1037    if (lsb == 1 && (c & (c + 1)) == 0) {
1038        *mb = clz64(c + 1) + 1;
1039        *me = 63;
1040        return true;
1041    }
1042    return false;
1043}
1044
1045static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1046{
1047    int mb, me;
1048
1049    if (mask_operand(c, &mb, &me)) {
1050        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
1051    } else if ((c & 0xffff) == c) {
1052        tcg_out32(s, ANDI | SAI(src, dst, c));
1053        return;
1054    } else if ((c & 0xffff0000) == c) {
1055        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1056        return;
1057    } else {
1058        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
1059        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1060    }
1061}
1062
1063static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
1064{
1065    int mb, me;
1066
1067    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1068    if (mask64_operand(c, &mb, &me)) {
1069        if (mb == 0) {
1070            tcg_out_rld(s, RLDICR, dst, src, 0, me);
1071        } else {
1072            tcg_out_rld(s, RLDICL, dst, src, 0, mb);
1073        }
1074    } else if ((c & 0xffff) == c) {
1075        tcg_out32(s, ANDI | SAI(src, dst, c));
1076        return;
1077    } else if ((c & 0xffff0000) == c) {
1078        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1079        return;
1080    } else {
1081        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
1082        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1083    }
1084}
1085
1086static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
1087                           int op_lo, int op_hi)
1088{
1089    if (c >> 16) {
1090        tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
1091        src = dst;
1092    }
1093    if (c & 0xffff) {
1094        tcg_out32(s, op_lo | SAI(src, dst, c));
1095        src = dst;
1096    }
1097}
1098
1099static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1100{
1101    tcg_out_zori32(s, dst, src, c, ORI, ORIS);
1102}
1103
1104static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1105{
1106    tcg_out_zori32(s, dst, src, c, XORI, XORIS);
1107}
1108
1109static void tcg_out_b(TCGContext *s, int mask, tcg_insn_unit *target)
1110{
1111    ptrdiff_t disp = tcg_pcrel_diff(s, target);
1112    if (in_range_b(disp)) {
1113        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
1114    } else {
1115        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
1116        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
1117        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
1118    }
1119}
1120
1121static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
1122                             TCGReg base, tcg_target_long offset)
1123{
1124    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
1125    bool is_int_store = false;
1126    TCGReg rs = TCG_REG_TMP1;
1127
1128    switch (opi) {
1129    case LD: case LWA:
1130        align = 3;
1131        /* FALLTHRU */
1132    default:
1133        if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
1134            rs = rt;
1135            break;
1136        }
1137        break;
1138    case LXSD:
1139    case STXSD:
1140        align = 3;
1141        break;
1142    case LXV:
1143    case STXV:
1144        align = 15;
1145        break;
1146    case STD:
1147        align = 3;
1148        /* FALLTHRU */
1149    case STB: case STH: case STW:
1150        is_int_store = true;
1151        break;
1152    }
1153
1154    /* For unaligned, or very large offsets, use the indexed form.  */
1155    if (offset & align || offset != (int32_t)offset || opi == 0) {
1156        if (rs == base) {
1157            rs = TCG_REG_R0;
1158        }
1159        tcg_debug_assert(!is_int_store || rs != rt);
1160        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
1161        tcg_out32(s, opx | TAB(rt & 31, base, rs));
1162        return;
1163    }
1164
1165    l0 = (int16_t)offset;
1166    offset = (offset - l0) >> 16;
1167    l1 = (int16_t)offset;
1168
1169    if (l1 < 0 && orig >= 0) {
1170        extra = 0x4000;
1171        l1 = (int16_t)(offset - 0x4000);
1172    }
1173    if (l1) {
1174        tcg_out32(s, ADDIS | TAI(rs, base, l1));
1175        base = rs;
1176    }
1177    if (extra) {
1178        tcg_out32(s, ADDIS | TAI(rs, base, extra));
1179        base = rs;
1180    }
1181    if (opi != ADDI || base != rt || l0 != 0) {
1182        tcg_out32(s, opi | TAI(rt & 31, base, l0));
1183    }
1184}
1185
1186static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
1187                           TCGReg va, TCGReg vb, int shb)
1188{
1189    tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
1190}
1191
1192static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1193                       TCGReg base, intptr_t offset)
1194{
1195    int shift;
1196
1197    switch (type) {
1198    case TCG_TYPE_I32:
1199        if (ret < TCG_REG_V0) {
1200            tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
1201            break;
1202        }
1203        if (have_isa_2_07 && have_vsx) {
1204            tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
1205            break;
1206        }
1207        tcg_debug_assert((offset & 3) == 0);
1208        tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
1209        shift = (offset - 4) & 0xc;
1210        if (shift) {
1211            tcg_out_vsldoi(s, ret, ret, ret, shift);
1212        }
1213        break;
1214    case TCG_TYPE_I64:
1215        if (ret < TCG_REG_V0) {
1216            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1217            tcg_out_mem_long(s, LD, LDX, ret, base, offset);
1218            break;
1219        }
1220        /* fallthru */
1221    case TCG_TYPE_V64:
1222        tcg_debug_assert(ret >= TCG_REG_V0);
1223        if (have_vsx) {
1224            tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
1225                             ret, base, offset);
1226            break;
1227        }
1228        tcg_debug_assert((offset & 7) == 0);
1229        tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
1230        if (offset & 8) {
1231            tcg_out_vsldoi(s, ret, ret, ret, 8);
1232        }
1233        break;
1234    case TCG_TYPE_V128:
1235        tcg_debug_assert(ret >= TCG_REG_V0);
1236        tcg_debug_assert((offset & 15) == 0);
1237        tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
1238                         LVX, ret, base, offset);
1239        break;
1240    default:
1241        g_assert_not_reached();
1242    }
1243}
1244
1245static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1246                              TCGReg base, intptr_t offset)
1247{
1248    int shift;
1249
1250    switch (type) {
1251    case TCG_TYPE_I32:
1252        if (arg < TCG_REG_V0) {
1253            tcg_out_mem_long(s, STW, STWX, arg, base, offset);
1254            break;
1255        }
1256        if (have_isa_2_07 && have_vsx) {
1257            tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
1258            break;
1259        }
1260        assert((offset & 3) == 0);
1261        tcg_debug_assert((offset & 3) == 0);
1262        shift = (offset - 4) & 0xc;
1263        if (shift) {
1264            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
1265            arg = TCG_VEC_TMP1;
1266        }
1267        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1268        break;
1269    case TCG_TYPE_I64:
1270        if (arg < TCG_REG_V0) {
1271            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1272            tcg_out_mem_long(s, STD, STDX, arg, base, offset);
1273            break;
1274        }
1275        /* fallthru */
1276    case TCG_TYPE_V64:
1277        tcg_debug_assert(arg >= TCG_REG_V0);
1278        if (have_vsx) {
1279            tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
1280                             STXSDX, arg, base, offset);
1281            break;
1282        }
1283        tcg_debug_assert((offset & 7) == 0);
1284        if (offset & 8) {
1285            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
1286            arg = TCG_VEC_TMP1;
1287        }
1288        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1289        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
1290        break;
1291    case TCG_TYPE_V128:
1292        tcg_debug_assert(arg >= TCG_REG_V0);
1293        tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
1294                         STVX, arg, base, offset);
1295        break;
1296    default:
1297        g_assert_not_reached();
1298    }
1299}
1300
1301static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1302                               TCGReg base, intptr_t ofs)
1303{
1304    return false;
1305}
1306
1307static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
1308                        int const_arg2, int cr, TCGType type)
1309{
1310    int imm;
1311    uint32_t op;
1312
1313    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1314
1315    /* Simplify the comparisons below wrt CMPI.  */
1316    if (type == TCG_TYPE_I32) {
1317        arg2 = (int32_t)arg2;
1318    }
1319
1320    switch (cond) {
1321    case TCG_COND_EQ:
1322    case TCG_COND_NE:
1323        if (const_arg2) {
1324            if ((int16_t) arg2 == arg2) {
1325                op = CMPI;
1326                imm = 1;
1327                break;
1328            } else if ((uint16_t) arg2 == arg2) {
1329                op = CMPLI;
1330                imm = 1;
1331                break;
1332            }
1333        }
1334        op = CMPL;
1335        imm = 0;
1336        break;
1337
1338    case TCG_COND_LT:
1339    case TCG_COND_GE:
1340    case TCG_COND_LE:
1341    case TCG_COND_GT:
1342        if (const_arg2) {
1343            if ((int16_t) arg2 == arg2) {
1344                op = CMPI;
1345                imm = 1;
1346                break;
1347            }
1348        }
1349        op = CMP;
1350        imm = 0;
1351        break;
1352
1353    case TCG_COND_LTU:
1354    case TCG_COND_GEU:
1355    case TCG_COND_LEU:
1356    case TCG_COND_GTU:
1357        if (const_arg2) {
1358            if ((uint16_t) arg2 == arg2) {
1359                op = CMPLI;
1360                imm = 1;
1361                break;
1362            }
1363        }
1364        op = CMPL;
1365        imm = 0;
1366        break;
1367
1368    default:
1369        tcg_abort();
1370    }
1371    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1372
1373    if (imm) {
1374        tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
1375    } else {
1376        if (const_arg2) {
1377            tcg_out_movi(s, type, TCG_REG_R0, arg2);
1378            arg2 = TCG_REG_R0;
1379        }
1380        tcg_out32(s, op | RA(arg1) | RB(arg2));
1381    }
1382}
1383
1384static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1385                                TCGReg dst, TCGReg src)
1386{
1387    if (type == TCG_TYPE_I32) {
1388        tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1389        tcg_out_shri32(s, dst, dst, 5);
1390    } else {
1391        tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1392        tcg_out_shri64(s, dst, dst, 6);
1393    }
1394}
1395
1396static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
1397{
1398    /* X != 0 implies X + -1 generates a carry.  Extra addition
1399       trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.  */
1400    if (dst != src) {
1401        tcg_out32(s, ADDIC | TAI(dst, src, -1));
1402        tcg_out32(s, SUBFE | TAB(dst, dst, src));
1403    } else {
1404        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1405        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1406    }
1407}
1408
1409static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1410                                  bool const_arg2)
1411{
1412    if (const_arg2) {
1413        if ((uint32_t)arg2 == arg2) {
1414            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1415        } else {
1416            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1417            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1418        }
1419    } else {
1420        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1421    }
1422    return TCG_REG_R0;
1423}
1424
1425static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1426                            TCGArg arg0, TCGArg arg1, TCGArg arg2,
1427                            int const_arg2)
1428{
1429    int crop, sh;
1430
1431    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1432
1433    /* Ignore high bits of a potential constant arg2.  */
1434    if (type == TCG_TYPE_I32) {
1435        arg2 = (uint32_t)arg2;
1436    }
1437
1438    /* Handle common and trivial cases before handling anything else.  */
1439    if (arg2 == 0) {
1440        switch (cond) {
1441        case TCG_COND_EQ:
1442            tcg_out_setcond_eq0(s, type, arg0, arg1);
1443            return;
1444        case TCG_COND_NE:
1445            if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1446                tcg_out_ext32u(s, TCG_REG_R0, arg1);
1447                arg1 = TCG_REG_R0;
1448            }
1449            tcg_out_setcond_ne0(s, arg0, arg1);
1450            return;
1451        case TCG_COND_GE:
1452            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1453            arg1 = arg0;
1454            /* FALLTHRU */
1455        case TCG_COND_LT:
1456            /* Extract the sign bit.  */
1457            if (type == TCG_TYPE_I32) {
1458                tcg_out_shri32(s, arg0, arg1, 31);
1459            } else {
1460                tcg_out_shri64(s, arg0, arg1, 63);
1461            }
1462            return;
1463        default:
1464            break;
1465        }
1466    }
1467
1468    /* If we have ISEL, we can implement everything with 3 or 4 insns.
1469       All other cases below are also at least 3 insns, so speed up the
1470       code generator by not considering them and always using ISEL.  */
1471    if (have_isel) {
1472        int isel, tab;
1473
1474        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1475
1476        isel = tcg_to_isel[cond];
1477
1478        tcg_out_movi(s, type, arg0, 1);
1479        if (isel & 1) {
1480            /* arg0 = (bc ? 0 : 1) */
1481            tab = TAB(arg0, 0, arg0);
1482            isel &= ~1;
1483        } else {
1484            /* arg0 = (bc ? 1 : 0) */
1485            tcg_out_movi(s, type, TCG_REG_R0, 0);
1486            tab = TAB(arg0, arg0, TCG_REG_R0);
1487        }
1488        tcg_out32(s, isel | tab);
1489        return;
1490    }
1491
1492    switch (cond) {
1493    case TCG_COND_EQ:
1494        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1495        tcg_out_setcond_eq0(s, type, arg0, arg1);
1496        return;
1497
1498    case TCG_COND_NE:
1499        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1500        /* Discard the high bits only once, rather than both inputs.  */
1501        if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1502            tcg_out_ext32u(s, TCG_REG_R0, arg1);
1503            arg1 = TCG_REG_R0;
1504        }
1505        tcg_out_setcond_ne0(s, arg0, arg1);
1506        return;
1507
1508    case TCG_COND_GT:
1509    case TCG_COND_GTU:
1510        sh = 30;
1511        crop = 0;
1512        goto crtest;
1513
1514    case TCG_COND_LT:
1515    case TCG_COND_LTU:
1516        sh = 29;
1517        crop = 0;
1518        goto crtest;
1519
1520    case TCG_COND_GE:
1521    case TCG_COND_GEU:
1522        sh = 31;
1523        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT);
1524        goto crtest;
1525
1526    case TCG_COND_LE:
1527    case TCG_COND_LEU:
1528        sh = 31;
1529        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT);
1530    crtest:
1531        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1532        if (crop) {
1533            tcg_out32(s, crop);
1534        }
1535        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1536        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
1537        break;
1538
1539    default:
1540        tcg_abort();
1541    }
1542}
1543
1544static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l)
1545{
1546    if (l->has_value) {
1547        bc |= reloc_pc14_val(s->code_ptr, l->u.value_ptr);
1548    } else {
1549        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
1550    }
1551    tcg_out32(s, bc);
1552}
1553
1554static void tcg_out_brcond(TCGContext *s, TCGCond cond,
1555                           TCGArg arg1, TCGArg arg2, int const_arg2,
1556                           TCGLabel *l, TCGType type)
1557{
1558    tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1559    tcg_out_bc(s, tcg_to_bc[cond], l);
1560}
1561
1562static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
1563                            TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
1564                            TCGArg v2, bool const_c2)
1565{
1566    /* If for some reason both inputs are zero, don't produce bad code.  */
1567    if (v1 == 0 && v2 == 0) {
1568        tcg_out_movi(s, type, dest, 0);
1569        return;
1570    }
1571
1572    tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
1573
1574    if (have_isel) {
1575        int isel = tcg_to_isel[cond];
1576
1577        /* Swap the V operands if the operation indicates inversion.  */
1578        if (isel & 1) {
1579            int t = v1;
1580            v1 = v2;
1581            v2 = t;
1582            isel &= ~1;
1583        }
1584        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
1585        if (v2 == 0) {
1586            tcg_out_movi(s, type, TCG_REG_R0, 0);
1587        }
1588        tcg_out32(s, isel | TAB(dest, v1, v2));
1589    } else {
1590        if (dest == v2) {
1591            cond = tcg_invert_cond(cond);
1592            v2 = v1;
1593        } else if (dest != v1) {
1594            if (v1 == 0) {
1595                tcg_out_movi(s, type, dest, 0);
1596            } else {
1597                tcg_out_mov(s, type, dest, v1);
1598            }
1599        }
1600        /* Branch forward over one insn */
1601        tcg_out32(s, tcg_to_bc[cond] | 8);
1602        if (v2 == 0) {
1603            tcg_out_movi(s, type, dest, 0);
1604        } else {
1605            tcg_out_mov(s, type, dest, v2);
1606        }
1607    }
1608}
1609
1610static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
1611                          TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
1612{
1613    if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
1614        tcg_out32(s, opc | RA(a0) | RS(a1));
1615    } else {
1616        tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type);
1617        /* Note that the only other valid constant for a2 is 0.  */
1618        if (have_isel) {
1619            tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
1620            tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
1621        } else if (!const_a2 && a0 == a2) {
1622            tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8);
1623            tcg_out32(s, opc | RA(a0) | RS(a1));
1624        } else {
1625            tcg_out32(s, opc | RA(a0) | RS(a1));
1626            tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8);
1627            if (const_a2) {
1628                tcg_out_movi(s, type, a0, 0);
1629            } else {
1630                tcg_out_mov(s, type, a0, a2);
1631            }
1632        }
1633    }
1634}
1635
1636static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
1637                         const int *const_args)
1638{
1639    static const struct { uint8_t bit1, bit2; } bits[] = {
1640        [TCG_COND_LT ] = { CR_LT, CR_LT },
1641        [TCG_COND_LE ] = { CR_LT, CR_GT },
1642        [TCG_COND_GT ] = { CR_GT, CR_GT },
1643        [TCG_COND_GE ] = { CR_GT, CR_LT },
1644        [TCG_COND_LTU] = { CR_LT, CR_LT },
1645        [TCG_COND_LEU] = { CR_LT, CR_GT },
1646        [TCG_COND_GTU] = { CR_GT, CR_GT },
1647        [TCG_COND_GEU] = { CR_GT, CR_LT },
1648    };
1649
1650    TCGCond cond = args[4], cond2;
1651    TCGArg al, ah, bl, bh;
1652    int blconst, bhconst;
1653    int op, bit1, bit2;
1654
1655    al = args[0];
1656    ah = args[1];
1657    bl = args[2];
1658    bh = args[3];
1659    blconst = const_args[2];
1660    bhconst = const_args[3];
1661
1662    switch (cond) {
1663    case TCG_COND_EQ:
1664        op = CRAND;
1665        goto do_equality;
1666    case TCG_COND_NE:
1667        op = CRNAND;
1668    do_equality:
1669        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
1670        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
1671        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
1672        break;
1673
1674    case TCG_COND_LT:
1675    case TCG_COND_LE:
1676    case TCG_COND_GT:
1677    case TCG_COND_GE:
1678    case TCG_COND_LTU:
1679    case TCG_COND_LEU:
1680    case TCG_COND_GTU:
1681    case TCG_COND_GEU:
1682        bit1 = bits[cond].bit1;
1683        bit2 = bits[cond].bit2;
1684        op = (bit1 != bit2 ? CRANDC : CRAND);
1685        cond2 = tcg_unsigned_cond(cond);
1686
1687        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
1688        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
1689        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
1690        tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ));
1691        break;
1692
1693    default:
1694        tcg_abort();
1695    }
1696}
1697
1698static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1699                             const int *const_args)
1700{
1701    tcg_out_cmp2(s, args + 1, const_args + 1);
1702    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1703    tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31);
1704}
1705
1706static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
1707                             const int *const_args)
1708{
1709    tcg_out_cmp2(s, args, const_args);
1710    tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
1711}
1712
1713static void tcg_out_mb(TCGContext *s, TCGArg a0)
1714{
1715    uint32_t insn = HWSYNC;
1716    a0 &= TCG_MO_ALL;
1717    if (a0 == TCG_MO_LD_LD) {
1718        insn = LWSYNC;
1719    } else if (a0 == TCG_MO_ST_ST) {
1720        insn = EIEIO;
1721    }
1722    tcg_out32(s, insn);
1723}
1724
1725void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1726                              uintptr_t addr)
1727{
1728    if (TCG_TARGET_REG_BITS == 64) {
1729        tcg_insn_unit i1, i2;
1730        intptr_t tb_diff = addr - tc_ptr;
1731        intptr_t br_diff = addr - (jmp_addr + 4);
1732        uint64_t pair;
1733
1734        /* This does not exercise the range of the branch, but we do
1735           still need to be able to load the new value of TCG_REG_TB.
1736           But this does still happen quite often.  */
1737        if (tb_diff == (int16_t)tb_diff) {
1738            i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
1739            i2 = B | (br_diff & 0x3fffffc);
1740        } else {
1741            intptr_t lo = (int16_t)tb_diff;
1742            intptr_t hi = (int32_t)(tb_diff - lo);
1743            assert(tb_diff == hi + lo);
1744            i1 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
1745            i2 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
1746        }
1747#ifdef HOST_WORDS_BIGENDIAN
1748        pair = (uint64_t)i1 << 32 | i2;
1749#else
1750        pair = (uint64_t)i2 << 32 | i1;
1751#endif
1752
1753        /* As per the enclosing if, this is ppc64.  Avoid the _Static_assert
1754           within qatomic_set that would fail to build a ppc32 host.  */
1755        qatomic_set__nocheck((uint64_t *)jmp_addr, pair);
1756        flush_icache_range(jmp_addr, jmp_addr + 8);
1757    } else {
1758        intptr_t diff = addr - jmp_addr;
1759        tcg_debug_assert(in_range_b(diff));
1760        qatomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc));
1761        flush_icache_range(jmp_addr, jmp_addr + 4);
1762    }
1763}
1764
1765static void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1766{
1767#ifdef _CALL_AIX
1768    /* Look through the descriptor.  If the branch is in range, and we
1769       don't have to spend too much effort on building the toc.  */
1770    void *tgt = ((void **)target)[0];
1771    uintptr_t toc = ((uintptr_t *)target)[1];
1772    intptr_t diff = tcg_pcrel_diff(s, tgt);
1773
1774    if (in_range_b(diff) && toc == (uint32_t)toc) {
1775        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
1776        tcg_out_b(s, LK, tgt);
1777    } else {
1778        /* Fold the low bits of the constant into the addresses below.  */
1779        intptr_t arg = (intptr_t)target;
1780        int ofs = (int16_t)arg;
1781
1782        if (ofs + 8 < 0x8000) {
1783            arg -= ofs;
1784        } else {
1785            ofs = 0;
1786        }
1787        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
1788        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
1789        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
1790        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
1791        tcg_out32(s, BCCTR | BO_ALWAYS | LK);
1792    }
1793#elif defined(_CALL_ELF) && _CALL_ELF == 2
1794    intptr_t diff;
1795
1796    /* In the ELFv2 ABI, we have to set up r12 to contain the destination
1797       address, which the callee uses to compute its TOC address.  */
1798    /* FIXME: when the branch is in range, we could avoid r12 load if we
1799       knew that the destination uses the same TOC, and what its local
1800       entry point offset is.  */
1801    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
1802
1803    diff = tcg_pcrel_diff(s, target);
1804    if (in_range_b(diff)) {
1805        tcg_out_b(s, LK, target);
1806    } else {
1807        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
1808        tcg_out32(s, BCCTR | BO_ALWAYS | LK);
1809    }
1810#else
1811    tcg_out_b(s, LK, target);
1812#endif
1813}
1814
1815static const uint32_t qemu_ldx_opc[16] = {
1816    [MO_UB] = LBZX,
1817    [MO_UW] = LHZX,
1818    [MO_UL] = LWZX,
1819    [MO_Q]  = LDX,
1820    [MO_SW] = LHAX,
1821    [MO_SL] = LWAX,
1822    [MO_BSWAP | MO_UB] = LBZX,
1823    [MO_BSWAP | MO_UW] = LHBRX,
1824    [MO_BSWAP | MO_UL] = LWBRX,
1825    [MO_BSWAP | MO_Q]  = LDBRX,
1826};
1827
1828static const uint32_t qemu_stx_opc[16] = {
1829    [MO_UB] = STBX,
1830    [MO_UW] = STHX,
1831    [MO_UL] = STWX,
1832    [MO_Q]  = STDX,
1833    [MO_BSWAP | MO_UB] = STBX,
1834    [MO_BSWAP | MO_UW] = STHBRX,
1835    [MO_BSWAP | MO_UL] = STWBRX,
1836    [MO_BSWAP | MO_Q]  = STDBRX,
1837};
1838
1839static const uint32_t qemu_exts_opc[4] = {
1840    EXTSB, EXTSH, EXTSW, 0
1841};
1842
1843#if defined (CONFIG_SOFTMMU)
1844#include "../tcg-ldst.c.inc"
1845
1846/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
1847 *                                 int mmu_idx, uintptr_t ra)
1848 */
1849static void * const qemu_ld_helpers[16] = {
1850    [MO_UB]   = helper_ret_ldub_mmu,
1851    [MO_LEUW] = helper_le_lduw_mmu,
1852    [MO_LEUL] = helper_le_ldul_mmu,
1853    [MO_LEQ]  = helper_le_ldq_mmu,
1854    [MO_BEUW] = helper_be_lduw_mmu,
1855    [MO_BEUL] = helper_be_ldul_mmu,
1856    [MO_BEQ]  = helper_be_ldq_mmu,
1857};
1858
1859/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
1860 *                                 uintxx_t val, int mmu_idx, uintptr_t ra)
1861 */
1862static void * const qemu_st_helpers[16] = {
1863    [MO_UB]   = helper_ret_stb_mmu,
1864    [MO_LEUW] = helper_le_stw_mmu,
1865    [MO_LEUL] = helper_le_stl_mmu,
1866    [MO_LEQ]  = helper_le_stq_mmu,
1867    [MO_BEUW] = helper_be_stw_mmu,
1868    [MO_BEUL] = helper_be_stl_mmu,
1869    [MO_BEQ]  = helper_be_stq_mmu,
1870};
1871
1872/* We expect to use a 16-bit negative offset from ENV.  */
1873QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1874QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
1875
1876/* Perform the TLB load and compare.  Places the result of the comparison
1877   in CR7, loads the addend of the TLB into R3, and returns the register
1878   containing the guest address (zero-extended into R4).  Clobbers R0 and R2. */
1879
1880static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
1881                               TCGReg addrlo, TCGReg addrhi,
1882                               int mem_index, bool is_read)
1883{
1884    int cmp_off
1885        = (is_read
1886           ? offsetof(CPUTLBEntry, addr_read)
1887           : offsetof(CPUTLBEntry, addr_write));
1888    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1889    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1890    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1891    unsigned s_bits = opc & MO_SIZE;
1892    unsigned a_bits = get_alignment_bits(opc);
1893
1894    /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
1895    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
1896    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
1897
1898    /* Extract the page index, shifted into place for tlb index.  */
1899    if (TCG_TARGET_REG_BITS == 32) {
1900        tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
1901                       TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1902    } else {
1903        tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
1904                       TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1905    }
1906    tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
1907
1908    /* Load the TLB comparator.  */
1909    if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
1910        uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
1911                        ? LWZUX : LDUX);
1912        tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
1913    } else {
1914        tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
1915        if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
1916            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
1917            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
1918        } else {
1919            tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
1920        }
1921    }
1922
1923    /* Load the TLB addend for use on the fast path.  Do this asap
1924       to minimize any load use delay.  */
1925    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3,
1926               offsetof(CPUTLBEntry, addend));
1927
1928    /* Clear the non-page, non-alignment bits from the address */
1929    if (TCG_TARGET_REG_BITS == 32) {
1930        /* We don't support unaligned accesses on 32-bits.
1931         * Preserve the bottom bits and thus trigger a comparison
1932         * failure on unaligned accesses.
1933         */
1934        if (a_bits < s_bits) {
1935            a_bits = s_bits;
1936        }
1937        tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
1938                    (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
1939    } else {
1940        TCGReg t = addrlo;
1941
1942        /* If the access is unaligned, we need to make sure we fail if we
1943         * cross a page boundary.  The trick is to add the access size-1
1944         * to the address before masking the low bits.  That will make the
1945         * address overflow to the next page if we cross a page boundary,
1946         * which will then force a mismatch of the TLB compare.
1947         */
1948        if (a_bits < s_bits) {
1949            unsigned a_mask = (1 << a_bits) - 1;
1950            unsigned s_mask = (1 << s_bits) - 1;
1951            tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
1952            t = TCG_REG_R0;
1953        }
1954
1955        /* Mask the address for the requested alignment.  */
1956        if (TARGET_LONG_BITS == 32) {
1957            tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
1958                        (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
1959            /* Zero-extend the address for use in the final address.  */
1960            tcg_out_ext32u(s, TCG_REG_R4, addrlo);
1961            addrlo = TCG_REG_R4;
1962        } else if (a_bits == 0) {
1963            tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
1964        } else {
1965            tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
1966                        64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
1967            tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
1968        }
1969    }
1970
1971    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
1972        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
1973                    0, 7, TCG_TYPE_I32);
1974        tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
1975        tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
1976    } else {
1977        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
1978                    0, 7, TCG_TYPE_TL);
1979    }
1980
1981    return addrlo;
1982}
1983
1984/* Record the context of a call to the out of line helper code for the slow
1985   path for a load or store, so that we can later generate the correct
1986   helper code.  */
1987static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1988                                TCGReg datalo_reg, TCGReg datahi_reg,
1989                                TCGReg addrlo_reg, TCGReg addrhi_reg,
1990                                tcg_insn_unit *raddr, tcg_insn_unit *lptr)
1991{
1992    TCGLabelQemuLdst *label = new_ldst_label(s);
1993
1994    label->is_ld = is_ld;
1995    label->oi = oi;
1996    label->datalo_reg = datalo_reg;
1997    label->datahi_reg = datahi_reg;
1998    label->addrlo_reg = addrlo_reg;
1999    label->addrhi_reg = addrhi_reg;
2000    label->raddr = raddr;
2001    label->label_ptr[0] = lptr;
2002}
2003
2004static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2005{
2006    TCGMemOpIdx oi = lb->oi;
2007    MemOp opc = get_memop(oi);
2008    TCGReg hi, lo, arg = TCG_REG_R3;
2009
2010    if (!reloc_pc14(lb->label_ptr[0], s->code_ptr)) {
2011        return false;
2012    }
2013
2014    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
2015
2016    lo = lb->addrlo_reg;
2017    hi = lb->addrhi_reg;
2018    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2019#ifdef TCG_TARGET_CALL_ALIGN_ARGS
2020        arg |= 1;
2021#endif
2022        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2023        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2024    } else {
2025        /* If the address needed to be zero-extended, we'll have already
2026           placed it in R4.  The only remaining case is 64-bit guest.  */
2027        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
2028    }
2029
2030    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
2031    tcg_out32(s, MFSPR | RT(arg) | LR);
2032
2033    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
2034
2035    lo = lb->datalo_reg;
2036    hi = lb->datahi_reg;
2037    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2038        tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4);
2039        tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3);
2040    } else if (opc & MO_SIGN) {
2041        uint32_t insn = qemu_exts_opc[opc & MO_SIZE];
2042        tcg_out32(s, insn | RA(lo) | RS(TCG_REG_R3));
2043    } else {
2044        tcg_out_mov(s, TCG_TYPE_REG, lo, TCG_REG_R3);
2045    }
2046
2047    tcg_out_b(s, 0, lb->raddr);
2048    return true;
2049}
2050
2051static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2052{
2053    TCGMemOpIdx oi = lb->oi;
2054    MemOp opc = get_memop(oi);
2055    MemOp s_bits = opc & MO_SIZE;
2056    TCGReg hi, lo, arg = TCG_REG_R3;
2057
2058    if (!reloc_pc14(lb->label_ptr[0], s->code_ptr)) {
2059        return false;
2060    }
2061
2062    tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
2063
2064    lo = lb->addrlo_reg;
2065    hi = lb->addrhi_reg;
2066    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
2067#ifdef TCG_TARGET_CALL_ALIGN_ARGS
2068        arg |= 1;
2069#endif
2070        tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2071        tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2072    } else {
2073        /* If the address needed to be zero-extended, we'll have already
2074           placed it in R4.  The only remaining case is 64-bit guest.  */
2075        tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
2076    }
2077
2078    lo = lb->datalo_reg;
2079    hi = lb->datahi_reg;
2080    if (TCG_TARGET_REG_BITS == 32) {
2081        switch (s_bits) {
2082        case MO_64:
2083#ifdef TCG_TARGET_CALL_ALIGN_ARGS
2084            arg |= 1;
2085#endif
2086            tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
2087            /* FALLTHRU */
2088        case MO_32:
2089            tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
2090            break;
2091        default:
2092            tcg_out_rlw(s, RLWINM, arg++, lo, 0, 32 - (8 << s_bits), 31);
2093            break;
2094        }
2095    } else {
2096        if (s_bits == MO_64) {
2097            tcg_out_mov(s, TCG_TYPE_I64, arg++, lo);
2098        } else {
2099            tcg_out_rld(s, RLDICL, arg++, lo, 0, 64 - (8 << s_bits));
2100        }
2101    }
2102
2103    tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
2104    tcg_out32(s, MFSPR | RT(arg) | LR);
2105
2106    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
2107
2108    tcg_out_b(s, 0, lb->raddr);
2109    return true;
2110}
2111#endif /* SOFTMMU */
2112
2113static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
2114{
2115    TCGReg datalo, datahi, addrlo, rbase;
2116    TCGReg addrhi __attribute__((unused));
2117    TCGMemOpIdx oi;
2118    MemOp opc, s_bits;
2119#ifdef CONFIG_SOFTMMU
2120    int mem_index;
2121    tcg_insn_unit *label_ptr;
2122#endif
2123
2124    datalo = *args++;
2125    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
2126    addrlo = *args++;
2127    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
2128    oi = *args++;
2129    opc = get_memop(oi);
2130    s_bits = opc & MO_SIZE;
2131
2132#ifdef CONFIG_SOFTMMU
2133    mem_index = get_mmuidx(oi);
2134    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true);
2135
2136    /* Load a pointer into the current opcode w/conditional branch-link. */
2137    label_ptr = s->code_ptr;
2138    tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
2139
2140    rbase = TCG_REG_R3;
2141#else  /* !CONFIG_SOFTMMU */
2142    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
2143    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2144        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
2145        addrlo = TCG_REG_TMP1;
2146    }
2147#endif
2148
2149    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
2150        if (opc & MO_BSWAP) {
2151            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2152            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
2153            tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0));
2154        } else if (rbase != 0) {
2155            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2156            tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo));
2157            tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0));
2158        } else if (addrlo == datahi) {
2159            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
2160            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
2161        } else {
2162            tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
2163            tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
2164        }
2165    } else {
2166        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
2167        if (!have_isa_2_06 && insn == LDBRX) {
2168            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2169            tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
2170            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
2171            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
2172        } else if (insn) {
2173            tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
2174        } else {
2175            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
2176            tcg_out32(s, insn | TAB(datalo, rbase, addrlo));
2177            insn = qemu_exts_opc[s_bits];
2178            tcg_out32(s, insn | RA(datalo) | RS(datalo));
2179        }
2180    }
2181
2182#ifdef CONFIG_SOFTMMU
2183    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
2184                        s->code_ptr, label_ptr);
2185#endif
2186}
2187
2188static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
2189{
2190    TCGReg datalo, datahi, addrlo, rbase;
2191    TCGReg addrhi __attribute__((unused));
2192    TCGMemOpIdx oi;
2193    MemOp opc, s_bits;
2194#ifdef CONFIG_SOFTMMU
2195    int mem_index;
2196    tcg_insn_unit *label_ptr;
2197#endif
2198
2199    datalo = *args++;
2200    datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
2201    addrlo = *args++;
2202    addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
2203    oi = *args++;
2204    opc = get_memop(oi);
2205    s_bits = opc & MO_SIZE;
2206
2207#ifdef CONFIG_SOFTMMU
2208    mem_index = get_mmuidx(oi);
2209    addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false);
2210
2211    /* Load a pointer into the current opcode w/conditional branch-link. */
2212    label_ptr = s->code_ptr;
2213    tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
2214
2215    rbase = TCG_REG_R3;
2216#else  /* !CONFIG_SOFTMMU */
2217    rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
2218    if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
2219        tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
2220        addrlo = TCG_REG_TMP1;
2221    }
2222#endif
2223
2224    if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
2225        if (opc & MO_BSWAP) {
2226            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2227            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
2228            tcg_out32(s, STWBRX | SAB(datahi, rbase, TCG_REG_R0));
2229        } else if (rbase != 0) {
2230            tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
2231            tcg_out32(s, STWX | SAB(datahi, rbase, addrlo));
2232            tcg_out32(s, STWX | SAB(datalo, rbase, TCG_REG_R0));
2233        } else {
2234            tcg_out32(s, STW | TAI(datahi, addrlo, 0));
2235            tcg_out32(s, STW | TAI(datalo, addrlo, 4));
2236        }
2237    } else {
2238        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
2239        if (!have_isa_2_06 && insn == STDBRX) {
2240            tcg_out32(s, STWBRX | SAB(datalo, rbase, addrlo));
2241            tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, addrlo, 4));
2242            tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
2243            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_TMP1));
2244        } else {
2245            tcg_out32(s, insn | SAB(datalo, rbase, addrlo));
2246        }
2247    }
2248
2249#ifdef CONFIG_SOFTMMU
2250    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
2251                        s->code_ptr, label_ptr);
2252#endif
2253}
2254
2255static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2256{
2257    int i;
2258    for (i = 0; i < count; ++i) {
2259        p[i] = NOP;
2260    }
2261}
2262
2263/* Parameters for function call generation, used in tcg.c.  */
2264#define TCG_TARGET_STACK_ALIGN       16
2265#define TCG_TARGET_EXTEND_ARGS       1
2266
2267#ifdef _CALL_AIX
2268# define LINK_AREA_SIZE                (6 * SZR)
2269# define LR_OFFSET                     (1 * SZR)
2270# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
2271#elif defined(TCG_TARGET_CALL_DARWIN)
2272# define LINK_AREA_SIZE                (6 * SZR)
2273# define LR_OFFSET                     (2 * SZR)
2274#elif TCG_TARGET_REG_BITS == 64
2275# if defined(_CALL_ELF) && _CALL_ELF == 2
2276#  define LINK_AREA_SIZE               (4 * SZR)
2277#  define LR_OFFSET                    (1 * SZR)
2278# endif
2279#else /* TCG_TARGET_REG_BITS == 32 */
2280# if defined(_CALL_SYSV)
2281#  define LINK_AREA_SIZE               (2 * SZR)
2282#  define LR_OFFSET                    (1 * SZR)
2283# endif
2284#endif
2285#ifndef LR_OFFSET
2286# error "Unhandled abi"
2287#endif
2288#ifndef TCG_TARGET_CALL_STACK_OFFSET
2289# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
2290#endif
2291
2292#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2293#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
2294
2295#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
2296                     + TCG_STATIC_CALL_ARGS_SIZE    \
2297                     + CPU_TEMP_BUF_SIZE            \
2298                     + REG_SAVE_SIZE                \
2299                     + TCG_TARGET_STACK_ALIGN - 1)  \
2300                    & -TCG_TARGET_STACK_ALIGN)
2301
2302#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
2303
2304static void tcg_target_qemu_prologue(TCGContext *s)
2305{
2306    int i;
2307
2308#ifdef _CALL_AIX
2309    void **desc = (void **)s->code_ptr;
2310    desc[0] = desc + 2;                   /* entry point */
2311    desc[1] = 0;                          /* environment pointer */
2312    s->code_ptr = (void *)(desc + 2);     /* skip over descriptor */
2313#endif
2314
2315    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
2316                  CPU_TEMP_BUF_SIZE);
2317
2318    /* Prologue */
2319    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
2320    tcg_out32(s, (SZR == 8 ? STDU : STWU)
2321              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
2322
2323    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2324        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2325                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2326    }
2327    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2328
2329#ifndef CONFIG_SOFTMMU
2330    if (guest_base) {
2331        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
2332        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2333    }
2334#endif
2335
2336    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2337    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
2338    if (USE_REG_TB) {
2339        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
2340    }
2341    tcg_out32(s, BCCTR | BO_ALWAYS);
2342
2343    /* Epilogue */
2344    s->code_gen_epilogue = tb_ret_addr = s->code_ptr;
2345
2346    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2347    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2348        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2349                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2350    }
2351    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
2352    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
2353    tcg_out32(s, BCLR | BO_ALWAYS);
2354}
2355
2356static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
2357                       const int *const_args)
2358{
2359    TCGArg a0, a1, a2;
2360    int c;
2361
2362    switch (opc) {
2363    case INDEX_op_exit_tb:
2364        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
2365        tcg_out_b(s, 0, tb_ret_addr);
2366        break;
2367    case INDEX_op_goto_tb:
2368        if (s->tb_jmp_insn_offset) {
2369            /* Direct jump. */
2370            if (TCG_TARGET_REG_BITS == 64) {
2371                /* Ensure the next insns are 8-byte aligned. */
2372                if ((uintptr_t)s->code_ptr & 7) {
2373                    tcg_out32(s, NOP);
2374                }
2375                s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
2376                tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0));
2377                tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0));
2378            } else {
2379                s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
2380                tcg_out32(s, B);
2381                s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
2382                break;
2383            }
2384        } else {
2385            /* Indirect jump. */
2386            tcg_debug_assert(s->tb_jmp_insn_offset == NULL);
2387            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, 0,
2388                       (intptr_t)(s->tb_jmp_insn_offset + args[0]));
2389        }
2390        tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
2391        tcg_out32(s, BCCTR | BO_ALWAYS);
2392        set_jmp_reset_offset(s, args[0]);
2393        if (USE_REG_TB) {
2394            /* For the unlinked case, need to reset TCG_REG_TB.  */
2395            c = -tcg_current_code_size(s);
2396            assert(c == (int16_t)c);
2397            tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, c));
2398        }
2399        break;
2400    case INDEX_op_goto_ptr:
2401        tcg_out32(s, MTSPR | RS(args[0]) | CTR);
2402        if (USE_REG_TB) {
2403            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
2404        }
2405        tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
2406        tcg_out32(s, BCCTR | BO_ALWAYS);
2407        break;
2408    case INDEX_op_br:
2409        {
2410            TCGLabel *l = arg_label(args[0]);
2411            uint32_t insn = B;
2412
2413            if (l->has_value) {
2414                insn |= reloc_pc24_val(s->code_ptr, l->u.value_ptr);
2415            } else {
2416                tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
2417            }
2418            tcg_out32(s, insn);
2419        }
2420        break;
2421    case INDEX_op_ld8u_i32:
2422    case INDEX_op_ld8u_i64:
2423        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2424        break;
2425    case INDEX_op_ld8s_i32:
2426    case INDEX_op_ld8s_i64:
2427        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2428        tcg_out32(s, EXTSB | RS(args[0]) | RA(args[0]));
2429        break;
2430    case INDEX_op_ld16u_i32:
2431    case INDEX_op_ld16u_i64:
2432        tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
2433        break;
2434    case INDEX_op_ld16s_i32:
2435    case INDEX_op_ld16s_i64:
2436        tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
2437        break;
2438    case INDEX_op_ld_i32:
2439    case INDEX_op_ld32u_i64:
2440        tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
2441        break;
2442    case INDEX_op_ld32s_i64:
2443        tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
2444        break;
2445    case INDEX_op_ld_i64:
2446        tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
2447        break;
2448    case INDEX_op_st8_i32:
2449    case INDEX_op_st8_i64:
2450        tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
2451        break;
2452    case INDEX_op_st16_i32:
2453    case INDEX_op_st16_i64:
2454        tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
2455        break;
2456    case INDEX_op_st_i32:
2457    case INDEX_op_st32_i64:
2458        tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
2459        break;
2460    case INDEX_op_st_i64:
2461        tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
2462        break;
2463
2464    case INDEX_op_add_i32:
2465        a0 = args[0], a1 = args[1], a2 = args[2];
2466        if (const_args[2]) {
2467        do_addi_32:
2468            tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
2469        } else {
2470            tcg_out32(s, ADD | TAB(a0, a1, a2));
2471        }
2472        break;
2473    case INDEX_op_sub_i32:
2474        a0 = args[0], a1 = args[1], a2 = args[2];
2475        if (const_args[1]) {
2476            if (const_args[2]) {
2477                tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
2478            } else {
2479                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2480            }
2481        } else if (const_args[2]) {
2482            a2 = -a2;
2483            goto do_addi_32;
2484        } else {
2485            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2486        }
2487        break;
2488
2489    case INDEX_op_and_i32:
2490        a0 = args[0], a1 = args[1], a2 = args[2];
2491        if (const_args[2]) {
2492            tcg_out_andi32(s, a0, a1, a2);
2493        } else {
2494            tcg_out32(s, AND | SAB(a1, a0, a2));
2495        }
2496        break;
2497    case INDEX_op_and_i64:
2498        a0 = args[0], a1 = args[1], a2 = args[2];
2499        if (const_args[2]) {
2500            tcg_out_andi64(s, a0, a1, a2);
2501        } else {
2502            tcg_out32(s, AND | SAB(a1, a0, a2));
2503        }
2504        break;
2505    case INDEX_op_or_i64:
2506    case INDEX_op_or_i32:
2507        a0 = args[0], a1 = args[1], a2 = args[2];
2508        if (const_args[2]) {
2509            tcg_out_ori32(s, a0, a1, a2);
2510        } else {
2511            tcg_out32(s, OR | SAB(a1, a0, a2));
2512        }
2513        break;
2514    case INDEX_op_xor_i64:
2515    case INDEX_op_xor_i32:
2516        a0 = args[0], a1 = args[1], a2 = args[2];
2517        if (const_args[2]) {
2518            tcg_out_xori32(s, a0, a1, a2);
2519        } else {
2520            tcg_out32(s, XOR | SAB(a1, a0, a2));
2521        }
2522        break;
2523    case INDEX_op_andc_i32:
2524        a0 = args[0], a1 = args[1], a2 = args[2];
2525        if (const_args[2]) {
2526            tcg_out_andi32(s, a0, a1, ~a2);
2527        } else {
2528            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2529        }
2530        break;
2531    case INDEX_op_andc_i64:
2532        a0 = args[0], a1 = args[1], a2 = args[2];
2533        if (const_args[2]) {
2534            tcg_out_andi64(s, a0, a1, ~a2);
2535        } else {
2536            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2537        }
2538        break;
2539    case INDEX_op_orc_i32:
2540        if (const_args[2]) {
2541            tcg_out_ori32(s, args[0], args[1], ~args[2]);
2542            break;
2543        }
2544        /* FALLTHRU */
2545    case INDEX_op_orc_i64:
2546        tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
2547        break;
2548    case INDEX_op_eqv_i32:
2549        if (const_args[2]) {
2550            tcg_out_xori32(s, args[0], args[1], ~args[2]);
2551            break;
2552        }
2553        /* FALLTHRU */
2554    case INDEX_op_eqv_i64:
2555        tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
2556        break;
2557    case INDEX_op_nand_i32:
2558    case INDEX_op_nand_i64:
2559        tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
2560        break;
2561    case INDEX_op_nor_i32:
2562    case INDEX_op_nor_i64:
2563        tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
2564        break;
2565
2566    case INDEX_op_clz_i32:
2567        tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1],
2568                      args[2], const_args[2]);
2569        break;
2570    case INDEX_op_ctz_i32:
2571        tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1],
2572                      args[2], const_args[2]);
2573        break;
2574    case INDEX_op_ctpop_i32:
2575        tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
2576        break;
2577
2578    case INDEX_op_clz_i64:
2579        tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1],
2580                      args[2], const_args[2]);
2581        break;
2582    case INDEX_op_ctz_i64:
2583        tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1],
2584                      args[2], const_args[2]);
2585        break;
2586    case INDEX_op_ctpop_i64:
2587        tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
2588        break;
2589
2590    case INDEX_op_mul_i32:
2591        a0 = args[0], a1 = args[1], a2 = args[2];
2592        if (const_args[2]) {
2593            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2594        } else {
2595            tcg_out32(s, MULLW | TAB(a0, a1, a2));
2596        }
2597        break;
2598
2599    case INDEX_op_div_i32:
2600        tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
2601        break;
2602
2603    case INDEX_op_divu_i32:
2604        tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
2605        break;
2606
2607    case INDEX_op_shl_i32:
2608        if (const_args[2]) {
2609            /* Limit immediate shift count lest we create an illegal insn.  */
2610            tcg_out_shli32(s, args[0], args[1], args[2] & 31);
2611        } else {
2612            tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
2613        }
2614        break;
2615    case INDEX_op_shr_i32:
2616        if (const_args[2]) {
2617            /* Limit immediate shift count lest we create an illegal insn.  */
2618            tcg_out_shri32(s, args[0], args[1], args[2] & 31);
2619        } else {
2620            tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
2621        }
2622        break;
2623    case INDEX_op_sar_i32:
2624        if (const_args[2]) {
2625            /* Limit immediate shift count lest we create an illegal insn.  */
2626            tcg_out32(s, SRAWI | RS(args[1]) | RA(args[0]) | SH(args[2] & 31));
2627        } else {
2628            tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
2629        }
2630        break;
2631    case INDEX_op_rotl_i32:
2632        if (const_args[2]) {
2633            tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
2634        } else {
2635            tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
2636                         | MB(0) | ME(31));
2637        }
2638        break;
2639    case INDEX_op_rotr_i32:
2640        if (const_args[2]) {
2641            tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
2642        } else {
2643            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
2644            tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
2645                         | MB(0) | ME(31));
2646        }
2647        break;
2648
2649    case INDEX_op_brcond_i32:
2650        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2651                       arg_label(args[3]), TCG_TYPE_I32);
2652        break;
2653    case INDEX_op_brcond_i64:
2654        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2655                       arg_label(args[3]), TCG_TYPE_I64);
2656        break;
2657    case INDEX_op_brcond2_i32:
2658        tcg_out_brcond2(s, args, const_args);
2659        break;
2660
2661    case INDEX_op_neg_i32:
2662    case INDEX_op_neg_i64:
2663        tcg_out32(s, NEG | RT(args[0]) | RA(args[1]));
2664        break;
2665
2666    case INDEX_op_not_i32:
2667    case INDEX_op_not_i64:
2668        tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
2669        break;
2670
2671    case INDEX_op_add_i64:
2672        a0 = args[0], a1 = args[1], a2 = args[2];
2673        if (const_args[2]) {
2674        do_addi_64:
2675            tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
2676        } else {
2677            tcg_out32(s, ADD | TAB(a0, a1, a2));
2678        }
2679        break;
2680    case INDEX_op_sub_i64:
2681        a0 = args[0], a1 = args[1], a2 = args[2];
2682        if (const_args[1]) {
2683            if (const_args[2]) {
2684                tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
2685            } else {
2686                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2687            }
2688        } else if (const_args[2]) {
2689            a2 = -a2;
2690            goto do_addi_64;
2691        } else {
2692            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2693        }
2694        break;
2695
2696    case INDEX_op_shl_i64:
2697        if (const_args[2]) {
2698            /* Limit immediate shift count lest we create an illegal insn.  */
2699            tcg_out_shli64(s, args[0], args[1], args[2] & 63);
2700        } else {
2701            tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
2702        }
2703        break;
2704    case INDEX_op_shr_i64:
2705        if (const_args[2]) {
2706            /* Limit immediate shift count lest we create an illegal insn.  */
2707            tcg_out_shri64(s, args[0], args[1], args[2] & 63);
2708        } else {
2709            tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
2710        }
2711        break;
2712    case INDEX_op_sar_i64:
2713        if (const_args[2]) {
2714            int sh = SH(args[2] & 0x1f) | (((args[2] >> 5) & 1) << 1);
2715            tcg_out32(s, SRADI | RA(args[0]) | RS(args[1]) | sh);
2716        } else {
2717            tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
2718        }
2719        break;
2720    case INDEX_op_rotl_i64:
2721        if (const_args[2]) {
2722            tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
2723        } else {
2724            tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
2725        }
2726        break;
2727    case INDEX_op_rotr_i64:
2728        if (const_args[2]) {
2729            tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
2730        } else {
2731            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
2732            tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
2733        }
2734        break;
2735
2736    case INDEX_op_mul_i64:
2737        a0 = args[0], a1 = args[1], a2 = args[2];
2738        if (const_args[2]) {
2739            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2740        } else {
2741            tcg_out32(s, MULLD | TAB(a0, a1, a2));
2742        }
2743        break;
2744    case INDEX_op_div_i64:
2745        tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
2746        break;
2747    case INDEX_op_divu_i64:
2748        tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
2749        break;
2750
2751    case INDEX_op_qemu_ld_i32:
2752        tcg_out_qemu_ld(s, args, false);
2753        break;
2754    case INDEX_op_qemu_ld_i64:
2755        tcg_out_qemu_ld(s, args, true);
2756        break;
2757    case INDEX_op_qemu_st_i32:
2758        tcg_out_qemu_st(s, args, false);
2759        break;
2760    case INDEX_op_qemu_st_i64:
2761        tcg_out_qemu_st(s, args, true);
2762        break;
2763
2764    case INDEX_op_ext8s_i32:
2765    case INDEX_op_ext8s_i64:
2766        c = EXTSB;
2767        goto gen_ext;
2768    case INDEX_op_ext16s_i32:
2769    case INDEX_op_ext16s_i64:
2770        c = EXTSH;
2771        goto gen_ext;
2772    case INDEX_op_ext_i32_i64:
2773    case INDEX_op_ext32s_i64:
2774        c = EXTSW;
2775        goto gen_ext;
2776    gen_ext:
2777        tcg_out32(s, c | RS(args[1]) | RA(args[0]));
2778        break;
2779    case INDEX_op_extu_i32_i64:
2780        tcg_out_ext32u(s, args[0], args[1]);
2781        break;
2782
2783    case INDEX_op_setcond_i32:
2784        tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
2785                        const_args[2]);
2786        break;
2787    case INDEX_op_setcond_i64:
2788        tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
2789                        const_args[2]);
2790        break;
2791    case INDEX_op_setcond2_i32:
2792        tcg_out_setcond2(s, args, const_args);
2793        break;
2794
2795    case INDEX_op_bswap16_i32:
2796    case INDEX_op_bswap16_i64:
2797        a0 = args[0], a1 = args[1];
2798        /* a1 = abcd */
2799        if (a0 != a1) {
2800            /* a0 = (a1 r<< 24) & 0xff # 000c */
2801            tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
2802            /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */
2803            tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23);
2804        } else {
2805            /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */
2806            tcg_out_rlw(s, RLWINM, TCG_REG_R0, a1, 8, 16, 23);
2807            /* a0 = (a1 r<< 24) & 0xff # 000c */
2808            tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
2809            /* a0 = a0 | r0 # 00dc */
2810            tcg_out32(s, OR | SAB(TCG_REG_R0, a0, a0));
2811        }
2812        break;
2813
2814    case INDEX_op_bswap32_i32:
2815    case INDEX_op_bswap32_i64:
2816        /* Stolen from gcc's builtin_bswap32 */
2817        a1 = args[1];
2818        a0 = args[0] == a1 ? TCG_REG_R0 : args[0];
2819
2820        /* a1 = args[1] # abcd */
2821        /* a0 = rotate_left (a1, 8) # bcda */
2822        tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
2823        /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */
2824        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
2825        /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */
2826        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
2827
2828        if (a0 == TCG_REG_R0) {
2829            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2830        }
2831        break;
2832
2833    case INDEX_op_bswap64_i64:
2834        a0 = args[0], a1 = args[1], a2 = TCG_REG_R0;
2835        if (a0 == a1) {
2836            a0 = TCG_REG_R0;
2837            a2 = a1;
2838        }
2839
2840        /* a1 = # abcd efgh */
2841        /* a0 = rl32(a1, 8) # 0000 fghe */
2842        tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
2843        /* a0 = dep(a0, rl32(a1, 24), 0xff000000) # 0000 hghe */
2844        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
2845        /* a0 = dep(a0, rl32(a1, 24), 0x0000ff00) # 0000 hgfe */
2846        tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
2847
2848        /* a0 = rl64(a0, 32) # hgfe 0000 */
2849        /* a2 = rl64(a1, 32) # efgh abcd */
2850        tcg_out_rld(s, RLDICL, a0, a0, 32, 0);
2851        tcg_out_rld(s, RLDICL, a2, a1, 32, 0);
2852
2853        /* a0 = dep(a0, rl32(a2, 8), 0xffffffff)  # hgfe bcda */
2854        tcg_out_rlw(s, RLWIMI, a0, a2, 8, 0, 31);
2855        /* a0 = dep(a0, rl32(a2, 24), 0xff000000) # hgfe dcda */
2856        tcg_out_rlw(s, RLWIMI, a0, a2, 24, 0, 7);
2857        /* a0 = dep(a0, rl32(a2, 24), 0x0000ff00) # hgfe dcba */
2858        tcg_out_rlw(s, RLWIMI, a0, a2, 24, 16, 23);
2859
2860        if (a0 == 0) {
2861            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2862        }
2863        break;
2864
2865    case INDEX_op_deposit_i32:
2866        if (const_args[2]) {
2867            uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
2868            tcg_out_andi32(s, args[0], args[0], ~mask);
2869        } else {
2870            tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
2871                        32 - args[3] - args[4], 31 - args[3]);
2872        }
2873        break;
2874    case INDEX_op_deposit_i64:
2875        if (const_args[2]) {
2876            uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
2877            tcg_out_andi64(s, args[0], args[0], ~mask);
2878        } else {
2879            tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
2880                        64 - args[3] - args[4]);
2881        }
2882        break;
2883
2884    case INDEX_op_extract_i32:
2885        tcg_out_rlw(s, RLWINM, args[0], args[1],
2886                    32 - args[2], 32 - args[3], 31);
2887        break;
2888    case INDEX_op_extract_i64:
2889        tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
2890        break;
2891
2892    case INDEX_op_movcond_i32:
2893        tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
2894                        args[3], args[4], const_args[2]);
2895        break;
2896    case INDEX_op_movcond_i64:
2897        tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
2898                        args[3], args[4], const_args[2]);
2899        break;
2900
2901#if TCG_TARGET_REG_BITS == 64
2902    case INDEX_op_add2_i64:
2903#else
2904    case INDEX_op_add2_i32:
2905#endif
2906        /* Note that the CA bit is defined based on the word size of the
2907           environment.  So in 64-bit mode it's always carry-out of bit 63.
2908           The fallback code using deposit works just as well for 32-bit.  */
2909        a0 = args[0], a1 = args[1];
2910        if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
2911            a0 = TCG_REG_R0;
2912        }
2913        if (const_args[4]) {
2914            tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
2915        } else {
2916            tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
2917        }
2918        if (const_args[5]) {
2919            tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
2920        } else {
2921            tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
2922        }
2923        if (a0 != args[0]) {
2924            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2925        }
2926        break;
2927
2928#if TCG_TARGET_REG_BITS == 64
2929    case INDEX_op_sub2_i64:
2930#else
2931    case INDEX_op_sub2_i32:
2932#endif
2933        a0 = args[0], a1 = args[1];
2934        if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
2935            a0 = TCG_REG_R0;
2936        }
2937        if (const_args[2]) {
2938            tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
2939        } else {
2940            tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
2941        }
2942        if (const_args[3]) {
2943            tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
2944        } else {
2945            tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
2946        }
2947        if (a0 != args[0]) {
2948            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
2949        }
2950        break;
2951
2952    case INDEX_op_muluh_i32:
2953        tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
2954        break;
2955    case INDEX_op_mulsh_i32:
2956        tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
2957        break;
2958    case INDEX_op_muluh_i64:
2959        tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
2960        break;
2961    case INDEX_op_mulsh_i64:
2962        tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
2963        break;
2964
2965    case INDEX_op_mb:
2966        tcg_out_mb(s, args[0]);
2967        break;
2968
2969    case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */
2970    case INDEX_op_mov_i64:
2971    case INDEX_op_movi_i32:  /* Always emitted via tcg_out_movi.  */
2972    case INDEX_op_movi_i64:
2973    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
2974    default:
2975        tcg_abort();
2976    }
2977}
2978
2979int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2980{
2981    switch (opc) {
2982    case INDEX_op_and_vec:
2983    case INDEX_op_or_vec:
2984    case INDEX_op_xor_vec:
2985    case INDEX_op_andc_vec:
2986    case INDEX_op_not_vec:
2987        return 1;
2988    case INDEX_op_orc_vec:
2989        return have_isa_2_07;
2990    case INDEX_op_add_vec:
2991    case INDEX_op_sub_vec:
2992    case INDEX_op_smax_vec:
2993    case INDEX_op_smin_vec:
2994    case INDEX_op_umax_vec:
2995    case INDEX_op_umin_vec:
2996    case INDEX_op_shlv_vec:
2997    case INDEX_op_shrv_vec:
2998    case INDEX_op_sarv_vec:
2999    case INDEX_op_rotlv_vec:
3000        return vece <= MO_32 || have_isa_2_07;
3001    case INDEX_op_ssadd_vec:
3002    case INDEX_op_sssub_vec:
3003    case INDEX_op_usadd_vec:
3004    case INDEX_op_ussub_vec:
3005        return vece <= MO_32;
3006    case INDEX_op_cmp_vec:
3007    case INDEX_op_shli_vec:
3008    case INDEX_op_shri_vec:
3009    case INDEX_op_sari_vec:
3010    case INDEX_op_rotli_vec:
3011        return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
3012    case INDEX_op_neg_vec:
3013        return vece >= MO_32 && have_isa_3_00;
3014    case INDEX_op_mul_vec:
3015        switch (vece) {
3016        case MO_8:
3017        case MO_16:
3018            return -1;
3019        case MO_32:
3020            return have_isa_2_07 ? 1 : -1;
3021        case MO_64:
3022            return have_isa_3_10;
3023        }
3024        return 0;
3025    case INDEX_op_bitsel_vec:
3026        return have_vsx;
3027    case INDEX_op_rotrv_vec:
3028        return -1;
3029    default:
3030        return 0;
3031    }
3032}
3033
3034static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
3035                            TCGReg dst, TCGReg src)
3036{
3037    tcg_debug_assert(dst >= TCG_REG_V0);
3038
3039    /* Splat from integer reg allowed via constraints for v3.00.  */
3040    if (src < TCG_REG_V0) {
3041        tcg_debug_assert(have_isa_3_00);
3042        switch (vece) {
3043        case MO_64:
3044            tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
3045            return true;
3046        case MO_32:
3047            tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
3048            return true;
3049        default:
3050            /* Fail, so that we fall back on either dupm or mov+dup.  */
3051            return false;
3052        }
3053    }
3054
3055    /*
3056     * Recall we use (or emulate) VSX integer loads, so the integer is
3057     * right justified within the left (zero-index) double-word.
3058     */
3059    switch (vece) {
3060    case MO_8:
3061        tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
3062        break;
3063    case MO_16:
3064        tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
3065        break;
3066    case MO_32:
3067        tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
3068        break;
3069    case MO_64:
3070        if (have_vsx) {
3071            tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
3072            break;
3073        }
3074        tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
3075        tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
3076        break;
3077    default:
3078        g_assert_not_reached();
3079    }
3080    return true;
3081}
3082
3083static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
3084                             TCGReg out, TCGReg base, intptr_t offset)
3085{
3086    int elt;
3087
3088    tcg_debug_assert(out >= TCG_REG_V0);
3089    switch (vece) {
3090    case MO_8:
3091        if (have_isa_3_00) {
3092            tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
3093        } else {
3094            tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
3095        }
3096        elt = extract32(offset, 0, 4);
3097#ifndef HOST_WORDS_BIGENDIAN
3098        elt ^= 15;
3099#endif
3100        tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
3101        break;
3102    case MO_16:
3103        tcg_debug_assert((offset & 1) == 0);
3104        if (have_isa_3_00) {
3105            tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
3106        } else {
3107            tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
3108        }
3109        elt = extract32(offset, 1, 3);
3110#ifndef HOST_WORDS_BIGENDIAN
3111        elt ^= 7;
3112#endif
3113        tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
3114        break;
3115    case MO_32:
3116        if (have_isa_3_00) {
3117            tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
3118            break;
3119        }
3120        tcg_debug_assert((offset & 3) == 0);
3121        tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
3122        elt = extract32(offset, 2, 2);
3123#ifndef HOST_WORDS_BIGENDIAN
3124        elt ^= 3;
3125#endif
3126        tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
3127        break;
3128    case MO_64:
3129        if (have_vsx) {
3130            tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
3131            break;
3132        }
3133        tcg_debug_assert((offset & 7) == 0);
3134        tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
3135        tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
3136        elt = extract32(offset, 3, 1);
3137#ifndef HOST_WORDS_BIGENDIAN
3138        elt = !elt;
3139#endif
3140        if (elt) {
3141            tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
3142        } else {
3143            tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
3144        }
3145        break;
3146    default:
3147        g_assert_not_reached();
3148    }
3149    return true;
3150}
3151
3152static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
3153                           unsigned vecl, unsigned vece,
3154                           const TCGArg *args, const int *const_args)
3155{
3156    static const uint32_t
3157        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
3158        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
3159        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
3160        neg_op[4] = { 0, 0, VNEGW, VNEGD },
3161        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
3162        ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
3163        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
3164        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
3165        ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
3166        usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
3167        sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
3168        ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
3169        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
3170        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
3171        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
3172        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
3173        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
3174        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
3175        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
3176        mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
3177        mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
3178        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
3179        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
3180        pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
3181        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
3182
3183    TCGType type = vecl + TCG_TYPE_V64;
3184    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
3185    uint32_t insn;
3186
3187    switch (opc) {
3188    case INDEX_op_ld_vec:
3189        tcg_out_ld(s, type, a0, a1, a2);
3190        return;
3191    case INDEX_op_st_vec:
3192        tcg_out_st(s, type, a0, a1, a2);
3193        return;
3194    case INDEX_op_dupm_vec:
3195        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
3196        return;
3197
3198    case INDEX_op_add_vec:
3199        insn = add_op[vece];
3200        break;
3201    case INDEX_op_sub_vec:
3202        insn = sub_op[vece];
3203        break;
3204    case INDEX_op_neg_vec:
3205        insn = neg_op[vece];
3206        a2 = a1;
3207        a1 = 0;
3208        break;
3209    case INDEX_op_mul_vec:
3210        insn = mul_op[vece];
3211        break;
3212    case INDEX_op_ssadd_vec:
3213        insn = ssadd_op[vece];
3214        break;
3215    case INDEX_op_sssub_vec:
3216        insn = sssub_op[vece];
3217        break;
3218    case INDEX_op_usadd_vec:
3219        insn = usadd_op[vece];
3220        break;
3221    case INDEX_op_ussub_vec:
3222        insn = ussub_op[vece];
3223        break;
3224    case INDEX_op_smin_vec:
3225        insn = smin_op[vece];
3226        break;
3227    case INDEX_op_umin_vec:
3228        insn = umin_op[vece];
3229        break;
3230    case INDEX_op_smax_vec:
3231        insn = smax_op[vece];
3232        break;
3233    case INDEX_op_umax_vec:
3234        insn = umax_op[vece];
3235        break;
3236    case INDEX_op_shlv_vec:
3237        insn = shlv_op[vece];
3238        break;
3239    case INDEX_op_shrv_vec:
3240        insn = shrv_op[vece];
3241        break;
3242    case INDEX_op_sarv_vec:
3243        insn = sarv_op[vece];
3244        break;
3245    case INDEX_op_and_vec:
3246        insn = VAND;
3247        break;
3248    case INDEX_op_or_vec:
3249        insn = VOR;
3250        break;
3251    case INDEX_op_xor_vec:
3252        insn = VXOR;
3253        break;
3254    case INDEX_op_andc_vec:
3255        insn = VANDC;
3256        break;
3257    case INDEX_op_not_vec:
3258        insn = VNOR;
3259        a2 = a1;
3260        break;
3261    case INDEX_op_orc_vec:
3262        insn = VORC;
3263        break;
3264
3265    case INDEX_op_cmp_vec:
3266        switch (args[3]) {
3267        case TCG_COND_EQ:
3268            insn = eq_op[vece];
3269            break;
3270        case TCG_COND_NE:
3271            insn = ne_op[vece];
3272            break;
3273        case TCG_COND_GT:
3274            insn = gts_op[vece];
3275            break;
3276        case TCG_COND_GTU:
3277            insn = gtu_op[vece];
3278            break;
3279        default:
3280            g_assert_not_reached();
3281        }
3282        break;
3283
3284    case INDEX_op_bitsel_vec:
3285        tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
3286        return;
3287
3288    case INDEX_op_dup2_vec:
3289        assert(TCG_TARGET_REG_BITS == 32);
3290        /* With inputs a1 = xLxx, a2 = xHxx  */
3291        tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1));  /* a0  = xxHL */
3292        tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8);          /* tmp = HLxx */
3293        tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8);          /* a0  = HLHL */
3294        return;
3295
3296    case INDEX_op_ppc_mrgh_vec:
3297        insn = mrgh_op[vece];
3298        break;
3299    case INDEX_op_ppc_mrgl_vec:
3300        insn = mrgl_op[vece];
3301        break;
3302    case INDEX_op_ppc_muleu_vec:
3303        insn = muleu_op[vece];
3304        break;
3305    case INDEX_op_ppc_mulou_vec:
3306        insn = mulou_op[vece];
3307        break;
3308    case INDEX_op_ppc_pkum_vec:
3309        insn = pkum_op[vece];
3310        break;
3311    case INDEX_op_rotlv_vec:
3312        insn = rotl_op[vece];
3313        break;
3314    case INDEX_op_ppc_msum_vec:
3315        tcg_debug_assert(vece == MO_16);
3316        tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
3317        return;
3318
3319    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
3320    case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi.  */
3321    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
3322    default:
3323        g_assert_not_reached();
3324    }
3325
3326    tcg_debug_assert(insn != 0);
3327    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
3328}
3329
3330static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
3331                           TCGv_vec v1, TCGArg imm, TCGOpcode opci)
3332{
3333    TCGv_vec t1 = tcg_temp_new_vec(type);
3334
3335    /* Splat w/bytes for xxspltib.  */
3336    tcg_gen_dupi_vec(MO_8, t1, imm & ((8 << vece) - 1));
3337    vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
3338              tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3339    tcg_temp_free_vec(t1);
3340}
3341
3342static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
3343                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3344{
3345    bool need_swap = false, need_inv = false;
3346
3347    tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
3348
3349    switch (cond) {
3350    case TCG_COND_EQ:
3351    case TCG_COND_GT:
3352    case TCG_COND_GTU:
3353        break;
3354    case TCG_COND_NE:
3355        if (have_isa_3_00 && vece <= MO_32) {
3356            break;
3357        }
3358        /* fall through */
3359    case TCG_COND_LE:
3360    case TCG_COND_LEU:
3361        need_inv = true;
3362        break;
3363    case TCG_COND_LT:
3364    case TCG_COND_LTU:
3365        need_swap = true;
3366        break;
3367    case TCG_COND_GE:
3368    case TCG_COND_GEU:
3369        need_swap = need_inv = true;
3370        break;
3371    default:
3372        g_assert_not_reached();
3373    }
3374
3375    if (need_inv) {
3376        cond = tcg_invert_cond(cond);
3377    }
3378    if (need_swap) {
3379        TCGv_vec t1;
3380        t1 = v1, v1 = v2, v2 = t1;
3381        cond = tcg_swap_cond(cond);
3382    }
3383
3384    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
3385              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
3386
3387    if (need_inv) {
3388        tcg_gen_not_vec(vece, v0, v0);
3389    }
3390}
3391
3392static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
3393                           TCGv_vec v1, TCGv_vec v2)
3394{
3395    TCGv_vec t1 = tcg_temp_new_vec(type);
3396    TCGv_vec t2 = tcg_temp_new_vec(type);
3397    TCGv_vec t3, t4;
3398
3399    switch (vece) {
3400    case MO_8:
3401    case MO_16:
3402        vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
3403                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3404        vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
3405                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3406        vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
3407                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3408        vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
3409                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3410        vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
3411                  tcgv_vec_arg(v0), tcgv_vec_arg(t1));
3412	break;
3413
3414    case MO_32:
3415        tcg_debug_assert(!have_isa_2_07);
3416        t3 = tcg_temp_new_vec(type);
3417        t4 = tcg_temp_new_vec(type);
3418        tcg_gen_dupi_vec(MO_8, t4, -16);
3419        vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
3420                  tcgv_vec_arg(v2), tcgv_vec_arg(t4));
3421        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
3422                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3423        tcg_gen_dupi_vec(MO_8, t3, 0);
3424        vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t3),
3425                  tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(t3));
3426        vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t3),
3427                  tcgv_vec_arg(t3), tcgv_vec_arg(t4));
3428        tcg_gen_add_vec(MO_32, v0, t2, t3);
3429        tcg_temp_free_vec(t3);
3430        tcg_temp_free_vec(t4);
3431        break;
3432
3433    default:
3434        g_assert_not_reached();
3435    }
3436    tcg_temp_free_vec(t1);
3437    tcg_temp_free_vec(t2);
3438}
3439
3440void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3441                       TCGArg a0, ...)
3442{
3443    va_list va;
3444    TCGv_vec v0, v1, v2, t0;
3445    TCGArg a2;
3446
3447    va_start(va, a0);
3448    v0 = temp_tcgv_vec(arg_temp(a0));
3449    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3450    a2 = va_arg(va, TCGArg);
3451
3452    switch (opc) {
3453    case INDEX_op_shli_vec:
3454        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
3455        break;
3456    case INDEX_op_shri_vec:
3457        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
3458        break;
3459    case INDEX_op_sari_vec:
3460        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
3461        break;
3462    case INDEX_op_rotli_vec:
3463        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
3464        break;
3465    case INDEX_op_cmp_vec:
3466        v2 = temp_tcgv_vec(arg_temp(a2));
3467        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3468        break;
3469    case INDEX_op_mul_vec:
3470        v2 = temp_tcgv_vec(arg_temp(a2));
3471        expand_vec_mul(type, vece, v0, v1, v2);
3472        break;
3473    case INDEX_op_rotlv_vec:
3474        v2 = temp_tcgv_vec(arg_temp(a2));
3475        t0 = tcg_temp_new_vec(type);
3476        tcg_gen_neg_vec(vece, t0, v2);
3477        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3478        tcg_temp_free_vec(t0);
3479        break;
3480    default:
3481        g_assert_not_reached();
3482    }
3483    va_end(va);
3484}
3485
3486static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
3487{
3488    static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
3489    static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
3490    static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
3491    static const TCGTargetOpDef S_S = { .args_ct_str = { "S", "S" } };
3492    static const TCGTargetOpDef r_ri = { .args_ct_str = { "r", "ri" } };
3493    static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
3494    static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
3495    static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } };
3496    static const TCGTargetOpDef S_S_S = { .args_ct_str = { "S", "S", "S" } };
3497    static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
3498    static const TCGTargetOpDef r_r_rI = { .args_ct_str = { "r", "r", "rI" } };
3499    static const TCGTargetOpDef r_r_rT = { .args_ct_str = { "r", "r", "rT" } };
3500    static const TCGTargetOpDef r_r_rU = { .args_ct_str = { "r", "r", "rU" } };
3501    static const TCGTargetOpDef r_rI_ri
3502        = { .args_ct_str = { "r", "rI", "ri" } };
3503    static const TCGTargetOpDef r_rI_rT
3504        = { .args_ct_str = { "r", "rI", "rT" } };
3505    static const TCGTargetOpDef r_r_rZW
3506        = { .args_ct_str = { "r", "r", "rZW" } };
3507    static const TCGTargetOpDef L_L_L_L
3508        = { .args_ct_str = { "L", "L", "L", "L" } };
3509    static const TCGTargetOpDef S_S_S_S
3510        = { .args_ct_str = { "S", "S", "S", "S" } };
3511    static const TCGTargetOpDef movc
3512        = { .args_ct_str = { "r", "r", "ri", "rZ", "rZ" } };
3513    static const TCGTargetOpDef dep
3514        = { .args_ct_str = { "r", "0", "rZ" } };
3515    static const TCGTargetOpDef br2
3516        = { .args_ct_str = { "r", "r", "ri", "ri" } };
3517    static const TCGTargetOpDef setc2
3518        = { .args_ct_str = { "r", "r", "r", "ri", "ri" } };
3519    static const TCGTargetOpDef add2
3520        = { .args_ct_str = { "r", "r", "r", "r", "rI", "rZM" } };
3521    static const TCGTargetOpDef sub2
3522        = { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } };
3523    static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } };
3524    static const TCGTargetOpDef v_vr = { .args_ct_str = { "v", "vr" } };
3525    static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } };
3526    static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } };
3527    static const TCGTargetOpDef v_v_v_v
3528        = { .args_ct_str = { "v", "v", "v", "v" } };
3529
3530    switch (op) {
3531    case INDEX_op_goto_ptr:
3532        return &r;
3533
3534    case INDEX_op_ld8u_i32:
3535    case INDEX_op_ld8s_i32:
3536    case INDEX_op_ld16u_i32:
3537    case INDEX_op_ld16s_i32:
3538    case INDEX_op_ld_i32:
3539    case INDEX_op_st8_i32:
3540    case INDEX_op_st16_i32:
3541    case INDEX_op_st_i32:
3542    case INDEX_op_ctpop_i32:
3543    case INDEX_op_neg_i32:
3544    case INDEX_op_not_i32:
3545    case INDEX_op_ext8s_i32:
3546    case INDEX_op_ext16s_i32:
3547    case INDEX_op_bswap16_i32:
3548    case INDEX_op_bswap32_i32:
3549    case INDEX_op_extract_i32:
3550    case INDEX_op_ld8u_i64:
3551    case INDEX_op_ld8s_i64:
3552    case INDEX_op_ld16u_i64:
3553    case INDEX_op_ld16s_i64:
3554    case INDEX_op_ld32u_i64:
3555    case INDEX_op_ld32s_i64:
3556    case INDEX_op_ld_i64:
3557    case INDEX_op_st8_i64:
3558    case INDEX_op_st16_i64:
3559    case INDEX_op_st32_i64:
3560    case INDEX_op_st_i64:
3561    case INDEX_op_ctpop_i64:
3562    case INDEX_op_neg_i64:
3563    case INDEX_op_not_i64:
3564    case INDEX_op_ext8s_i64:
3565    case INDEX_op_ext16s_i64:
3566    case INDEX_op_ext32s_i64:
3567    case INDEX_op_ext_i32_i64:
3568    case INDEX_op_extu_i32_i64:
3569    case INDEX_op_bswap16_i64:
3570    case INDEX_op_bswap32_i64:
3571    case INDEX_op_bswap64_i64:
3572    case INDEX_op_extract_i64:
3573        return &r_r;
3574
3575    case INDEX_op_add_i32:
3576    case INDEX_op_and_i32:
3577    case INDEX_op_or_i32:
3578    case INDEX_op_xor_i32:
3579    case INDEX_op_andc_i32:
3580    case INDEX_op_orc_i32:
3581    case INDEX_op_eqv_i32:
3582    case INDEX_op_shl_i32:
3583    case INDEX_op_shr_i32:
3584    case INDEX_op_sar_i32:
3585    case INDEX_op_rotl_i32:
3586    case INDEX_op_rotr_i32:
3587    case INDEX_op_setcond_i32:
3588    case INDEX_op_and_i64:
3589    case INDEX_op_andc_i64:
3590    case INDEX_op_shl_i64:
3591    case INDEX_op_shr_i64:
3592    case INDEX_op_sar_i64:
3593    case INDEX_op_rotl_i64:
3594    case INDEX_op_rotr_i64:
3595    case INDEX_op_setcond_i64:
3596        return &r_r_ri;
3597    case INDEX_op_mul_i32:
3598    case INDEX_op_mul_i64:
3599        return &r_r_rI;
3600    case INDEX_op_div_i32:
3601    case INDEX_op_divu_i32:
3602    case INDEX_op_nand_i32:
3603    case INDEX_op_nor_i32:
3604    case INDEX_op_muluh_i32:
3605    case INDEX_op_mulsh_i32:
3606    case INDEX_op_orc_i64:
3607    case INDEX_op_eqv_i64:
3608    case INDEX_op_nand_i64:
3609    case INDEX_op_nor_i64:
3610    case INDEX_op_div_i64:
3611    case INDEX_op_divu_i64:
3612    case INDEX_op_mulsh_i64:
3613    case INDEX_op_muluh_i64:
3614        return &r_r_r;
3615    case INDEX_op_sub_i32:
3616        return &r_rI_ri;
3617    case INDEX_op_add_i64:
3618        return &r_r_rT;
3619    case INDEX_op_or_i64:
3620    case INDEX_op_xor_i64:
3621        return &r_r_rU;
3622    case INDEX_op_sub_i64:
3623        return &r_rI_rT;
3624    case INDEX_op_clz_i32:
3625    case INDEX_op_ctz_i32:
3626    case INDEX_op_clz_i64:
3627    case INDEX_op_ctz_i64:
3628        return &r_r_rZW;
3629
3630    case INDEX_op_brcond_i32:
3631    case INDEX_op_brcond_i64:
3632        return &r_ri;
3633
3634    case INDEX_op_movcond_i32:
3635    case INDEX_op_movcond_i64:
3636        return &movc;
3637    case INDEX_op_deposit_i32:
3638    case INDEX_op_deposit_i64:
3639        return &dep;
3640    case INDEX_op_brcond2_i32:
3641        return &br2;
3642    case INDEX_op_setcond2_i32:
3643        return &setc2;
3644    case INDEX_op_add2_i64:
3645    case INDEX_op_add2_i32:
3646        return &add2;
3647    case INDEX_op_sub2_i64:
3648    case INDEX_op_sub2_i32:
3649        return &sub2;
3650
3651    case INDEX_op_qemu_ld_i32:
3652        return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
3653                ? &r_L : &r_L_L);
3654    case INDEX_op_qemu_st_i32:
3655        return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
3656                ? &S_S : &S_S_S);
3657    case INDEX_op_qemu_ld_i64:
3658        return (TCG_TARGET_REG_BITS == 64 ? &r_L
3659                : TARGET_LONG_BITS == 32 ? &L_L_L : &L_L_L_L);
3660    case INDEX_op_qemu_st_i64:
3661        return (TCG_TARGET_REG_BITS == 64 ? &S_S
3662                : TARGET_LONG_BITS == 32 ? &S_S_S : &S_S_S_S);
3663
3664    case INDEX_op_add_vec:
3665    case INDEX_op_sub_vec:
3666    case INDEX_op_mul_vec:
3667    case INDEX_op_and_vec:
3668    case INDEX_op_or_vec:
3669    case INDEX_op_xor_vec:
3670    case INDEX_op_andc_vec:
3671    case INDEX_op_orc_vec:
3672    case INDEX_op_cmp_vec:
3673    case INDEX_op_ssadd_vec:
3674    case INDEX_op_sssub_vec:
3675    case INDEX_op_usadd_vec:
3676    case INDEX_op_ussub_vec:
3677    case INDEX_op_smax_vec:
3678    case INDEX_op_smin_vec:
3679    case INDEX_op_umax_vec:
3680    case INDEX_op_umin_vec:
3681    case INDEX_op_shlv_vec:
3682    case INDEX_op_shrv_vec:
3683    case INDEX_op_sarv_vec:
3684    case INDEX_op_rotlv_vec:
3685    case INDEX_op_rotrv_vec:
3686    case INDEX_op_ppc_mrgh_vec:
3687    case INDEX_op_ppc_mrgl_vec:
3688    case INDEX_op_ppc_muleu_vec:
3689    case INDEX_op_ppc_mulou_vec:
3690    case INDEX_op_ppc_pkum_vec:
3691    case INDEX_op_dup2_vec:
3692        return &v_v_v;
3693    case INDEX_op_not_vec:
3694    case INDEX_op_neg_vec:
3695        return &v_v;
3696    case INDEX_op_dup_vec:
3697        return have_isa_3_00 ? &v_vr : &v_v;
3698    case INDEX_op_ld_vec:
3699    case INDEX_op_st_vec:
3700    case INDEX_op_dupm_vec:
3701        return &v_r;
3702    case INDEX_op_bitsel_vec:
3703    case INDEX_op_ppc_msum_vec:
3704        return &v_v_v_v;
3705
3706    default:
3707        return NULL;
3708    }
3709}
3710
3711static void tcg_target_init(TCGContext *s)
3712{
3713    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3714    unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
3715
3716    have_isa = tcg_isa_base;
3717    if (hwcap & PPC_FEATURE_ARCH_2_06) {
3718        have_isa = tcg_isa_2_06;
3719    }
3720#ifdef PPC_FEATURE2_ARCH_2_07
3721    if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
3722        have_isa = tcg_isa_2_07;
3723    }
3724#endif
3725#ifdef PPC_FEATURE2_ARCH_3_00
3726    if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
3727        have_isa = tcg_isa_3_00;
3728    }
3729#endif
3730#ifdef PPC_FEATURE2_ARCH_3_10
3731    if (hwcap2 & PPC_FEATURE2_ARCH_3_10) {
3732        have_isa = tcg_isa_3_10;
3733    }
3734#endif
3735
3736#ifdef PPC_FEATURE2_HAS_ISEL
3737    /* Prefer explicit instruction from the kernel. */
3738    have_isel = (hwcap2 & PPC_FEATURE2_HAS_ISEL) != 0;
3739#else
3740    /* Fall back to knowing Power7 (2.06) has ISEL. */
3741    have_isel = have_isa_2_06;
3742#endif
3743
3744    if (hwcap & PPC_FEATURE_HAS_ALTIVEC) {
3745        have_altivec = true;
3746        /* We only care about the portion of VSX that overlaps Altivec. */
3747        if (hwcap & PPC_FEATURE_HAS_VSX) {
3748            have_vsx = true;
3749        }
3750    }
3751
3752    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
3753    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
3754    if (have_altivec) {
3755        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3756        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3757    }
3758
3759    tcg_target_call_clobber_regs = 0;
3760    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3761    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3762    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3763    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3764    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3765    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3766    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
3767    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
3768    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
3769    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
3770    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
3771    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
3772
3773    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3774    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3775    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3776    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3777    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3778    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3779    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3780    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3781    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3782    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3783    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3784    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3785    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3786    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3787    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3788    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3789    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3790    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3791    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3792    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3793
3794    s->reserved_regs = 0;
3795    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
3796    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
3797#if defined(_CALL_SYSV)
3798    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
3799#endif
3800#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
3801    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
3802#endif
3803    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
3804    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
3805    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
3806    if (USE_REG_TB) {
3807        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
3808    }
3809}
3810
3811#ifdef __ELF__
3812typedef struct {
3813    DebugFrameCIE cie;
3814    DebugFrameFDEHeader fde;
3815    uint8_t fde_def_cfa[4];
3816    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
3817} DebugFrame;
3818
3819/* We're expecting a 2 byte uleb128 encoded value.  */
3820QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3821
3822#if TCG_TARGET_REG_BITS == 64
3823# define ELF_HOST_MACHINE EM_PPC64
3824#else
3825# define ELF_HOST_MACHINE EM_PPC
3826#endif
3827
3828static DebugFrame debug_frame = {
3829    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3830    .cie.id = -1,
3831    .cie.version = 1,
3832    .cie.code_align = 1,
3833    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
3834    .cie.return_column = 65,
3835
3836    /* Total FDE size does not include the "len" member.  */
3837    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
3838
3839    .fde_def_cfa = {
3840        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
3841        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3842        (FRAME_SIZE >> 7)
3843    },
3844    .fde_reg_ofs = {
3845        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
3846        0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
3847    }
3848};
3849
3850void tcg_register_jit(void *buf, size_t buf_size)
3851{
3852    uint8_t *p = &debug_frame.fde_reg_ofs[3];
3853    int i;
3854
3855    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
3856        p[0] = 0x80 + tcg_target_callee_save_regs[i];
3857        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
3858    }
3859
3860    debug_frame.fde.func_start = (uintptr_t)buf;
3861    debug_frame.fde.func_len = buf_size;
3862
3863    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3864}
3865#endif /* __ELF__ */
3866
3867void flush_icache_range(uintptr_t start, uintptr_t stop)
3868{
3869    uintptr_t p, start1, stop1;
3870    size_t dsize = qemu_dcache_linesize;
3871    size_t isize = qemu_icache_linesize;
3872
3873    start1 = start & ~(dsize - 1);
3874    stop1 = (stop + dsize - 1) & ~(dsize - 1);
3875    for (p = start1; p < stop1; p += dsize) {
3876        asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
3877    }
3878    asm volatile ("sync" : : : "memory");
3879
3880    start &= start & ~(isize - 1);
3881    stop1 = (stop + isize - 1) & ~(isize - 1);
3882    for (p = start1; p < stop1; p += isize) {
3883        asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
3884    }
3885    asm volatile ("sync" : : : "memory");
3886    asm volatile ("isync" : : : "memory");
3887}
3888