xref: /openbmc/qemu/tcg/ppc/tcg-target.c.inc (revision 136cb9cc)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "elf.h"
26#include "../tcg-pool.c.inc"
27#include "../tcg-ldst.c.inc"
28
29/*
30 * Standardize on the _CALL_FOO symbols used by GCC:
31 * Apple XCode does not define _CALL_DARWIN.
32 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV or _CALL_AIX.
33 */
34#if TCG_TARGET_REG_BITS == 64
35# ifdef _CALL_AIX
36    /* ok */
37# elif defined(_CALL_ELF) && _CALL_ELF == 1
38#  define _CALL_AIX
39# elif defined(_CALL_ELF) && _CALL_ELF == 2
40    /* ok */
41# else
42#  error "Unknown ABI"
43# endif
44#else
45# if defined(_CALL_SYSV) || defined(_CALL_DARWIN)
46    /* ok */
47# elif defined(__APPLE__)
48#  define _CALL_DARWIN
49# elif defined(__ELF__)
50#  define _CALL_SYSV
51# else
52#  error "Unknown ABI"
53# endif
54#endif
55
56#if TCG_TARGET_REG_BITS == 64
57# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_EXTEND
58# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_NORMAL
59#else
60# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_NORMAL
61# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_BY_REF
62#endif
63#ifdef _CALL_SYSV
64# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_EVEN
65# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_BY_REF
66#else
67# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_NORMAL
68# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_NORMAL
69#endif
70
71/* For some memory operations, we need a scratch that isn't R0.  For the AIX
72   calling convention, we can re-use the TOC register since we'll be reloading
73   it at every call.  Otherwise R12 will do nicely as neither a call-saved
74   register nor a parameter register.  */
75#ifdef _CALL_AIX
76# define TCG_REG_TMP1   TCG_REG_R2
77#else
78# define TCG_REG_TMP1   TCG_REG_R12
79#endif
80#define TCG_REG_TMP2    TCG_REG_R11
81
82#define TCG_VEC_TMP1    TCG_REG_V0
83#define TCG_VEC_TMP2    TCG_REG_V1
84
85#define TCG_REG_TB     TCG_REG_R31
86#define USE_REG_TB     (TCG_TARGET_REG_BITS == 64)
87
88/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
89#define SZP  ((int)sizeof(void *))
90
91/* Shorthand for size of a register.  */
92#define SZR  (TCG_TARGET_REG_BITS / 8)
93
94#define TCG_CT_CONST_S16  0x100
95#define TCG_CT_CONST_S32  0x400
96#define TCG_CT_CONST_U32  0x800
97#define TCG_CT_CONST_ZERO 0x1000
98#define TCG_CT_CONST_MONE 0x2000
99#define TCG_CT_CONST_WSZ  0x4000
100
101#define ALL_GENERAL_REGS  0xffffffffu
102#define ALL_VECTOR_REGS   0xffffffff00000000ull
103
104#define have_isel  (cpuinfo & CPUINFO_ISEL)
105
106#ifndef CONFIG_SOFTMMU
107#define TCG_GUEST_BASE_REG 30
108#endif
109
110#ifdef CONFIG_DEBUG_TCG
111static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
112    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
113    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
114    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
115    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
116    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
117    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
118    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
119    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
120};
121#endif
122
123static const int tcg_target_reg_alloc_order[] = {
124    TCG_REG_R14,  /* call saved registers */
125    TCG_REG_R15,
126    TCG_REG_R16,
127    TCG_REG_R17,
128    TCG_REG_R18,
129    TCG_REG_R19,
130    TCG_REG_R20,
131    TCG_REG_R21,
132    TCG_REG_R22,
133    TCG_REG_R23,
134    TCG_REG_R24,
135    TCG_REG_R25,
136    TCG_REG_R26,
137    TCG_REG_R27,
138    TCG_REG_R28,
139    TCG_REG_R29,
140    TCG_REG_R30,
141    TCG_REG_R31,
142    TCG_REG_R12,  /* call clobbered, non-arguments */
143    TCG_REG_R11,
144    TCG_REG_R2,
145    TCG_REG_R13,
146    TCG_REG_R10,  /* call clobbered, arguments */
147    TCG_REG_R9,
148    TCG_REG_R8,
149    TCG_REG_R7,
150    TCG_REG_R6,
151    TCG_REG_R5,
152    TCG_REG_R4,
153    TCG_REG_R3,
154
155    /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
156    TCG_REG_V2,   /* call clobbered, vectors */
157    TCG_REG_V3,
158    TCG_REG_V4,
159    TCG_REG_V5,
160    TCG_REG_V6,
161    TCG_REG_V7,
162    TCG_REG_V8,
163    TCG_REG_V9,
164    TCG_REG_V10,
165    TCG_REG_V11,
166    TCG_REG_V12,
167    TCG_REG_V13,
168    TCG_REG_V14,
169    TCG_REG_V15,
170    TCG_REG_V16,
171    TCG_REG_V17,
172    TCG_REG_V18,
173    TCG_REG_V19,
174};
175
176static const int tcg_target_call_iarg_regs[] = {
177    TCG_REG_R3,
178    TCG_REG_R4,
179    TCG_REG_R5,
180    TCG_REG_R6,
181    TCG_REG_R7,
182    TCG_REG_R8,
183    TCG_REG_R9,
184    TCG_REG_R10
185};
186
187static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
188{
189    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
190    tcg_debug_assert(slot >= 0 && slot <= 1);
191    return TCG_REG_R3 + slot;
192}
193
194static const int tcg_target_callee_save_regs[] = {
195#ifdef _CALL_DARWIN
196    TCG_REG_R11,
197#endif
198    TCG_REG_R14,
199    TCG_REG_R15,
200    TCG_REG_R16,
201    TCG_REG_R17,
202    TCG_REG_R18,
203    TCG_REG_R19,
204    TCG_REG_R20,
205    TCG_REG_R21,
206    TCG_REG_R22,
207    TCG_REG_R23,
208    TCG_REG_R24,
209    TCG_REG_R25,
210    TCG_REG_R26,
211    TCG_REG_R27, /* currently used for the global env */
212    TCG_REG_R28,
213    TCG_REG_R29,
214    TCG_REG_R30,
215    TCG_REG_R31
216};
217
218static inline bool in_range_b(tcg_target_long target)
219{
220    return target == sextract64(target, 0, 26);
221}
222
223static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
224			       const tcg_insn_unit *target)
225{
226    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
227    tcg_debug_assert(in_range_b(disp));
228    return disp & 0x3fffffc;
229}
230
231static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
232{
233    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
234    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
235
236    if (in_range_b(disp)) {
237        *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc);
238        return true;
239    }
240    return false;
241}
242
243static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
244			       const tcg_insn_unit *target)
245{
246    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
247    tcg_debug_assert(disp == (int16_t) disp);
248    return disp & 0xfffc;
249}
250
251static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
252{
253    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
254    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
255
256    if (disp == (int16_t) disp) {
257        *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc);
258        return true;
259    }
260    return false;
261}
262
263/* test if a constant matches the constraint */
264static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
265{
266    if (ct & TCG_CT_CONST) {
267        return 1;
268    }
269
270    /* The only 32-bit constraint we use aside from
271       TCG_CT_CONST is TCG_CT_CONST_S16.  */
272    if (type == TCG_TYPE_I32) {
273        val = (int32_t)val;
274    }
275
276    if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
277        return 1;
278    } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
279        return 1;
280    } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
281        return 1;
282    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
283        return 1;
284    } else if ((ct & TCG_CT_CONST_MONE) && val == -1) {
285        return 1;
286    } else if ((ct & TCG_CT_CONST_WSZ)
287               && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
288        return 1;
289    }
290    return 0;
291}
292
293#define OPCD(opc) ((opc)<<26)
294#define XO19(opc) (OPCD(19)|((opc)<<1))
295#define MD30(opc) (OPCD(30)|((opc)<<2))
296#define MDS30(opc) (OPCD(30)|((opc)<<1))
297#define XO31(opc) (OPCD(31)|((opc)<<1))
298#define XO58(opc) (OPCD(58)|(opc))
299#define XO62(opc) (OPCD(62)|(opc))
300#define VX4(opc)  (OPCD(4)|(opc))
301
302#define B      OPCD( 18)
303#define BC     OPCD( 16)
304
305#define LBZ    OPCD( 34)
306#define LHZ    OPCD( 40)
307#define LHA    OPCD( 42)
308#define LWZ    OPCD( 32)
309#define LWZUX  XO31( 55)
310#define LD     XO58(  0)
311#define LDX    XO31( 21)
312#define LDU    XO58(  1)
313#define LDUX   XO31( 53)
314#define LWA    XO58(  2)
315#define LWAX   XO31(341)
316#define LQ     OPCD( 56)
317
318#define STB    OPCD( 38)
319#define STH    OPCD( 44)
320#define STW    OPCD( 36)
321#define STD    XO62(  0)
322#define STDU   XO62(  1)
323#define STDX   XO31(149)
324#define STQ    XO62(  2)
325
326#define ADDIC  OPCD( 12)
327#define ADDI   OPCD( 14)
328#define ADDIS  OPCD( 15)
329#define ORI    OPCD( 24)
330#define ORIS   OPCD( 25)
331#define XORI   OPCD( 26)
332#define XORIS  OPCD( 27)
333#define ANDI   OPCD( 28)
334#define ANDIS  OPCD( 29)
335#define MULLI  OPCD(  7)
336#define CMPLI  OPCD( 10)
337#define CMPI   OPCD( 11)
338#define SUBFIC OPCD( 8)
339
340#define LWZU   OPCD( 33)
341#define STWU   OPCD( 37)
342
343#define RLWIMI OPCD( 20)
344#define RLWINM OPCD( 21)
345#define RLWNM  OPCD( 23)
346
347#define RLDICL MD30(  0)
348#define RLDICR MD30(  1)
349#define RLDIMI MD30(  3)
350#define RLDCL  MDS30( 8)
351
352#define BCLR   XO19( 16)
353#define BCCTR  XO19(528)
354#define CRAND  XO19(257)
355#define CRANDC XO19(129)
356#define CRNAND XO19(225)
357#define CROR   XO19(449)
358#define CRNOR  XO19( 33)
359
360#define EXTSB  XO31(954)
361#define EXTSH  XO31(922)
362#define EXTSW  XO31(986)
363#define ADD    XO31(266)
364#define ADDE   XO31(138)
365#define ADDME  XO31(234)
366#define ADDZE  XO31(202)
367#define ADDC   XO31( 10)
368#define AND    XO31( 28)
369#define SUBF   XO31( 40)
370#define SUBFC  XO31(  8)
371#define SUBFE  XO31(136)
372#define SUBFME XO31(232)
373#define SUBFZE XO31(200)
374#define OR     XO31(444)
375#define XOR    XO31(316)
376#define MULLW  XO31(235)
377#define MULHW  XO31( 75)
378#define MULHWU XO31( 11)
379#define DIVW   XO31(491)
380#define DIVWU  XO31(459)
381#define MODSW  XO31(779)
382#define MODUW  XO31(267)
383#define CMP    XO31(  0)
384#define CMPL   XO31( 32)
385#define LHBRX  XO31(790)
386#define LWBRX  XO31(534)
387#define LDBRX  XO31(532)
388#define STHBRX XO31(918)
389#define STWBRX XO31(662)
390#define STDBRX XO31(660)
391#define MFSPR  XO31(339)
392#define MTSPR  XO31(467)
393#define SRAWI  XO31(824)
394#define NEG    XO31(104)
395#define MFCR   XO31( 19)
396#define MFOCRF (MFCR | (1u << 20))
397#define NOR    XO31(124)
398#define CNTLZW XO31( 26)
399#define CNTLZD XO31( 58)
400#define CNTTZW XO31(538)
401#define CNTTZD XO31(570)
402#define CNTPOPW XO31(378)
403#define CNTPOPD XO31(506)
404#define ANDC   XO31( 60)
405#define ORC    XO31(412)
406#define EQV    XO31(284)
407#define NAND   XO31(476)
408#define ISEL   XO31( 15)
409
410#define MULLD  XO31(233)
411#define MULHD  XO31( 73)
412#define MULHDU XO31(  9)
413#define DIVD   XO31(489)
414#define DIVDU  XO31(457)
415#define MODSD  XO31(777)
416#define MODUD  XO31(265)
417
418#define LBZX   XO31( 87)
419#define LHZX   XO31(279)
420#define LHAX   XO31(343)
421#define LWZX   XO31( 23)
422#define STBX   XO31(215)
423#define STHX   XO31(407)
424#define STWX   XO31(151)
425
426#define EIEIO  XO31(854)
427#define HWSYNC XO31(598)
428#define LWSYNC (HWSYNC | (1u << 21))
429
430#define SPR(a, b) ((((a)<<5)|(b))<<11)
431#define LR     SPR(8, 0)
432#define CTR    SPR(9, 0)
433
434#define SLW    XO31( 24)
435#define SRW    XO31(536)
436#define SRAW   XO31(792)
437
438#define SLD    XO31( 27)
439#define SRD    XO31(539)
440#define SRAD   XO31(794)
441#define SRADI  XO31(413<<1)
442
443#define BRH    XO31(219)
444#define BRW    XO31(155)
445#define BRD    XO31(187)
446
447#define TW     XO31( 4)
448#define TRAP   (TW | TO(31))
449
450#define NOP    ORI  /* ori 0,0,0 */
451
452#define LVX        XO31(103)
453#define LVEBX      XO31(7)
454#define LVEHX      XO31(39)
455#define LVEWX      XO31(71)
456#define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
457#define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
458#define LXSIWZX    (XO31(12) | 1)   /* v2.07, force tx=1 */
459#define LXV        (OPCD(61) | 8 | 1)  /* v3.00, force tx=1 */
460#define LXSD       (OPCD(57) | 2)   /* v3.00 */
461#define LXVWSX     (XO31(364) | 1)  /* v3.00, force tx=1 */
462
463#define STVX       XO31(231)
464#define STVEWX     XO31(199)
465#define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
466#define STXSIWX    (XO31(140) | 1)  /* v2.07, force sx=1 */
467#define STXV       (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
468#define STXSD      (OPCD(61) | 2)   /* v3.00 */
469
470#define VADDSBS    VX4(768)
471#define VADDUBS    VX4(512)
472#define VADDUBM    VX4(0)
473#define VADDSHS    VX4(832)
474#define VADDUHS    VX4(576)
475#define VADDUHM    VX4(64)
476#define VADDSWS    VX4(896)
477#define VADDUWS    VX4(640)
478#define VADDUWM    VX4(128)
479#define VADDUDM    VX4(192)       /* v2.07 */
480
481#define VSUBSBS    VX4(1792)
482#define VSUBUBS    VX4(1536)
483#define VSUBUBM    VX4(1024)
484#define VSUBSHS    VX4(1856)
485#define VSUBUHS    VX4(1600)
486#define VSUBUHM    VX4(1088)
487#define VSUBSWS    VX4(1920)
488#define VSUBUWS    VX4(1664)
489#define VSUBUWM    VX4(1152)
490#define VSUBUDM    VX4(1216)      /* v2.07 */
491
492#define VNEGW      (VX4(1538) | (6 << 16))  /* v3.00 */
493#define VNEGD      (VX4(1538) | (7 << 16))  /* v3.00 */
494
495#define VMAXSB     VX4(258)
496#define VMAXSH     VX4(322)
497#define VMAXSW     VX4(386)
498#define VMAXSD     VX4(450)       /* v2.07 */
499#define VMAXUB     VX4(2)
500#define VMAXUH     VX4(66)
501#define VMAXUW     VX4(130)
502#define VMAXUD     VX4(194)       /* v2.07 */
503#define VMINSB     VX4(770)
504#define VMINSH     VX4(834)
505#define VMINSW     VX4(898)
506#define VMINSD     VX4(962)       /* v2.07 */
507#define VMINUB     VX4(514)
508#define VMINUH     VX4(578)
509#define VMINUW     VX4(642)
510#define VMINUD     VX4(706)       /* v2.07 */
511
512#define VCMPEQUB   VX4(6)
513#define VCMPEQUH   VX4(70)
514#define VCMPEQUW   VX4(134)
515#define VCMPEQUD   VX4(199)       /* v2.07 */
516#define VCMPGTSB   VX4(774)
517#define VCMPGTSH   VX4(838)
518#define VCMPGTSW   VX4(902)
519#define VCMPGTSD   VX4(967)       /* v2.07 */
520#define VCMPGTUB   VX4(518)
521#define VCMPGTUH   VX4(582)
522#define VCMPGTUW   VX4(646)
523#define VCMPGTUD   VX4(711)       /* v2.07 */
524#define VCMPNEB    VX4(7)         /* v3.00 */
525#define VCMPNEH    VX4(71)        /* v3.00 */
526#define VCMPNEW    VX4(135)       /* v3.00 */
527
528#define VSLB       VX4(260)
529#define VSLH       VX4(324)
530#define VSLW       VX4(388)
531#define VSLD       VX4(1476)      /* v2.07 */
532#define VSRB       VX4(516)
533#define VSRH       VX4(580)
534#define VSRW       VX4(644)
535#define VSRD       VX4(1732)      /* v2.07 */
536#define VSRAB      VX4(772)
537#define VSRAH      VX4(836)
538#define VSRAW      VX4(900)
539#define VSRAD      VX4(964)       /* v2.07 */
540#define VRLB       VX4(4)
541#define VRLH       VX4(68)
542#define VRLW       VX4(132)
543#define VRLD       VX4(196)       /* v2.07 */
544
545#define VMULEUB    VX4(520)
546#define VMULEUH    VX4(584)
547#define VMULEUW    VX4(648)       /* v2.07 */
548#define VMULOUB    VX4(8)
549#define VMULOUH    VX4(72)
550#define VMULOUW    VX4(136)       /* v2.07 */
551#define VMULUWM    VX4(137)       /* v2.07 */
552#define VMULLD     VX4(457)       /* v3.10 */
553#define VMSUMUHM   VX4(38)
554
555#define VMRGHB     VX4(12)
556#define VMRGHH     VX4(76)
557#define VMRGHW     VX4(140)
558#define VMRGLB     VX4(268)
559#define VMRGLH     VX4(332)
560#define VMRGLW     VX4(396)
561
562#define VPKUHUM    VX4(14)
563#define VPKUWUM    VX4(78)
564
565#define VAND       VX4(1028)
566#define VANDC      VX4(1092)
567#define VNOR       VX4(1284)
568#define VOR        VX4(1156)
569#define VXOR       VX4(1220)
570#define VEQV       VX4(1668)      /* v2.07 */
571#define VNAND      VX4(1412)      /* v2.07 */
572#define VORC       VX4(1348)      /* v2.07 */
573
574#define VSPLTB     VX4(524)
575#define VSPLTH     VX4(588)
576#define VSPLTW     VX4(652)
577#define VSPLTISB   VX4(780)
578#define VSPLTISH   VX4(844)
579#define VSPLTISW   VX4(908)
580
581#define VSLDOI     VX4(44)
582
583#define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
584#define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
585#define XXSPLTIB   (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
586
587#define MFVSRD     (XO31(51) | 1)   /* v2.07, force sx=1 */
588#define MFVSRWZ    (XO31(115) | 1)  /* v2.07, force sx=1 */
589#define MTVSRD     (XO31(179) | 1)  /* v2.07, force tx=1 */
590#define MTVSRWZ    (XO31(243) | 1)  /* v2.07, force tx=1 */
591#define MTVSRDD    (XO31(435) | 1)  /* v3.00, force tx=1 */
592#define MTVSRWS    (XO31(403) | 1)  /* v3.00, force tx=1 */
593
594#define RT(r) ((r)<<21)
595#define RS(r) ((r)<<21)
596#define RA(r) ((r)<<16)
597#define RB(r) ((r)<<11)
598#define TO(t) ((t)<<21)
599#define SH(s) ((s)<<11)
600#define MB(b) ((b)<<6)
601#define ME(e) ((e)<<1)
602#define BO(o) ((o)<<21)
603#define MB64(b) ((b)<<5)
604#define FXM(b) (1 << (19 - (b)))
605
606#define VRT(r)  (((r) & 31) << 21)
607#define VRA(r)  (((r) & 31) << 16)
608#define VRB(r)  (((r) & 31) << 11)
609#define VRC(r)  (((r) & 31) <<  6)
610
611#define LK    1
612
613#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
614#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
615#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
616#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
617
618#define BF(n)    ((n)<<23)
619#define BI(n, c) (((c)+((n)*4))<<16)
620#define BT(n, c) (((c)+((n)*4))<<21)
621#define BA(n, c) (((c)+((n)*4))<<16)
622#define BB(n, c) (((c)+((n)*4))<<11)
623#define BC_(n, c) (((c)+((n)*4))<<6)
624
625#define BO_COND_TRUE  BO(12)
626#define BO_COND_FALSE BO( 4)
627#define BO_ALWAYS     BO(20)
628
629enum {
630    CR_LT,
631    CR_GT,
632    CR_EQ,
633    CR_SO
634};
635
636static const uint32_t tcg_to_bc[] = {
637    [TCG_COND_EQ]  = BC | BI(7, CR_EQ) | BO_COND_TRUE,
638    [TCG_COND_NE]  = BC | BI(7, CR_EQ) | BO_COND_FALSE,
639    [TCG_COND_LT]  = BC | BI(7, CR_LT) | BO_COND_TRUE,
640    [TCG_COND_GE]  = BC | BI(7, CR_LT) | BO_COND_FALSE,
641    [TCG_COND_LE]  = BC | BI(7, CR_GT) | BO_COND_FALSE,
642    [TCG_COND_GT]  = BC | BI(7, CR_GT) | BO_COND_TRUE,
643    [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE,
644    [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE,
645    [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE,
646    [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE,
647};
648
649/* The low bit here is set if the RA and RB fields must be inverted.  */
650static const uint32_t tcg_to_isel[] = {
651    [TCG_COND_EQ]  = ISEL | BC_(7, CR_EQ),
652    [TCG_COND_NE]  = ISEL | BC_(7, CR_EQ) | 1,
653    [TCG_COND_LT]  = ISEL | BC_(7, CR_LT),
654    [TCG_COND_GE]  = ISEL | BC_(7, CR_LT) | 1,
655    [TCG_COND_LE]  = ISEL | BC_(7, CR_GT) | 1,
656    [TCG_COND_GT]  = ISEL | BC_(7, CR_GT),
657    [TCG_COND_LTU] = ISEL | BC_(7, CR_LT),
658    [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1,
659    [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1,
660    [TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
661};
662
663static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
664                        intptr_t value, intptr_t addend)
665{
666    const tcg_insn_unit *target;
667    int16_t lo;
668    int32_t hi;
669
670    value += addend;
671    target = (const tcg_insn_unit *)value;
672
673    switch (type) {
674    case R_PPC_REL14:
675        return reloc_pc14(code_ptr, target);
676    case R_PPC_REL24:
677        return reloc_pc24(code_ptr, target);
678    case R_PPC_ADDR16:
679        /*
680         * We are (slightly) abusing this relocation type.  In particular,
681         * assert that the low 2 bits are zero, and do not modify them.
682         * That way we can use this with LD et al that have opcode bits
683         * in the low 2 bits of the insn.
684         */
685        if ((value & 3) || value != (int16_t)value) {
686            return false;
687        }
688        *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
689        break;
690    case R_PPC_ADDR32:
691        /*
692         * We are abusing this relocation type.  Again, this points to
693         * a pair of insns, lis + load.  This is an absolute address
694         * relocation for PPC32 so the lis cannot be removed.
695         */
696        lo = value;
697        hi = value - lo;
698        if (hi + lo != value) {
699            return false;
700        }
701        code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
702        code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
703        break;
704    default:
705        g_assert_not_reached();
706    }
707    return true;
708}
709
710static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
711                             TCGReg base, tcg_target_long offset);
712
713static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
714{
715    if (ret == arg) {
716        return true;
717    }
718    switch (type) {
719    case TCG_TYPE_I64:
720        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
721        /* fallthru */
722    case TCG_TYPE_I32:
723        if (ret < TCG_REG_V0) {
724            if (arg < TCG_REG_V0) {
725                tcg_out32(s, OR | SAB(arg, ret, arg));
726                break;
727            } else if (have_isa_2_07) {
728                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
729                          | VRT(arg) | RA(ret));
730                break;
731            } else {
732                /* Altivec does not support vector->integer moves.  */
733                return false;
734            }
735        } else if (arg < TCG_REG_V0) {
736            if (have_isa_2_07) {
737                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
738                          | VRT(ret) | RA(arg));
739                break;
740            } else {
741                /* Altivec does not support integer->vector moves.  */
742                return false;
743            }
744        }
745        /* fallthru */
746    case TCG_TYPE_V64:
747    case TCG_TYPE_V128:
748        tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
749        tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
750        break;
751    default:
752        g_assert_not_reached();
753    }
754    return true;
755}
756
757static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
758                               int sh, int mb)
759{
760    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
761    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
762    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
763    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb);
764}
765
766static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
767                               int sh, int mb, int me)
768{
769    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
770}
771
772static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
773{
774    tcg_out32(s, EXTSB | RA(dst) | RS(src));
775}
776
777static void tcg_out_ext8u(TCGContext *s, TCGReg dst, TCGReg src)
778{
779    tcg_out32(s, ANDI | SAI(src, dst, 0xff));
780}
781
782static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
783{
784    tcg_out32(s, EXTSH | RA(dst) | RS(src));
785}
786
787static void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src)
788{
789    tcg_out32(s, ANDI | SAI(src, dst, 0xffff));
790}
791
792static void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src)
793{
794    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
795    tcg_out32(s, EXTSW | RA(dst) | RS(src));
796}
797
798static void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
799{
800    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
801    tcg_out_rld(s, RLDICL, dst, src, 0, 32);
802}
803
804static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
805{
806    tcg_out_ext32s(s, dst, src);
807}
808
809static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
810{
811    tcg_out_ext32u(s, dst, src);
812}
813
814static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
815{
816    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
817    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
818}
819
820static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
821{
822    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
823}
824
825static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
826{
827    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
828}
829
830static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c)
831{
832    /* Limit immediate shift count lest we create an illegal insn.  */
833    tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31));
834}
835
836static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
837{
838    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
839}
840
841static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
842{
843    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
844}
845
846static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
847{
848    tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
849}
850
851static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags)
852{
853    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
854
855    if (have_isa_3_10) {
856        tcg_out32(s, BRH | RA(dst) | RS(src));
857        if (flags & TCG_BSWAP_OS) {
858            tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst);
859        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
860            tcg_out_ext16u(s, dst, dst);
861        }
862        return;
863    }
864
865    /*
866     * In the following,
867     *   dep(a, b, m) -> (a & ~m) | (b & m)
868     *
869     * Begin with:                              src = xxxxabcd
870     */
871    /* tmp = rol32(src, 24) & 0x000000ff            = 0000000c */
872    tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);
873    /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00)    = 000000dc */
874    tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
875
876    if (flags & TCG_BSWAP_OS) {
877        tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp);
878    } else {
879        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
880    }
881}
882
883static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags)
884{
885    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
886
887    if (have_isa_3_10) {
888        tcg_out32(s, BRW | RA(dst) | RS(src));
889        if (flags & TCG_BSWAP_OS) {
890            tcg_out_ext32s(s, dst, dst);
891        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
892            tcg_out_ext32u(s, dst, dst);
893        }
894        return;
895    }
896
897    /*
898     * Stolen from gcc's builtin_bswap32.
899     * In the following,
900     *   dep(a, b, m) -> (a & ~m) | (b & m)
901     *
902     * Begin with:                              src = xxxxabcd
903     */
904    /* tmp = rol32(src, 8) & 0xffffffff             = 0000bcda */
905    tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31);
906    /* tmp = dep(tmp, rol32(src, 24), 0xff000000)   = 0000dcda */
907    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7);
908    /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00)   = 0000dcba */
909    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23);
910
911    if (flags & TCG_BSWAP_OS) {
912        tcg_out_ext32s(s, dst, tmp);
913    } else {
914        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
915    }
916}
917
918static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src)
919{
920    TCGReg t0 = dst == src ? TCG_REG_R0 : dst;
921    TCGReg t1 = dst == src ? dst : TCG_REG_R0;
922
923    if (have_isa_3_10) {
924        tcg_out32(s, BRD | RA(dst) | RS(src));
925        return;
926    }
927
928    /*
929     * In the following,
930     *   dep(a, b, m) -> (a & ~m) | (b & m)
931     *
932     * Begin with:                              src = abcdefgh
933     */
934    /* t0 = rol32(src, 8) & 0xffffffff              = 0000fghe */
935    tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31);
936    /* t0 = dep(t0, rol32(src, 24), 0xff000000)     = 0000hghe */
937    tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7);
938    /* t0 = dep(t0, rol32(src, 24), 0x0000ff00)     = 0000hgfe */
939    tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23);
940
941    /* t0 = rol64(t0, 32)                           = hgfe0000 */
942    tcg_out_rld(s, RLDICL, t0, t0, 32, 0);
943    /* t1 = rol64(src, 32)                          = efghabcd */
944    tcg_out_rld(s, RLDICL, t1, src, 32, 0);
945
946    /* t0 = dep(t0, rol32(t1, 24), 0xffffffff)      = hgfebcda */
947    tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31);
948    /* t0 = dep(t0, rol32(t1, 24), 0xff000000)      = hgfedcda */
949    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7);
950    /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00)      = hgfedcba */
951    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23);
952
953    tcg_out_mov(s, TCG_TYPE_REG, dst, t0);
954}
955
956/* Emit a move into ret of arg, if it can be done in one insn.  */
957static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
958{
959    if (arg == (int16_t)arg) {
960        tcg_out32(s, ADDI | TAI(ret, 0, arg));
961        return true;
962    }
963    if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
964        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
965        return true;
966    }
967    return false;
968}
969
970static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
971                             tcg_target_long arg, bool in_prologue)
972{
973    intptr_t tb_diff;
974    tcg_target_long tmp;
975    int shift;
976
977    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
978
979    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
980        arg = (int32_t)arg;
981    }
982
983    /* Load 16-bit immediates with one insn.  */
984    if (tcg_out_movi_one(s, ret, arg)) {
985        return;
986    }
987
988    /* Load addresses within the TB with one insn.  */
989    tb_diff = tcg_tbrel_diff(s, (void *)arg);
990    if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
991        tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
992        return;
993    }
994
995    /* Load 32-bit immediates with two insns.  Note that we've already
996       eliminated bare ADDIS, so we know both insns are required.  */
997    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
998        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
999        tcg_out32(s, ORI | SAI(ret, ret, arg));
1000        return;
1001    }
1002    if (arg == (uint32_t)arg && !(arg & 0x8000)) {
1003        tcg_out32(s, ADDI | TAI(ret, 0, arg));
1004        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1005        return;
1006    }
1007
1008    /* Load masked 16-bit value.  */
1009    if (arg > 0 && (arg & 0x8000)) {
1010        tmp = arg | 0x7fff;
1011        if ((tmp & (tmp + 1)) == 0) {
1012            int mb = clz64(tmp + 1) + 1;
1013            tcg_out32(s, ADDI | TAI(ret, 0, arg));
1014            tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
1015            return;
1016        }
1017    }
1018
1019    /* Load common masks with 2 insns.  */
1020    shift = ctz64(arg);
1021    tmp = arg >> shift;
1022    if (tmp == (int16_t)tmp) {
1023        tcg_out32(s, ADDI | TAI(ret, 0, tmp));
1024        tcg_out_shli64(s, ret, ret, shift);
1025        return;
1026    }
1027    shift = clz64(arg);
1028    if (tcg_out_movi_one(s, ret, arg << shift)) {
1029        tcg_out_shri64(s, ret, ret, shift);
1030        return;
1031    }
1032
1033    /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
1034    if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
1035        tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
1036        return;
1037    }
1038
1039    /* Use the constant pool, if possible.  */
1040    if (!in_prologue && USE_REG_TB) {
1041        new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
1042                       tcg_tbrel_diff(s, NULL));
1043        tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
1044        return;
1045    }
1046
1047    tmp = arg >> 31 >> 1;
1048    tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
1049    if (tmp) {
1050        tcg_out_shli64(s, ret, ret, 32);
1051    }
1052    if (arg & 0xffff0000) {
1053        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1054    }
1055    if (arg & 0xffff) {
1056        tcg_out32(s, ORI | SAI(ret, ret, arg));
1057    }
1058}
1059
1060static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1061                             TCGReg ret, int64_t val)
1062{
1063    uint32_t load_insn;
1064    int rel, low;
1065    intptr_t add;
1066
1067    switch (vece) {
1068    case MO_8:
1069        low = (int8_t)val;
1070        if (low >= -16 && low < 16) {
1071            tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
1072            return;
1073        }
1074        if (have_isa_3_00) {
1075            tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
1076            return;
1077        }
1078        break;
1079
1080    case MO_16:
1081        low = (int16_t)val;
1082        if (low >= -16 && low < 16) {
1083            tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
1084            return;
1085        }
1086        break;
1087
1088    case MO_32:
1089        low = (int32_t)val;
1090        if (low >= -16 && low < 16) {
1091            tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
1092            return;
1093        }
1094        break;
1095    }
1096
1097    /*
1098     * Otherwise we must load the value from the constant pool.
1099     */
1100    if (USE_REG_TB) {
1101        rel = R_PPC_ADDR16;
1102        add = tcg_tbrel_diff(s, NULL);
1103    } else {
1104        rel = R_PPC_ADDR32;
1105        add = 0;
1106    }
1107
1108    if (have_vsx) {
1109        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
1110        load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
1111        if (TCG_TARGET_REG_BITS == 64) {
1112            new_pool_label(s, val, rel, s->code_ptr, add);
1113        } else {
1114            new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
1115        }
1116    } else {
1117        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
1118        if (TCG_TARGET_REG_BITS == 64) {
1119            new_pool_l2(s, rel, s->code_ptr, add, val, val);
1120        } else {
1121            new_pool_l4(s, rel, s->code_ptr, add,
1122                        val >> 32, val, val >> 32, val);
1123        }
1124    }
1125
1126    if (USE_REG_TB) {
1127        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
1128        load_insn |= RA(TCG_REG_TB);
1129    } else {
1130        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
1131        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1132    }
1133    tcg_out32(s, load_insn);
1134}
1135
1136static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
1137                         tcg_target_long arg)
1138{
1139    switch (type) {
1140    case TCG_TYPE_I32:
1141    case TCG_TYPE_I64:
1142        tcg_debug_assert(ret < TCG_REG_V0);
1143        tcg_out_movi_int(s, type, ret, arg, false);
1144        break;
1145
1146    default:
1147        g_assert_not_reached();
1148    }
1149}
1150
1151static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1152{
1153    return false;
1154}
1155
1156static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1157                             tcg_target_long imm)
1158{
1159    /* This function is only used for passing structs by reference. */
1160    g_assert_not_reached();
1161}
1162
1163static bool mask_operand(uint32_t c, int *mb, int *me)
1164{
1165    uint32_t lsb, test;
1166
1167    /* Accept a bit pattern like:
1168           0....01....1
1169           1....10....0
1170           0..01..10..0
1171       Keep track of the transitions.  */
1172    if (c == 0 || c == -1) {
1173        return false;
1174    }
1175    test = c;
1176    lsb = test & -test;
1177    test += lsb;
1178    if (test & (test - 1)) {
1179        return false;
1180    }
1181
1182    *me = clz32(lsb);
1183    *mb = test ? clz32(test & -test) + 1 : 0;
1184    return true;
1185}
1186
1187static bool mask64_operand(uint64_t c, int *mb, int *me)
1188{
1189    uint64_t lsb;
1190
1191    if (c == 0) {
1192        return false;
1193    }
1194
1195    lsb = c & -c;
1196    /* Accept 1..10..0.  */
1197    if (c == -lsb) {
1198        *mb = 0;
1199        *me = clz64(lsb);
1200        return true;
1201    }
1202    /* Accept 0..01..1.  */
1203    if (lsb == 1 && (c & (c + 1)) == 0) {
1204        *mb = clz64(c + 1) + 1;
1205        *me = 63;
1206        return true;
1207    }
1208    return false;
1209}
1210
1211static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1212{
1213    int mb, me;
1214
1215    if (mask_operand(c, &mb, &me)) {
1216        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
1217    } else if ((c & 0xffff) == c) {
1218        tcg_out32(s, ANDI | SAI(src, dst, c));
1219        return;
1220    } else if ((c & 0xffff0000) == c) {
1221        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1222        return;
1223    } else {
1224        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
1225        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1226    }
1227}
1228
1229static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
1230{
1231    int mb, me;
1232
1233    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1234    if (mask64_operand(c, &mb, &me)) {
1235        if (mb == 0) {
1236            tcg_out_rld(s, RLDICR, dst, src, 0, me);
1237        } else {
1238            tcg_out_rld(s, RLDICL, dst, src, 0, mb);
1239        }
1240    } else if ((c & 0xffff) == c) {
1241        tcg_out32(s, ANDI | SAI(src, dst, c));
1242        return;
1243    } else if ((c & 0xffff0000) == c) {
1244        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1245        return;
1246    } else {
1247        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
1248        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1249    }
1250}
1251
1252static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
1253                           int op_lo, int op_hi)
1254{
1255    if (c >> 16) {
1256        tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
1257        src = dst;
1258    }
1259    if (c & 0xffff) {
1260        tcg_out32(s, op_lo | SAI(src, dst, c));
1261        src = dst;
1262    }
1263}
1264
1265static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1266{
1267    tcg_out_zori32(s, dst, src, c, ORI, ORIS);
1268}
1269
1270static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1271{
1272    tcg_out_zori32(s, dst, src, c, XORI, XORIS);
1273}
1274
1275static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target)
1276{
1277    ptrdiff_t disp = tcg_pcrel_diff(s, target);
1278    if (in_range_b(disp)) {
1279        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
1280    } else {
1281        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
1282        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
1283        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
1284    }
1285}
1286
1287static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
1288                             TCGReg base, tcg_target_long offset)
1289{
1290    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
1291    bool is_int_store = false;
1292    TCGReg rs = TCG_REG_TMP1;
1293
1294    switch (opi) {
1295    case LD: case LWA:
1296        align = 3;
1297        /* FALLTHRU */
1298    default:
1299        if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
1300            rs = rt;
1301            break;
1302        }
1303        break;
1304    case LXSD:
1305    case STXSD:
1306        align = 3;
1307        break;
1308    case LXV:
1309    case STXV:
1310        align = 15;
1311        break;
1312    case STD:
1313        align = 3;
1314        /* FALLTHRU */
1315    case STB: case STH: case STW:
1316        is_int_store = true;
1317        break;
1318    }
1319
1320    /* For unaligned, or very large offsets, use the indexed form.  */
1321    if (offset & align || offset != (int32_t)offset || opi == 0) {
1322        if (rs == base) {
1323            rs = TCG_REG_R0;
1324        }
1325        tcg_debug_assert(!is_int_store || rs != rt);
1326        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
1327        tcg_out32(s, opx | TAB(rt & 31, base, rs));
1328        return;
1329    }
1330
1331    l0 = (int16_t)offset;
1332    offset = (offset - l0) >> 16;
1333    l1 = (int16_t)offset;
1334
1335    if (l1 < 0 && orig >= 0) {
1336        extra = 0x4000;
1337        l1 = (int16_t)(offset - 0x4000);
1338    }
1339    if (l1) {
1340        tcg_out32(s, ADDIS | TAI(rs, base, l1));
1341        base = rs;
1342    }
1343    if (extra) {
1344        tcg_out32(s, ADDIS | TAI(rs, base, extra));
1345        base = rs;
1346    }
1347    if (opi != ADDI || base != rt || l0 != 0) {
1348        tcg_out32(s, opi | TAI(rt & 31, base, l0));
1349    }
1350}
1351
1352static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
1353                           TCGReg va, TCGReg vb, int shb)
1354{
1355    tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
1356}
1357
1358static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1359                       TCGReg base, intptr_t offset)
1360{
1361    int shift;
1362
1363    switch (type) {
1364    case TCG_TYPE_I32:
1365        if (ret < TCG_REG_V0) {
1366            tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
1367            break;
1368        }
1369        if (have_isa_2_07 && have_vsx) {
1370            tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
1371            break;
1372        }
1373        tcg_debug_assert((offset & 3) == 0);
1374        tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
1375        shift = (offset - 4) & 0xc;
1376        if (shift) {
1377            tcg_out_vsldoi(s, ret, ret, ret, shift);
1378        }
1379        break;
1380    case TCG_TYPE_I64:
1381        if (ret < TCG_REG_V0) {
1382            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1383            tcg_out_mem_long(s, LD, LDX, ret, base, offset);
1384            break;
1385        }
1386        /* fallthru */
1387    case TCG_TYPE_V64:
1388        tcg_debug_assert(ret >= TCG_REG_V0);
1389        if (have_vsx) {
1390            tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
1391                             ret, base, offset);
1392            break;
1393        }
1394        tcg_debug_assert((offset & 7) == 0);
1395        tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
1396        if (offset & 8) {
1397            tcg_out_vsldoi(s, ret, ret, ret, 8);
1398        }
1399        break;
1400    case TCG_TYPE_V128:
1401        tcg_debug_assert(ret >= TCG_REG_V0);
1402        tcg_debug_assert((offset & 15) == 0);
1403        tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
1404                         LVX, ret, base, offset);
1405        break;
1406    default:
1407        g_assert_not_reached();
1408    }
1409}
1410
1411static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1412                              TCGReg base, intptr_t offset)
1413{
1414    int shift;
1415
1416    switch (type) {
1417    case TCG_TYPE_I32:
1418        if (arg < TCG_REG_V0) {
1419            tcg_out_mem_long(s, STW, STWX, arg, base, offset);
1420            break;
1421        }
1422        if (have_isa_2_07 && have_vsx) {
1423            tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
1424            break;
1425        }
1426        assert((offset & 3) == 0);
1427        tcg_debug_assert((offset & 3) == 0);
1428        shift = (offset - 4) & 0xc;
1429        if (shift) {
1430            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
1431            arg = TCG_VEC_TMP1;
1432        }
1433        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1434        break;
1435    case TCG_TYPE_I64:
1436        if (arg < TCG_REG_V0) {
1437            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1438            tcg_out_mem_long(s, STD, STDX, arg, base, offset);
1439            break;
1440        }
1441        /* fallthru */
1442    case TCG_TYPE_V64:
1443        tcg_debug_assert(arg >= TCG_REG_V0);
1444        if (have_vsx) {
1445            tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
1446                             STXSDX, arg, base, offset);
1447            break;
1448        }
1449        tcg_debug_assert((offset & 7) == 0);
1450        if (offset & 8) {
1451            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
1452            arg = TCG_VEC_TMP1;
1453        }
1454        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1455        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
1456        break;
1457    case TCG_TYPE_V128:
1458        tcg_debug_assert(arg >= TCG_REG_V0);
1459        tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
1460                         STVX, arg, base, offset);
1461        break;
1462    default:
1463        g_assert_not_reached();
1464    }
1465}
1466
1467static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1468                               TCGReg base, intptr_t ofs)
1469{
1470    return false;
1471}
1472
1473static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
1474                        int const_arg2, int cr, TCGType type)
1475{
1476    int imm;
1477    uint32_t op;
1478
1479    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1480
1481    /* Simplify the comparisons below wrt CMPI.  */
1482    if (type == TCG_TYPE_I32) {
1483        arg2 = (int32_t)arg2;
1484    }
1485
1486    switch (cond) {
1487    case TCG_COND_EQ:
1488    case TCG_COND_NE:
1489        if (const_arg2) {
1490            if ((int16_t) arg2 == arg2) {
1491                op = CMPI;
1492                imm = 1;
1493                break;
1494            } else if ((uint16_t) arg2 == arg2) {
1495                op = CMPLI;
1496                imm = 1;
1497                break;
1498            }
1499        }
1500        op = CMPL;
1501        imm = 0;
1502        break;
1503
1504    case TCG_COND_LT:
1505    case TCG_COND_GE:
1506    case TCG_COND_LE:
1507    case TCG_COND_GT:
1508        if (const_arg2) {
1509            if ((int16_t) arg2 == arg2) {
1510                op = CMPI;
1511                imm = 1;
1512                break;
1513            }
1514        }
1515        op = CMP;
1516        imm = 0;
1517        break;
1518
1519    case TCG_COND_LTU:
1520    case TCG_COND_GEU:
1521    case TCG_COND_LEU:
1522    case TCG_COND_GTU:
1523        if (const_arg2) {
1524            if ((uint16_t) arg2 == arg2) {
1525                op = CMPLI;
1526                imm = 1;
1527                break;
1528            }
1529        }
1530        op = CMPL;
1531        imm = 0;
1532        break;
1533
1534    default:
1535        g_assert_not_reached();
1536    }
1537    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1538
1539    if (imm) {
1540        tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
1541    } else {
1542        if (const_arg2) {
1543            tcg_out_movi(s, type, TCG_REG_R0, arg2);
1544            arg2 = TCG_REG_R0;
1545        }
1546        tcg_out32(s, op | RA(arg1) | RB(arg2));
1547    }
1548}
1549
1550static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1551                                TCGReg dst, TCGReg src)
1552{
1553    if (type == TCG_TYPE_I32) {
1554        tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1555        tcg_out_shri32(s, dst, dst, 5);
1556    } else {
1557        tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1558        tcg_out_shri64(s, dst, dst, 6);
1559    }
1560}
1561
1562static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
1563{
1564    /* X != 0 implies X + -1 generates a carry.  Extra addition
1565       trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.  */
1566    if (dst != src) {
1567        tcg_out32(s, ADDIC | TAI(dst, src, -1));
1568        tcg_out32(s, SUBFE | TAB(dst, dst, src));
1569    } else {
1570        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1571        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1572    }
1573}
1574
1575static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1576                                  bool const_arg2)
1577{
1578    if (const_arg2) {
1579        if ((uint32_t)arg2 == arg2) {
1580            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1581        } else {
1582            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1583            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1584        }
1585    } else {
1586        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1587    }
1588    return TCG_REG_R0;
1589}
1590
1591static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1592                            TCGArg arg0, TCGArg arg1, TCGArg arg2,
1593                            int const_arg2)
1594{
1595    int crop, sh;
1596
1597    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1598
1599    /* Ignore high bits of a potential constant arg2.  */
1600    if (type == TCG_TYPE_I32) {
1601        arg2 = (uint32_t)arg2;
1602    }
1603
1604    /* Handle common and trivial cases before handling anything else.  */
1605    if (arg2 == 0) {
1606        switch (cond) {
1607        case TCG_COND_EQ:
1608            tcg_out_setcond_eq0(s, type, arg0, arg1);
1609            return;
1610        case TCG_COND_NE:
1611            if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1612                tcg_out_ext32u(s, TCG_REG_R0, arg1);
1613                arg1 = TCG_REG_R0;
1614            }
1615            tcg_out_setcond_ne0(s, arg0, arg1);
1616            return;
1617        case TCG_COND_GE:
1618            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1619            arg1 = arg0;
1620            /* FALLTHRU */
1621        case TCG_COND_LT:
1622            /* Extract the sign bit.  */
1623            if (type == TCG_TYPE_I32) {
1624                tcg_out_shri32(s, arg0, arg1, 31);
1625            } else {
1626                tcg_out_shri64(s, arg0, arg1, 63);
1627            }
1628            return;
1629        default:
1630            break;
1631        }
1632    }
1633
1634    /* If we have ISEL, we can implement everything with 3 or 4 insns.
1635       All other cases below are also at least 3 insns, so speed up the
1636       code generator by not considering them and always using ISEL.  */
1637    if (have_isel) {
1638        int isel, tab;
1639
1640        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1641
1642        isel = tcg_to_isel[cond];
1643
1644        tcg_out_movi(s, type, arg0, 1);
1645        if (isel & 1) {
1646            /* arg0 = (bc ? 0 : 1) */
1647            tab = TAB(arg0, 0, arg0);
1648            isel &= ~1;
1649        } else {
1650            /* arg0 = (bc ? 1 : 0) */
1651            tcg_out_movi(s, type, TCG_REG_R0, 0);
1652            tab = TAB(arg0, arg0, TCG_REG_R0);
1653        }
1654        tcg_out32(s, isel | tab);
1655        return;
1656    }
1657
1658    switch (cond) {
1659    case TCG_COND_EQ:
1660        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1661        tcg_out_setcond_eq0(s, type, arg0, arg1);
1662        return;
1663
1664    case TCG_COND_NE:
1665        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1666        /* Discard the high bits only once, rather than both inputs.  */
1667        if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1668            tcg_out_ext32u(s, TCG_REG_R0, arg1);
1669            arg1 = TCG_REG_R0;
1670        }
1671        tcg_out_setcond_ne0(s, arg0, arg1);
1672        return;
1673
1674    case TCG_COND_GT:
1675    case TCG_COND_GTU:
1676        sh = 30;
1677        crop = 0;
1678        goto crtest;
1679
1680    case TCG_COND_LT:
1681    case TCG_COND_LTU:
1682        sh = 29;
1683        crop = 0;
1684        goto crtest;
1685
1686    case TCG_COND_GE:
1687    case TCG_COND_GEU:
1688        sh = 31;
1689        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT);
1690        goto crtest;
1691
1692    case TCG_COND_LE:
1693    case TCG_COND_LEU:
1694        sh = 31;
1695        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT);
1696    crtest:
1697        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1698        if (crop) {
1699            tcg_out32(s, crop);
1700        }
1701        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1702        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
1703        break;
1704
1705    default:
1706        g_assert_not_reached();
1707    }
1708}
1709
1710static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l)
1711{
1712    if (l->has_value) {
1713        bc |= reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
1714    } else {
1715        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
1716    }
1717    tcg_out32(s, bc);
1718}
1719
1720static void tcg_out_brcond(TCGContext *s, TCGCond cond,
1721                           TCGArg arg1, TCGArg arg2, int const_arg2,
1722                           TCGLabel *l, TCGType type)
1723{
1724    tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1725    tcg_out_bc(s, tcg_to_bc[cond], l);
1726}
1727
1728static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
1729                            TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
1730                            TCGArg v2, bool const_c2)
1731{
1732    /* If for some reason both inputs are zero, don't produce bad code.  */
1733    if (v1 == 0 && v2 == 0) {
1734        tcg_out_movi(s, type, dest, 0);
1735        return;
1736    }
1737
1738    tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
1739
1740    if (have_isel) {
1741        int isel = tcg_to_isel[cond];
1742
1743        /* Swap the V operands if the operation indicates inversion.  */
1744        if (isel & 1) {
1745            int t = v1;
1746            v1 = v2;
1747            v2 = t;
1748            isel &= ~1;
1749        }
1750        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
1751        if (v2 == 0) {
1752            tcg_out_movi(s, type, TCG_REG_R0, 0);
1753        }
1754        tcg_out32(s, isel | TAB(dest, v1, v2));
1755    } else {
1756        if (dest == v2) {
1757            cond = tcg_invert_cond(cond);
1758            v2 = v1;
1759        } else if (dest != v1) {
1760            if (v1 == 0) {
1761                tcg_out_movi(s, type, dest, 0);
1762            } else {
1763                tcg_out_mov(s, type, dest, v1);
1764            }
1765        }
1766        /* Branch forward over one insn */
1767        tcg_out32(s, tcg_to_bc[cond] | 8);
1768        if (v2 == 0) {
1769            tcg_out_movi(s, type, dest, 0);
1770        } else {
1771            tcg_out_mov(s, type, dest, v2);
1772        }
1773    }
1774}
1775
1776static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
1777                          TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
1778{
1779    if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
1780        tcg_out32(s, opc | RA(a0) | RS(a1));
1781    } else {
1782        tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type);
1783        /* Note that the only other valid constant for a2 is 0.  */
1784        if (have_isel) {
1785            tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
1786            tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
1787        } else if (!const_a2 && a0 == a2) {
1788            tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8);
1789            tcg_out32(s, opc | RA(a0) | RS(a1));
1790        } else {
1791            tcg_out32(s, opc | RA(a0) | RS(a1));
1792            tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8);
1793            if (const_a2) {
1794                tcg_out_movi(s, type, a0, 0);
1795            } else {
1796                tcg_out_mov(s, type, a0, a2);
1797            }
1798        }
1799    }
1800}
1801
1802static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
1803                         const int *const_args)
1804{
1805    static const struct { uint8_t bit1, bit2; } bits[] = {
1806        [TCG_COND_LT ] = { CR_LT, CR_LT },
1807        [TCG_COND_LE ] = { CR_LT, CR_GT },
1808        [TCG_COND_GT ] = { CR_GT, CR_GT },
1809        [TCG_COND_GE ] = { CR_GT, CR_LT },
1810        [TCG_COND_LTU] = { CR_LT, CR_LT },
1811        [TCG_COND_LEU] = { CR_LT, CR_GT },
1812        [TCG_COND_GTU] = { CR_GT, CR_GT },
1813        [TCG_COND_GEU] = { CR_GT, CR_LT },
1814    };
1815
1816    TCGCond cond = args[4], cond2;
1817    TCGArg al, ah, bl, bh;
1818    int blconst, bhconst;
1819    int op, bit1, bit2;
1820
1821    al = args[0];
1822    ah = args[1];
1823    bl = args[2];
1824    bh = args[3];
1825    blconst = const_args[2];
1826    bhconst = const_args[3];
1827
1828    switch (cond) {
1829    case TCG_COND_EQ:
1830        op = CRAND;
1831        goto do_equality;
1832    case TCG_COND_NE:
1833        op = CRNAND;
1834    do_equality:
1835        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
1836        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
1837        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
1838        break;
1839
1840    case TCG_COND_LT:
1841    case TCG_COND_LE:
1842    case TCG_COND_GT:
1843    case TCG_COND_GE:
1844    case TCG_COND_LTU:
1845    case TCG_COND_LEU:
1846    case TCG_COND_GTU:
1847    case TCG_COND_GEU:
1848        bit1 = bits[cond].bit1;
1849        bit2 = bits[cond].bit2;
1850        op = (bit1 != bit2 ? CRANDC : CRAND);
1851        cond2 = tcg_unsigned_cond(cond);
1852
1853        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
1854        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
1855        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
1856        tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ));
1857        break;
1858
1859    default:
1860        g_assert_not_reached();
1861    }
1862}
1863
1864static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1865                             const int *const_args)
1866{
1867    tcg_out_cmp2(s, args + 1, const_args + 1);
1868    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1869    tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31);
1870}
1871
1872static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
1873                             const int *const_args)
1874{
1875    tcg_out_cmp2(s, args, const_args);
1876    tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
1877}
1878
1879static void tcg_out_mb(TCGContext *s, TCGArg a0)
1880{
1881    uint32_t insn;
1882
1883    if (a0 & TCG_MO_ST_LD) {
1884        insn = HWSYNC;
1885    } else {
1886        insn = LWSYNC;
1887    }
1888
1889    tcg_out32(s, insn);
1890}
1891
1892static void tcg_out_call_int(TCGContext *s, int lk,
1893                             const tcg_insn_unit *target)
1894{
1895#ifdef _CALL_AIX
1896    /* Look through the descriptor.  If the branch is in range, and we
1897       don't have to spend too much effort on building the toc.  */
1898    const void *tgt = ((const void * const *)target)[0];
1899    uintptr_t toc = ((const uintptr_t *)target)[1];
1900    intptr_t diff = tcg_pcrel_diff(s, tgt);
1901
1902    if (in_range_b(diff) && toc == (uint32_t)toc) {
1903        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
1904        tcg_out_b(s, lk, tgt);
1905    } else {
1906        /* Fold the low bits of the constant into the addresses below.  */
1907        intptr_t arg = (intptr_t)target;
1908        int ofs = (int16_t)arg;
1909
1910        if (ofs + 8 < 0x8000) {
1911            arg -= ofs;
1912        } else {
1913            ofs = 0;
1914        }
1915        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
1916        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
1917        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
1918        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
1919        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
1920    }
1921#elif defined(_CALL_ELF) && _CALL_ELF == 2
1922    intptr_t diff;
1923
1924    /* In the ELFv2 ABI, we have to set up r12 to contain the destination
1925       address, which the callee uses to compute its TOC address.  */
1926    /* FIXME: when the branch is in range, we could avoid r12 load if we
1927       knew that the destination uses the same TOC, and what its local
1928       entry point offset is.  */
1929    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
1930
1931    diff = tcg_pcrel_diff(s, target);
1932    if (in_range_b(diff)) {
1933        tcg_out_b(s, lk, target);
1934    } else {
1935        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
1936        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
1937    }
1938#else
1939    tcg_out_b(s, lk, target);
1940#endif
1941}
1942
1943static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1944                         const TCGHelperInfo *info)
1945{
1946    tcg_out_call_int(s, LK, target);
1947}
1948
1949static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = {
1950    [MO_UB] = LBZX,
1951    [MO_UW] = LHZX,
1952    [MO_UL] = LWZX,
1953    [MO_UQ] = LDX,
1954    [MO_SW] = LHAX,
1955    [MO_SL] = LWAX,
1956    [MO_BSWAP | MO_UB] = LBZX,
1957    [MO_BSWAP | MO_UW] = LHBRX,
1958    [MO_BSWAP | MO_UL] = LWBRX,
1959    [MO_BSWAP | MO_UQ] = LDBRX,
1960};
1961
1962static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
1963    [MO_UB] = STBX,
1964    [MO_UW] = STHX,
1965    [MO_UL] = STWX,
1966    [MO_UQ] = STDX,
1967    [MO_BSWAP | MO_UB] = STBX,
1968    [MO_BSWAP | MO_UW] = STHBRX,
1969    [MO_BSWAP | MO_UL] = STWBRX,
1970    [MO_BSWAP | MO_UQ] = STDBRX,
1971};
1972
1973static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
1974{
1975    if (arg < 0) {
1976        arg = TCG_REG_TMP1;
1977    }
1978    tcg_out32(s, MFSPR | RT(arg) | LR);
1979    return arg;
1980}
1981
1982/*
1983 * For the purposes of ppc32 sorting 4 input registers into 4 argument
1984 * registers, there is an outside chance we would require 3 temps.
1985 */
1986static const TCGLdstHelperParam ldst_helper_param = {
1987    .ra_gen = ldst_ra_gen,
1988    .ntmp = 3,
1989    .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 }
1990};
1991
1992static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1993{
1994    MemOp opc = get_memop(lb->oi);
1995
1996    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1997        return false;
1998    }
1999
2000    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
2001    tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]);
2002    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
2003
2004    tcg_out_b(s, 0, lb->raddr);
2005    return true;
2006}
2007
2008static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2009{
2010    MemOp opc = get_memop(lb->oi);
2011
2012    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2013        return false;
2014    }
2015
2016    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
2017    tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]);
2018
2019    tcg_out_b(s, 0, lb->raddr);
2020    return true;
2021}
2022
2023typedef struct {
2024    TCGReg base;
2025    TCGReg index;
2026    TCGAtomAlign aa;
2027} HostAddress;
2028
2029bool tcg_target_has_memory_bswap(MemOp memop)
2030{
2031    TCGAtomAlign aa;
2032
2033    if ((memop & MO_SIZE) <= MO_64) {
2034        return true;
2035    }
2036
2037    /*
2038     * Reject 16-byte memop with 16-byte atomicity,
2039     * but do allow a pair of 64-bit operations.
2040     */
2041    aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
2042    return aa.atom <= MO_64;
2043}
2044
2045/* We expect to use a 16-bit negative offset from ENV.  */
2046#define MIN_TLB_MASK_TABLE_OFS  -32768
2047
2048/*
2049 * For softmmu, perform the TLB load and compare.
2050 * For useronly, perform any required alignment tests.
2051 * In both cases, return a TCGLabelQemuLdst structure if the slow path
2052 * is required and fill in @h with the host address for the fast path.
2053 */
2054static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
2055                                           TCGReg addrlo, TCGReg addrhi,
2056                                           MemOpIdx oi, bool is_ld)
2057{
2058    TCGType addr_type = s->addr_type;
2059    TCGLabelQemuLdst *ldst = NULL;
2060    MemOp opc = get_memop(oi);
2061    MemOp a_bits, s_bits;
2062
2063    /*
2064     * Book II, Section 1.4, Single-Copy Atomicity, specifies:
2065     *
2066     * Before 3.0, "An access that is not atomic is performed as a set of
2067     * smaller disjoint atomic accesses. In general, the number and alignment
2068     * of these accesses are implementation-dependent."  Thus MO_ATOM_IFALIGN.
2069     *
2070     * As of 3.0, "the non-atomic access is performed as described in
2071     * the corresponding list", which matches MO_ATOM_SUBALIGN.
2072     */
2073    s_bits = opc & MO_SIZE;
2074    h->aa = atom_and_align_for_opc(s, opc,
2075                                   have_isa_3_00 ? MO_ATOM_SUBALIGN
2076                                                 : MO_ATOM_IFALIGN,
2077                                   s_bits == MO_128);
2078    a_bits = h->aa.align;
2079
2080#ifdef CONFIG_SOFTMMU
2081    int mem_index = get_mmuidx(oi);
2082    int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
2083                        : offsetof(CPUTLBEntry, addr_write);
2084    int fast_off = tlb_mask_table_ofs(s, mem_index);
2085    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
2086    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
2087
2088    ldst = new_ldst_label(s);
2089    ldst->is_ld = is_ld;
2090    ldst->oi = oi;
2091    ldst->addrlo_reg = addrlo;
2092    ldst->addrhi_reg = addrhi;
2093
2094    /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
2095    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
2096    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
2097
2098    /* Extract the page index, shifted into place for tlb index.  */
2099    if (TCG_TARGET_REG_BITS == 32) {
2100        tcg_out_shri32(s, TCG_REG_R0, addrlo,
2101                       s->page_bits - CPU_TLB_ENTRY_BITS);
2102    } else {
2103        tcg_out_shri64(s, TCG_REG_R0, addrlo,
2104                       s->page_bits - CPU_TLB_ENTRY_BITS);
2105    }
2106    tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
2107
2108    /*
2109     * Load the (low part) TLB comparator into TMP2.
2110     * For 64-bit host, always load the entire 64-bit slot for simplicity.
2111     * We will ignore the high bits with tcg_out_cmp(..., addr_type).
2112     */
2113    if (TCG_TARGET_REG_BITS == 64) {
2114        if (cmp_off == 0) {
2115            tcg_out32(s, LDUX | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2));
2116        } else {
2117            tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
2118            tcg_out_ld(s, TCG_TYPE_I64, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off);
2119        }
2120    } else if (cmp_off == 0 && !HOST_BIG_ENDIAN) {
2121        tcg_out32(s, LWZUX | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2));
2122    } else {
2123        tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
2124        tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
2125                   cmp_off + 4 * HOST_BIG_ENDIAN);
2126    }
2127
2128    /*
2129     * Load the TLB addend for use on the fast path.
2130     * Do this asap to minimize any load use delay.
2131     */
2132    if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
2133        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
2134                   offsetof(CPUTLBEntry, addend));
2135    }
2136
2137    /* Clear the non-page, non-alignment bits from the address in R0. */
2138    if (TCG_TARGET_REG_BITS == 32) {
2139        /*
2140         * We don't support unaligned accesses on 32-bits.
2141         * Preserve the bottom bits and thus trigger a comparison
2142         * failure on unaligned accesses.
2143         */
2144        if (a_bits < s_bits) {
2145            a_bits = s_bits;
2146        }
2147        tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
2148                    (32 - a_bits) & 31, 31 - s->page_bits);
2149    } else {
2150        TCGReg t = addrlo;
2151
2152        /*
2153         * If the access is unaligned, we need to make sure we fail if we
2154         * cross a page boundary.  The trick is to add the access size-1
2155         * to the address before masking the low bits.  That will make the
2156         * address overflow to the next page if we cross a page boundary,
2157         * which will then force a mismatch of the TLB compare.
2158         */
2159        if (a_bits < s_bits) {
2160            unsigned a_mask = (1 << a_bits) - 1;
2161            unsigned s_mask = (1 << s_bits) - 1;
2162            tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
2163            t = TCG_REG_R0;
2164        }
2165
2166        /* Mask the address for the requested alignment.  */
2167        if (addr_type == TCG_TYPE_I32) {
2168            tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
2169                        (32 - a_bits) & 31, 31 - s->page_bits);
2170        } else if (a_bits == 0) {
2171            tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
2172        } else {
2173            tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
2174                        64 - s->page_bits, s->page_bits - a_bits);
2175            tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
2176        }
2177    }
2178
2179    if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) {
2180        /* Low part comparison into cr7. */
2181        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
2182                    0, 7, TCG_TYPE_I32);
2183
2184        /* Load the high part TLB comparator into TMP2.  */
2185        tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
2186                   cmp_off + 4 * !HOST_BIG_ENDIAN);
2187
2188        /* Load addend, deferred for this case. */
2189        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
2190                   offsetof(CPUTLBEntry, addend));
2191
2192        /* High part comparison into cr6. */
2193        tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2, 0, 6, TCG_TYPE_I32);
2194
2195        /* Combine comparisons into cr7. */
2196        tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
2197    } else {
2198        /* Full comparison into cr7. */
2199        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 0, 7, addr_type);
2200    }
2201
2202    /* Load a pointer into the current opcode w/conditional branch-link. */
2203    ldst->label_ptr[0] = s->code_ptr;
2204    tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
2205
2206    h->base = TCG_REG_TMP1;
2207#else
2208    if (a_bits) {
2209        ldst = new_ldst_label(s);
2210        ldst->is_ld = is_ld;
2211        ldst->oi = oi;
2212        ldst->addrlo_reg = addrlo;
2213        ldst->addrhi_reg = addrhi;
2214
2215        /* We are expecting a_bits to max out at 7, much lower than ANDI. */
2216        tcg_debug_assert(a_bits < 16);
2217        tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1));
2218
2219        ldst->label_ptr[0] = s->code_ptr;
2220        tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
2221    }
2222
2223    h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
2224#endif
2225
2226    if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
2227        /* Zero-extend the guest address for use in the host address. */
2228        tcg_out_ext32u(s, TCG_REG_R0, addrlo);
2229        h->index = TCG_REG_R0;
2230    } else {
2231        h->index = addrlo;
2232    }
2233
2234    return ldst;
2235}
2236
2237static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
2238                            TCGReg addrlo, TCGReg addrhi,
2239                            MemOpIdx oi, TCGType data_type)
2240{
2241    MemOp opc = get_memop(oi);
2242    TCGLabelQemuLdst *ldst;
2243    HostAddress h;
2244
2245    ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
2246
2247    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2248        if (opc & MO_BSWAP) {
2249            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2250            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2251            tcg_out32(s, LWBRX | TAB(datahi, h.base, TCG_REG_R0));
2252        } else if (h.base != 0) {
2253            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2254            tcg_out32(s, LWZX | TAB(datahi, h.base, h.index));
2255            tcg_out32(s, LWZX | TAB(datalo, h.base, TCG_REG_R0));
2256        } else if (h.index == datahi) {
2257            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2258            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2259        } else {
2260            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2261            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2262        }
2263    } else {
2264        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
2265        if (!have_isa_2_06 && insn == LDBRX) {
2266            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2267            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2268            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, h.base, TCG_REG_R0));
2269            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
2270        } else if (insn) {
2271            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2272        } else {
2273            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
2274            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2275            tcg_out_movext(s, TCG_TYPE_REG, datalo,
2276                           TCG_TYPE_REG, opc & MO_SSIZE, datalo);
2277        }
2278    }
2279
2280    if (ldst) {
2281        ldst->type = data_type;
2282        ldst->datalo_reg = datalo;
2283        ldst->datahi_reg = datahi;
2284        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2285    }
2286}
2287
2288static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
2289                            TCGReg addrlo, TCGReg addrhi,
2290                            MemOpIdx oi, TCGType data_type)
2291{
2292    MemOp opc = get_memop(oi);
2293    TCGLabelQemuLdst *ldst;
2294    HostAddress h;
2295
2296    ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
2297
2298    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2299        if (opc & MO_BSWAP) {
2300            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2301            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2302            tcg_out32(s, STWBRX | SAB(datahi, h.base, TCG_REG_R0));
2303        } else if (h.base != 0) {
2304            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2305            tcg_out32(s, STWX | SAB(datahi, h.base, h.index));
2306            tcg_out32(s, STWX | SAB(datalo, h.base, TCG_REG_R0));
2307        } else {
2308            tcg_out32(s, STW | TAI(datahi, h.index, 0));
2309            tcg_out32(s, STW | TAI(datalo, h.index, 4));
2310        }
2311    } else {
2312        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
2313        if (!have_isa_2_06 && insn == STDBRX) {
2314            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2315            tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, h.index, 4));
2316            tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
2317            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, h.base, TCG_REG_TMP1));
2318        } else {
2319            tcg_out32(s, insn | SAB(datalo, h.base, h.index));
2320        }
2321    }
2322
2323    if (ldst) {
2324        ldst->type = data_type;
2325        ldst->datalo_reg = datalo;
2326        ldst->datahi_reg = datahi;
2327        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2328    }
2329}
2330
2331static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
2332                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
2333{
2334    TCGLabelQemuLdst *ldst;
2335    HostAddress h;
2336    bool need_bswap;
2337    uint32_t insn;
2338    TCGReg index;
2339
2340    ldst = prepare_host_addr(s, &h, addr_reg, -1, oi, is_ld);
2341
2342    /* Compose the final address, as LQ/STQ have no indexing. */
2343    index = h.index;
2344    if (h.base != 0) {
2345        index = TCG_REG_TMP1;
2346        tcg_out32(s, ADD | TAB(index, h.base, h.index));
2347    }
2348    need_bswap = get_memop(oi) & MO_BSWAP;
2349
2350    if (h.aa.atom == MO_128) {
2351        tcg_debug_assert(!need_bswap);
2352        tcg_debug_assert(datalo & 1);
2353        tcg_debug_assert(datahi == datalo - 1);
2354        insn = is_ld ? LQ : STQ;
2355        tcg_out32(s, insn | TAI(datahi, index, 0));
2356    } else {
2357        TCGReg d1, d2;
2358
2359        if (HOST_BIG_ENDIAN ^ need_bswap) {
2360            d1 = datahi, d2 = datalo;
2361        } else {
2362            d1 = datalo, d2 = datahi;
2363        }
2364
2365        if (need_bswap) {
2366            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8);
2367            insn = is_ld ? LDBRX : STDBRX;
2368            tcg_out32(s, insn | TAB(d1, 0, index));
2369            tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0));
2370        } else {
2371            insn = is_ld ? LD : STD;
2372            tcg_out32(s, insn | TAI(d1, index, 0));
2373            tcg_out32(s, insn | TAI(d2, index, 8));
2374        }
2375    }
2376
2377    if (ldst) {
2378        ldst->type = TCG_TYPE_I128;
2379        ldst->datalo_reg = datalo;
2380        ldst->datahi_reg = datahi;
2381        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2382    }
2383}
2384
2385static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2386{
2387    int i;
2388    for (i = 0; i < count; ++i) {
2389        p[i] = NOP;
2390    }
2391}
2392
2393/* Parameters for function call generation, used in tcg.c.  */
2394#define TCG_TARGET_STACK_ALIGN       16
2395
2396#ifdef _CALL_AIX
2397# define LINK_AREA_SIZE                (6 * SZR)
2398# define LR_OFFSET                     (1 * SZR)
2399# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
2400#elif defined(_CALL_DARWIN)
2401# define LINK_AREA_SIZE                (6 * SZR)
2402# define LR_OFFSET                     (2 * SZR)
2403#elif TCG_TARGET_REG_BITS == 64
2404# if defined(_CALL_ELF) && _CALL_ELF == 2
2405#  define LINK_AREA_SIZE               (4 * SZR)
2406#  define LR_OFFSET                    (1 * SZR)
2407# endif
2408#else /* TCG_TARGET_REG_BITS == 32 */
2409# if defined(_CALL_SYSV)
2410#  define LINK_AREA_SIZE               (2 * SZR)
2411#  define LR_OFFSET                    (1 * SZR)
2412# endif
2413#endif
2414#ifndef LR_OFFSET
2415# error "Unhandled abi"
2416#endif
2417#ifndef TCG_TARGET_CALL_STACK_OFFSET
2418# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
2419#endif
2420
2421#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2422#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
2423
2424#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
2425                     + TCG_STATIC_CALL_ARGS_SIZE    \
2426                     + CPU_TEMP_BUF_SIZE            \
2427                     + REG_SAVE_SIZE                \
2428                     + TCG_TARGET_STACK_ALIGN - 1)  \
2429                    & -TCG_TARGET_STACK_ALIGN)
2430
2431#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
2432
2433static void tcg_target_qemu_prologue(TCGContext *s)
2434{
2435    int i;
2436
2437#ifdef _CALL_AIX
2438    const void **desc = (const void **)s->code_ptr;
2439    desc[0] = tcg_splitwx_to_rx(desc + 2);  /* entry point */
2440    desc[1] = 0;                            /* environment pointer */
2441    s->code_ptr = (void *)(desc + 2);       /* skip over descriptor */
2442#endif
2443
2444    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
2445                  CPU_TEMP_BUF_SIZE);
2446
2447    /* Prologue */
2448    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
2449    tcg_out32(s, (SZR == 8 ? STDU : STWU)
2450              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
2451
2452    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2453        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2454                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2455    }
2456    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2457
2458#ifndef CONFIG_SOFTMMU
2459    if (guest_base) {
2460        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
2461        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2462    }
2463#endif
2464
2465    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2466    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
2467    if (USE_REG_TB) {
2468        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
2469    }
2470    tcg_out32(s, BCCTR | BO_ALWAYS);
2471
2472    /* Epilogue */
2473    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2474
2475    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2476    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2477        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2478                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2479    }
2480    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
2481    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
2482    tcg_out32(s, BCLR | BO_ALWAYS);
2483}
2484
2485static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
2486{
2487    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg);
2488    tcg_out_b(s, 0, tcg_code_gen_epilogue);
2489}
2490
2491static void tcg_out_goto_tb(TCGContext *s, int which)
2492{
2493    uintptr_t ptr = get_jmp_target_addr(s, which);
2494
2495    if (USE_REG_TB) {
2496        ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr);
2497        tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset);
2498
2499        /* TODO: Use direct branches when possible. */
2500        set_jmp_insn_offset(s, which);
2501        tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
2502
2503        tcg_out32(s, BCCTR | BO_ALWAYS);
2504
2505        /* For the unlinked case, need to reset TCG_REG_TB.  */
2506        set_jmp_reset_offset(s, which);
2507        tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
2508                         -tcg_current_code_size(s));
2509    } else {
2510        /* Direct branch will be patched by tb_target_set_jmp_target. */
2511        set_jmp_insn_offset(s, which);
2512        tcg_out32(s, NOP);
2513
2514        /* When branch is out of range, fall through to indirect. */
2515        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr);
2516        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, (int16_t)ptr);
2517        tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
2518        tcg_out32(s, BCCTR | BO_ALWAYS);
2519        set_jmp_reset_offset(s, which);
2520    }
2521}
2522
2523void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2524                              uintptr_t jmp_rx, uintptr_t jmp_rw)
2525{
2526    uintptr_t addr = tb->jmp_target_addr[n];
2527    intptr_t diff = addr - jmp_rx;
2528    tcg_insn_unit insn;
2529
2530    if (USE_REG_TB) {
2531        return;
2532    }
2533
2534    if (in_range_b(diff)) {
2535        insn = B | (diff & 0x3fffffc);
2536    } else {
2537        insn = NOP;
2538    }
2539
2540    qatomic_set((uint32_t *)jmp_rw, insn);
2541    flush_idcache_range(jmp_rx, jmp_rw, 4);
2542}
2543
2544static void tcg_out_op(TCGContext *s, TCGOpcode opc,
2545                       const TCGArg args[TCG_MAX_OP_ARGS],
2546                       const int const_args[TCG_MAX_OP_ARGS])
2547{
2548    TCGArg a0, a1, a2;
2549
2550    switch (opc) {
2551    case INDEX_op_goto_ptr:
2552        tcg_out32(s, MTSPR | RS(args[0]) | CTR);
2553        if (USE_REG_TB) {
2554            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
2555        }
2556        tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
2557        tcg_out32(s, BCCTR | BO_ALWAYS);
2558        break;
2559    case INDEX_op_br:
2560        {
2561            TCGLabel *l = arg_label(args[0]);
2562            uint32_t insn = B;
2563
2564            if (l->has_value) {
2565                insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr),
2566                                       l->u.value_ptr);
2567            } else {
2568                tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
2569            }
2570            tcg_out32(s, insn);
2571        }
2572        break;
2573    case INDEX_op_ld8u_i32:
2574    case INDEX_op_ld8u_i64:
2575        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2576        break;
2577    case INDEX_op_ld8s_i32:
2578    case INDEX_op_ld8s_i64:
2579        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2580        tcg_out_ext8s(s, TCG_TYPE_REG, args[0], args[0]);
2581        break;
2582    case INDEX_op_ld16u_i32:
2583    case INDEX_op_ld16u_i64:
2584        tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
2585        break;
2586    case INDEX_op_ld16s_i32:
2587    case INDEX_op_ld16s_i64:
2588        tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
2589        break;
2590    case INDEX_op_ld_i32:
2591    case INDEX_op_ld32u_i64:
2592        tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
2593        break;
2594    case INDEX_op_ld32s_i64:
2595        tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
2596        break;
2597    case INDEX_op_ld_i64:
2598        tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
2599        break;
2600    case INDEX_op_st8_i32:
2601    case INDEX_op_st8_i64:
2602        tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
2603        break;
2604    case INDEX_op_st16_i32:
2605    case INDEX_op_st16_i64:
2606        tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
2607        break;
2608    case INDEX_op_st_i32:
2609    case INDEX_op_st32_i64:
2610        tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
2611        break;
2612    case INDEX_op_st_i64:
2613        tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
2614        break;
2615
2616    case INDEX_op_add_i32:
2617        a0 = args[0], a1 = args[1], a2 = args[2];
2618        if (const_args[2]) {
2619        do_addi_32:
2620            tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
2621        } else {
2622            tcg_out32(s, ADD | TAB(a0, a1, a2));
2623        }
2624        break;
2625    case INDEX_op_sub_i32:
2626        a0 = args[0], a1 = args[1], a2 = args[2];
2627        if (const_args[1]) {
2628            if (const_args[2]) {
2629                tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
2630            } else {
2631                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2632            }
2633        } else if (const_args[2]) {
2634            a2 = -a2;
2635            goto do_addi_32;
2636        } else {
2637            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2638        }
2639        break;
2640
2641    case INDEX_op_and_i32:
2642        a0 = args[0], a1 = args[1], a2 = args[2];
2643        if (const_args[2]) {
2644            tcg_out_andi32(s, a0, a1, a2);
2645        } else {
2646            tcg_out32(s, AND | SAB(a1, a0, a2));
2647        }
2648        break;
2649    case INDEX_op_and_i64:
2650        a0 = args[0], a1 = args[1], a2 = args[2];
2651        if (const_args[2]) {
2652            tcg_out_andi64(s, a0, a1, a2);
2653        } else {
2654            tcg_out32(s, AND | SAB(a1, a0, a2));
2655        }
2656        break;
2657    case INDEX_op_or_i64:
2658    case INDEX_op_or_i32:
2659        a0 = args[0], a1 = args[1], a2 = args[2];
2660        if (const_args[2]) {
2661            tcg_out_ori32(s, a0, a1, a2);
2662        } else {
2663            tcg_out32(s, OR | SAB(a1, a0, a2));
2664        }
2665        break;
2666    case INDEX_op_xor_i64:
2667    case INDEX_op_xor_i32:
2668        a0 = args[0], a1 = args[1], a2 = args[2];
2669        if (const_args[2]) {
2670            tcg_out_xori32(s, a0, a1, a2);
2671        } else {
2672            tcg_out32(s, XOR | SAB(a1, a0, a2));
2673        }
2674        break;
2675    case INDEX_op_andc_i32:
2676        a0 = args[0], a1 = args[1], a2 = args[2];
2677        if (const_args[2]) {
2678            tcg_out_andi32(s, a0, a1, ~a2);
2679        } else {
2680            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2681        }
2682        break;
2683    case INDEX_op_andc_i64:
2684        a0 = args[0], a1 = args[1], a2 = args[2];
2685        if (const_args[2]) {
2686            tcg_out_andi64(s, a0, a1, ~a2);
2687        } else {
2688            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2689        }
2690        break;
2691    case INDEX_op_orc_i32:
2692        if (const_args[2]) {
2693            tcg_out_ori32(s, args[0], args[1], ~args[2]);
2694            break;
2695        }
2696        /* FALLTHRU */
2697    case INDEX_op_orc_i64:
2698        tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
2699        break;
2700    case INDEX_op_eqv_i32:
2701        if (const_args[2]) {
2702            tcg_out_xori32(s, args[0], args[1], ~args[2]);
2703            break;
2704        }
2705        /* FALLTHRU */
2706    case INDEX_op_eqv_i64:
2707        tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
2708        break;
2709    case INDEX_op_nand_i32:
2710    case INDEX_op_nand_i64:
2711        tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
2712        break;
2713    case INDEX_op_nor_i32:
2714    case INDEX_op_nor_i64:
2715        tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
2716        break;
2717
2718    case INDEX_op_clz_i32:
2719        tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1],
2720                      args[2], const_args[2]);
2721        break;
2722    case INDEX_op_ctz_i32:
2723        tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1],
2724                      args[2], const_args[2]);
2725        break;
2726    case INDEX_op_ctpop_i32:
2727        tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
2728        break;
2729
2730    case INDEX_op_clz_i64:
2731        tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1],
2732                      args[2], const_args[2]);
2733        break;
2734    case INDEX_op_ctz_i64:
2735        tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1],
2736                      args[2], const_args[2]);
2737        break;
2738    case INDEX_op_ctpop_i64:
2739        tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
2740        break;
2741
2742    case INDEX_op_mul_i32:
2743        a0 = args[0], a1 = args[1], a2 = args[2];
2744        if (const_args[2]) {
2745            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2746        } else {
2747            tcg_out32(s, MULLW | TAB(a0, a1, a2));
2748        }
2749        break;
2750
2751    case INDEX_op_div_i32:
2752        tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
2753        break;
2754
2755    case INDEX_op_divu_i32:
2756        tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
2757        break;
2758
2759    case INDEX_op_rem_i32:
2760        tcg_out32(s, MODSW | TAB(args[0], args[1], args[2]));
2761        break;
2762
2763    case INDEX_op_remu_i32:
2764        tcg_out32(s, MODUW | TAB(args[0], args[1], args[2]));
2765        break;
2766
2767    case INDEX_op_shl_i32:
2768        if (const_args[2]) {
2769            /* Limit immediate shift count lest we create an illegal insn.  */
2770            tcg_out_shli32(s, args[0], args[1], args[2] & 31);
2771        } else {
2772            tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
2773        }
2774        break;
2775    case INDEX_op_shr_i32:
2776        if (const_args[2]) {
2777            /* Limit immediate shift count lest we create an illegal insn.  */
2778            tcg_out_shri32(s, args[0], args[1], args[2] & 31);
2779        } else {
2780            tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
2781        }
2782        break;
2783    case INDEX_op_sar_i32:
2784        if (const_args[2]) {
2785            tcg_out_sari32(s, args[0], args[1], args[2]);
2786        } else {
2787            tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
2788        }
2789        break;
2790    case INDEX_op_rotl_i32:
2791        if (const_args[2]) {
2792            tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
2793        } else {
2794            tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
2795                         | MB(0) | ME(31));
2796        }
2797        break;
2798    case INDEX_op_rotr_i32:
2799        if (const_args[2]) {
2800            tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
2801        } else {
2802            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
2803            tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
2804                         | MB(0) | ME(31));
2805        }
2806        break;
2807
2808    case INDEX_op_brcond_i32:
2809        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2810                       arg_label(args[3]), TCG_TYPE_I32);
2811        break;
2812    case INDEX_op_brcond_i64:
2813        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
2814                       arg_label(args[3]), TCG_TYPE_I64);
2815        break;
2816    case INDEX_op_brcond2_i32:
2817        tcg_out_brcond2(s, args, const_args);
2818        break;
2819
2820    case INDEX_op_neg_i32:
2821    case INDEX_op_neg_i64:
2822        tcg_out32(s, NEG | RT(args[0]) | RA(args[1]));
2823        break;
2824
2825    case INDEX_op_not_i32:
2826    case INDEX_op_not_i64:
2827        tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
2828        break;
2829
2830    case INDEX_op_add_i64:
2831        a0 = args[0], a1 = args[1], a2 = args[2];
2832        if (const_args[2]) {
2833        do_addi_64:
2834            tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
2835        } else {
2836            tcg_out32(s, ADD | TAB(a0, a1, a2));
2837        }
2838        break;
2839    case INDEX_op_sub_i64:
2840        a0 = args[0], a1 = args[1], a2 = args[2];
2841        if (const_args[1]) {
2842            if (const_args[2]) {
2843                tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
2844            } else {
2845                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2846            }
2847        } else if (const_args[2]) {
2848            a2 = -a2;
2849            goto do_addi_64;
2850        } else {
2851            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2852        }
2853        break;
2854
2855    case INDEX_op_shl_i64:
2856        if (const_args[2]) {
2857            /* Limit immediate shift count lest we create an illegal insn.  */
2858            tcg_out_shli64(s, args[0], args[1], args[2] & 63);
2859        } else {
2860            tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
2861        }
2862        break;
2863    case INDEX_op_shr_i64:
2864        if (const_args[2]) {
2865            /* Limit immediate shift count lest we create an illegal insn.  */
2866            tcg_out_shri64(s, args[0], args[1], args[2] & 63);
2867        } else {
2868            tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
2869        }
2870        break;
2871    case INDEX_op_sar_i64:
2872        if (const_args[2]) {
2873            tcg_out_sari64(s, args[0], args[1], args[2]);
2874        } else {
2875            tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
2876        }
2877        break;
2878    case INDEX_op_rotl_i64:
2879        if (const_args[2]) {
2880            tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
2881        } else {
2882            tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
2883        }
2884        break;
2885    case INDEX_op_rotr_i64:
2886        if (const_args[2]) {
2887            tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
2888        } else {
2889            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
2890            tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
2891        }
2892        break;
2893
2894    case INDEX_op_mul_i64:
2895        a0 = args[0], a1 = args[1], a2 = args[2];
2896        if (const_args[2]) {
2897            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2898        } else {
2899            tcg_out32(s, MULLD | TAB(a0, a1, a2));
2900        }
2901        break;
2902    case INDEX_op_div_i64:
2903        tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
2904        break;
2905    case INDEX_op_divu_i64:
2906        tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
2907        break;
2908    case INDEX_op_rem_i64:
2909        tcg_out32(s, MODSD | TAB(args[0], args[1], args[2]));
2910        break;
2911    case INDEX_op_remu_i64:
2912        tcg_out32(s, MODUD | TAB(args[0], args[1], args[2]));
2913        break;
2914
2915    case INDEX_op_qemu_ld_a64_i32:
2916        if (TCG_TARGET_REG_BITS == 32) {
2917            tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
2918                            args[3], TCG_TYPE_I32);
2919            break;
2920        }
2921        /* fall through */
2922    case INDEX_op_qemu_ld_a32_i32:
2923        tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
2924        break;
2925    case INDEX_op_qemu_ld_a32_i64:
2926        if (TCG_TARGET_REG_BITS == 64) {
2927            tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
2928                            args[2], TCG_TYPE_I64);
2929        } else {
2930            tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
2931                            args[3], TCG_TYPE_I64);
2932        }
2933        break;
2934    case INDEX_op_qemu_ld_a64_i64:
2935        if (TCG_TARGET_REG_BITS == 64) {
2936            tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
2937                            args[2], TCG_TYPE_I64);
2938        } else {
2939            tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
2940                            args[4], TCG_TYPE_I64);
2941        }
2942        break;
2943    case INDEX_op_qemu_ld_a32_i128:
2944    case INDEX_op_qemu_ld_a64_i128:
2945        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
2946        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
2947        break;
2948
2949    case INDEX_op_qemu_st_a64_i32:
2950        if (TCG_TARGET_REG_BITS == 32) {
2951            tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
2952                            args[3], TCG_TYPE_I32);
2953            break;
2954        }
2955        /* fall through */
2956    case INDEX_op_qemu_st_a32_i32:
2957        tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
2958        break;
2959    case INDEX_op_qemu_st_a32_i64:
2960        if (TCG_TARGET_REG_BITS == 64) {
2961            tcg_out_qemu_st(s, args[0], -1, args[1], -1,
2962                            args[2], TCG_TYPE_I64);
2963        } else {
2964            tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
2965                            args[3], TCG_TYPE_I64);
2966        }
2967        break;
2968    case INDEX_op_qemu_st_a64_i64:
2969        if (TCG_TARGET_REG_BITS == 64) {
2970            tcg_out_qemu_st(s, args[0], -1, args[1], -1,
2971                            args[2], TCG_TYPE_I64);
2972        } else {
2973            tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
2974                            args[4], TCG_TYPE_I64);
2975        }
2976        break;
2977    case INDEX_op_qemu_st_a32_i128:
2978    case INDEX_op_qemu_st_a64_i128:
2979        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
2980        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
2981        break;
2982
2983    case INDEX_op_setcond_i32:
2984        tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
2985                        const_args[2]);
2986        break;
2987    case INDEX_op_setcond_i64:
2988        tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
2989                        const_args[2]);
2990        break;
2991    case INDEX_op_setcond2_i32:
2992        tcg_out_setcond2(s, args, const_args);
2993        break;
2994
2995    case INDEX_op_bswap16_i32:
2996    case INDEX_op_bswap16_i64:
2997        tcg_out_bswap16(s, args[0], args[1], args[2]);
2998        break;
2999    case INDEX_op_bswap32_i32:
3000        tcg_out_bswap32(s, args[0], args[1], 0);
3001        break;
3002    case INDEX_op_bswap32_i64:
3003        tcg_out_bswap32(s, args[0], args[1], args[2]);
3004        break;
3005    case INDEX_op_bswap64_i64:
3006        tcg_out_bswap64(s, args[0], args[1]);
3007        break;
3008
3009    case INDEX_op_deposit_i32:
3010        if (const_args[2]) {
3011            uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
3012            tcg_out_andi32(s, args[0], args[0], ~mask);
3013        } else {
3014            tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
3015                        32 - args[3] - args[4], 31 - args[3]);
3016        }
3017        break;
3018    case INDEX_op_deposit_i64:
3019        if (const_args[2]) {
3020            uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
3021            tcg_out_andi64(s, args[0], args[0], ~mask);
3022        } else {
3023            tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
3024                        64 - args[3] - args[4]);
3025        }
3026        break;
3027
3028    case INDEX_op_extract_i32:
3029        tcg_out_rlw(s, RLWINM, args[0], args[1],
3030                    32 - args[2], 32 - args[3], 31);
3031        break;
3032    case INDEX_op_extract_i64:
3033        tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
3034        break;
3035
3036    case INDEX_op_movcond_i32:
3037        tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
3038                        args[3], args[4], const_args[2]);
3039        break;
3040    case INDEX_op_movcond_i64:
3041        tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
3042                        args[3], args[4], const_args[2]);
3043        break;
3044
3045#if TCG_TARGET_REG_BITS == 64
3046    case INDEX_op_add2_i64:
3047#else
3048    case INDEX_op_add2_i32:
3049#endif
3050        /* Note that the CA bit is defined based on the word size of the
3051           environment.  So in 64-bit mode it's always carry-out of bit 63.
3052           The fallback code using deposit works just as well for 32-bit.  */
3053        a0 = args[0], a1 = args[1];
3054        if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
3055            a0 = TCG_REG_R0;
3056        }
3057        if (const_args[4]) {
3058            tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
3059        } else {
3060            tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
3061        }
3062        if (const_args[5]) {
3063            tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
3064        } else {
3065            tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
3066        }
3067        if (a0 != args[0]) {
3068            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3069        }
3070        break;
3071
3072#if TCG_TARGET_REG_BITS == 64
3073    case INDEX_op_sub2_i64:
3074#else
3075    case INDEX_op_sub2_i32:
3076#endif
3077        a0 = args[0], a1 = args[1];
3078        if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
3079            a0 = TCG_REG_R0;
3080        }
3081        if (const_args[2]) {
3082            tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
3083        } else {
3084            tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
3085        }
3086        if (const_args[3]) {
3087            tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
3088        } else {
3089            tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
3090        }
3091        if (a0 != args[0]) {
3092            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3093        }
3094        break;
3095
3096    case INDEX_op_muluh_i32:
3097        tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
3098        break;
3099    case INDEX_op_mulsh_i32:
3100        tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
3101        break;
3102    case INDEX_op_muluh_i64:
3103        tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
3104        break;
3105    case INDEX_op_mulsh_i64:
3106        tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
3107        break;
3108
3109    case INDEX_op_mb:
3110        tcg_out_mb(s, args[0]);
3111        break;
3112
3113    case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */
3114    case INDEX_op_mov_i64:
3115    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
3116    case INDEX_op_exit_tb:   /* Always emitted via tcg_out_exit_tb.  */
3117    case INDEX_op_goto_tb:   /* Always emitted via tcg_out_goto_tb.  */
3118    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
3119    case INDEX_op_ext8s_i64:
3120    case INDEX_op_ext8u_i32:
3121    case INDEX_op_ext8u_i64:
3122    case INDEX_op_ext16s_i32:
3123    case INDEX_op_ext16s_i64:
3124    case INDEX_op_ext16u_i32:
3125    case INDEX_op_ext16u_i64:
3126    case INDEX_op_ext32s_i64:
3127    case INDEX_op_ext32u_i64:
3128    case INDEX_op_ext_i32_i64:
3129    case INDEX_op_extu_i32_i64:
3130    case INDEX_op_extrl_i64_i32:
3131    default:
3132        g_assert_not_reached();
3133    }
3134}
3135
3136int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3137{
3138    switch (opc) {
3139    case INDEX_op_and_vec:
3140    case INDEX_op_or_vec:
3141    case INDEX_op_xor_vec:
3142    case INDEX_op_andc_vec:
3143    case INDEX_op_not_vec:
3144    case INDEX_op_nor_vec:
3145    case INDEX_op_eqv_vec:
3146    case INDEX_op_nand_vec:
3147        return 1;
3148    case INDEX_op_orc_vec:
3149        return have_isa_2_07;
3150    case INDEX_op_add_vec:
3151    case INDEX_op_sub_vec:
3152    case INDEX_op_smax_vec:
3153    case INDEX_op_smin_vec:
3154    case INDEX_op_umax_vec:
3155    case INDEX_op_umin_vec:
3156    case INDEX_op_shlv_vec:
3157    case INDEX_op_shrv_vec:
3158    case INDEX_op_sarv_vec:
3159    case INDEX_op_rotlv_vec:
3160        return vece <= MO_32 || have_isa_2_07;
3161    case INDEX_op_ssadd_vec:
3162    case INDEX_op_sssub_vec:
3163    case INDEX_op_usadd_vec:
3164    case INDEX_op_ussub_vec:
3165        return vece <= MO_32;
3166    case INDEX_op_cmp_vec:
3167    case INDEX_op_shli_vec:
3168    case INDEX_op_shri_vec:
3169    case INDEX_op_sari_vec:
3170    case INDEX_op_rotli_vec:
3171        return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
3172    case INDEX_op_neg_vec:
3173        return vece >= MO_32 && have_isa_3_00;
3174    case INDEX_op_mul_vec:
3175        switch (vece) {
3176        case MO_8:
3177        case MO_16:
3178            return -1;
3179        case MO_32:
3180            return have_isa_2_07 ? 1 : -1;
3181        case MO_64:
3182            return have_isa_3_10;
3183        }
3184        return 0;
3185    case INDEX_op_bitsel_vec:
3186        return have_vsx;
3187    case INDEX_op_rotrv_vec:
3188        return -1;
3189    default:
3190        return 0;
3191    }
3192}
3193
3194static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
3195                            TCGReg dst, TCGReg src)
3196{
3197    tcg_debug_assert(dst >= TCG_REG_V0);
3198
3199    /* Splat from integer reg allowed via constraints for v3.00.  */
3200    if (src < TCG_REG_V0) {
3201        tcg_debug_assert(have_isa_3_00);
3202        switch (vece) {
3203        case MO_64:
3204            tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
3205            return true;
3206        case MO_32:
3207            tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
3208            return true;
3209        default:
3210            /* Fail, so that we fall back on either dupm or mov+dup.  */
3211            return false;
3212        }
3213    }
3214
3215    /*
3216     * Recall we use (or emulate) VSX integer loads, so the integer is
3217     * right justified within the left (zero-index) double-word.
3218     */
3219    switch (vece) {
3220    case MO_8:
3221        tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
3222        break;
3223    case MO_16:
3224        tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
3225        break;
3226    case MO_32:
3227        tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
3228        break;
3229    case MO_64:
3230        if (have_vsx) {
3231            tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
3232            break;
3233        }
3234        tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
3235        tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
3236        break;
3237    default:
3238        g_assert_not_reached();
3239    }
3240    return true;
3241}
3242
3243static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
3244                             TCGReg out, TCGReg base, intptr_t offset)
3245{
3246    int elt;
3247
3248    tcg_debug_assert(out >= TCG_REG_V0);
3249    switch (vece) {
3250    case MO_8:
3251        if (have_isa_3_00) {
3252            tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
3253        } else {
3254            tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
3255        }
3256        elt = extract32(offset, 0, 4);
3257#if !HOST_BIG_ENDIAN
3258        elt ^= 15;
3259#endif
3260        tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
3261        break;
3262    case MO_16:
3263        tcg_debug_assert((offset & 1) == 0);
3264        if (have_isa_3_00) {
3265            tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
3266        } else {
3267            tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
3268        }
3269        elt = extract32(offset, 1, 3);
3270#if !HOST_BIG_ENDIAN
3271        elt ^= 7;
3272#endif
3273        tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
3274        break;
3275    case MO_32:
3276        if (have_isa_3_00) {
3277            tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
3278            break;
3279        }
3280        tcg_debug_assert((offset & 3) == 0);
3281        tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
3282        elt = extract32(offset, 2, 2);
3283#if !HOST_BIG_ENDIAN
3284        elt ^= 3;
3285#endif
3286        tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
3287        break;
3288    case MO_64:
3289        if (have_vsx) {
3290            tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
3291            break;
3292        }
3293        tcg_debug_assert((offset & 7) == 0);
3294        tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
3295        tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
3296        elt = extract32(offset, 3, 1);
3297#if !HOST_BIG_ENDIAN
3298        elt = !elt;
3299#endif
3300        if (elt) {
3301            tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
3302        } else {
3303            tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
3304        }
3305        break;
3306    default:
3307        g_assert_not_reached();
3308    }
3309    return true;
3310}
3311
3312static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
3313                           unsigned vecl, unsigned vece,
3314                           const TCGArg args[TCG_MAX_OP_ARGS],
3315                           const int const_args[TCG_MAX_OP_ARGS])
3316{
3317    static const uint32_t
3318        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
3319        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
3320        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
3321        neg_op[4] = { 0, 0, VNEGW, VNEGD },
3322        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
3323        ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
3324        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
3325        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
3326        ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
3327        usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
3328        sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
3329        ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
3330        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
3331        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
3332        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
3333        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
3334        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
3335        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
3336        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
3337        mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
3338        mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
3339        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
3340        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
3341        pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
3342        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
3343
3344    TCGType type = vecl + TCG_TYPE_V64;
3345    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
3346    uint32_t insn;
3347
3348    switch (opc) {
3349    case INDEX_op_ld_vec:
3350        tcg_out_ld(s, type, a0, a1, a2);
3351        return;
3352    case INDEX_op_st_vec:
3353        tcg_out_st(s, type, a0, a1, a2);
3354        return;
3355    case INDEX_op_dupm_vec:
3356        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
3357        return;
3358
3359    case INDEX_op_add_vec:
3360        insn = add_op[vece];
3361        break;
3362    case INDEX_op_sub_vec:
3363        insn = sub_op[vece];
3364        break;
3365    case INDEX_op_neg_vec:
3366        insn = neg_op[vece];
3367        a2 = a1;
3368        a1 = 0;
3369        break;
3370    case INDEX_op_mul_vec:
3371        insn = mul_op[vece];
3372        break;
3373    case INDEX_op_ssadd_vec:
3374        insn = ssadd_op[vece];
3375        break;
3376    case INDEX_op_sssub_vec:
3377        insn = sssub_op[vece];
3378        break;
3379    case INDEX_op_usadd_vec:
3380        insn = usadd_op[vece];
3381        break;
3382    case INDEX_op_ussub_vec:
3383        insn = ussub_op[vece];
3384        break;
3385    case INDEX_op_smin_vec:
3386        insn = smin_op[vece];
3387        break;
3388    case INDEX_op_umin_vec:
3389        insn = umin_op[vece];
3390        break;
3391    case INDEX_op_smax_vec:
3392        insn = smax_op[vece];
3393        break;
3394    case INDEX_op_umax_vec:
3395        insn = umax_op[vece];
3396        break;
3397    case INDEX_op_shlv_vec:
3398        insn = shlv_op[vece];
3399        break;
3400    case INDEX_op_shrv_vec:
3401        insn = shrv_op[vece];
3402        break;
3403    case INDEX_op_sarv_vec:
3404        insn = sarv_op[vece];
3405        break;
3406    case INDEX_op_and_vec:
3407        insn = VAND;
3408        break;
3409    case INDEX_op_or_vec:
3410        insn = VOR;
3411        break;
3412    case INDEX_op_xor_vec:
3413        insn = VXOR;
3414        break;
3415    case INDEX_op_andc_vec:
3416        insn = VANDC;
3417        break;
3418    case INDEX_op_not_vec:
3419        insn = VNOR;
3420        a2 = a1;
3421        break;
3422    case INDEX_op_orc_vec:
3423        insn = VORC;
3424        break;
3425    case INDEX_op_nand_vec:
3426        insn = VNAND;
3427        break;
3428    case INDEX_op_nor_vec:
3429        insn = VNOR;
3430        break;
3431    case INDEX_op_eqv_vec:
3432        insn = VEQV;
3433        break;
3434
3435    case INDEX_op_cmp_vec:
3436        switch (args[3]) {
3437        case TCG_COND_EQ:
3438            insn = eq_op[vece];
3439            break;
3440        case TCG_COND_NE:
3441            insn = ne_op[vece];
3442            break;
3443        case TCG_COND_GT:
3444            insn = gts_op[vece];
3445            break;
3446        case TCG_COND_GTU:
3447            insn = gtu_op[vece];
3448            break;
3449        default:
3450            g_assert_not_reached();
3451        }
3452        break;
3453
3454    case INDEX_op_bitsel_vec:
3455        tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
3456        return;
3457
3458    case INDEX_op_dup2_vec:
3459        assert(TCG_TARGET_REG_BITS == 32);
3460        /* With inputs a1 = xLxx, a2 = xHxx  */
3461        tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1));  /* a0  = xxHL */
3462        tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8);          /* tmp = HLxx */
3463        tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8);          /* a0  = HLHL */
3464        return;
3465
3466    case INDEX_op_ppc_mrgh_vec:
3467        insn = mrgh_op[vece];
3468        break;
3469    case INDEX_op_ppc_mrgl_vec:
3470        insn = mrgl_op[vece];
3471        break;
3472    case INDEX_op_ppc_muleu_vec:
3473        insn = muleu_op[vece];
3474        break;
3475    case INDEX_op_ppc_mulou_vec:
3476        insn = mulou_op[vece];
3477        break;
3478    case INDEX_op_ppc_pkum_vec:
3479        insn = pkum_op[vece];
3480        break;
3481    case INDEX_op_rotlv_vec:
3482        insn = rotl_op[vece];
3483        break;
3484    case INDEX_op_ppc_msum_vec:
3485        tcg_debug_assert(vece == MO_16);
3486        tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
3487        return;
3488
3489    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
3490    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
3491    default:
3492        g_assert_not_reached();
3493    }
3494
3495    tcg_debug_assert(insn != 0);
3496    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
3497}
3498
3499static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
3500                           TCGv_vec v1, TCGArg imm, TCGOpcode opci)
3501{
3502    TCGv_vec t1;
3503
3504    if (vece == MO_32) {
3505        /*
3506         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
3507         * So using negative numbers gets us the 4th bit easily.
3508         */
3509        imm = sextract32(imm, 0, 5);
3510    } else {
3511        imm &= (8 << vece) - 1;
3512    }
3513
3514    /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
3515    t1 = tcg_constant_vec(type, MO_8, imm);
3516    vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
3517              tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3518}
3519
3520static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
3521                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3522{
3523    bool need_swap = false, need_inv = false;
3524
3525    tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
3526
3527    switch (cond) {
3528    case TCG_COND_EQ:
3529    case TCG_COND_GT:
3530    case TCG_COND_GTU:
3531        break;
3532    case TCG_COND_NE:
3533        if (have_isa_3_00 && vece <= MO_32) {
3534            break;
3535        }
3536        /* fall through */
3537    case TCG_COND_LE:
3538    case TCG_COND_LEU:
3539        need_inv = true;
3540        break;
3541    case TCG_COND_LT:
3542    case TCG_COND_LTU:
3543        need_swap = true;
3544        break;
3545    case TCG_COND_GE:
3546    case TCG_COND_GEU:
3547        need_swap = need_inv = true;
3548        break;
3549    default:
3550        g_assert_not_reached();
3551    }
3552
3553    if (need_inv) {
3554        cond = tcg_invert_cond(cond);
3555    }
3556    if (need_swap) {
3557        TCGv_vec t1;
3558        t1 = v1, v1 = v2, v2 = t1;
3559        cond = tcg_swap_cond(cond);
3560    }
3561
3562    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
3563              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
3564
3565    if (need_inv) {
3566        tcg_gen_not_vec(vece, v0, v0);
3567    }
3568}
3569
3570static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
3571                           TCGv_vec v1, TCGv_vec v2)
3572{
3573    TCGv_vec t1 = tcg_temp_new_vec(type);
3574    TCGv_vec t2 = tcg_temp_new_vec(type);
3575    TCGv_vec c0, c16;
3576
3577    switch (vece) {
3578    case MO_8:
3579    case MO_16:
3580        vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
3581                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3582        vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
3583                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3584        vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
3585                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3586        vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
3587                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3588        vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
3589                  tcgv_vec_arg(v0), tcgv_vec_arg(t1));
3590	break;
3591
3592    case MO_32:
3593        tcg_debug_assert(!have_isa_2_07);
3594        /*
3595         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
3596         * So using -16 is a quick way to represent 16.
3597         */
3598        c16 = tcg_constant_vec(type, MO_8, -16);
3599        c0 = tcg_constant_vec(type, MO_8, 0);
3600
3601        vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
3602                  tcgv_vec_arg(v2), tcgv_vec_arg(c16));
3603        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
3604                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3605        vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
3606                  tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
3607        vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
3608                  tcgv_vec_arg(t1), tcgv_vec_arg(c16));
3609        tcg_gen_add_vec(MO_32, v0, t1, t2);
3610        break;
3611
3612    default:
3613        g_assert_not_reached();
3614    }
3615    tcg_temp_free_vec(t1);
3616    tcg_temp_free_vec(t2);
3617}
3618
3619void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3620                       TCGArg a0, ...)
3621{
3622    va_list va;
3623    TCGv_vec v0, v1, v2, t0;
3624    TCGArg a2;
3625
3626    va_start(va, a0);
3627    v0 = temp_tcgv_vec(arg_temp(a0));
3628    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3629    a2 = va_arg(va, TCGArg);
3630
3631    switch (opc) {
3632    case INDEX_op_shli_vec:
3633        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
3634        break;
3635    case INDEX_op_shri_vec:
3636        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
3637        break;
3638    case INDEX_op_sari_vec:
3639        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
3640        break;
3641    case INDEX_op_rotli_vec:
3642        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
3643        break;
3644    case INDEX_op_cmp_vec:
3645        v2 = temp_tcgv_vec(arg_temp(a2));
3646        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3647        break;
3648    case INDEX_op_mul_vec:
3649        v2 = temp_tcgv_vec(arg_temp(a2));
3650        expand_vec_mul(type, vece, v0, v1, v2);
3651        break;
3652    case INDEX_op_rotlv_vec:
3653        v2 = temp_tcgv_vec(arg_temp(a2));
3654        t0 = tcg_temp_new_vec(type);
3655        tcg_gen_neg_vec(vece, t0, v2);
3656        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3657        tcg_temp_free_vec(t0);
3658        break;
3659    default:
3660        g_assert_not_reached();
3661    }
3662    va_end(va);
3663}
3664
3665static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3666{
3667    switch (op) {
3668    case INDEX_op_goto_ptr:
3669        return C_O0_I1(r);
3670
3671    case INDEX_op_ld8u_i32:
3672    case INDEX_op_ld8s_i32:
3673    case INDEX_op_ld16u_i32:
3674    case INDEX_op_ld16s_i32:
3675    case INDEX_op_ld_i32:
3676    case INDEX_op_ctpop_i32:
3677    case INDEX_op_neg_i32:
3678    case INDEX_op_not_i32:
3679    case INDEX_op_ext8s_i32:
3680    case INDEX_op_ext16s_i32:
3681    case INDEX_op_bswap16_i32:
3682    case INDEX_op_bswap32_i32:
3683    case INDEX_op_extract_i32:
3684    case INDEX_op_ld8u_i64:
3685    case INDEX_op_ld8s_i64:
3686    case INDEX_op_ld16u_i64:
3687    case INDEX_op_ld16s_i64:
3688    case INDEX_op_ld32u_i64:
3689    case INDEX_op_ld32s_i64:
3690    case INDEX_op_ld_i64:
3691    case INDEX_op_ctpop_i64:
3692    case INDEX_op_neg_i64:
3693    case INDEX_op_not_i64:
3694    case INDEX_op_ext8s_i64:
3695    case INDEX_op_ext16s_i64:
3696    case INDEX_op_ext32s_i64:
3697    case INDEX_op_ext_i32_i64:
3698    case INDEX_op_extu_i32_i64:
3699    case INDEX_op_bswap16_i64:
3700    case INDEX_op_bswap32_i64:
3701    case INDEX_op_bswap64_i64:
3702    case INDEX_op_extract_i64:
3703        return C_O1_I1(r, r);
3704
3705    case INDEX_op_st8_i32:
3706    case INDEX_op_st16_i32:
3707    case INDEX_op_st_i32:
3708    case INDEX_op_st8_i64:
3709    case INDEX_op_st16_i64:
3710    case INDEX_op_st32_i64:
3711    case INDEX_op_st_i64:
3712        return C_O0_I2(r, r);
3713
3714    case INDEX_op_add_i32:
3715    case INDEX_op_and_i32:
3716    case INDEX_op_or_i32:
3717    case INDEX_op_xor_i32:
3718    case INDEX_op_andc_i32:
3719    case INDEX_op_orc_i32:
3720    case INDEX_op_eqv_i32:
3721    case INDEX_op_shl_i32:
3722    case INDEX_op_shr_i32:
3723    case INDEX_op_sar_i32:
3724    case INDEX_op_rotl_i32:
3725    case INDEX_op_rotr_i32:
3726    case INDEX_op_setcond_i32:
3727    case INDEX_op_and_i64:
3728    case INDEX_op_andc_i64:
3729    case INDEX_op_shl_i64:
3730    case INDEX_op_shr_i64:
3731    case INDEX_op_sar_i64:
3732    case INDEX_op_rotl_i64:
3733    case INDEX_op_rotr_i64:
3734    case INDEX_op_setcond_i64:
3735        return C_O1_I2(r, r, ri);
3736
3737    case INDEX_op_mul_i32:
3738    case INDEX_op_mul_i64:
3739        return C_O1_I2(r, r, rI);
3740
3741    case INDEX_op_div_i32:
3742    case INDEX_op_divu_i32:
3743    case INDEX_op_rem_i32:
3744    case INDEX_op_remu_i32:
3745    case INDEX_op_nand_i32:
3746    case INDEX_op_nor_i32:
3747    case INDEX_op_muluh_i32:
3748    case INDEX_op_mulsh_i32:
3749    case INDEX_op_orc_i64:
3750    case INDEX_op_eqv_i64:
3751    case INDEX_op_nand_i64:
3752    case INDEX_op_nor_i64:
3753    case INDEX_op_div_i64:
3754    case INDEX_op_divu_i64:
3755    case INDEX_op_rem_i64:
3756    case INDEX_op_remu_i64:
3757    case INDEX_op_mulsh_i64:
3758    case INDEX_op_muluh_i64:
3759        return C_O1_I2(r, r, r);
3760
3761    case INDEX_op_sub_i32:
3762        return C_O1_I2(r, rI, ri);
3763    case INDEX_op_add_i64:
3764        return C_O1_I2(r, r, rT);
3765    case INDEX_op_or_i64:
3766    case INDEX_op_xor_i64:
3767        return C_O1_I2(r, r, rU);
3768    case INDEX_op_sub_i64:
3769        return C_O1_I2(r, rI, rT);
3770    case INDEX_op_clz_i32:
3771    case INDEX_op_ctz_i32:
3772    case INDEX_op_clz_i64:
3773    case INDEX_op_ctz_i64:
3774        return C_O1_I2(r, r, rZW);
3775
3776    case INDEX_op_brcond_i32:
3777    case INDEX_op_brcond_i64:
3778        return C_O0_I2(r, ri);
3779
3780    case INDEX_op_movcond_i32:
3781    case INDEX_op_movcond_i64:
3782        return C_O1_I4(r, r, ri, rZ, rZ);
3783    case INDEX_op_deposit_i32:
3784    case INDEX_op_deposit_i64:
3785        return C_O1_I2(r, 0, rZ);
3786    case INDEX_op_brcond2_i32:
3787        return C_O0_I4(r, r, ri, ri);
3788    case INDEX_op_setcond2_i32:
3789        return C_O1_I4(r, r, r, ri, ri);
3790    case INDEX_op_add2_i64:
3791    case INDEX_op_add2_i32:
3792        return C_O2_I4(r, r, r, r, rI, rZM);
3793    case INDEX_op_sub2_i64:
3794    case INDEX_op_sub2_i32:
3795        return C_O2_I4(r, r, rI, rZM, r, r);
3796
3797    case INDEX_op_qemu_ld_a32_i32:
3798        return C_O1_I1(r, r);
3799    case INDEX_op_qemu_ld_a64_i32:
3800        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
3801    case INDEX_op_qemu_ld_a32_i64:
3802        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
3803    case INDEX_op_qemu_ld_a64_i64:
3804        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
3805
3806    case INDEX_op_qemu_st_a32_i32:
3807        return C_O0_I2(r, r);
3808    case INDEX_op_qemu_st_a64_i32:
3809        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
3810    case INDEX_op_qemu_st_a32_i64:
3811        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
3812    case INDEX_op_qemu_st_a64_i64:
3813        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
3814
3815    case INDEX_op_qemu_ld_a32_i128:
3816    case INDEX_op_qemu_ld_a64_i128:
3817        return C_O2_I1(o, m, r);
3818    case INDEX_op_qemu_st_a32_i128:
3819    case INDEX_op_qemu_st_a64_i128:
3820        return C_O0_I3(o, m, r);
3821
3822    case INDEX_op_add_vec:
3823    case INDEX_op_sub_vec:
3824    case INDEX_op_mul_vec:
3825    case INDEX_op_and_vec:
3826    case INDEX_op_or_vec:
3827    case INDEX_op_xor_vec:
3828    case INDEX_op_andc_vec:
3829    case INDEX_op_orc_vec:
3830    case INDEX_op_nor_vec:
3831    case INDEX_op_eqv_vec:
3832    case INDEX_op_nand_vec:
3833    case INDEX_op_cmp_vec:
3834    case INDEX_op_ssadd_vec:
3835    case INDEX_op_sssub_vec:
3836    case INDEX_op_usadd_vec:
3837    case INDEX_op_ussub_vec:
3838    case INDEX_op_smax_vec:
3839    case INDEX_op_smin_vec:
3840    case INDEX_op_umax_vec:
3841    case INDEX_op_umin_vec:
3842    case INDEX_op_shlv_vec:
3843    case INDEX_op_shrv_vec:
3844    case INDEX_op_sarv_vec:
3845    case INDEX_op_rotlv_vec:
3846    case INDEX_op_rotrv_vec:
3847    case INDEX_op_ppc_mrgh_vec:
3848    case INDEX_op_ppc_mrgl_vec:
3849    case INDEX_op_ppc_muleu_vec:
3850    case INDEX_op_ppc_mulou_vec:
3851    case INDEX_op_ppc_pkum_vec:
3852    case INDEX_op_dup2_vec:
3853        return C_O1_I2(v, v, v);
3854
3855    case INDEX_op_not_vec:
3856    case INDEX_op_neg_vec:
3857        return C_O1_I1(v, v);
3858
3859    case INDEX_op_dup_vec:
3860        return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v);
3861
3862    case INDEX_op_ld_vec:
3863    case INDEX_op_dupm_vec:
3864        return C_O1_I1(v, r);
3865
3866    case INDEX_op_st_vec:
3867        return C_O0_I2(v, r);
3868
3869    case INDEX_op_bitsel_vec:
3870    case INDEX_op_ppc_msum_vec:
3871        return C_O1_I3(v, v, v, v);
3872
3873    default:
3874        g_assert_not_reached();
3875    }
3876}
3877
3878static void tcg_target_init(TCGContext *s)
3879{
3880    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
3881    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
3882    if (have_altivec) {
3883        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3884        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3885    }
3886
3887    tcg_target_call_clobber_regs = 0;
3888    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3889    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3890    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3891    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3892    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3893    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3894    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
3895    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
3896    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
3897    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
3898    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
3899    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
3900
3901    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3902    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3903    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3904    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3905    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3906    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3907    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3908    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3909    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3910    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3911    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3912    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3913    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3914    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3915    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3916    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3917    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3918    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3919    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3920    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3921
3922    s->reserved_regs = 0;
3923    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
3924    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
3925#if defined(_CALL_SYSV)
3926    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
3927#endif
3928#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
3929    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
3930#endif
3931    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
3932    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
3933    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
3934    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
3935    if (USE_REG_TB) {
3936        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
3937    }
3938}
3939
3940#ifdef __ELF__
3941typedef struct {
3942    DebugFrameCIE cie;
3943    DebugFrameFDEHeader fde;
3944    uint8_t fde_def_cfa[4];
3945    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
3946} DebugFrame;
3947
3948/* We're expecting a 2 byte uleb128 encoded value.  */
3949QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3950
3951#if TCG_TARGET_REG_BITS == 64
3952# define ELF_HOST_MACHINE EM_PPC64
3953#else
3954# define ELF_HOST_MACHINE EM_PPC
3955#endif
3956
3957static DebugFrame debug_frame = {
3958    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3959    .cie.id = -1,
3960    .cie.version = 1,
3961    .cie.code_align = 1,
3962    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
3963    .cie.return_column = 65,
3964
3965    /* Total FDE size does not include the "len" member.  */
3966    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
3967
3968    .fde_def_cfa = {
3969        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
3970        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3971        (FRAME_SIZE >> 7)
3972    },
3973    .fde_reg_ofs = {
3974        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
3975        0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
3976    }
3977};
3978
3979void tcg_register_jit(const void *buf, size_t buf_size)
3980{
3981    uint8_t *p = &debug_frame.fde_reg_ofs[3];
3982    int i;
3983
3984    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
3985        p[0] = 0x80 + tcg_target_callee_save_regs[i];
3986        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
3987    }
3988
3989    debug_frame.fde.func_start = (uintptr_t)buf;
3990    debug_frame.fde.func_len = buf_size;
3991
3992    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3993}
3994#endif /* __ELF__ */
3995#undef VMULEUB
3996#undef VMULEUH
3997#undef VMULEUW
3998#undef VMULOUB
3999#undef VMULOUH
4000#undef VMULOUW
4001#undef VMSUMUHM
4002