xref: /openbmc/qemu/tcg/ppc/tcg-target.c.inc (revision 21e9a8aefb0313174c1861df84e5e49bd84026c8)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "elf.h"
26#include "../tcg-pool.c.inc"
27#include "../tcg-ldst.c.inc"
28
29/*
30 * Standardize on the _CALL_FOO symbols used by GCC:
31 * Apple XCode does not define _CALL_DARWIN.
32 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV or _CALL_AIX.
33 */
34#if TCG_TARGET_REG_BITS == 64
35# ifdef _CALL_AIX
36    /* ok */
37# elif defined(_CALL_ELF) && _CALL_ELF == 1
38#  define _CALL_AIX
39# elif defined(_CALL_ELF) && _CALL_ELF == 2
40    /* ok */
41# else
42#  error "Unknown ABI"
43# endif
44#else
45# if defined(_CALL_SYSV) || defined(_CALL_DARWIN)
46    /* ok */
47# elif defined(__APPLE__)
48#  define _CALL_DARWIN
49# elif defined(__ELF__)
50#  define _CALL_SYSV
51# else
52#  error "Unknown ABI"
53# endif
54#endif
55
56#if TCG_TARGET_REG_BITS == 64
57# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_EXTEND
58# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_NORMAL
59#else
60# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_NORMAL
61# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_BY_REF
62#endif
63#ifdef _CALL_SYSV
64# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_EVEN
65# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_BY_REF
66#else
67# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_NORMAL
68# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_NORMAL
69#endif
70
71/* For some memory operations, we need a scratch that isn't R0.  For the AIX
72   calling convention, we can re-use the TOC register since we'll be reloading
73   it at every call.  Otherwise R12 will do nicely as neither a call-saved
74   register nor a parameter register.  */
75#ifdef _CALL_AIX
76# define TCG_REG_TMP1   TCG_REG_R2
77#else
78# define TCG_REG_TMP1   TCG_REG_R12
79#endif
80#define TCG_REG_TMP2    TCG_REG_R11
81
82#define TCG_VEC_TMP1    TCG_REG_V0
83#define TCG_VEC_TMP2    TCG_REG_V1
84
85#define TCG_REG_TB     TCG_REG_R31
86#define USE_REG_TB     (TCG_TARGET_REG_BITS == 64 && !have_isa_3_00)
87
88/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
89#define SZP  ((int)sizeof(void *))
90
91/* Shorthand for size of a register.  */
92#define SZR  (TCG_TARGET_REG_BITS / 8)
93
94#define TCG_CT_CONST_S16  0x100
95#define TCG_CT_CONST_S32  0x400
96#define TCG_CT_CONST_U32  0x800
97#define TCG_CT_CONST_ZERO 0x1000
98#define TCG_CT_CONST_MONE 0x2000
99#define TCG_CT_CONST_WSZ  0x4000
100
101#define ALL_GENERAL_REGS  0xffffffffu
102#define ALL_VECTOR_REGS   0xffffffff00000000ull
103
104#ifndef R_PPC64_PCREL34
105#define R_PPC64_PCREL34  132
106#endif
107
108#define have_isel  (cpuinfo & CPUINFO_ISEL)
109
110#define TCG_GUEST_BASE_REG  TCG_REG_R30
111
112#ifdef CONFIG_DEBUG_TCG
113static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
114    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
115    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
116    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
117    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
118    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
119    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
120    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
121    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
122};
123#endif
124
125static const int tcg_target_reg_alloc_order[] = {
126    TCG_REG_R14,  /* call saved registers */
127    TCG_REG_R15,
128    TCG_REG_R16,
129    TCG_REG_R17,
130    TCG_REG_R18,
131    TCG_REG_R19,
132    TCG_REG_R20,
133    TCG_REG_R21,
134    TCG_REG_R22,
135    TCG_REG_R23,
136    TCG_REG_R24,
137    TCG_REG_R25,
138    TCG_REG_R26,
139    TCG_REG_R27,
140    TCG_REG_R28,
141    TCG_REG_R29,
142    TCG_REG_R30,
143    TCG_REG_R31,
144    TCG_REG_R12,  /* call clobbered, non-arguments */
145    TCG_REG_R11,
146    TCG_REG_R2,
147    TCG_REG_R13,
148    TCG_REG_R10,  /* call clobbered, arguments */
149    TCG_REG_R9,
150    TCG_REG_R8,
151    TCG_REG_R7,
152    TCG_REG_R6,
153    TCG_REG_R5,
154    TCG_REG_R4,
155    TCG_REG_R3,
156
157    /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
158    TCG_REG_V2,   /* call clobbered, vectors */
159    TCG_REG_V3,
160    TCG_REG_V4,
161    TCG_REG_V5,
162    TCG_REG_V6,
163    TCG_REG_V7,
164    TCG_REG_V8,
165    TCG_REG_V9,
166    TCG_REG_V10,
167    TCG_REG_V11,
168    TCG_REG_V12,
169    TCG_REG_V13,
170    TCG_REG_V14,
171    TCG_REG_V15,
172    TCG_REG_V16,
173    TCG_REG_V17,
174    TCG_REG_V18,
175    TCG_REG_V19,
176};
177
178static const int tcg_target_call_iarg_regs[] = {
179    TCG_REG_R3,
180    TCG_REG_R4,
181    TCG_REG_R5,
182    TCG_REG_R6,
183    TCG_REG_R7,
184    TCG_REG_R8,
185    TCG_REG_R9,
186    TCG_REG_R10
187};
188
189static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
190{
191    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
192    tcg_debug_assert(slot >= 0 && slot <= 1);
193    return TCG_REG_R3 + slot;
194}
195
196static const int tcg_target_callee_save_regs[] = {
197#ifdef _CALL_DARWIN
198    TCG_REG_R11,
199#endif
200    TCG_REG_R14,
201    TCG_REG_R15,
202    TCG_REG_R16,
203    TCG_REG_R17,
204    TCG_REG_R18,
205    TCG_REG_R19,
206    TCG_REG_R20,
207    TCG_REG_R21,
208    TCG_REG_R22,
209    TCG_REG_R23,
210    TCG_REG_R24,
211    TCG_REG_R25,
212    TCG_REG_R26,
213    TCG_REG_R27, /* currently used for the global env */
214    TCG_REG_R28,
215    TCG_REG_R29,
216    TCG_REG_R30,
217    TCG_REG_R31
218};
219
220/* For PPC, we use TB+4 instead of TB as the base. */
221static inline ptrdiff_t ppc_tbrel_diff(TCGContext *s, const void *target)
222{
223    return tcg_tbrel_diff(s, target) - 4;
224}
225
226static inline bool in_range_b(tcg_target_long target)
227{
228    return target == sextract64(target, 0, 26);
229}
230
231static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
232                               const tcg_insn_unit *target)
233{
234    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
235    tcg_debug_assert(in_range_b(disp));
236    return disp & 0x3fffffc;
237}
238
239static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
240{
241    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
242    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
243
244    if (in_range_b(disp)) {
245        *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc);
246        return true;
247    }
248    return false;
249}
250
251static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
252                               const tcg_insn_unit *target)
253{
254    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
255    tcg_debug_assert(disp == (int16_t) disp);
256    return disp & 0xfffc;
257}
258
259static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
260{
261    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
262    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
263
264    if (disp == (int16_t) disp) {
265        *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc);
266        return true;
267    }
268    return false;
269}
270
271static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
272{
273    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
274    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
275
276    if (disp == sextract64(disp, 0, 34)) {
277        src_rw[0] = (src_rw[0] & ~0x3ffff) | ((disp >> 16) & 0x3ffff);
278        src_rw[1] = (src_rw[1] & ~0xffff) | (disp & 0xffff);
279        return true;
280    }
281    return false;
282}
283
284/* test if a constant matches the constraint */
285static bool tcg_target_const_match(int64_t val, int ct,
286                                   TCGType type, TCGCond cond, int vece)
287{
288    if (ct & TCG_CT_CONST) {
289        return 1;
290    }
291
292    /* The only 32-bit constraint we use aside from
293       TCG_CT_CONST is TCG_CT_CONST_S16.  */
294    if (type == TCG_TYPE_I32) {
295        val = (int32_t)val;
296    }
297
298    if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
299        return 1;
300    } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
301        return 1;
302    } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
303        return 1;
304    } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
305        return 1;
306    } else if ((ct & TCG_CT_CONST_MONE) && val == -1) {
307        return 1;
308    } else if ((ct & TCG_CT_CONST_WSZ)
309               && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
310        return 1;
311    }
312    return 0;
313}
314
315#define OPCD(opc) ((opc)<<26)
316#define XO19(opc) (OPCD(19)|((opc)<<1))
317#define MD30(opc) (OPCD(30)|((opc)<<2))
318#define MDS30(opc) (OPCD(30)|((opc)<<1))
319#define XO31(opc) (OPCD(31)|((opc)<<1))
320#define XO58(opc) (OPCD(58)|(opc))
321#define XO62(opc) (OPCD(62)|(opc))
322#define VX4(opc)  (OPCD(4)|(opc))
323
324#define B      OPCD( 18)
325#define BC     OPCD( 16)
326
327#define LBZ    OPCD( 34)
328#define LHZ    OPCD( 40)
329#define LHA    OPCD( 42)
330#define LWZ    OPCD( 32)
331#define LWZUX  XO31( 55)
332#define LD     XO58(  0)
333#define LDX    XO31( 21)
334#define LDU    XO58(  1)
335#define LDUX   XO31( 53)
336#define LWA    XO58(  2)
337#define LWAX   XO31(341)
338#define LQ     OPCD( 56)
339
340#define STB    OPCD( 38)
341#define STH    OPCD( 44)
342#define STW    OPCD( 36)
343#define STD    XO62(  0)
344#define STDU   XO62(  1)
345#define STDX   XO31(149)
346#define STQ    XO62(  2)
347
348#define PLWA   OPCD( 41)
349#define PLD    OPCD( 57)
350#define PLXSD  OPCD( 42)
351#define PLXV   OPCD(25 * 2 + 1)  /* force tx=1 */
352
353#define PSTD   OPCD( 61)
354#define PSTXSD OPCD( 46)
355#define PSTXV  OPCD(27 * 2 + 1)  /* force sx=1 */
356
357#define ADDIC  OPCD( 12)
358#define ADDI   OPCD( 14)
359#define ADDIS  OPCD( 15)
360#define ORI    OPCD( 24)
361#define ORIS   OPCD( 25)
362#define XORI   OPCD( 26)
363#define XORIS  OPCD( 27)
364#define ANDI   OPCD( 28)
365#define ANDIS  OPCD( 29)
366#define MULLI  OPCD(  7)
367#define CMPLI  OPCD( 10)
368#define CMPI   OPCD( 11)
369#define SUBFIC OPCD( 8)
370
371#define LWZU   OPCD( 33)
372#define STWU   OPCD( 37)
373
374#define RLWIMI OPCD( 20)
375#define RLWINM OPCD( 21)
376#define RLWNM  OPCD( 23)
377
378#define RLDICL MD30(  0)
379#define RLDICR MD30(  1)
380#define RLDIMI MD30(  3)
381#define RLDCL  MDS30( 8)
382
383#define BCLR   XO19( 16)
384#define BCCTR  XO19(528)
385#define CRAND  XO19(257)
386#define CRANDC XO19(129)
387#define CRNAND XO19(225)
388#define CROR   XO19(449)
389#define CRNOR  XO19( 33)
390#define ADDPCIS XO19( 2)
391
392#define EXTSB  XO31(954)
393#define EXTSH  XO31(922)
394#define EXTSW  XO31(986)
395#define ADD    XO31(266)
396#define ADDE   XO31(138)
397#define ADDME  XO31(234)
398#define ADDZE  XO31(202)
399#define ADDC   XO31( 10)
400#define AND    XO31( 28)
401#define SUBF   XO31( 40)
402#define SUBFC  XO31(  8)
403#define SUBFE  XO31(136)
404#define SUBFME XO31(232)
405#define SUBFZE XO31(200)
406#define OR     XO31(444)
407#define XOR    XO31(316)
408#define MULLW  XO31(235)
409#define MULHW  XO31( 75)
410#define MULHWU XO31( 11)
411#define DIVW   XO31(491)
412#define DIVWU  XO31(459)
413#define MODSW  XO31(779)
414#define MODUW  XO31(267)
415#define CMP    XO31(  0)
416#define CMPL   XO31( 32)
417#define LHBRX  XO31(790)
418#define LWBRX  XO31(534)
419#define LDBRX  XO31(532)
420#define STHBRX XO31(918)
421#define STWBRX XO31(662)
422#define STDBRX XO31(660)
423#define MFSPR  XO31(339)
424#define MTSPR  XO31(467)
425#define SRAWI  XO31(824)
426#define NEG    XO31(104)
427#define MFCR   XO31( 19)
428#define MFOCRF (MFCR | (1u << 20))
429#define NOR    XO31(124)
430#define CNTLZW XO31( 26)
431#define CNTLZD XO31( 58)
432#define CNTTZW XO31(538)
433#define CNTTZD XO31(570)
434#define CNTPOPW XO31(378)
435#define CNTPOPD XO31(506)
436#define ANDC   XO31( 60)
437#define ORC    XO31(412)
438#define EQV    XO31(284)
439#define NAND   XO31(476)
440#define ISEL   XO31( 15)
441
442#define MULLD  XO31(233)
443#define MULHD  XO31( 73)
444#define MULHDU XO31(  9)
445#define DIVD   XO31(489)
446#define DIVDU  XO31(457)
447#define MODSD  XO31(777)
448#define MODUD  XO31(265)
449
450#define LBZX   XO31( 87)
451#define LHZX   XO31(279)
452#define LHAX   XO31(343)
453#define LWZX   XO31( 23)
454#define STBX   XO31(215)
455#define STHX   XO31(407)
456#define STWX   XO31(151)
457
458#define EIEIO  XO31(854)
459#define HWSYNC XO31(598)
460#define LWSYNC (HWSYNC | (1u << 21))
461
462#define SPR(a, b) ((((a)<<5)|(b))<<11)
463#define LR     SPR(8, 0)
464#define CTR    SPR(9, 0)
465
466#define SLW    XO31( 24)
467#define SRW    XO31(536)
468#define SRAW   XO31(792)
469
470#define SLD    XO31( 27)
471#define SRD    XO31(539)
472#define SRAD   XO31(794)
473#define SRADI  XO31(413<<1)
474
475#define BRH    XO31(219)
476#define BRW    XO31(155)
477#define BRD    XO31(187)
478
479#define TW     XO31( 4)
480#define TRAP   (TW | TO(31))
481
482#define SETBC    XO31(384)  /* v3.10 */
483#define SETBCR   XO31(416)  /* v3.10 */
484#define SETNBC   XO31(448)  /* v3.10 */
485#define SETNBCR  XO31(480)  /* v3.10 */
486
487#define NOP    ORI  /* ori 0,0,0 */
488
489#define LVX        XO31(103)
490#define LVEBX      XO31(7)
491#define LVEHX      XO31(39)
492#define LVEWX      XO31(71)
493#define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
494#define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
495#define LXSIWZX    (XO31(12) | 1)   /* v2.07, force tx=1 */
496#define LXV        (OPCD(61) | 8 | 1)  /* v3.00, force tx=1 */
497#define LXSD       (OPCD(57) | 2)   /* v3.00 */
498#define LXVWSX     (XO31(364) | 1)  /* v3.00, force tx=1 */
499
500#define STVX       XO31(231)
501#define STVEWX     XO31(199)
502#define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
503#define STXSIWX    (XO31(140) | 1)  /* v2.07, force sx=1 */
504#define STXV       (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
505#define STXSD      (OPCD(61) | 2)   /* v3.00 */
506
507#define VADDSBS    VX4(768)
508#define VADDUBS    VX4(512)
509#define VADDUBM    VX4(0)
510#define VADDSHS    VX4(832)
511#define VADDUHS    VX4(576)
512#define VADDUHM    VX4(64)
513#define VADDSWS    VX4(896)
514#define VADDUWS    VX4(640)
515#define VADDUWM    VX4(128)
516#define VADDUDM    VX4(192)       /* v2.07 */
517
518#define VSUBSBS    VX4(1792)
519#define VSUBUBS    VX4(1536)
520#define VSUBUBM    VX4(1024)
521#define VSUBSHS    VX4(1856)
522#define VSUBUHS    VX4(1600)
523#define VSUBUHM    VX4(1088)
524#define VSUBSWS    VX4(1920)
525#define VSUBUWS    VX4(1664)
526#define VSUBUWM    VX4(1152)
527#define VSUBUDM    VX4(1216)      /* v2.07 */
528
529#define VNEGW      (VX4(1538) | (6 << 16))  /* v3.00 */
530#define VNEGD      (VX4(1538) | (7 << 16))  /* v3.00 */
531
532#define VMAXSB     VX4(258)
533#define VMAXSH     VX4(322)
534#define VMAXSW     VX4(386)
535#define VMAXSD     VX4(450)       /* v2.07 */
536#define VMAXUB     VX4(2)
537#define VMAXUH     VX4(66)
538#define VMAXUW     VX4(130)
539#define VMAXUD     VX4(194)       /* v2.07 */
540#define VMINSB     VX4(770)
541#define VMINSH     VX4(834)
542#define VMINSW     VX4(898)
543#define VMINSD     VX4(962)       /* v2.07 */
544#define VMINUB     VX4(514)
545#define VMINUH     VX4(578)
546#define VMINUW     VX4(642)
547#define VMINUD     VX4(706)       /* v2.07 */
548
549#define VCMPEQUB   VX4(6)
550#define VCMPEQUH   VX4(70)
551#define VCMPEQUW   VX4(134)
552#define VCMPEQUD   VX4(199)       /* v2.07 */
553#define VCMPGTSB   VX4(774)
554#define VCMPGTSH   VX4(838)
555#define VCMPGTSW   VX4(902)
556#define VCMPGTSD   VX4(967)       /* v2.07 */
557#define VCMPGTUB   VX4(518)
558#define VCMPGTUH   VX4(582)
559#define VCMPGTUW   VX4(646)
560#define VCMPGTUD   VX4(711)       /* v2.07 */
561#define VCMPNEB    VX4(7)         /* v3.00 */
562#define VCMPNEH    VX4(71)        /* v3.00 */
563#define VCMPNEW    VX4(135)       /* v3.00 */
564
565#define VSLB       VX4(260)
566#define VSLH       VX4(324)
567#define VSLW       VX4(388)
568#define VSLD       VX4(1476)      /* v2.07 */
569#define VSRB       VX4(516)
570#define VSRH       VX4(580)
571#define VSRW       VX4(644)
572#define VSRD       VX4(1732)      /* v2.07 */
573#define VSRAB      VX4(772)
574#define VSRAH      VX4(836)
575#define VSRAW      VX4(900)
576#define VSRAD      VX4(964)       /* v2.07 */
577#define VRLB       VX4(4)
578#define VRLH       VX4(68)
579#define VRLW       VX4(132)
580#define VRLD       VX4(196)       /* v2.07 */
581
582#define VMULEUB    VX4(520)
583#define VMULEUH    VX4(584)
584#define VMULEUW    VX4(648)       /* v2.07 */
585#define VMULOUB    VX4(8)
586#define VMULOUH    VX4(72)
587#define VMULOUW    VX4(136)       /* v2.07 */
588#define VMULUWM    VX4(137)       /* v2.07 */
589#define VMULLD     VX4(457)       /* v3.10 */
590#define VMSUMUHM   VX4(38)
591
592#define VMRGHB     VX4(12)
593#define VMRGHH     VX4(76)
594#define VMRGHW     VX4(140)
595#define VMRGLB     VX4(268)
596#define VMRGLH     VX4(332)
597#define VMRGLW     VX4(396)
598
599#define VPKUHUM    VX4(14)
600#define VPKUWUM    VX4(78)
601
602#define VAND       VX4(1028)
603#define VANDC      VX4(1092)
604#define VNOR       VX4(1284)
605#define VOR        VX4(1156)
606#define VXOR       VX4(1220)
607#define VEQV       VX4(1668)      /* v2.07 */
608#define VNAND      VX4(1412)      /* v2.07 */
609#define VORC       VX4(1348)      /* v2.07 */
610
611#define VSPLTB     VX4(524)
612#define VSPLTH     VX4(588)
613#define VSPLTW     VX4(652)
614#define VSPLTISB   VX4(780)
615#define VSPLTISH   VX4(844)
616#define VSPLTISW   VX4(908)
617
618#define VSLDOI     VX4(44)
619
620#define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
621#define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
622#define XXSPLTIB   (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
623
624#define MFVSRD     (XO31(51) | 1)   /* v2.07, force sx=1 */
625#define MFVSRWZ    (XO31(115) | 1)  /* v2.07, force sx=1 */
626#define MTVSRD     (XO31(179) | 1)  /* v2.07, force tx=1 */
627#define MTVSRWZ    (XO31(243) | 1)  /* v2.07, force tx=1 */
628#define MTVSRDD    (XO31(435) | 1)  /* v3.00, force tx=1 */
629#define MTVSRWS    (XO31(403) | 1)  /* v3.00, force tx=1 */
630
631#define RT(r) ((r)<<21)
632#define RS(r) ((r)<<21)
633#define RA(r) ((r)<<16)
634#define RB(r) ((r)<<11)
635#define TO(t) ((t)<<21)
636#define SH(s) ((s)<<11)
637#define MB(b) ((b)<<6)
638#define ME(e) ((e)<<1)
639#define BO(o) ((o)<<21)
640#define MB64(b) ((b)<<5)
641#define FXM(b) (1 << (19 - (b)))
642
643#define VRT(r)  (((r) & 31) << 21)
644#define VRA(r)  (((r) & 31) << 16)
645#define VRB(r)  (((r) & 31) << 11)
646#define VRC(r)  (((r) & 31) <<  6)
647
648#define LK    1
649
650#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
651#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
652#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
653#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
654
655#define BF(n)    ((n)<<23)
656#define BI(n, c) (((c)+((n)*4))<<16)
657#define BT(n, c) (((c)+((n)*4))<<21)
658#define BA(n, c) (((c)+((n)*4))<<16)
659#define BB(n, c) (((c)+((n)*4))<<11)
660#define BC_(n, c) (((c)+((n)*4))<<6)
661
662#define BO_COND_TRUE  BO(12)
663#define BO_COND_FALSE BO( 4)
664#define BO_ALWAYS     BO(20)
665
666enum {
667    CR_LT,
668    CR_GT,
669    CR_EQ,
670    CR_SO
671};
672
673static const uint32_t tcg_to_bc[] = {
674    [TCG_COND_EQ]  = BC | BI(7, CR_EQ) | BO_COND_TRUE,
675    [TCG_COND_NE]  = BC | BI(7, CR_EQ) | BO_COND_FALSE,
676    [TCG_COND_LT]  = BC | BI(7, CR_LT) | BO_COND_TRUE,
677    [TCG_COND_GE]  = BC | BI(7, CR_LT) | BO_COND_FALSE,
678    [TCG_COND_LE]  = BC | BI(7, CR_GT) | BO_COND_FALSE,
679    [TCG_COND_GT]  = BC | BI(7, CR_GT) | BO_COND_TRUE,
680    [TCG_COND_LTU] = BC | BI(7, CR_LT) | BO_COND_TRUE,
681    [TCG_COND_GEU] = BC | BI(7, CR_LT) | BO_COND_FALSE,
682    [TCG_COND_LEU] = BC | BI(7, CR_GT) | BO_COND_FALSE,
683    [TCG_COND_GTU] = BC | BI(7, CR_GT) | BO_COND_TRUE,
684};
685
686/* The low bit here is set if the RA and RB fields must be inverted.  */
687static const uint32_t tcg_to_isel[] = {
688    [TCG_COND_EQ]  = ISEL | BC_(7, CR_EQ),
689    [TCG_COND_NE]  = ISEL | BC_(7, CR_EQ) | 1,
690    [TCG_COND_LT]  = ISEL | BC_(7, CR_LT),
691    [TCG_COND_GE]  = ISEL | BC_(7, CR_LT) | 1,
692    [TCG_COND_LE]  = ISEL | BC_(7, CR_GT) | 1,
693    [TCG_COND_GT]  = ISEL | BC_(7, CR_GT),
694    [TCG_COND_LTU] = ISEL | BC_(7, CR_LT),
695    [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1,
696    [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1,
697    [TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
698};
699
700static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
701                        intptr_t value, intptr_t addend)
702{
703    const tcg_insn_unit *target;
704    int16_t lo;
705    int32_t hi;
706
707    value += addend;
708    target = (const tcg_insn_unit *)value;
709
710    switch (type) {
711    case R_PPC_REL14:
712        return reloc_pc14(code_ptr, target);
713    case R_PPC_REL24:
714        return reloc_pc24(code_ptr, target);
715    case R_PPC64_PCREL34:
716        return reloc_pc34(code_ptr, target);
717    case R_PPC_ADDR16:
718        /*
719         * We are (slightly) abusing this relocation type.  In particular,
720         * assert that the low 2 bits are zero, and do not modify them.
721         * That way we can use this with LD et al that have opcode bits
722         * in the low 2 bits of the insn.
723         */
724        if ((value & 3) || value != (int16_t)value) {
725            return false;
726        }
727        *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
728        break;
729    case R_PPC_ADDR32:
730        /*
731         * We are abusing this relocation type.  Again, this points to
732         * a pair of insns, lis + load.  This is an absolute address
733         * relocation for PPC32 so the lis cannot be removed.
734         */
735        lo = value;
736        hi = value - lo;
737        if (hi + lo != value) {
738            return false;
739        }
740        code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
741        code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
742        break;
743    default:
744        g_assert_not_reached();
745    }
746    return true;
747}
748
749/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */
750static bool tcg_out_need_prefix_align(TCGContext *s)
751{
752    return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c;
753}
754
755static void tcg_out_prefix_align(TCGContext *s)
756{
757    if (tcg_out_need_prefix_align(s)) {
758        tcg_out32(s, NOP);
759    }
760}
761
762static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target)
763{
764    return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0);
765}
766
767/* Output Type 00 Prefix - 8-Byte Load/Store Form (8LS:D) */
768static void tcg_out_8ls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
769                          unsigned ra, tcg_target_long imm, bool r)
770{
771    tcg_insn_unit p, i;
772
773    p = OPCD(1) | (r << 20) | ((imm >> 16) & 0x3ffff);
774    i = opc | TAI(rt, ra, imm);
775
776    tcg_out_prefix_align(s);
777    tcg_out32(s, p);
778    tcg_out32(s, i);
779}
780
781/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
782static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
783                          unsigned ra, tcg_target_long imm, bool r)
784{
785    tcg_insn_unit p, i;
786
787    p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff);
788    i = opc | TAI(rt, ra, imm);
789
790    tcg_out_prefix_align(s);
791    tcg_out32(s, p);
792    tcg_out32(s, i);
793}
794
795static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
796                             TCGReg base, tcg_target_long offset);
797
798static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
799{
800    if (ret == arg) {
801        return true;
802    }
803    switch (type) {
804    case TCG_TYPE_I64:
805        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
806        /* fallthru */
807    case TCG_TYPE_I32:
808        if (ret < TCG_REG_V0) {
809            if (arg < TCG_REG_V0) {
810                tcg_out32(s, OR | SAB(arg, ret, arg));
811                break;
812            } else if (have_isa_2_07) {
813                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
814                          | VRT(arg) | RA(ret));
815                break;
816            } else {
817                /* Altivec does not support vector->integer moves.  */
818                return false;
819            }
820        } else if (arg < TCG_REG_V0) {
821            if (have_isa_2_07) {
822                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
823                          | VRT(ret) | RA(arg));
824                break;
825            } else {
826                /* Altivec does not support integer->vector moves.  */
827                return false;
828            }
829        }
830        /* fallthru */
831    case TCG_TYPE_V64:
832    case TCG_TYPE_V128:
833        tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
834        tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
835        break;
836    default:
837        g_assert_not_reached();
838    }
839    return true;
840}
841
842static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
843                               int sh, int mb)
844{
845    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
846    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
847    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
848    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb);
849}
850
851static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
852                               int sh, int mb, int me)
853{
854    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
855}
856
857static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
858{
859    tcg_out32(s, EXTSB | RA(dst) | RS(src));
860}
861
862static void tcg_out_ext8u(TCGContext *s, TCGReg dst, TCGReg src)
863{
864    tcg_out32(s, ANDI | SAI(src, dst, 0xff));
865}
866
867static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
868{
869    tcg_out32(s, EXTSH | RA(dst) | RS(src));
870}
871
872static void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src)
873{
874    tcg_out32(s, ANDI | SAI(src, dst, 0xffff));
875}
876
877static void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src)
878{
879    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
880    tcg_out32(s, EXTSW | RA(dst) | RS(src));
881}
882
883static void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
884{
885    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
886    tcg_out_rld(s, RLDICL, dst, src, 0, 32);
887}
888
889static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
890{
891    tcg_out_ext32s(s, dst, src);
892}
893
894static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
895{
896    tcg_out_ext32u(s, dst, src);
897}
898
899static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
900{
901    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
902    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
903}
904
905static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
906{
907    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
908}
909
910static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
911{
912    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
913}
914
915static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c)
916{
917    /* Limit immediate shift count lest we create an illegal insn.  */
918    tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31));
919}
920
921static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
922{
923    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
924}
925
926static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
927{
928    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
929}
930
931static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
932{
933    tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
934}
935
936static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm)
937{
938    uint32_t d0, d1, d2;
939
940    tcg_debug_assert((imm & 0xffff) == 0);
941    tcg_debug_assert(imm == (int32_t)imm);
942
943    d2 = extract32(imm, 16, 1);
944    d1 = extract32(imm, 17, 5);
945    d0 = extract32(imm, 22, 10);
946    tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2);
947}
948
949static void tcg_out_bswap16(TCGContext *s, TCGReg dst, TCGReg src, int flags)
950{
951    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
952
953    if (have_isa_3_10) {
954        tcg_out32(s, BRH | RA(dst) | RS(src));
955        if (flags & TCG_BSWAP_OS) {
956            tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst);
957        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
958            tcg_out_ext16u(s, dst, dst);
959        }
960        return;
961    }
962
963    /*
964     * In the following,
965     *   dep(a, b, m) -> (a & ~m) | (b & m)
966     *
967     * Begin with:                              src = xxxxabcd
968     */
969    /* tmp = rol32(src, 24) & 0x000000ff            = 0000000c */
970    tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);
971    /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00)    = 000000dc */
972    tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
973
974    if (flags & TCG_BSWAP_OS) {
975        tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp);
976    } else {
977        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
978    }
979}
980
981static void tcg_out_bswap32(TCGContext *s, TCGReg dst, TCGReg src, int flags)
982{
983    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
984
985    if (have_isa_3_10) {
986        tcg_out32(s, BRW | RA(dst) | RS(src));
987        if (flags & TCG_BSWAP_OS) {
988            tcg_out_ext32s(s, dst, dst);
989        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
990            tcg_out_ext32u(s, dst, dst);
991        }
992        return;
993    }
994
995    /*
996     * Stolen from gcc's builtin_bswap32.
997     * In the following,
998     *   dep(a, b, m) -> (a & ~m) | (b & m)
999     *
1000     * Begin with:                              src = xxxxabcd
1001     */
1002    /* tmp = rol32(src, 8) & 0xffffffff             = 0000bcda */
1003    tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31);
1004    /* tmp = dep(tmp, rol32(src, 24), 0xff000000)   = 0000dcda */
1005    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7);
1006    /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00)   = 0000dcba */
1007    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23);
1008
1009    if (flags & TCG_BSWAP_OS) {
1010        tcg_out_ext32s(s, dst, tmp);
1011    } else {
1012        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
1013    }
1014}
1015
1016static void tcg_out_bswap64(TCGContext *s, TCGReg dst, TCGReg src)
1017{
1018    TCGReg t0 = dst == src ? TCG_REG_R0 : dst;
1019    TCGReg t1 = dst == src ? dst : TCG_REG_R0;
1020
1021    if (have_isa_3_10) {
1022        tcg_out32(s, BRD | RA(dst) | RS(src));
1023        return;
1024    }
1025
1026    /*
1027     * In the following,
1028     *   dep(a, b, m) -> (a & ~m) | (b & m)
1029     *
1030     * Begin with:                              src = abcdefgh
1031     */
1032    /* t0 = rol32(src, 8) & 0xffffffff              = 0000fghe */
1033    tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31);
1034    /* t0 = dep(t0, rol32(src, 24), 0xff000000)     = 0000hghe */
1035    tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7);
1036    /* t0 = dep(t0, rol32(src, 24), 0x0000ff00)     = 0000hgfe */
1037    tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23);
1038
1039    /* t0 = rol64(t0, 32)                           = hgfe0000 */
1040    tcg_out_rld(s, RLDICL, t0, t0, 32, 0);
1041    /* t1 = rol64(src, 32)                          = efghabcd */
1042    tcg_out_rld(s, RLDICL, t1, src, 32, 0);
1043
1044    /* t0 = dep(t0, rol32(t1, 24), 0xffffffff)      = hgfebcda */
1045    tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31);
1046    /* t0 = dep(t0, rol32(t1, 24), 0xff000000)      = hgfedcda */
1047    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7);
1048    /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00)      = hgfedcba */
1049    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23);
1050
1051    tcg_out_mov(s, TCG_TYPE_REG, dst, t0);
1052}
1053
1054/* Emit a move into ret of arg, if it can be done in one insn.  */
1055static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
1056{
1057    if (arg == (int16_t)arg) {
1058        tcg_out32(s, ADDI | TAI(ret, 0, arg));
1059        return true;
1060    }
1061    if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
1062        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
1063        return true;
1064    }
1065    return false;
1066}
1067
1068static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
1069                             tcg_target_long arg, bool in_prologue)
1070{
1071    intptr_t tb_diff;
1072    tcg_target_long tmp;
1073    int shift;
1074
1075    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1076
1077    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1078        arg = (int32_t)arg;
1079    }
1080
1081    /* Load 16-bit immediates with one insn.  */
1082    if (tcg_out_movi_one(s, ret, arg)) {
1083        return;
1084    }
1085
1086    /* Load addresses within the TB with one insn.  */
1087    tb_diff = ppc_tbrel_diff(s, (void *)arg);
1088    if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
1089        tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
1090        return;
1091    }
1092
1093    /*
1094     * Load values up to 34 bits, and pc-relative addresses,
1095     * with one prefixed insn.
1096     */
1097    if (have_isa_3_10) {
1098        if (arg == sextract64(arg, 0, 34)) {
1099            /* pli ret,value = paddi ret,0,value,0 */
1100            tcg_out_mls_d(s, ADDI, ret, 0, arg, 0);
1101            return;
1102        }
1103
1104        tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg);
1105        if (tmp == sextract64(tmp, 0, 34)) {
1106            /* pla ret,value = paddi ret,0,value,1 */
1107            tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1);
1108            return;
1109        }
1110    }
1111
1112    /* Load 32-bit immediates with two insns.  Note that we've already
1113       eliminated bare ADDIS, so we know both insns are required.  */
1114    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
1115        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
1116        tcg_out32(s, ORI | SAI(ret, ret, arg));
1117        return;
1118    }
1119    if (arg == (uint32_t)arg && !(arg & 0x8000)) {
1120        tcg_out32(s, ADDI | TAI(ret, 0, arg));
1121        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1122        return;
1123    }
1124
1125    /* Load masked 16-bit value.  */
1126    if (arg > 0 && (arg & 0x8000)) {
1127        tmp = arg | 0x7fff;
1128        if ((tmp & (tmp + 1)) == 0) {
1129            int mb = clz64(tmp + 1) + 1;
1130            tcg_out32(s, ADDI | TAI(ret, 0, arg));
1131            tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
1132            return;
1133        }
1134    }
1135
1136    /* Load common masks with 2 insns.  */
1137    shift = ctz64(arg);
1138    tmp = arg >> shift;
1139    if (tmp == (int16_t)tmp) {
1140        tcg_out32(s, ADDI | TAI(ret, 0, tmp));
1141        tcg_out_shli64(s, ret, ret, shift);
1142        return;
1143    }
1144    shift = clz64(arg);
1145    if (tcg_out_movi_one(s, ret, arg << shift)) {
1146        tcg_out_shri64(s, ret, ret, shift);
1147        return;
1148    }
1149
1150    /* Load addresses within 2GB with 2 insns. */
1151    if (have_isa_3_00) {
1152        intptr_t hi = tcg_pcrel_diff(s, (void *)arg) - 4;
1153        int16_t lo = hi;
1154
1155        hi -= lo;
1156        if (hi == (int32_t)hi) {
1157            tcg_out_addpcis(s, TCG_REG_TMP2, hi);
1158            tcg_out32(s, ADDI | TAI(ret, TCG_REG_TMP2, lo));
1159            return;
1160        }
1161    }
1162
1163    /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
1164    if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
1165        tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
1166        return;
1167    }
1168
1169    /* Use the constant pool, if possible.  */
1170    if (!in_prologue && USE_REG_TB) {
1171        new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
1172                       ppc_tbrel_diff(s, NULL));
1173        tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
1174        return;
1175    }
1176    if (have_isa_3_10) {
1177        tcg_out_8ls_d(s, PLD, ret, 0, 0, 1);
1178        new_pool_label(s, arg, R_PPC64_PCREL34, s->code_ptr - 2, 0);
1179        return;
1180    }
1181    if (have_isa_3_00) {
1182        tcg_out_addpcis(s, TCG_REG_TMP2, 0);
1183        new_pool_label(s, arg, R_PPC_REL14, s->code_ptr, 0);
1184        tcg_out32(s, LD | TAI(ret, TCG_REG_TMP2, 0));
1185        return;
1186    }
1187
1188    tmp = arg >> 31 >> 1;
1189    tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
1190    if (tmp) {
1191        tcg_out_shli64(s, ret, ret, 32);
1192    }
1193    if (arg & 0xffff0000) {
1194        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1195    }
1196    if (arg & 0xffff) {
1197        tcg_out32(s, ORI | SAI(ret, ret, arg));
1198    }
1199}
1200
1201static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1202                             TCGReg ret, int64_t val)
1203{
1204    uint32_t load_insn;
1205    int rel, low;
1206    intptr_t add;
1207
1208    switch (vece) {
1209    case MO_8:
1210        low = (int8_t)val;
1211        if (low >= -16 && low < 16) {
1212            tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
1213            return;
1214        }
1215        if (have_isa_3_00) {
1216            tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
1217            return;
1218        }
1219        break;
1220
1221    case MO_16:
1222        low = (int16_t)val;
1223        if (low >= -16 && low < 16) {
1224            tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
1225            return;
1226        }
1227        break;
1228
1229    case MO_32:
1230        low = (int32_t)val;
1231        if (low >= -16 && low < 16) {
1232            tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
1233            return;
1234        }
1235        break;
1236    }
1237
1238    /*
1239     * Otherwise we must load the value from the constant pool.
1240     */
1241    if (USE_REG_TB) {
1242        rel = R_PPC_ADDR16;
1243        add = ppc_tbrel_diff(s, NULL);
1244    } else if (have_isa_3_10) {
1245        if (type == TCG_TYPE_V64) {
1246            tcg_out_8ls_d(s, PLXSD, ret & 31, 0, 0, 1);
1247            new_pool_label(s, val, R_PPC64_PCREL34, s->code_ptr - 2, 0);
1248        } else {
1249            tcg_out_8ls_d(s, PLXV, ret & 31, 0, 0, 1);
1250            new_pool_l2(s, R_PPC64_PCREL34, s->code_ptr - 2, 0, val, val);
1251        }
1252        return;
1253    } else if (have_isa_3_00) {
1254        tcg_out_addpcis(s, TCG_REG_TMP1, 0);
1255        rel = R_PPC_REL14;
1256        add = 0;
1257    } else {
1258        rel = R_PPC_ADDR32;
1259        add = 0;
1260    }
1261
1262    if (have_vsx) {
1263        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
1264        load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
1265        if (TCG_TARGET_REG_BITS == 64) {
1266            new_pool_label(s, val, rel, s->code_ptr, add);
1267        } else {
1268            new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
1269        }
1270    } else {
1271        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
1272        if (TCG_TARGET_REG_BITS == 64) {
1273            new_pool_l2(s, rel, s->code_ptr, add, val, val);
1274        } else {
1275            new_pool_l4(s, rel, s->code_ptr, add,
1276                        val >> 32, val, val >> 32, val);
1277        }
1278    }
1279
1280    if (USE_REG_TB) {
1281        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
1282        load_insn |= RA(TCG_REG_TB);
1283    } else if (have_isa_3_00) {
1284        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1285    } else {
1286        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
1287        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1288    }
1289    tcg_out32(s, load_insn);
1290}
1291
1292static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
1293                         tcg_target_long arg)
1294{
1295    switch (type) {
1296    case TCG_TYPE_I32:
1297    case TCG_TYPE_I64:
1298        tcg_debug_assert(ret < TCG_REG_V0);
1299        tcg_out_movi_int(s, type, ret, arg, false);
1300        break;
1301
1302    default:
1303        g_assert_not_reached();
1304    }
1305}
1306
1307static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1308{
1309    return false;
1310}
1311
1312static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1313                             tcg_target_long imm)
1314{
1315    /* This function is only used for passing structs by reference. */
1316    g_assert_not_reached();
1317}
1318
1319static bool mask_operand(uint32_t c, int *mb, int *me)
1320{
1321    uint32_t lsb, test;
1322
1323    /* Accept a bit pattern like:
1324           0....01....1
1325           1....10....0
1326           0..01..10..0
1327       Keep track of the transitions.  */
1328    if (c == 0 || c == -1) {
1329        return false;
1330    }
1331    test = c;
1332    lsb = test & -test;
1333    test += lsb;
1334    if (test & (test - 1)) {
1335        return false;
1336    }
1337
1338    *me = clz32(lsb);
1339    *mb = test ? clz32(test & -test) + 1 : 0;
1340    return true;
1341}
1342
1343static bool mask64_operand(uint64_t c, int *mb, int *me)
1344{
1345    uint64_t lsb;
1346
1347    if (c == 0) {
1348        return false;
1349    }
1350
1351    lsb = c & -c;
1352    /* Accept 1..10..0.  */
1353    if (c == -lsb) {
1354        *mb = 0;
1355        *me = clz64(lsb);
1356        return true;
1357    }
1358    /* Accept 0..01..1.  */
1359    if (lsb == 1 && (c & (c + 1)) == 0) {
1360        *mb = clz64(c + 1) + 1;
1361        *me = 63;
1362        return true;
1363    }
1364    return false;
1365}
1366
1367static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1368{
1369    int mb, me;
1370
1371    if (mask_operand(c, &mb, &me)) {
1372        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
1373    } else if ((c & 0xffff) == c) {
1374        tcg_out32(s, ANDI | SAI(src, dst, c));
1375        return;
1376    } else if ((c & 0xffff0000) == c) {
1377        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1378        return;
1379    } else {
1380        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
1381        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1382    }
1383}
1384
1385static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
1386{
1387    int mb, me;
1388
1389    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1390    if (mask64_operand(c, &mb, &me)) {
1391        if (mb == 0) {
1392            tcg_out_rld(s, RLDICR, dst, src, 0, me);
1393        } else {
1394            tcg_out_rld(s, RLDICL, dst, src, 0, mb);
1395        }
1396    } else if ((c & 0xffff) == c) {
1397        tcg_out32(s, ANDI | SAI(src, dst, c));
1398        return;
1399    } else if ((c & 0xffff0000) == c) {
1400        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1401        return;
1402    } else {
1403        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
1404        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1405    }
1406}
1407
1408static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
1409                           int op_lo, int op_hi)
1410{
1411    if (c >> 16) {
1412        tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
1413        src = dst;
1414    }
1415    if (c & 0xffff) {
1416        tcg_out32(s, op_lo | SAI(src, dst, c));
1417        src = dst;
1418    }
1419}
1420
1421static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1422{
1423    tcg_out_zori32(s, dst, src, c, ORI, ORIS);
1424}
1425
1426static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1427{
1428    tcg_out_zori32(s, dst, src, c, XORI, XORIS);
1429}
1430
1431static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target)
1432{
1433    ptrdiff_t disp = tcg_pcrel_diff(s, target);
1434    if (in_range_b(disp)) {
1435        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
1436    } else {
1437        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
1438        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
1439        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
1440    }
1441}
1442
1443static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
1444                             TCGReg base, tcg_target_long offset)
1445{
1446    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
1447    bool is_int_store = false;
1448    TCGReg rs = TCG_REG_TMP1;
1449
1450    switch (opi) {
1451    case LD: case LWA:
1452        align = 3;
1453        /* FALLTHRU */
1454    default:
1455        if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
1456            rs = rt;
1457            break;
1458        }
1459        break;
1460    case LXSD:
1461    case STXSD:
1462        align = 3;
1463        break;
1464    case LXV:
1465    case STXV:
1466        align = 15;
1467        break;
1468    case STD:
1469        align = 3;
1470        /* FALLTHRU */
1471    case STB: case STH: case STW:
1472        is_int_store = true;
1473        break;
1474    }
1475
1476    /* For unaligned or large offsets, use the prefixed form. */
1477    if (have_isa_3_10
1478        && (offset != (int16_t)offset || (offset & align))
1479        && offset == sextract64(offset, 0, 34)) {
1480        /*
1481         * Note that the MLS:D insns retain their un-prefixed opcode,
1482         * while the 8LS:D insns use a different opcode space.
1483         */
1484        switch (opi) {
1485        case LBZ:
1486        case LHZ:
1487        case LHA:
1488        case LWZ:
1489        case STB:
1490        case STH:
1491        case STW:
1492        case ADDI:
1493            tcg_out_mls_d(s, opi, rt, base, offset, 0);
1494            return;
1495        case LWA:
1496            tcg_out_8ls_d(s, PLWA, rt, base, offset, 0);
1497            return;
1498        case LD:
1499            tcg_out_8ls_d(s, PLD, rt, base, offset, 0);
1500            return;
1501        case STD:
1502            tcg_out_8ls_d(s, PSTD, rt, base, offset, 0);
1503            return;
1504        case LXSD:
1505            tcg_out_8ls_d(s, PLXSD, rt & 31, base, offset, 0);
1506            return;
1507        case STXSD:
1508            tcg_out_8ls_d(s, PSTXSD, rt & 31, base, offset, 0);
1509            return;
1510        case LXV:
1511            tcg_out_8ls_d(s, PLXV, rt & 31, base, offset, 0);
1512            return;
1513        case STXV:
1514            tcg_out_8ls_d(s, PSTXV, rt & 31, base, offset, 0);
1515            return;
1516        }
1517    }
1518
1519    /* For unaligned, or very large offsets, use the indexed form.  */
1520    if (offset & align || offset != (int32_t)offset || opi == 0) {
1521        if (rs == base) {
1522            rs = TCG_REG_R0;
1523        }
1524        tcg_debug_assert(!is_int_store || rs != rt);
1525        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
1526        tcg_out32(s, opx | TAB(rt & 31, base, rs));
1527        return;
1528    }
1529
1530    l0 = (int16_t)offset;
1531    offset = (offset - l0) >> 16;
1532    l1 = (int16_t)offset;
1533
1534    if (l1 < 0 && orig >= 0) {
1535        extra = 0x4000;
1536        l1 = (int16_t)(offset - 0x4000);
1537    }
1538    if (l1) {
1539        tcg_out32(s, ADDIS | TAI(rs, base, l1));
1540        base = rs;
1541    }
1542    if (extra) {
1543        tcg_out32(s, ADDIS | TAI(rs, base, extra));
1544        base = rs;
1545    }
1546    if (opi != ADDI || base != rt || l0 != 0) {
1547        tcg_out32(s, opi | TAI(rt & 31, base, l0));
1548    }
1549}
1550
1551static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
1552                           TCGReg va, TCGReg vb, int shb)
1553{
1554    tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
1555}
1556
1557static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1558                       TCGReg base, intptr_t offset)
1559{
1560    int shift;
1561
1562    switch (type) {
1563    case TCG_TYPE_I32:
1564        if (ret < TCG_REG_V0) {
1565            tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
1566            break;
1567        }
1568        if (have_isa_2_07 && have_vsx) {
1569            tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
1570            break;
1571        }
1572        tcg_debug_assert((offset & 3) == 0);
1573        tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
1574        shift = (offset - 4) & 0xc;
1575        if (shift) {
1576            tcg_out_vsldoi(s, ret, ret, ret, shift);
1577        }
1578        break;
1579    case TCG_TYPE_I64:
1580        if (ret < TCG_REG_V0) {
1581            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1582            tcg_out_mem_long(s, LD, LDX, ret, base, offset);
1583            break;
1584        }
1585        /* fallthru */
1586    case TCG_TYPE_V64:
1587        tcg_debug_assert(ret >= TCG_REG_V0);
1588        if (have_vsx) {
1589            tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
1590                             ret, base, offset);
1591            break;
1592        }
1593        tcg_debug_assert((offset & 7) == 0);
1594        tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
1595        if (offset & 8) {
1596            tcg_out_vsldoi(s, ret, ret, ret, 8);
1597        }
1598        break;
1599    case TCG_TYPE_V128:
1600        tcg_debug_assert(ret >= TCG_REG_V0);
1601        tcg_debug_assert((offset & 15) == 0);
1602        tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
1603                         LVX, ret, base, offset);
1604        break;
1605    default:
1606        g_assert_not_reached();
1607    }
1608}
1609
1610static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1611                              TCGReg base, intptr_t offset)
1612{
1613    int shift;
1614
1615    switch (type) {
1616    case TCG_TYPE_I32:
1617        if (arg < TCG_REG_V0) {
1618            tcg_out_mem_long(s, STW, STWX, arg, base, offset);
1619            break;
1620        }
1621        if (have_isa_2_07 && have_vsx) {
1622            tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
1623            break;
1624        }
1625        assert((offset & 3) == 0);
1626        tcg_debug_assert((offset & 3) == 0);
1627        shift = (offset - 4) & 0xc;
1628        if (shift) {
1629            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
1630            arg = TCG_VEC_TMP1;
1631        }
1632        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1633        break;
1634    case TCG_TYPE_I64:
1635        if (arg < TCG_REG_V0) {
1636            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1637            tcg_out_mem_long(s, STD, STDX, arg, base, offset);
1638            break;
1639        }
1640        /* fallthru */
1641    case TCG_TYPE_V64:
1642        tcg_debug_assert(arg >= TCG_REG_V0);
1643        if (have_vsx) {
1644            tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
1645                             STXSDX, arg, base, offset);
1646            break;
1647        }
1648        tcg_debug_assert((offset & 7) == 0);
1649        if (offset & 8) {
1650            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
1651            arg = TCG_VEC_TMP1;
1652        }
1653        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1654        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
1655        break;
1656    case TCG_TYPE_V128:
1657        tcg_debug_assert(arg >= TCG_REG_V0);
1658        tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
1659                         STVX, arg, base, offset);
1660        break;
1661    default:
1662        g_assert_not_reached();
1663    }
1664}
1665
1666static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1667                               TCGReg base, intptr_t ofs)
1668{
1669    return false;
1670}
1671
1672static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
1673                        int const_arg2, int cr, TCGType type)
1674{
1675    int imm;
1676    uint32_t op;
1677
1678    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1679
1680    /* Simplify the comparisons below wrt CMPI.  */
1681    if (type == TCG_TYPE_I32) {
1682        arg2 = (int32_t)arg2;
1683    }
1684
1685    switch (cond) {
1686    case TCG_COND_EQ:
1687    case TCG_COND_NE:
1688        if (const_arg2) {
1689            if ((int16_t) arg2 == arg2) {
1690                op = CMPI;
1691                imm = 1;
1692                break;
1693            } else if ((uint16_t) arg2 == arg2) {
1694                op = CMPLI;
1695                imm = 1;
1696                break;
1697            }
1698        }
1699        op = CMPL;
1700        imm = 0;
1701        break;
1702
1703    case TCG_COND_LT:
1704    case TCG_COND_GE:
1705    case TCG_COND_LE:
1706    case TCG_COND_GT:
1707        if (const_arg2) {
1708            if ((int16_t) arg2 == arg2) {
1709                op = CMPI;
1710                imm = 1;
1711                break;
1712            }
1713        }
1714        op = CMP;
1715        imm = 0;
1716        break;
1717
1718    case TCG_COND_LTU:
1719    case TCG_COND_GEU:
1720    case TCG_COND_LEU:
1721    case TCG_COND_GTU:
1722        if (const_arg2) {
1723            if ((uint16_t) arg2 == arg2) {
1724                op = CMPLI;
1725                imm = 1;
1726                break;
1727            }
1728        }
1729        op = CMPL;
1730        imm = 0;
1731        break;
1732
1733    default:
1734        g_assert_not_reached();
1735    }
1736    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1737
1738    if (imm) {
1739        tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
1740    } else {
1741        if (const_arg2) {
1742            tcg_out_movi(s, type, TCG_REG_R0, arg2);
1743            arg2 = TCG_REG_R0;
1744        }
1745        tcg_out32(s, op | RA(arg1) | RB(arg2));
1746    }
1747}
1748
1749static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1750                                TCGReg dst, TCGReg src, bool neg)
1751{
1752    if (neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
1753        /*
1754         * X != 0 implies X + -1 generates a carry.
1755         * RT = (~X + X) + CA
1756         *    = -1 + CA
1757         *    = CA ? 0 : -1
1758         */
1759        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1760        tcg_out32(s, SUBFE | TAB(dst, src, src));
1761        return;
1762    }
1763
1764    if (type == TCG_TYPE_I32) {
1765        tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1766        tcg_out_shri32(s, dst, dst, 5);
1767    } else {
1768        tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1769        tcg_out_shri64(s, dst, dst, 6);
1770    }
1771    if (neg) {
1772        tcg_out32(s, NEG | RT(dst) | RA(dst));
1773    }
1774}
1775
1776static void tcg_out_setcond_ne0(TCGContext *s, TCGType type,
1777                                TCGReg dst, TCGReg src, bool neg)
1778{
1779    if (!neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
1780        /*
1781         * X != 0 implies X + -1 generates a carry.  Extra addition
1782         * trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.
1783         */
1784        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1785        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1786        return;
1787    }
1788    tcg_out_setcond_eq0(s, type, dst, src, false);
1789    if (neg) {
1790        tcg_out32(s, ADDI | TAI(dst, dst, -1));
1791    } else {
1792        tcg_out_xori32(s, dst, dst, 1);
1793    }
1794}
1795
1796static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1797                                  bool const_arg2)
1798{
1799    if (const_arg2) {
1800        if ((uint32_t)arg2 == arg2) {
1801            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1802        } else {
1803            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1804            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1805        }
1806    } else {
1807        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1808    }
1809    return TCG_REG_R0;
1810}
1811
1812static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1813                            TCGArg arg0, TCGArg arg1, TCGArg arg2,
1814                            int const_arg2, bool neg)
1815{
1816    int sh;
1817    bool inv;
1818
1819    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1820
1821    /* Ignore high bits of a potential constant arg2.  */
1822    if (type == TCG_TYPE_I32) {
1823        arg2 = (uint32_t)arg2;
1824    }
1825
1826    /* With SETBC/SETBCR, we can always implement with 2 insns. */
1827    if (have_isa_3_10) {
1828        tcg_insn_unit bi, opc;
1829
1830        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1831
1832        /* Re-use tcg_to_bc for BI and BO_COND_{TRUE,FALSE}. */
1833        bi = tcg_to_bc[cond] & (0x1f << 16);
1834        if (tcg_to_bc[cond] & BO(8)) {
1835            opc = neg ? SETNBC : SETBC;
1836        } else {
1837            opc = neg ? SETNBCR : SETBCR;
1838        }
1839        tcg_out32(s, opc | RT(arg0) | bi);
1840        return;
1841    }
1842
1843    /* Handle common and trivial cases before handling anything else.  */
1844    if (arg2 == 0) {
1845        switch (cond) {
1846        case TCG_COND_EQ:
1847            tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
1848            return;
1849        case TCG_COND_NE:
1850            tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
1851            return;
1852        case TCG_COND_GE:
1853            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1854            arg1 = arg0;
1855            /* FALLTHRU */
1856        case TCG_COND_LT:
1857            /* Extract the sign bit.  */
1858            if (type == TCG_TYPE_I32) {
1859                if (neg) {
1860                    tcg_out_sari32(s, arg0, arg1, 31);
1861                } else {
1862                    tcg_out_shri32(s, arg0, arg1, 31);
1863                }
1864            } else {
1865                if (neg) {
1866                    tcg_out_sari64(s, arg0, arg1, 63);
1867                } else {
1868                    tcg_out_shri64(s, arg0, arg1, 63);
1869                }
1870            }
1871            return;
1872        default:
1873            break;
1874        }
1875    }
1876
1877    /* If we have ISEL, we can implement everything with 3 or 4 insns.
1878       All other cases below are also at least 3 insns, so speed up the
1879       code generator by not considering them and always using ISEL.  */
1880    if (have_isel) {
1881        int isel, tab;
1882
1883        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1884
1885        isel = tcg_to_isel[cond];
1886
1887        tcg_out_movi(s, type, arg0, neg ? -1 : 1);
1888        if (isel & 1) {
1889            /* arg0 = (bc ? 0 : 1) */
1890            tab = TAB(arg0, 0, arg0);
1891            isel &= ~1;
1892        } else {
1893            /* arg0 = (bc ? 1 : 0) */
1894            tcg_out_movi(s, type, TCG_REG_R0, 0);
1895            tab = TAB(arg0, arg0, TCG_REG_R0);
1896        }
1897        tcg_out32(s, isel | tab);
1898        return;
1899    }
1900
1901    inv = false;
1902    switch (cond) {
1903    case TCG_COND_EQ:
1904        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1905        tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
1906        break;
1907
1908    case TCG_COND_NE:
1909        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1910        tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
1911        break;
1912
1913    case TCG_COND_LE:
1914    case TCG_COND_LEU:
1915        inv = true;
1916        /* fall through */
1917    case TCG_COND_GT:
1918    case TCG_COND_GTU:
1919        sh = 30; /* CR7 CR_GT */
1920        goto crtest;
1921
1922    case TCG_COND_GE:
1923    case TCG_COND_GEU:
1924        inv = true;
1925        /* fall through */
1926    case TCG_COND_LT:
1927    case TCG_COND_LTU:
1928        sh = 29; /* CR7 CR_LT */
1929        goto crtest;
1930
1931    crtest:
1932        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1933        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1934        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
1935        if (neg && inv) {
1936            tcg_out32(s, ADDI | TAI(arg0, arg0, -1));
1937        } else if (neg) {
1938            tcg_out32(s, NEG | RT(arg0) | RA(arg0));
1939        } else if (inv) {
1940            tcg_out_xori32(s, arg0, arg0, 1);
1941        }
1942        break;
1943
1944    default:
1945        g_assert_not_reached();
1946    }
1947}
1948
1949static void tcg_out_bc(TCGContext *s, int bc, TCGLabel *l)
1950{
1951    if (l->has_value) {
1952        bc |= reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
1953    } else {
1954        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
1955    }
1956    tcg_out32(s, bc);
1957}
1958
1959static void tcg_out_brcond(TCGContext *s, TCGCond cond,
1960                           TCGArg arg1, TCGArg arg2, int const_arg2,
1961                           TCGLabel *l, TCGType type)
1962{
1963    tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1964    tcg_out_bc(s, tcg_to_bc[cond], l);
1965}
1966
1967static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
1968                            TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
1969                            TCGArg v2, bool const_c2)
1970{
1971    /* If for some reason both inputs are zero, don't produce bad code.  */
1972    if (v1 == 0 && v2 == 0) {
1973        tcg_out_movi(s, type, dest, 0);
1974        return;
1975    }
1976
1977    tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
1978
1979    if (have_isel) {
1980        int isel = tcg_to_isel[cond];
1981
1982        /* Swap the V operands if the operation indicates inversion.  */
1983        if (isel & 1) {
1984            int t = v1;
1985            v1 = v2;
1986            v2 = t;
1987            isel &= ~1;
1988        }
1989        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
1990        if (v2 == 0) {
1991            tcg_out_movi(s, type, TCG_REG_R0, 0);
1992        }
1993        tcg_out32(s, isel | TAB(dest, v1, v2));
1994    } else {
1995        if (dest == v2) {
1996            cond = tcg_invert_cond(cond);
1997            v2 = v1;
1998        } else if (dest != v1) {
1999            if (v1 == 0) {
2000                tcg_out_movi(s, type, dest, 0);
2001            } else {
2002                tcg_out_mov(s, type, dest, v1);
2003            }
2004        }
2005        /* Branch forward over one insn */
2006        tcg_out32(s, tcg_to_bc[cond] | 8);
2007        if (v2 == 0) {
2008            tcg_out_movi(s, type, dest, 0);
2009        } else {
2010            tcg_out_mov(s, type, dest, v2);
2011        }
2012    }
2013}
2014
2015static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
2016                          TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
2017{
2018    if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
2019        tcg_out32(s, opc | RA(a0) | RS(a1));
2020    } else {
2021        tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 7, type);
2022        /* Note that the only other valid constant for a2 is 0.  */
2023        if (have_isel) {
2024            tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
2025            tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
2026        } else if (!const_a2 && a0 == a2) {
2027            tcg_out32(s, tcg_to_bc[TCG_COND_EQ] | 8);
2028            tcg_out32(s, opc | RA(a0) | RS(a1));
2029        } else {
2030            tcg_out32(s, opc | RA(a0) | RS(a1));
2031            tcg_out32(s, tcg_to_bc[TCG_COND_NE] | 8);
2032            if (const_a2) {
2033                tcg_out_movi(s, type, a0, 0);
2034            } else {
2035                tcg_out_mov(s, type, a0, a2);
2036            }
2037        }
2038    }
2039}
2040
2041static void tcg_out_cmp2(TCGContext *s, const TCGArg *args,
2042                         const int *const_args)
2043{
2044    static const struct { uint8_t bit1, bit2; } bits[] = {
2045        [TCG_COND_LT ] = { CR_LT, CR_LT },
2046        [TCG_COND_LE ] = { CR_LT, CR_GT },
2047        [TCG_COND_GT ] = { CR_GT, CR_GT },
2048        [TCG_COND_GE ] = { CR_GT, CR_LT },
2049        [TCG_COND_LTU] = { CR_LT, CR_LT },
2050        [TCG_COND_LEU] = { CR_LT, CR_GT },
2051        [TCG_COND_GTU] = { CR_GT, CR_GT },
2052        [TCG_COND_GEU] = { CR_GT, CR_LT },
2053    };
2054
2055    TCGCond cond = args[4], cond2;
2056    TCGArg al, ah, bl, bh;
2057    int blconst, bhconst;
2058    int op, bit1, bit2;
2059
2060    al = args[0];
2061    ah = args[1];
2062    bl = args[2];
2063    bh = args[3];
2064    blconst = const_args[2];
2065    bhconst = const_args[3];
2066
2067    switch (cond) {
2068    case TCG_COND_EQ:
2069        op = CRAND;
2070        goto do_equality;
2071    case TCG_COND_NE:
2072        op = CRNAND;
2073    do_equality:
2074        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
2075        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
2076        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
2077        break;
2078
2079    case TCG_COND_LT:
2080    case TCG_COND_LE:
2081    case TCG_COND_GT:
2082    case TCG_COND_GE:
2083    case TCG_COND_LTU:
2084    case TCG_COND_LEU:
2085    case TCG_COND_GTU:
2086    case TCG_COND_GEU:
2087        bit1 = bits[cond].bit1;
2088        bit2 = bits[cond].bit2;
2089        op = (bit1 != bit2 ? CRANDC : CRAND);
2090        cond2 = tcg_unsigned_cond(cond);
2091
2092        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
2093        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
2094        tcg_out32(s, op | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
2095        tcg_out32(s, CROR | BT(7, CR_EQ) | BA(6, bit1) | BB(7, CR_EQ));
2096        break;
2097
2098    default:
2099        g_assert_not_reached();
2100    }
2101}
2102
2103static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
2104                             const int *const_args)
2105{
2106    tcg_out_cmp2(s, args + 1, const_args + 1);
2107    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
2108    tcg_out_rlw(s, RLWINM, args[0], TCG_REG_R0, 31, 31, 31);
2109}
2110
2111static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
2112                             const int *const_args)
2113{
2114    tcg_out_cmp2(s, args, const_args);
2115    tcg_out_bc(s, BC | BI(7, CR_EQ) | BO_COND_TRUE, arg_label(args[5]));
2116}
2117
2118static void tcg_out_mb(TCGContext *s, TCGArg a0)
2119{
2120    uint32_t insn;
2121
2122    if (a0 & TCG_MO_ST_LD) {
2123        insn = HWSYNC;
2124    } else {
2125        insn = LWSYNC;
2126    }
2127
2128    tcg_out32(s, insn);
2129}
2130
2131static void tcg_out_call_int(TCGContext *s, int lk,
2132                             const tcg_insn_unit *target)
2133{
2134#ifdef _CALL_AIX
2135    /* Look through the descriptor.  If the branch is in range, and we
2136       don't have to spend too much effort on building the toc.  */
2137    const void *tgt = ((const void * const *)target)[0];
2138    uintptr_t toc = ((const uintptr_t *)target)[1];
2139    intptr_t diff = tcg_pcrel_diff(s, tgt);
2140
2141    if (in_range_b(diff) && toc == (uint32_t)toc) {
2142        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
2143        tcg_out_b(s, lk, tgt);
2144    } else {
2145        /* Fold the low bits of the constant into the addresses below.  */
2146        intptr_t arg = (intptr_t)target;
2147        int ofs = (int16_t)arg;
2148
2149        if (ofs + 8 < 0x8000) {
2150            arg -= ofs;
2151        } else {
2152            ofs = 0;
2153        }
2154        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
2155        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
2156        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
2157        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
2158        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
2159    }
2160#elif defined(_CALL_ELF) && _CALL_ELF == 2
2161    intptr_t diff;
2162
2163    /* In the ELFv2 ABI, we have to set up r12 to contain the destination
2164       address, which the callee uses to compute its TOC address.  */
2165    /* FIXME: when the branch is in range, we could avoid r12 load if we
2166       knew that the destination uses the same TOC, and what its local
2167       entry point offset is.  */
2168    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
2169
2170    diff = tcg_pcrel_diff(s, target);
2171    if (in_range_b(diff)) {
2172        tcg_out_b(s, lk, target);
2173    } else {
2174        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
2175        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
2176    }
2177#else
2178    tcg_out_b(s, lk, target);
2179#endif
2180}
2181
2182static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
2183                         const TCGHelperInfo *info)
2184{
2185    tcg_out_call_int(s, LK, target);
2186}
2187
2188static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = {
2189    [MO_UB] = LBZX,
2190    [MO_UW] = LHZX,
2191    [MO_UL] = LWZX,
2192    [MO_UQ] = LDX,
2193    [MO_SW] = LHAX,
2194    [MO_SL] = LWAX,
2195    [MO_BSWAP | MO_UB] = LBZX,
2196    [MO_BSWAP | MO_UW] = LHBRX,
2197    [MO_BSWAP | MO_UL] = LWBRX,
2198    [MO_BSWAP | MO_UQ] = LDBRX,
2199};
2200
2201static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
2202    [MO_UB] = STBX,
2203    [MO_UW] = STHX,
2204    [MO_UL] = STWX,
2205    [MO_UQ] = STDX,
2206    [MO_BSWAP | MO_UB] = STBX,
2207    [MO_BSWAP | MO_UW] = STHBRX,
2208    [MO_BSWAP | MO_UL] = STWBRX,
2209    [MO_BSWAP | MO_UQ] = STDBRX,
2210};
2211
2212static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
2213{
2214    if (arg < 0) {
2215        arg = TCG_REG_TMP1;
2216    }
2217    tcg_out32(s, MFSPR | RT(arg) | LR);
2218    return arg;
2219}
2220
2221/*
2222 * For the purposes of ppc32 sorting 4 input registers into 4 argument
2223 * registers, there is an outside chance we would require 3 temps.
2224 */
2225static const TCGLdstHelperParam ldst_helper_param = {
2226    .ra_gen = ldst_ra_gen,
2227    .ntmp = 3,
2228    .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 }
2229};
2230
2231static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2232{
2233    MemOp opc = get_memop(lb->oi);
2234
2235    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2236        return false;
2237    }
2238
2239    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
2240    tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]);
2241    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
2242
2243    tcg_out_b(s, 0, lb->raddr);
2244    return true;
2245}
2246
2247static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2248{
2249    MemOp opc = get_memop(lb->oi);
2250
2251    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2252        return false;
2253    }
2254
2255    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
2256    tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]);
2257
2258    tcg_out_b(s, 0, lb->raddr);
2259    return true;
2260}
2261
2262typedef struct {
2263    TCGReg base;
2264    TCGReg index;
2265    TCGAtomAlign aa;
2266} HostAddress;
2267
2268bool tcg_target_has_memory_bswap(MemOp memop)
2269{
2270    TCGAtomAlign aa;
2271
2272    if ((memop & MO_SIZE) <= MO_64) {
2273        return true;
2274    }
2275
2276    /*
2277     * Reject 16-byte memop with 16-byte atomicity,
2278     * but do allow a pair of 64-bit operations.
2279     */
2280    aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
2281    return aa.atom <= MO_64;
2282}
2283
2284/* We expect to use a 16-bit negative offset from ENV.  */
2285#define MIN_TLB_MASK_TABLE_OFS  -32768
2286
2287/*
2288 * For system-mode, perform the TLB load and compare.
2289 * For user-mode, perform any required alignment tests.
2290 * In both cases, return a TCGLabelQemuLdst structure if the slow path
2291 * is required and fill in @h with the host address for the fast path.
2292 */
2293static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
2294                                           TCGReg addrlo, TCGReg addrhi,
2295                                           MemOpIdx oi, bool is_ld)
2296{
2297    TCGType addr_type = s->addr_type;
2298    TCGLabelQemuLdst *ldst = NULL;
2299    MemOp opc = get_memop(oi);
2300    MemOp a_bits, s_bits;
2301
2302    /*
2303     * Book II, Section 1.4, Single-Copy Atomicity, specifies:
2304     *
2305     * Before 3.0, "An access that is not atomic is performed as a set of
2306     * smaller disjoint atomic accesses. In general, the number and alignment
2307     * of these accesses are implementation-dependent."  Thus MO_ATOM_IFALIGN.
2308     *
2309     * As of 3.0, "the non-atomic access is performed as described in
2310     * the corresponding list", which matches MO_ATOM_SUBALIGN.
2311     */
2312    s_bits = opc & MO_SIZE;
2313    h->aa = atom_and_align_for_opc(s, opc,
2314                                   have_isa_3_00 ? MO_ATOM_SUBALIGN
2315                                                 : MO_ATOM_IFALIGN,
2316                                   s_bits == MO_128);
2317    a_bits = h->aa.align;
2318
2319    if (tcg_use_softmmu) {
2320        int mem_index = get_mmuidx(oi);
2321        int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
2322                            : offsetof(CPUTLBEntry, addr_write);
2323        int fast_off = tlb_mask_table_ofs(s, mem_index);
2324        int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
2325        int table_off = fast_off + offsetof(CPUTLBDescFast, table);
2326
2327        ldst = new_ldst_label(s);
2328        ldst->is_ld = is_ld;
2329        ldst->oi = oi;
2330        ldst->addrlo_reg = addrlo;
2331        ldst->addrhi_reg = addrhi;
2332
2333        /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
2334        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
2335        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
2336
2337        /* Extract the page index, shifted into place for tlb index.  */
2338        if (TCG_TARGET_REG_BITS == 32) {
2339            tcg_out_shri32(s, TCG_REG_R0, addrlo,
2340                           s->page_bits - CPU_TLB_ENTRY_BITS);
2341        } else {
2342            tcg_out_shri64(s, TCG_REG_R0, addrlo,
2343                           s->page_bits - CPU_TLB_ENTRY_BITS);
2344        }
2345        tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
2346
2347        /*
2348         * Load the (low part) TLB comparator into TMP2.
2349         * For 64-bit host, always load the entire 64-bit slot for simplicity.
2350         * We will ignore the high bits with tcg_out_cmp(..., addr_type).
2351         */
2352        if (TCG_TARGET_REG_BITS == 64) {
2353            if (cmp_off == 0) {
2354                tcg_out32(s, LDUX | TAB(TCG_REG_TMP2,
2355                                        TCG_REG_TMP1, TCG_REG_TMP2));
2356            } else {
2357                tcg_out32(s, ADD | TAB(TCG_REG_TMP1,
2358                                       TCG_REG_TMP1, TCG_REG_TMP2));
2359                tcg_out_ld(s, TCG_TYPE_I64, TCG_REG_TMP2,
2360                           TCG_REG_TMP1, cmp_off);
2361            }
2362        } else if (cmp_off == 0 && !HOST_BIG_ENDIAN) {
2363            tcg_out32(s, LWZUX | TAB(TCG_REG_TMP2,
2364                                     TCG_REG_TMP1, TCG_REG_TMP2));
2365        } else {
2366            tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
2367            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
2368                       cmp_off + 4 * HOST_BIG_ENDIAN);
2369        }
2370
2371        /*
2372         * Load the TLB addend for use on the fast path.
2373         * Do this asap to minimize any load use delay.
2374         */
2375        if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
2376            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
2377                       offsetof(CPUTLBEntry, addend));
2378        }
2379
2380        /* Clear the non-page, non-alignment bits from the address in R0. */
2381        if (TCG_TARGET_REG_BITS == 32) {
2382            /*
2383             * We don't support unaligned accesses on 32-bits.
2384             * Preserve the bottom bits and thus trigger a comparison
2385             * failure on unaligned accesses.
2386             */
2387            if (a_bits < s_bits) {
2388                a_bits = s_bits;
2389            }
2390            tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
2391                        (32 - a_bits) & 31, 31 - s->page_bits);
2392        } else {
2393            TCGReg t = addrlo;
2394
2395            /*
2396             * If the access is unaligned, we need to make sure we fail if we
2397             * cross a page boundary.  The trick is to add the access size-1
2398             * to the address before masking the low bits.  That will make the
2399             * address overflow to the next page if we cross a page boundary,
2400             * which will then force a mismatch of the TLB compare.
2401             */
2402            if (a_bits < s_bits) {
2403                unsigned a_mask = (1 << a_bits) - 1;
2404                unsigned s_mask = (1 << s_bits) - 1;
2405                tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
2406                t = TCG_REG_R0;
2407            }
2408
2409            /* Mask the address for the requested alignment.  */
2410            if (addr_type == TCG_TYPE_I32) {
2411                tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
2412                            (32 - a_bits) & 31, 31 - s->page_bits);
2413            } else if (a_bits == 0) {
2414                tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
2415            } else {
2416                tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
2417                            64 - s->page_bits, s->page_bits - a_bits);
2418                tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
2419            }
2420        }
2421
2422        if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) {
2423            /* Low part comparison into cr7. */
2424            tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
2425                        0, 7, TCG_TYPE_I32);
2426
2427            /* Load the high part TLB comparator into TMP2.  */
2428            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
2429                       cmp_off + 4 * !HOST_BIG_ENDIAN);
2430
2431            /* Load addend, deferred for this case. */
2432            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
2433                       offsetof(CPUTLBEntry, addend));
2434
2435            /* High part comparison into cr6. */
2436            tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2,
2437                        0, 6, TCG_TYPE_I32);
2438
2439            /* Combine comparisons into cr7. */
2440            tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
2441        } else {
2442            /* Full comparison into cr7. */
2443            tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
2444                        0, 7, addr_type);
2445        }
2446
2447        /* Load a pointer into the current opcode w/conditional branch-link. */
2448        ldst->label_ptr[0] = s->code_ptr;
2449        tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
2450
2451        h->base = TCG_REG_TMP1;
2452    } else {
2453        if (a_bits) {
2454            ldst = new_ldst_label(s);
2455            ldst->is_ld = is_ld;
2456            ldst->oi = oi;
2457            ldst->addrlo_reg = addrlo;
2458            ldst->addrhi_reg = addrhi;
2459
2460            /* We are expecting a_bits to max out at 7, much lower than ANDI. */
2461            tcg_debug_assert(a_bits < 16);
2462            tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1));
2463
2464            ldst->label_ptr[0] = s->code_ptr;
2465            tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
2466        }
2467
2468        h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
2469    }
2470
2471    if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
2472        /* Zero-extend the guest address for use in the host address. */
2473        tcg_out_ext32u(s, TCG_REG_R0, addrlo);
2474        h->index = TCG_REG_R0;
2475    } else {
2476        h->index = addrlo;
2477    }
2478
2479    return ldst;
2480}
2481
2482static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
2483                            TCGReg addrlo, TCGReg addrhi,
2484                            MemOpIdx oi, TCGType data_type)
2485{
2486    MemOp opc = get_memop(oi);
2487    TCGLabelQemuLdst *ldst;
2488    HostAddress h;
2489
2490    ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
2491
2492    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2493        if (opc & MO_BSWAP) {
2494            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2495            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2496            tcg_out32(s, LWBRX | TAB(datahi, h.base, TCG_REG_R0));
2497        } else if (h.base != 0) {
2498            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2499            tcg_out32(s, LWZX | TAB(datahi, h.base, h.index));
2500            tcg_out32(s, LWZX | TAB(datalo, h.base, TCG_REG_R0));
2501        } else if (h.index == datahi) {
2502            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2503            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2504        } else {
2505            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2506            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2507        }
2508    } else {
2509        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
2510        if (!have_isa_2_06 && insn == LDBRX) {
2511            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2512            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2513            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, h.base, TCG_REG_R0));
2514            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
2515        } else if (insn) {
2516            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2517        } else {
2518            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
2519            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2520            tcg_out_movext(s, TCG_TYPE_REG, datalo,
2521                           TCG_TYPE_REG, opc & MO_SSIZE, datalo);
2522        }
2523    }
2524
2525    if (ldst) {
2526        ldst->type = data_type;
2527        ldst->datalo_reg = datalo;
2528        ldst->datahi_reg = datahi;
2529        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2530    }
2531}
2532
2533static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
2534                            TCGReg addrlo, TCGReg addrhi,
2535                            MemOpIdx oi, TCGType data_type)
2536{
2537    MemOp opc = get_memop(oi);
2538    TCGLabelQemuLdst *ldst;
2539    HostAddress h;
2540
2541    ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
2542
2543    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2544        if (opc & MO_BSWAP) {
2545            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2546            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2547            tcg_out32(s, STWBRX | SAB(datahi, h.base, TCG_REG_R0));
2548        } else if (h.base != 0) {
2549            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2550            tcg_out32(s, STWX | SAB(datahi, h.base, h.index));
2551            tcg_out32(s, STWX | SAB(datalo, h.base, TCG_REG_R0));
2552        } else {
2553            tcg_out32(s, STW | TAI(datahi, h.index, 0));
2554            tcg_out32(s, STW | TAI(datalo, h.index, 4));
2555        }
2556    } else {
2557        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
2558        if (!have_isa_2_06 && insn == STDBRX) {
2559            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2560            tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, h.index, 4));
2561            tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
2562            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, h.base, TCG_REG_TMP1));
2563        } else {
2564            tcg_out32(s, insn | SAB(datalo, h.base, h.index));
2565        }
2566    }
2567
2568    if (ldst) {
2569        ldst->type = data_type;
2570        ldst->datalo_reg = datalo;
2571        ldst->datahi_reg = datahi;
2572        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2573    }
2574}
2575
2576static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
2577                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
2578{
2579    TCGLabelQemuLdst *ldst;
2580    HostAddress h;
2581    bool need_bswap;
2582    uint32_t insn;
2583    TCGReg index;
2584
2585    ldst = prepare_host_addr(s, &h, addr_reg, -1, oi, is_ld);
2586
2587    /* Compose the final address, as LQ/STQ have no indexing. */
2588    index = h.index;
2589    if (h.base != 0) {
2590        index = TCG_REG_TMP1;
2591        tcg_out32(s, ADD | TAB(index, h.base, h.index));
2592    }
2593    need_bswap = get_memop(oi) & MO_BSWAP;
2594
2595    if (h.aa.atom == MO_128) {
2596        tcg_debug_assert(!need_bswap);
2597        tcg_debug_assert(datalo & 1);
2598        tcg_debug_assert(datahi == datalo - 1);
2599        tcg_debug_assert(!is_ld || datahi != index);
2600        insn = is_ld ? LQ : STQ;
2601        tcg_out32(s, insn | TAI(datahi, index, 0));
2602    } else {
2603        TCGReg d1, d2;
2604
2605        if (HOST_BIG_ENDIAN ^ need_bswap) {
2606            d1 = datahi, d2 = datalo;
2607        } else {
2608            d1 = datalo, d2 = datahi;
2609        }
2610
2611        if (need_bswap) {
2612            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8);
2613            insn = is_ld ? LDBRX : STDBRX;
2614            tcg_out32(s, insn | TAB(d1, 0, index));
2615            tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0));
2616        } else {
2617            insn = is_ld ? LD : STD;
2618            tcg_out32(s, insn | TAI(d1, index, 0));
2619            tcg_out32(s, insn | TAI(d2, index, 8));
2620        }
2621    }
2622
2623    if (ldst) {
2624        ldst->type = TCG_TYPE_I128;
2625        ldst->datalo_reg = datalo;
2626        ldst->datahi_reg = datahi;
2627        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2628    }
2629}
2630
2631static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2632{
2633    int i;
2634    for (i = 0; i < count; ++i) {
2635        p[i] = NOP;
2636    }
2637}
2638
2639/* Parameters for function call generation, used in tcg.c.  */
2640#define TCG_TARGET_STACK_ALIGN       16
2641
2642#ifdef _CALL_AIX
2643# define LINK_AREA_SIZE                (6 * SZR)
2644# define LR_OFFSET                     (1 * SZR)
2645# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
2646#elif defined(_CALL_DARWIN)
2647# define LINK_AREA_SIZE                (6 * SZR)
2648# define LR_OFFSET                     (2 * SZR)
2649#elif TCG_TARGET_REG_BITS == 64
2650# if defined(_CALL_ELF) && _CALL_ELF == 2
2651#  define LINK_AREA_SIZE               (4 * SZR)
2652#  define LR_OFFSET                    (1 * SZR)
2653# endif
2654#else /* TCG_TARGET_REG_BITS == 32 */
2655# if defined(_CALL_SYSV)
2656#  define LINK_AREA_SIZE               (2 * SZR)
2657#  define LR_OFFSET                    (1 * SZR)
2658# endif
2659#endif
2660#ifndef LR_OFFSET
2661# error "Unhandled abi"
2662#endif
2663#ifndef TCG_TARGET_CALL_STACK_OFFSET
2664# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
2665#endif
2666
2667#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2668#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
2669
2670#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
2671                     + TCG_STATIC_CALL_ARGS_SIZE    \
2672                     + CPU_TEMP_BUF_SIZE            \
2673                     + REG_SAVE_SIZE                \
2674                     + TCG_TARGET_STACK_ALIGN - 1)  \
2675                    & -TCG_TARGET_STACK_ALIGN)
2676
2677#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
2678
2679static void tcg_target_qemu_prologue(TCGContext *s)
2680{
2681    int i;
2682
2683#ifdef _CALL_AIX
2684    const void **desc = (const void **)s->code_ptr;
2685    desc[0] = tcg_splitwx_to_rx(desc + 2);  /* entry point */
2686    desc[1] = 0;                            /* environment pointer */
2687    s->code_ptr = (void *)(desc + 2);       /* skip over descriptor */
2688#endif
2689
2690    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
2691                  CPU_TEMP_BUF_SIZE);
2692
2693    /* Prologue */
2694    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
2695    tcg_out32(s, (SZR == 8 ? STDU : STWU)
2696              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
2697
2698    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2699        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2700                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2701    }
2702    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2703
2704    if (!tcg_use_softmmu && guest_base) {
2705        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
2706        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2707    }
2708
2709    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2710    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
2711    tcg_out32(s, BCCTR | BO_ALWAYS);
2712
2713    /* Epilogue */
2714    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2715
2716    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2717    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2718        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2719                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2720    }
2721    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
2722    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
2723    tcg_out32(s, BCLR | BO_ALWAYS);
2724}
2725
2726static void tcg_out_tb_start(TCGContext *s)
2727{
2728    /* Load TCG_REG_TB. */
2729    if (USE_REG_TB) {
2730        if (have_isa_3_00) {
2731            /* lnia REG_TB */
2732            tcg_out_addpcis(s, TCG_REG_TB, 0);
2733        } else {
2734            /* bcl 20,31,$+4 (preferred form for getting nia) */
2735            tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK);
2736            tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR);
2737        }
2738    }
2739}
2740
2741static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
2742{
2743    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg);
2744    tcg_out_b(s, 0, tcg_code_gen_epilogue);
2745}
2746
2747static void tcg_out_goto_tb(TCGContext *s, int which)
2748{
2749    uintptr_t ptr = get_jmp_target_addr(s, which);
2750    int16_t lo;
2751
2752    /* Direct branch will be patched by tb_target_set_jmp_target. */
2753    set_jmp_insn_offset(s, which);
2754    tcg_out32(s, NOP);
2755
2756    /* When branch is out of range, fall through to indirect. */
2757    if (USE_REG_TB) {
2758        ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr);
2759        tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset);
2760    } else if (have_isa_3_10) {
2761        ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr);
2762        tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1);
2763    } else if (have_isa_3_00) {
2764        ptrdiff_t offset = tcg_pcrel_diff(s, (void *)ptr) - 4;
2765        lo = offset;
2766        tcg_out_addpcis(s, TCG_REG_TMP1, offset - lo);
2767        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
2768    } else {
2769        lo = ptr;
2770        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - lo);
2771        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
2772    }
2773
2774    tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
2775    tcg_out32(s, BCCTR | BO_ALWAYS);
2776    set_jmp_reset_offset(s, which);
2777}
2778
2779void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2780                              uintptr_t jmp_rx, uintptr_t jmp_rw)
2781{
2782    uintptr_t addr = tb->jmp_target_addr[n];
2783    intptr_t diff = addr - jmp_rx;
2784    tcg_insn_unit insn;
2785
2786    if (in_range_b(diff)) {
2787        insn = B | (diff & 0x3fffffc);
2788    } else {
2789        insn = NOP;
2790    }
2791
2792    qatomic_set((uint32_t *)jmp_rw, insn);
2793    flush_idcache_range(jmp_rx, jmp_rw, 4);
2794}
2795
2796static void tcg_out_op(TCGContext *s, TCGOpcode opc,
2797                       const TCGArg args[TCG_MAX_OP_ARGS],
2798                       const int const_args[TCG_MAX_OP_ARGS])
2799{
2800    TCGArg a0, a1, a2;
2801
2802    switch (opc) {
2803    case INDEX_op_goto_ptr:
2804        tcg_out32(s, MTSPR | RS(args[0]) | CTR);
2805        tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
2806        tcg_out32(s, BCCTR | BO_ALWAYS);
2807        break;
2808    case INDEX_op_br:
2809        {
2810            TCGLabel *l = arg_label(args[0]);
2811            uint32_t insn = B;
2812
2813            if (l->has_value) {
2814                insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr),
2815                                       l->u.value_ptr);
2816            } else {
2817                tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
2818            }
2819            tcg_out32(s, insn);
2820        }
2821        break;
2822    case INDEX_op_ld8u_i32:
2823    case INDEX_op_ld8u_i64:
2824        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2825        break;
2826    case INDEX_op_ld8s_i32:
2827    case INDEX_op_ld8s_i64:
2828        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
2829        tcg_out_ext8s(s, TCG_TYPE_REG, args[0], args[0]);
2830        break;
2831    case INDEX_op_ld16u_i32:
2832    case INDEX_op_ld16u_i64:
2833        tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
2834        break;
2835    case INDEX_op_ld16s_i32:
2836    case INDEX_op_ld16s_i64:
2837        tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
2838        break;
2839    case INDEX_op_ld_i32:
2840    case INDEX_op_ld32u_i64:
2841        tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
2842        break;
2843    case INDEX_op_ld32s_i64:
2844        tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
2845        break;
2846    case INDEX_op_ld_i64:
2847        tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
2848        break;
2849    case INDEX_op_st8_i32:
2850    case INDEX_op_st8_i64:
2851        tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
2852        break;
2853    case INDEX_op_st16_i32:
2854    case INDEX_op_st16_i64:
2855        tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
2856        break;
2857    case INDEX_op_st_i32:
2858    case INDEX_op_st32_i64:
2859        tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
2860        break;
2861    case INDEX_op_st_i64:
2862        tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
2863        break;
2864
2865    case INDEX_op_add_i32:
2866        a0 = args[0], a1 = args[1], a2 = args[2];
2867        if (const_args[2]) {
2868        do_addi_32:
2869            tcg_out_mem_long(s, ADDI, ADD, a0, a1, (int32_t)a2);
2870        } else {
2871            tcg_out32(s, ADD | TAB(a0, a1, a2));
2872        }
2873        break;
2874    case INDEX_op_sub_i32:
2875        a0 = args[0], a1 = args[1], a2 = args[2];
2876        if (const_args[1]) {
2877            if (const_args[2]) {
2878                tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
2879            } else {
2880                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
2881            }
2882        } else if (const_args[2]) {
2883            a2 = -a2;
2884            goto do_addi_32;
2885        } else {
2886            tcg_out32(s, SUBF | TAB(a0, a2, a1));
2887        }
2888        break;
2889
2890    case INDEX_op_and_i32:
2891        a0 = args[0], a1 = args[1], a2 = args[2];
2892        if (const_args[2]) {
2893            tcg_out_andi32(s, a0, a1, a2);
2894        } else {
2895            tcg_out32(s, AND | SAB(a1, a0, a2));
2896        }
2897        break;
2898    case INDEX_op_and_i64:
2899        a0 = args[0], a1 = args[1], a2 = args[2];
2900        if (const_args[2]) {
2901            tcg_out_andi64(s, a0, a1, a2);
2902        } else {
2903            tcg_out32(s, AND | SAB(a1, a0, a2));
2904        }
2905        break;
2906    case INDEX_op_or_i64:
2907    case INDEX_op_or_i32:
2908        a0 = args[0], a1 = args[1], a2 = args[2];
2909        if (const_args[2]) {
2910            tcg_out_ori32(s, a0, a1, a2);
2911        } else {
2912            tcg_out32(s, OR | SAB(a1, a0, a2));
2913        }
2914        break;
2915    case INDEX_op_xor_i64:
2916    case INDEX_op_xor_i32:
2917        a0 = args[0], a1 = args[1], a2 = args[2];
2918        if (const_args[2]) {
2919            tcg_out_xori32(s, a0, a1, a2);
2920        } else {
2921            tcg_out32(s, XOR | SAB(a1, a0, a2));
2922        }
2923        break;
2924    case INDEX_op_andc_i32:
2925        a0 = args[0], a1 = args[1], a2 = args[2];
2926        if (const_args[2]) {
2927            tcg_out_andi32(s, a0, a1, ~a2);
2928        } else {
2929            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2930        }
2931        break;
2932    case INDEX_op_andc_i64:
2933        a0 = args[0], a1 = args[1], a2 = args[2];
2934        if (const_args[2]) {
2935            tcg_out_andi64(s, a0, a1, ~a2);
2936        } else {
2937            tcg_out32(s, ANDC | SAB(a1, a0, a2));
2938        }
2939        break;
2940    case INDEX_op_orc_i32:
2941        if (const_args[2]) {
2942            tcg_out_ori32(s, args[0], args[1], ~args[2]);
2943            break;
2944        }
2945        /* FALLTHRU */
2946    case INDEX_op_orc_i64:
2947        tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
2948        break;
2949    case INDEX_op_eqv_i32:
2950        if (const_args[2]) {
2951            tcg_out_xori32(s, args[0], args[1], ~args[2]);
2952            break;
2953        }
2954        /* FALLTHRU */
2955    case INDEX_op_eqv_i64:
2956        tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
2957        break;
2958    case INDEX_op_nand_i32:
2959    case INDEX_op_nand_i64:
2960        tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
2961        break;
2962    case INDEX_op_nor_i32:
2963    case INDEX_op_nor_i64:
2964        tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
2965        break;
2966
2967    case INDEX_op_clz_i32:
2968        tcg_out_cntxz(s, TCG_TYPE_I32, CNTLZW, args[0], args[1],
2969                      args[2], const_args[2]);
2970        break;
2971    case INDEX_op_ctz_i32:
2972        tcg_out_cntxz(s, TCG_TYPE_I32, CNTTZW, args[0], args[1],
2973                      args[2], const_args[2]);
2974        break;
2975    case INDEX_op_ctpop_i32:
2976        tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
2977        break;
2978
2979    case INDEX_op_clz_i64:
2980        tcg_out_cntxz(s, TCG_TYPE_I64, CNTLZD, args[0], args[1],
2981                      args[2], const_args[2]);
2982        break;
2983    case INDEX_op_ctz_i64:
2984        tcg_out_cntxz(s, TCG_TYPE_I64, CNTTZD, args[0], args[1],
2985                      args[2], const_args[2]);
2986        break;
2987    case INDEX_op_ctpop_i64:
2988        tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
2989        break;
2990
2991    case INDEX_op_mul_i32:
2992        a0 = args[0], a1 = args[1], a2 = args[2];
2993        if (const_args[2]) {
2994            tcg_out32(s, MULLI | TAI(a0, a1, a2));
2995        } else {
2996            tcg_out32(s, MULLW | TAB(a0, a1, a2));
2997        }
2998        break;
2999
3000    case INDEX_op_div_i32:
3001        tcg_out32(s, DIVW | TAB(args[0], args[1], args[2]));
3002        break;
3003
3004    case INDEX_op_divu_i32:
3005        tcg_out32(s, DIVWU | TAB(args[0], args[1], args[2]));
3006        break;
3007
3008    case INDEX_op_rem_i32:
3009        tcg_out32(s, MODSW | TAB(args[0], args[1], args[2]));
3010        break;
3011
3012    case INDEX_op_remu_i32:
3013        tcg_out32(s, MODUW | TAB(args[0], args[1], args[2]));
3014        break;
3015
3016    case INDEX_op_shl_i32:
3017        if (const_args[2]) {
3018            /* Limit immediate shift count lest we create an illegal insn.  */
3019            tcg_out_shli32(s, args[0], args[1], args[2] & 31);
3020        } else {
3021            tcg_out32(s, SLW | SAB(args[1], args[0], args[2]));
3022        }
3023        break;
3024    case INDEX_op_shr_i32:
3025        if (const_args[2]) {
3026            /* Limit immediate shift count lest we create an illegal insn.  */
3027            tcg_out_shri32(s, args[0], args[1], args[2] & 31);
3028        } else {
3029            tcg_out32(s, SRW | SAB(args[1], args[0], args[2]));
3030        }
3031        break;
3032    case INDEX_op_sar_i32:
3033        if (const_args[2]) {
3034            tcg_out_sari32(s, args[0], args[1], args[2]);
3035        } else {
3036            tcg_out32(s, SRAW | SAB(args[1], args[0], args[2]));
3037        }
3038        break;
3039    case INDEX_op_rotl_i32:
3040        if (const_args[2]) {
3041            tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
3042        } else {
3043            tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
3044                         | MB(0) | ME(31));
3045        }
3046        break;
3047    case INDEX_op_rotr_i32:
3048        if (const_args[2]) {
3049            tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
3050        } else {
3051            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 32));
3052            tcg_out32(s, RLWNM | SAB(args[1], args[0], TCG_REG_R0)
3053                         | MB(0) | ME(31));
3054        }
3055        break;
3056
3057    case INDEX_op_brcond_i32:
3058        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
3059                       arg_label(args[3]), TCG_TYPE_I32);
3060        break;
3061    case INDEX_op_brcond_i64:
3062        tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
3063                       arg_label(args[3]), TCG_TYPE_I64);
3064        break;
3065    case INDEX_op_brcond2_i32:
3066        tcg_out_brcond2(s, args, const_args);
3067        break;
3068
3069    case INDEX_op_neg_i32:
3070    case INDEX_op_neg_i64:
3071        tcg_out32(s, NEG | RT(args[0]) | RA(args[1]));
3072        break;
3073
3074    case INDEX_op_not_i32:
3075    case INDEX_op_not_i64:
3076        tcg_out32(s, NOR | SAB(args[1], args[0], args[1]));
3077        break;
3078
3079    case INDEX_op_add_i64:
3080        a0 = args[0], a1 = args[1], a2 = args[2];
3081        if (const_args[2]) {
3082        do_addi_64:
3083            tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
3084        } else {
3085            tcg_out32(s, ADD | TAB(a0, a1, a2));
3086        }
3087        break;
3088    case INDEX_op_sub_i64:
3089        a0 = args[0], a1 = args[1], a2 = args[2];
3090        if (const_args[1]) {
3091            if (const_args[2]) {
3092                tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
3093            } else {
3094                tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
3095            }
3096        } else if (const_args[2]) {
3097            a2 = -a2;
3098            goto do_addi_64;
3099        } else {
3100            tcg_out32(s, SUBF | TAB(a0, a2, a1));
3101        }
3102        break;
3103
3104    case INDEX_op_shl_i64:
3105        if (const_args[2]) {
3106            /* Limit immediate shift count lest we create an illegal insn.  */
3107            tcg_out_shli64(s, args[0], args[1], args[2] & 63);
3108        } else {
3109            tcg_out32(s, SLD | SAB(args[1], args[0], args[2]));
3110        }
3111        break;
3112    case INDEX_op_shr_i64:
3113        if (const_args[2]) {
3114            /* Limit immediate shift count lest we create an illegal insn.  */
3115            tcg_out_shri64(s, args[0], args[1], args[2] & 63);
3116        } else {
3117            tcg_out32(s, SRD | SAB(args[1], args[0], args[2]));
3118        }
3119        break;
3120    case INDEX_op_sar_i64:
3121        if (const_args[2]) {
3122            tcg_out_sari64(s, args[0], args[1], args[2]);
3123        } else {
3124            tcg_out32(s, SRAD | SAB(args[1], args[0], args[2]));
3125        }
3126        break;
3127    case INDEX_op_rotl_i64:
3128        if (const_args[2]) {
3129            tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
3130        } else {
3131            tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
3132        }
3133        break;
3134    case INDEX_op_rotr_i64:
3135        if (const_args[2]) {
3136            tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
3137        } else {
3138            tcg_out32(s, SUBFIC | TAI(TCG_REG_R0, args[2], 64));
3139            tcg_out32(s, RLDCL | SAB(args[1], args[0], TCG_REG_R0) | MB64(0));
3140        }
3141        break;
3142
3143    case INDEX_op_mul_i64:
3144        a0 = args[0], a1 = args[1], a2 = args[2];
3145        if (const_args[2]) {
3146            tcg_out32(s, MULLI | TAI(a0, a1, a2));
3147        } else {
3148            tcg_out32(s, MULLD | TAB(a0, a1, a2));
3149        }
3150        break;
3151    case INDEX_op_div_i64:
3152        tcg_out32(s, DIVD | TAB(args[0], args[1], args[2]));
3153        break;
3154    case INDEX_op_divu_i64:
3155        tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
3156        break;
3157    case INDEX_op_rem_i64:
3158        tcg_out32(s, MODSD | TAB(args[0], args[1], args[2]));
3159        break;
3160    case INDEX_op_remu_i64:
3161        tcg_out32(s, MODUD | TAB(args[0], args[1], args[2]));
3162        break;
3163
3164    case INDEX_op_qemu_ld_a64_i32:
3165        if (TCG_TARGET_REG_BITS == 32) {
3166            tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
3167                            args[3], TCG_TYPE_I32);
3168            break;
3169        }
3170        /* fall through */
3171    case INDEX_op_qemu_ld_a32_i32:
3172        tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
3173        break;
3174    case INDEX_op_qemu_ld_a32_i64:
3175        if (TCG_TARGET_REG_BITS == 64) {
3176            tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
3177                            args[2], TCG_TYPE_I64);
3178        } else {
3179            tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
3180                            args[3], TCG_TYPE_I64);
3181        }
3182        break;
3183    case INDEX_op_qemu_ld_a64_i64:
3184        if (TCG_TARGET_REG_BITS == 64) {
3185            tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
3186                            args[2], TCG_TYPE_I64);
3187        } else {
3188            tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
3189                            args[4], TCG_TYPE_I64);
3190        }
3191        break;
3192    case INDEX_op_qemu_ld_a32_i128:
3193    case INDEX_op_qemu_ld_a64_i128:
3194        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
3195        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
3196        break;
3197
3198    case INDEX_op_qemu_st_a64_i32:
3199        if (TCG_TARGET_REG_BITS == 32) {
3200            tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
3201                            args[3], TCG_TYPE_I32);
3202            break;
3203        }
3204        /* fall through */
3205    case INDEX_op_qemu_st_a32_i32:
3206        tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
3207        break;
3208    case INDEX_op_qemu_st_a32_i64:
3209        if (TCG_TARGET_REG_BITS == 64) {
3210            tcg_out_qemu_st(s, args[0], -1, args[1], -1,
3211                            args[2], TCG_TYPE_I64);
3212        } else {
3213            tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
3214                            args[3], TCG_TYPE_I64);
3215        }
3216        break;
3217    case INDEX_op_qemu_st_a64_i64:
3218        if (TCG_TARGET_REG_BITS == 64) {
3219            tcg_out_qemu_st(s, args[0], -1, args[1], -1,
3220                            args[2], TCG_TYPE_I64);
3221        } else {
3222            tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
3223                            args[4], TCG_TYPE_I64);
3224        }
3225        break;
3226    case INDEX_op_qemu_st_a32_i128:
3227    case INDEX_op_qemu_st_a64_i128:
3228        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
3229        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
3230        break;
3231
3232    case INDEX_op_setcond_i32:
3233        tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
3234                        const_args[2], false);
3235        break;
3236    case INDEX_op_setcond_i64:
3237        tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
3238                        const_args[2], false);
3239        break;
3240    case INDEX_op_negsetcond_i32:
3241        tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
3242                        const_args[2], true);
3243        break;
3244    case INDEX_op_negsetcond_i64:
3245        tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
3246                        const_args[2], true);
3247        break;
3248    case INDEX_op_setcond2_i32:
3249        tcg_out_setcond2(s, args, const_args);
3250        break;
3251
3252    case INDEX_op_bswap16_i32:
3253    case INDEX_op_bswap16_i64:
3254        tcg_out_bswap16(s, args[0], args[1], args[2]);
3255        break;
3256    case INDEX_op_bswap32_i32:
3257        tcg_out_bswap32(s, args[0], args[1], 0);
3258        break;
3259    case INDEX_op_bswap32_i64:
3260        tcg_out_bswap32(s, args[0], args[1], args[2]);
3261        break;
3262    case INDEX_op_bswap64_i64:
3263        tcg_out_bswap64(s, args[0], args[1]);
3264        break;
3265
3266    case INDEX_op_deposit_i32:
3267        if (const_args[2]) {
3268            uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
3269            tcg_out_andi32(s, args[0], args[0], ~mask);
3270        } else {
3271            tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
3272                        32 - args[3] - args[4], 31 - args[3]);
3273        }
3274        break;
3275    case INDEX_op_deposit_i64:
3276        if (const_args[2]) {
3277            uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
3278            tcg_out_andi64(s, args[0], args[0], ~mask);
3279        } else {
3280            tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
3281                        64 - args[3] - args[4]);
3282        }
3283        break;
3284
3285    case INDEX_op_extract_i32:
3286        tcg_out_rlw(s, RLWINM, args[0], args[1],
3287                    32 - args[2], 32 - args[3], 31);
3288        break;
3289    case INDEX_op_extract_i64:
3290        tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 64 - args[3]);
3291        break;
3292
3293    case INDEX_op_movcond_i32:
3294        tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
3295                        args[3], args[4], const_args[2]);
3296        break;
3297    case INDEX_op_movcond_i64:
3298        tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
3299                        args[3], args[4], const_args[2]);
3300        break;
3301
3302#if TCG_TARGET_REG_BITS == 64
3303    case INDEX_op_add2_i64:
3304#else
3305    case INDEX_op_add2_i32:
3306#endif
3307        /* Note that the CA bit is defined based on the word size of the
3308           environment.  So in 64-bit mode it's always carry-out of bit 63.
3309           The fallback code using deposit works just as well for 32-bit.  */
3310        a0 = args[0], a1 = args[1];
3311        if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
3312            a0 = TCG_REG_R0;
3313        }
3314        if (const_args[4]) {
3315            tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
3316        } else {
3317            tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
3318        }
3319        if (const_args[5]) {
3320            tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
3321        } else {
3322            tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
3323        }
3324        if (a0 != args[0]) {
3325            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3326        }
3327        break;
3328
3329#if TCG_TARGET_REG_BITS == 64
3330    case INDEX_op_sub2_i64:
3331#else
3332    case INDEX_op_sub2_i32:
3333#endif
3334        a0 = args[0], a1 = args[1];
3335        if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
3336            a0 = TCG_REG_R0;
3337        }
3338        if (const_args[2]) {
3339            tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
3340        } else {
3341            tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
3342        }
3343        if (const_args[3]) {
3344            tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
3345        } else {
3346            tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
3347        }
3348        if (a0 != args[0]) {
3349            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3350        }
3351        break;
3352
3353    case INDEX_op_muluh_i32:
3354        tcg_out32(s, MULHWU | TAB(args[0], args[1], args[2]));
3355        break;
3356    case INDEX_op_mulsh_i32:
3357        tcg_out32(s, MULHW | TAB(args[0], args[1], args[2]));
3358        break;
3359    case INDEX_op_muluh_i64:
3360        tcg_out32(s, MULHDU | TAB(args[0], args[1], args[2]));
3361        break;
3362    case INDEX_op_mulsh_i64:
3363        tcg_out32(s, MULHD | TAB(args[0], args[1], args[2]));
3364        break;
3365
3366    case INDEX_op_mb:
3367        tcg_out_mb(s, args[0]);
3368        break;
3369
3370    case INDEX_op_mov_i32:   /* Always emitted via tcg_out_mov.  */
3371    case INDEX_op_mov_i64:
3372    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
3373    case INDEX_op_exit_tb:   /* Always emitted via tcg_out_exit_tb.  */
3374    case INDEX_op_goto_tb:   /* Always emitted via tcg_out_goto_tb.  */
3375    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
3376    case INDEX_op_ext8s_i64:
3377    case INDEX_op_ext8u_i32:
3378    case INDEX_op_ext8u_i64:
3379    case INDEX_op_ext16s_i32:
3380    case INDEX_op_ext16s_i64:
3381    case INDEX_op_ext16u_i32:
3382    case INDEX_op_ext16u_i64:
3383    case INDEX_op_ext32s_i64:
3384    case INDEX_op_ext32u_i64:
3385    case INDEX_op_ext_i32_i64:
3386    case INDEX_op_extu_i32_i64:
3387    case INDEX_op_extrl_i64_i32:
3388    default:
3389        g_assert_not_reached();
3390    }
3391}
3392
3393int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3394{
3395    switch (opc) {
3396    case INDEX_op_and_vec:
3397    case INDEX_op_or_vec:
3398    case INDEX_op_xor_vec:
3399    case INDEX_op_andc_vec:
3400    case INDEX_op_not_vec:
3401    case INDEX_op_nor_vec:
3402    case INDEX_op_eqv_vec:
3403    case INDEX_op_nand_vec:
3404        return 1;
3405    case INDEX_op_orc_vec:
3406        return have_isa_2_07;
3407    case INDEX_op_add_vec:
3408    case INDEX_op_sub_vec:
3409    case INDEX_op_smax_vec:
3410    case INDEX_op_smin_vec:
3411    case INDEX_op_umax_vec:
3412    case INDEX_op_umin_vec:
3413    case INDEX_op_shlv_vec:
3414    case INDEX_op_shrv_vec:
3415    case INDEX_op_sarv_vec:
3416    case INDEX_op_rotlv_vec:
3417        return vece <= MO_32 || have_isa_2_07;
3418    case INDEX_op_ssadd_vec:
3419    case INDEX_op_sssub_vec:
3420    case INDEX_op_usadd_vec:
3421    case INDEX_op_ussub_vec:
3422        return vece <= MO_32;
3423    case INDEX_op_cmp_vec:
3424    case INDEX_op_shli_vec:
3425    case INDEX_op_shri_vec:
3426    case INDEX_op_sari_vec:
3427    case INDEX_op_rotli_vec:
3428        return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
3429    case INDEX_op_neg_vec:
3430        return vece >= MO_32 && have_isa_3_00;
3431    case INDEX_op_mul_vec:
3432        switch (vece) {
3433        case MO_8:
3434        case MO_16:
3435            return -1;
3436        case MO_32:
3437            return have_isa_2_07 ? 1 : -1;
3438        case MO_64:
3439            return have_isa_3_10;
3440        }
3441        return 0;
3442    case INDEX_op_bitsel_vec:
3443        return have_vsx;
3444    case INDEX_op_rotrv_vec:
3445        return -1;
3446    default:
3447        return 0;
3448    }
3449}
3450
3451static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
3452                            TCGReg dst, TCGReg src)
3453{
3454    tcg_debug_assert(dst >= TCG_REG_V0);
3455
3456    /* Splat from integer reg allowed via constraints for v3.00.  */
3457    if (src < TCG_REG_V0) {
3458        tcg_debug_assert(have_isa_3_00);
3459        switch (vece) {
3460        case MO_64:
3461            tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
3462            return true;
3463        case MO_32:
3464            tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
3465            return true;
3466        default:
3467            /* Fail, so that we fall back on either dupm or mov+dup.  */
3468            return false;
3469        }
3470    }
3471
3472    /*
3473     * Recall we use (or emulate) VSX integer loads, so the integer is
3474     * right justified within the left (zero-index) double-word.
3475     */
3476    switch (vece) {
3477    case MO_8:
3478        tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
3479        break;
3480    case MO_16:
3481        tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
3482        break;
3483    case MO_32:
3484        tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
3485        break;
3486    case MO_64:
3487        if (have_vsx) {
3488            tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
3489            break;
3490        }
3491        tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
3492        tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
3493        break;
3494    default:
3495        g_assert_not_reached();
3496    }
3497    return true;
3498}
3499
3500static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
3501                             TCGReg out, TCGReg base, intptr_t offset)
3502{
3503    int elt;
3504
3505    tcg_debug_assert(out >= TCG_REG_V0);
3506    switch (vece) {
3507    case MO_8:
3508        if (have_isa_3_00) {
3509            tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
3510        } else {
3511            tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
3512        }
3513        elt = extract32(offset, 0, 4);
3514#if !HOST_BIG_ENDIAN
3515        elt ^= 15;
3516#endif
3517        tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
3518        break;
3519    case MO_16:
3520        tcg_debug_assert((offset & 1) == 0);
3521        if (have_isa_3_00) {
3522            tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
3523        } else {
3524            tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
3525        }
3526        elt = extract32(offset, 1, 3);
3527#if !HOST_BIG_ENDIAN
3528        elt ^= 7;
3529#endif
3530        tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
3531        break;
3532    case MO_32:
3533        if (have_isa_3_00) {
3534            tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
3535            break;
3536        }
3537        tcg_debug_assert((offset & 3) == 0);
3538        tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
3539        elt = extract32(offset, 2, 2);
3540#if !HOST_BIG_ENDIAN
3541        elt ^= 3;
3542#endif
3543        tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
3544        break;
3545    case MO_64:
3546        if (have_vsx) {
3547            tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
3548            break;
3549        }
3550        tcg_debug_assert((offset & 7) == 0);
3551        tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
3552        tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
3553        elt = extract32(offset, 3, 1);
3554#if !HOST_BIG_ENDIAN
3555        elt = !elt;
3556#endif
3557        if (elt) {
3558            tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
3559        } else {
3560            tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
3561        }
3562        break;
3563    default:
3564        g_assert_not_reached();
3565    }
3566    return true;
3567}
3568
3569static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
3570                           unsigned vecl, unsigned vece,
3571                           const TCGArg args[TCG_MAX_OP_ARGS],
3572                           const int const_args[TCG_MAX_OP_ARGS])
3573{
3574    static const uint32_t
3575        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
3576        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
3577        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
3578        neg_op[4] = { 0, 0, VNEGW, VNEGD },
3579        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
3580        ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
3581        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
3582        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
3583        ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
3584        usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
3585        sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
3586        ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
3587        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
3588        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
3589        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
3590        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
3591        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
3592        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
3593        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
3594        mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
3595        mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
3596        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
3597        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
3598        pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
3599        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
3600
3601    TCGType type = vecl + TCG_TYPE_V64;
3602    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
3603    uint32_t insn;
3604
3605    switch (opc) {
3606    case INDEX_op_ld_vec:
3607        tcg_out_ld(s, type, a0, a1, a2);
3608        return;
3609    case INDEX_op_st_vec:
3610        tcg_out_st(s, type, a0, a1, a2);
3611        return;
3612    case INDEX_op_dupm_vec:
3613        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
3614        return;
3615
3616    case INDEX_op_add_vec:
3617        insn = add_op[vece];
3618        break;
3619    case INDEX_op_sub_vec:
3620        insn = sub_op[vece];
3621        break;
3622    case INDEX_op_neg_vec:
3623        insn = neg_op[vece];
3624        a2 = a1;
3625        a1 = 0;
3626        break;
3627    case INDEX_op_mul_vec:
3628        insn = mul_op[vece];
3629        break;
3630    case INDEX_op_ssadd_vec:
3631        insn = ssadd_op[vece];
3632        break;
3633    case INDEX_op_sssub_vec:
3634        insn = sssub_op[vece];
3635        break;
3636    case INDEX_op_usadd_vec:
3637        insn = usadd_op[vece];
3638        break;
3639    case INDEX_op_ussub_vec:
3640        insn = ussub_op[vece];
3641        break;
3642    case INDEX_op_smin_vec:
3643        insn = smin_op[vece];
3644        break;
3645    case INDEX_op_umin_vec:
3646        insn = umin_op[vece];
3647        break;
3648    case INDEX_op_smax_vec:
3649        insn = smax_op[vece];
3650        break;
3651    case INDEX_op_umax_vec:
3652        insn = umax_op[vece];
3653        break;
3654    case INDEX_op_shlv_vec:
3655        insn = shlv_op[vece];
3656        break;
3657    case INDEX_op_shrv_vec:
3658        insn = shrv_op[vece];
3659        break;
3660    case INDEX_op_sarv_vec:
3661        insn = sarv_op[vece];
3662        break;
3663    case INDEX_op_and_vec:
3664        insn = VAND;
3665        break;
3666    case INDEX_op_or_vec:
3667        insn = VOR;
3668        break;
3669    case INDEX_op_xor_vec:
3670        insn = VXOR;
3671        break;
3672    case INDEX_op_andc_vec:
3673        insn = VANDC;
3674        break;
3675    case INDEX_op_not_vec:
3676        insn = VNOR;
3677        a2 = a1;
3678        break;
3679    case INDEX_op_orc_vec:
3680        insn = VORC;
3681        break;
3682    case INDEX_op_nand_vec:
3683        insn = VNAND;
3684        break;
3685    case INDEX_op_nor_vec:
3686        insn = VNOR;
3687        break;
3688    case INDEX_op_eqv_vec:
3689        insn = VEQV;
3690        break;
3691
3692    case INDEX_op_cmp_vec:
3693        switch (args[3]) {
3694        case TCG_COND_EQ:
3695            insn = eq_op[vece];
3696            break;
3697        case TCG_COND_NE:
3698            insn = ne_op[vece];
3699            break;
3700        case TCG_COND_GT:
3701            insn = gts_op[vece];
3702            break;
3703        case TCG_COND_GTU:
3704            insn = gtu_op[vece];
3705            break;
3706        default:
3707            g_assert_not_reached();
3708        }
3709        break;
3710
3711    case INDEX_op_bitsel_vec:
3712        tcg_out32(s, XXSEL | VRT(a0) | VRC(a1) | VRB(a2) | VRA(args[3]));
3713        return;
3714
3715    case INDEX_op_dup2_vec:
3716        assert(TCG_TARGET_REG_BITS == 32);
3717        /* With inputs a1 = xLxx, a2 = xHxx  */
3718        tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1));  /* a0  = xxHL */
3719        tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8);          /* tmp = HLxx */
3720        tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8);          /* a0  = HLHL */
3721        return;
3722
3723    case INDEX_op_ppc_mrgh_vec:
3724        insn = mrgh_op[vece];
3725        break;
3726    case INDEX_op_ppc_mrgl_vec:
3727        insn = mrgl_op[vece];
3728        break;
3729    case INDEX_op_ppc_muleu_vec:
3730        insn = muleu_op[vece];
3731        break;
3732    case INDEX_op_ppc_mulou_vec:
3733        insn = mulou_op[vece];
3734        break;
3735    case INDEX_op_ppc_pkum_vec:
3736        insn = pkum_op[vece];
3737        break;
3738    case INDEX_op_rotlv_vec:
3739        insn = rotl_op[vece];
3740        break;
3741    case INDEX_op_ppc_msum_vec:
3742        tcg_debug_assert(vece == MO_16);
3743        tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
3744        return;
3745
3746    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
3747    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
3748    default:
3749        g_assert_not_reached();
3750    }
3751
3752    tcg_debug_assert(insn != 0);
3753    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
3754}
3755
3756static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
3757                           TCGv_vec v1, TCGArg imm, TCGOpcode opci)
3758{
3759    TCGv_vec t1;
3760
3761    if (vece == MO_32) {
3762        /*
3763         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
3764         * So using negative numbers gets us the 4th bit easily.
3765         */
3766        imm = sextract32(imm, 0, 5);
3767    } else {
3768        imm &= (8 << vece) - 1;
3769    }
3770
3771    /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
3772    t1 = tcg_constant_vec(type, MO_8, imm);
3773    vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
3774              tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3775}
3776
3777static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
3778                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3779{
3780    bool need_swap = false, need_inv = false;
3781
3782    tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
3783
3784    switch (cond) {
3785    case TCG_COND_EQ:
3786    case TCG_COND_GT:
3787    case TCG_COND_GTU:
3788        break;
3789    case TCG_COND_NE:
3790        if (have_isa_3_00 && vece <= MO_32) {
3791            break;
3792        }
3793        /* fall through */
3794    case TCG_COND_LE:
3795    case TCG_COND_LEU:
3796        need_inv = true;
3797        break;
3798    case TCG_COND_LT:
3799    case TCG_COND_LTU:
3800        need_swap = true;
3801        break;
3802    case TCG_COND_GE:
3803    case TCG_COND_GEU:
3804        need_swap = need_inv = true;
3805        break;
3806    default:
3807        g_assert_not_reached();
3808    }
3809
3810    if (need_inv) {
3811        cond = tcg_invert_cond(cond);
3812    }
3813    if (need_swap) {
3814        TCGv_vec t1;
3815        t1 = v1, v1 = v2, v2 = t1;
3816        cond = tcg_swap_cond(cond);
3817    }
3818
3819    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
3820              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
3821
3822    if (need_inv) {
3823        tcg_gen_not_vec(vece, v0, v0);
3824    }
3825}
3826
3827static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
3828                           TCGv_vec v1, TCGv_vec v2)
3829{
3830    TCGv_vec t1 = tcg_temp_new_vec(type);
3831    TCGv_vec t2 = tcg_temp_new_vec(type);
3832    TCGv_vec c0, c16;
3833
3834    switch (vece) {
3835    case MO_8:
3836    case MO_16:
3837        vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
3838                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3839        vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
3840                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3841        vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
3842                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3843        vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
3844                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
3845        vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
3846                  tcgv_vec_arg(v0), tcgv_vec_arg(t1));
3847        break;
3848
3849    case MO_32:
3850        tcg_debug_assert(!have_isa_2_07);
3851        /*
3852         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
3853         * So using -16 is a quick way to represent 16.
3854         */
3855        c16 = tcg_constant_vec(type, MO_8, -16);
3856        c0 = tcg_constant_vec(type, MO_8, 0);
3857
3858        vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
3859                  tcgv_vec_arg(v2), tcgv_vec_arg(c16));
3860        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
3861                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3862        vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
3863                  tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
3864        vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
3865                  tcgv_vec_arg(t1), tcgv_vec_arg(c16));
3866        tcg_gen_add_vec(MO_32, v0, t1, t2);
3867        break;
3868
3869    default:
3870        g_assert_not_reached();
3871    }
3872    tcg_temp_free_vec(t1);
3873    tcg_temp_free_vec(t2);
3874}
3875
3876void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3877                       TCGArg a0, ...)
3878{
3879    va_list va;
3880    TCGv_vec v0, v1, v2, t0;
3881    TCGArg a2;
3882
3883    va_start(va, a0);
3884    v0 = temp_tcgv_vec(arg_temp(a0));
3885    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3886    a2 = va_arg(va, TCGArg);
3887
3888    switch (opc) {
3889    case INDEX_op_shli_vec:
3890        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
3891        break;
3892    case INDEX_op_shri_vec:
3893        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
3894        break;
3895    case INDEX_op_sari_vec:
3896        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
3897        break;
3898    case INDEX_op_rotli_vec:
3899        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
3900        break;
3901    case INDEX_op_cmp_vec:
3902        v2 = temp_tcgv_vec(arg_temp(a2));
3903        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3904        break;
3905    case INDEX_op_mul_vec:
3906        v2 = temp_tcgv_vec(arg_temp(a2));
3907        expand_vec_mul(type, vece, v0, v1, v2);
3908        break;
3909    case INDEX_op_rotlv_vec:
3910        v2 = temp_tcgv_vec(arg_temp(a2));
3911        t0 = tcg_temp_new_vec(type);
3912        tcg_gen_neg_vec(vece, t0, v2);
3913        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3914        tcg_temp_free_vec(t0);
3915        break;
3916    default:
3917        g_assert_not_reached();
3918    }
3919    va_end(va);
3920}
3921
3922static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3923{
3924    switch (op) {
3925    case INDEX_op_goto_ptr:
3926        return C_O0_I1(r);
3927
3928    case INDEX_op_ld8u_i32:
3929    case INDEX_op_ld8s_i32:
3930    case INDEX_op_ld16u_i32:
3931    case INDEX_op_ld16s_i32:
3932    case INDEX_op_ld_i32:
3933    case INDEX_op_ctpop_i32:
3934    case INDEX_op_neg_i32:
3935    case INDEX_op_not_i32:
3936    case INDEX_op_ext8s_i32:
3937    case INDEX_op_ext16s_i32:
3938    case INDEX_op_bswap16_i32:
3939    case INDEX_op_bswap32_i32:
3940    case INDEX_op_extract_i32:
3941    case INDEX_op_ld8u_i64:
3942    case INDEX_op_ld8s_i64:
3943    case INDEX_op_ld16u_i64:
3944    case INDEX_op_ld16s_i64:
3945    case INDEX_op_ld32u_i64:
3946    case INDEX_op_ld32s_i64:
3947    case INDEX_op_ld_i64:
3948    case INDEX_op_ctpop_i64:
3949    case INDEX_op_neg_i64:
3950    case INDEX_op_not_i64:
3951    case INDEX_op_ext8s_i64:
3952    case INDEX_op_ext16s_i64:
3953    case INDEX_op_ext32s_i64:
3954    case INDEX_op_ext_i32_i64:
3955    case INDEX_op_extu_i32_i64:
3956    case INDEX_op_bswap16_i64:
3957    case INDEX_op_bswap32_i64:
3958    case INDEX_op_bswap64_i64:
3959    case INDEX_op_extract_i64:
3960        return C_O1_I1(r, r);
3961
3962    case INDEX_op_st8_i32:
3963    case INDEX_op_st16_i32:
3964    case INDEX_op_st_i32:
3965    case INDEX_op_st8_i64:
3966    case INDEX_op_st16_i64:
3967    case INDEX_op_st32_i64:
3968    case INDEX_op_st_i64:
3969        return C_O0_I2(r, r);
3970
3971    case INDEX_op_add_i32:
3972    case INDEX_op_and_i32:
3973    case INDEX_op_or_i32:
3974    case INDEX_op_xor_i32:
3975    case INDEX_op_andc_i32:
3976    case INDEX_op_orc_i32:
3977    case INDEX_op_eqv_i32:
3978    case INDEX_op_shl_i32:
3979    case INDEX_op_shr_i32:
3980    case INDEX_op_sar_i32:
3981    case INDEX_op_rotl_i32:
3982    case INDEX_op_rotr_i32:
3983    case INDEX_op_setcond_i32:
3984    case INDEX_op_negsetcond_i32:
3985    case INDEX_op_and_i64:
3986    case INDEX_op_andc_i64:
3987    case INDEX_op_shl_i64:
3988    case INDEX_op_shr_i64:
3989    case INDEX_op_sar_i64:
3990    case INDEX_op_rotl_i64:
3991    case INDEX_op_rotr_i64:
3992    case INDEX_op_setcond_i64:
3993    case INDEX_op_negsetcond_i64:
3994        return C_O1_I2(r, r, ri);
3995
3996    case INDEX_op_mul_i32:
3997    case INDEX_op_mul_i64:
3998        return C_O1_I2(r, r, rI);
3999
4000    case INDEX_op_div_i32:
4001    case INDEX_op_divu_i32:
4002    case INDEX_op_rem_i32:
4003    case INDEX_op_remu_i32:
4004    case INDEX_op_nand_i32:
4005    case INDEX_op_nor_i32:
4006    case INDEX_op_muluh_i32:
4007    case INDEX_op_mulsh_i32:
4008    case INDEX_op_orc_i64:
4009    case INDEX_op_eqv_i64:
4010    case INDEX_op_nand_i64:
4011    case INDEX_op_nor_i64:
4012    case INDEX_op_div_i64:
4013    case INDEX_op_divu_i64:
4014    case INDEX_op_rem_i64:
4015    case INDEX_op_remu_i64:
4016    case INDEX_op_mulsh_i64:
4017    case INDEX_op_muluh_i64:
4018        return C_O1_I2(r, r, r);
4019
4020    case INDEX_op_sub_i32:
4021        return C_O1_I2(r, rI, ri);
4022    case INDEX_op_add_i64:
4023        return C_O1_I2(r, r, rT);
4024    case INDEX_op_or_i64:
4025    case INDEX_op_xor_i64:
4026        return C_O1_I2(r, r, rU);
4027    case INDEX_op_sub_i64:
4028        return C_O1_I2(r, rI, rT);
4029    case INDEX_op_clz_i32:
4030    case INDEX_op_ctz_i32:
4031    case INDEX_op_clz_i64:
4032    case INDEX_op_ctz_i64:
4033        return C_O1_I2(r, r, rZW);
4034
4035    case INDEX_op_brcond_i32:
4036    case INDEX_op_brcond_i64:
4037        return C_O0_I2(r, ri);
4038
4039    case INDEX_op_movcond_i32:
4040    case INDEX_op_movcond_i64:
4041        return C_O1_I4(r, r, ri, rZ, rZ);
4042    case INDEX_op_deposit_i32:
4043    case INDEX_op_deposit_i64:
4044        return C_O1_I2(r, 0, rZ);
4045    case INDEX_op_brcond2_i32:
4046        return C_O0_I4(r, r, ri, ri);
4047    case INDEX_op_setcond2_i32:
4048        return C_O1_I4(r, r, r, ri, ri);
4049    case INDEX_op_add2_i64:
4050    case INDEX_op_add2_i32:
4051        return C_O2_I4(r, r, r, r, rI, rZM);
4052    case INDEX_op_sub2_i64:
4053    case INDEX_op_sub2_i32:
4054        return C_O2_I4(r, r, rI, rZM, r, r);
4055
4056    case INDEX_op_qemu_ld_a32_i32:
4057        return C_O1_I1(r, r);
4058    case INDEX_op_qemu_ld_a64_i32:
4059        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
4060    case INDEX_op_qemu_ld_a32_i64:
4061        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
4062    case INDEX_op_qemu_ld_a64_i64:
4063        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
4064
4065    case INDEX_op_qemu_st_a32_i32:
4066        return C_O0_I2(r, r);
4067    case INDEX_op_qemu_st_a64_i32:
4068        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
4069    case INDEX_op_qemu_st_a32_i64:
4070        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
4071    case INDEX_op_qemu_st_a64_i64:
4072        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
4073
4074    case INDEX_op_qemu_ld_a32_i128:
4075    case INDEX_op_qemu_ld_a64_i128:
4076        return C_N1O1_I1(o, m, r);
4077    case INDEX_op_qemu_st_a32_i128:
4078    case INDEX_op_qemu_st_a64_i128:
4079        return C_O0_I3(o, m, r);
4080
4081    case INDEX_op_add_vec:
4082    case INDEX_op_sub_vec:
4083    case INDEX_op_mul_vec:
4084    case INDEX_op_and_vec:
4085    case INDEX_op_or_vec:
4086    case INDEX_op_xor_vec:
4087    case INDEX_op_andc_vec:
4088    case INDEX_op_orc_vec:
4089    case INDEX_op_nor_vec:
4090    case INDEX_op_eqv_vec:
4091    case INDEX_op_nand_vec:
4092    case INDEX_op_cmp_vec:
4093    case INDEX_op_ssadd_vec:
4094    case INDEX_op_sssub_vec:
4095    case INDEX_op_usadd_vec:
4096    case INDEX_op_ussub_vec:
4097    case INDEX_op_smax_vec:
4098    case INDEX_op_smin_vec:
4099    case INDEX_op_umax_vec:
4100    case INDEX_op_umin_vec:
4101    case INDEX_op_shlv_vec:
4102    case INDEX_op_shrv_vec:
4103    case INDEX_op_sarv_vec:
4104    case INDEX_op_rotlv_vec:
4105    case INDEX_op_rotrv_vec:
4106    case INDEX_op_ppc_mrgh_vec:
4107    case INDEX_op_ppc_mrgl_vec:
4108    case INDEX_op_ppc_muleu_vec:
4109    case INDEX_op_ppc_mulou_vec:
4110    case INDEX_op_ppc_pkum_vec:
4111    case INDEX_op_dup2_vec:
4112        return C_O1_I2(v, v, v);
4113
4114    case INDEX_op_not_vec:
4115    case INDEX_op_neg_vec:
4116        return C_O1_I1(v, v);
4117
4118    case INDEX_op_dup_vec:
4119        return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v);
4120
4121    case INDEX_op_ld_vec:
4122    case INDEX_op_dupm_vec:
4123        return C_O1_I1(v, r);
4124
4125    case INDEX_op_st_vec:
4126        return C_O0_I2(v, r);
4127
4128    case INDEX_op_bitsel_vec:
4129    case INDEX_op_ppc_msum_vec:
4130        return C_O1_I3(v, v, v, v);
4131
4132    default:
4133        g_assert_not_reached();
4134    }
4135}
4136
4137static void tcg_target_init(TCGContext *s)
4138{
4139    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
4140    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
4141    if (have_altivec) {
4142        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
4143        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
4144    }
4145
4146    tcg_target_call_clobber_regs = 0;
4147    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
4148    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
4149    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
4150    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
4151    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
4152    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
4153    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
4154    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
4155    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
4156    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
4157    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
4158    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
4159
4160    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
4161    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
4162    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
4163    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
4164    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
4165    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
4166    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
4167    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
4168    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
4169    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
4170    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
4171    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
4172    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
4173    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
4174    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
4175    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
4176    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
4177    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
4178    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
4179    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
4180
4181    s->reserved_regs = 0;
4182    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
4183    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
4184#if defined(_CALL_SYSV)
4185    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
4186#endif
4187#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
4188    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
4189#endif
4190    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
4191    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
4192    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
4193    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
4194    if (USE_REG_TB) {
4195        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
4196    }
4197}
4198
4199#ifdef __ELF__
4200typedef struct {
4201    DebugFrameCIE cie;
4202    DebugFrameFDEHeader fde;
4203    uint8_t fde_def_cfa[4];
4204    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
4205} DebugFrame;
4206
4207/* We're expecting a 2 byte uleb128 encoded value.  */
4208QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
4209
4210#if TCG_TARGET_REG_BITS == 64
4211# define ELF_HOST_MACHINE EM_PPC64
4212#else
4213# define ELF_HOST_MACHINE EM_PPC
4214#endif
4215
4216static DebugFrame debug_frame = {
4217    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
4218    .cie.id = -1,
4219    .cie.version = 1,
4220    .cie.code_align = 1,
4221    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
4222    .cie.return_column = 65,
4223
4224    /* Total FDE size does not include the "len" member.  */
4225    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
4226
4227    .fde_def_cfa = {
4228        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
4229        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
4230        (FRAME_SIZE >> 7)
4231    },
4232    .fde_reg_ofs = {
4233        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
4234        0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
4235    }
4236};
4237
4238void tcg_register_jit(const void *buf, size_t buf_size)
4239{
4240    uint8_t *p = &debug_frame.fde_reg_ofs[3];
4241    int i;
4242
4243    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
4244        p[0] = 0x80 + tcg_target_callee_save_regs[i];
4245        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
4246    }
4247
4248    debug_frame.fde.func_start = (uintptr_t)buf;
4249    debug_frame.fde.func_len = buf_size;
4250
4251    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
4252}
4253#endif /* __ELF__ */
4254#undef VMULEUB
4255#undef VMULEUH
4256#undef VMULEUW
4257#undef VMULOUB
4258#undef VMULOUH
4259#undef VMULOUW
4260#undef VMSUMUHM
4261