xref: /openbmc/qemu/tcg/ppc/tcg-target.c.inc (revision 4d137ff819bae33d045f13bb9186e3a2c71cb7e4)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "elf.h"
26
27/*
28 * Standardize on the _CALL_FOO symbols used by GCC:
29 * Apple XCode does not define _CALL_DARWIN.
30 * Clang defines _CALL_ELF (64-bit) but not _CALL_SYSV or _CALL_AIX.
31 */
32#if TCG_TARGET_REG_BITS == 64
33# ifdef _CALL_AIX
34    /* ok */
35# elif defined(_CALL_ELF) && _CALL_ELF == 1
36#  define _CALL_AIX
37# elif defined(_CALL_ELF) && _CALL_ELF == 2
38    /* ok */
39# else
40#  error "Unknown ABI"
41# endif
42#else
43# if defined(_CALL_SYSV) || defined(_CALL_DARWIN)
44    /* ok */
45# elif defined(__APPLE__)
46#  define _CALL_DARWIN
47# elif defined(__ELF__)
48#  define _CALL_SYSV
49# else
50#  error "Unknown ABI"
51# endif
52#endif
53
54#if TCG_TARGET_REG_BITS == 64
55# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_EXTEND
56# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_NORMAL
57#else
58# define TCG_TARGET_CALL_ARG_I32   TCG_CALL_ARG_NORMAL
59# define TCG_TARGET_CALL_RET_I128  TCG_CALL_RET_BY_REF
60#endif
61#ifdef _CALL_SYSV
62# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_EVEN
63# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_BY_REF
64#else
65# define TCG_TARGET_CALL_ARG_I64   TCG_CALL_ARG_NORMAL
66# define TCG_TARGET_CALL_ARG_I128  TCG_CALL_ARG_NORMAL
67#endif
68
69/* For some memory operations, we need a scratch that isn't R0.  For the AIX
70   calling convention, we can re-use the TOC register since we'll be reloading
71   it at every call.  Otherwise R12 will do nicely as neither a call-saved
72   register nor a parameter register.  */
73#ifdef _CALL_AIX
74# define TCG_REG_TMP1   TCG_REG_R2
75#else
76# define TCG_REG_TMP1   TCG_REG_R12
77#endif
78#define TCG_REG_TMP2    TCG_REG_R11
79
80#define TCG_VEC_TMP1    TCG_REG_V0
81#define TCG_VEC_TMP2    TCG_REG_V1
82
83#define TCG_REG_TB     TCG_REG_R31
84#define USE_REG_TB     (TCG_TARGET_REG_BITS == 64 && !have_isa_3_00)
85
86/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
87#define SZP  ((int)sizeof(void *))
88
89/* Shorthand for size of a register.  */
90#define SZR  (TCG_TARGET_REG_BITS / 8)
91
92#define TCG_CT_CONST_S16  0x100
93#define TCG_CT_CONST_U16  0x200
94#define TCG_CT_CONST_S32  0x400
95#define TCG_CT_CONST_U32  0x800
96#define TCG_CT_CONST_ZERO 0x1000
97#define TCG_CT_CONST_MONE 0x2000
98#define TCG_CT_CONST_WSZ  0x4000
99#define TCG_CT_CONST_CMP  0x8000
100
101#define ALL_GENERAL_REGS  0xffffffffu
102#define ALL_VECTOR_REGS   0xffffffff00000000ull
103
104#ifndef R_PPC64_PCREL34
105#define R_PPC64_PCREL34  132
106#endif
107
108#define have_isel  (cpuinfo & CPUINFO_ISEL)
109
110#define TCG_GUEST_BASE_REG  TCG_REG_R30
111
112#ifdef CONFIG_DEBUG_TCG
113static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
114    "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
115    "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
116    "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
117    "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
118    "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
119    "v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
120    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
121    "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
122};
123#endif
124
125static const int tcg_target_reg_alloc_order[] = {
126    TCG_REG_R14,  /* call saved registers */
127    TCG_REG_R15,
128    TCG_REG_R16,
129    TCG_REG_R17,
130    TCG_REG_R18,
131    TCG_REG_R19,
132    TCG_REG_R20,
133    TCG_REG_R21,
134    TCG_REG_R22,
135    TCG_REG_R23,
136    TCG_REG_R24,
137    TCG_REG_R25,
138    TCG_REG_R26,
139    TCG_REG_R27,
140    TCG_REG_R28,
141    TCG_REG_R29,
142    TCG_REG_R30,
143    TCG_REG_R31,
144    TCG_REG_R12,  /* call clobbered, non-arguments */
145    TCG_REG_R11,
146    TCG_REG_R2,
147    TCG_REG_R13,
148    TCG_REG_R10,  /* call clobbered, arguments */
149    TCG_REG_R9,
150    TCG_REG_R8,
151    TCG_REG_R7,
152    TCG_REG_R6,
153    TCG_REG_R5,
154    TCG_REG_R4,
155    TCG_REG_R3,
156
157    /* V0 and V1 reserved as temporaries; V20 - V31 are call-saved */
158    TCG_REG_V2,   /* call clobbered, vectors */
159    TCG_REG_V3,
160    TCG_REG_V4,
161    TCG_REG_V5,
162    TCG_REG_V6,
163    TCG_REG_V7,
164    TCG_REG_V8,
165    TCG_REG_V9,
166    TCG_REG_V10,
167    TCG_REG_V11,
168    TCG_REG_V12,
169    TCG_REG_V13,
170    TCG_REG_V14,
171    TCG_REG_V15,
172    TCG_REG_V16,
173    TCG_REG_V17,
174    TCG_REG_V18,
175    TCG_REG_V19,
176};
177
178static const int tcg_target_call_iarg_regs[] = {
179    TCG_REG_R3,
180    TCG_REG_R4,
181    TCG_REG_R5,
182    TCG_REG_R6,
183    TCG_REG_R7,
184    TCG_REG_R8,
185    TCG_REG_R9,
186    TCG_REG_R10
187};
188
189static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
190{
191    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
192    tcg_debug_assert(slot >= 0 && slot <= 1);
193    return TCG_REG_R3 + slot;
194}
195
196static const int tcg_target_callee_save_regs[] = {
197#ifdef _CALL_DARWIN
198    TCG_REG_R11,
199#endif
200    TCG_REG_R14,
201    TCG_REG_R15,
202    TCG_REG_R16,
203    TCG_REG_R17,
204    TCG_REG_R18,
205    TCG_REG_R19,
206    TCG_REG_R20,
207    TCG_REG_R21,
208    TCG_REG_R22,
209    TCG_REG_R23,
210    TCG_REG_R24,
211    TCG_REG_R25,
212    TCG_REG_R26,
213    TCG_REG_R27, /* currently used for the global env */
214    TCG_REG_R28,
215    TCG_REG_R29,
216    TCG_REG_R30,
217    TCG_REG_R31
218};
219
220/* For PPC, we use TB+4 instead of TB as the base. */
221static inline ptrdiff_t ppc_tbrel_diff(TCGContext *s, const void *target)
222{
223    return tcg_tbrel_diff(s, target) - 4;
224}
225
226static inline bool in_range_b(tcg_target_long target)
227{
228    return target == sextract64(target, 0, 26);
229}
230
231static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
232                               const tcg_insn_unit *target)
233{
234    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
235    tcg_debug_assert(in_range_b(disp));
236    return disp & 0x3fffffc;
237}
238
239static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
240{
241    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
242    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
243
244    if (in_range_b(disp)) {
245        *src_rw = (*src_rw & ~0x3fffffc) | (disp & 0x3fffffc);
246        return true;
247    }
248    return false;
249}
250
251static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
252                               const tcg_insn_unit *target)
253{
254    ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
255    tcg_debug_assert(disp == (int16_t) disp);
256    return disp & 0xfffc;
257}
258
259static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
260{
261    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
262    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
263
264    if (disp == (int16_t) disp) {
265        *src_rw = (*src_rw & ~0xfffc) | (disp & 0xfffc);
266        return true;
267    }
268    return false;
269}
270
271static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
272{
273    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
274    ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
275
276    if (disp == sextract64(disp, 0, 34)) {
277        src_rw[0] = (src_rw[0] & ~0x3ffff) | ((disp >> 16) & 0x3ffff);
278        src_rw[1] = (src_rw[1] & ~0xffff) | (disp & 0xffff);
279        return true;
280    }
281    return false;
282}
283
284static bool mask_operand(uint32_t c, int *mb, int *me);
285static bool mask64_operand(uint64_t c, int *mb, int *me);
286
287/* test if a constant matches the constraint */
288static bool tcg_target_const_match(int64_t sval, int ct,
289                                   TCGType type, TCGCond cond, int vece)
290{
291    uint64_t uval = sval;
292    int mb, me;
293
294    if (ct & TCG_CT_CONST) {
295        return 1;
296    }
297
298    if (type == TCG_TYPE_I32) {
299        uval = (uint32_t)sval;
300        sval = (int32_t)sval;
301    }
302
303    if (ct & TCG_CT_CONST_CMP) {
304        switch (cond) {
305        case TCG_COND_EQ:
306        case TCG_COND_NE:
307            ct |= TCG_CT_CONST_S16 | TCG_CT_CONST_U16;
308            break;
309        case TCG_COND_LT:
310        case TCG_COND_GE:
311        case TCG_COND_LE:
312        case TCG_COND_GT:
313            ct |= TCG_CT_CONST_S16;
314            break;
315        case TCG_COND_LTU:
316        case TCG_COND_GEU:
317        case TCG_COND_LEU:
318        case TCG_COND_GTU:
319            ct |= TCG_CT_CONST_U16;
320            break;
321        case TCG_COND_TSTEQ:
322        case TCG_COND_TSTNE:
323            if ((uval & ~0xffff) == 0 || (uval & ~0xffff0000ull) == 0) {
324                return 1;
325            }
326            if (uval == (uint32_t)uval && mask_operand(uval, &mb, &me)) {
327                return 1;
328            }
329            if (TCG_TARGET_REG_BITS == 64 &&
330                mask64_operand(uval << clz64(uval), &mb, &me)) {
331                return 1;
332            }
333            return 0;
334        default:
335            g_assert_not_reached();
336        }
337    }
338
339    if ((ct & TCG_CT_CONST_S16) && sval == (int16_t)sval) {
340        return 1;
341    }
342    if ((ct & TCG_CT_CONST_U16) && uval == (uint16_t)uval) {
343        return 1;
344    }
345    if ((ct & TCG_CT_CONST_S32) && sval == (int32_t)sval) {
346        return 1;
347    }
348    if ((ct & TCG_CT_CONST_U32) && uval == (uint32_t)uval) {
349        return 1;
350    }
351    if ((ct & TCG_CT_CONST_ZERO) && sval == 0) {
352        return 1;
353    }
354    if ((ct & TCG_CT_CONST_MONE) && sval == -1) {
355        return 1;
356    }
357    if ((ct & TCG_CT_CONST_WSZ) && sval == (type == TCG_TYPE_I32 ? 32 : 64)) {
358        return 1;
359    }
360    return 0;
361}
362
363#define OPCD(opc) ((opc)<<26)
364#define XO19(opc) (OPCD(19)|((opc)<<1))
365#define MD30(opc) (OPCD(30)|((opc)<<2))
366#define MDS30(opc) (OPCD(30)|((opc)<<1))
367#define XO31(opc) (OPCD(31)|((opc)<<1))
368#define XO58(opc) (OPCD(58)|(opc))
369#define XO62(opc) (OPCD(62)|(opc))
370#define VX4(opc)  (OPCD(4)|(opc))
371
372#define B      OPCD( 18)
373#define BC     OPCD( 16)
374
375#define LBZ    OPCD( 34)
376#define LHZ    OPCD( 40)
377#define LHA    OPCD( 42)
378#define LWZ    OPCD( 32)
379#define LWZUX  XO31( 55)
380#define LD     XO58(  0)
381#define LDX    XO31( 21)
382#define LDU    XO58(  1)
383#define LDUX   XO31( 53)
384#define LWA    XO58(  2)
385#define LWAX   XO31(341)
386#define LQ     OPCD( 56)
387
388#define STB    OPCD( 38)
389#define STH    OPCD( 44)
390#define STW    OPCD( 36)
391#define STD    XO62(  0)
392#define STDU   XO62(  1)
393#define STDX   XO31(149)
394#define STQ    XO62(  2)
395
396#define PLWA   OPCD( 41)
397#define PLD    OPCD( 57)
398#define PLXSD  OPCD( 42)
399#define PLXV   OPCD(25 * 2 + 1)  /* force tx=1 */
400
401#define PSTD   OPCD( 61)
402#define PSTXSD OPCD( 46)
403#define PSTXV  OPCD(27 * 2 + 1)  /* force sx=1 */
404
405#define ADDIC  OPCD( 12)
406#define ADDI   OPCD( 14)
407#define ADDIS  OPCD( 15)
408#define ORI    OPCD( 24)
409#define ORIS   OPCD( 25)
410#define XORI   OPCD( 26)
411#define XORIS  OPCD( 27)
412#define ANDI   OPCD( 28)
413#define ANDIS  OPCD( 29)
414#define MULLI  OPCD(  7)
415#define CMPLI  OPCD( 10)
416#define CMPI   OPCD( 11)
417#define SUBFIC OPCD( 8)
418
419#define LWZU   OPCD( 33)
420#define STWU   OPCD( 37)
421
422#define RLWIMI OPCD( 20)
423#define RLWINM OPCD( 21)
424#define RLWNM  OPCD( 23)
425
426#define RLDICL MD30(  0)
427#define RLDICR MD30(  1)
428#define RLDIMI MD30(  3)
429#define RLDCL  MDS30( 8)
430
431#define BCLR   XO19( 16)
432#define BCCTR  XO19(528)
433#define CRAND  XO19(257)
434#define CRANDC XO19(129)
435#define CRNAND XO19(225)
436#define CROR   XO19(449)
437#define CRNOR  XO19( 33)
438#define ADDPCIS XO19( 2)
439
440#define EXTSB  XO31(954)
441#define EXTSH  XO31(922)
442#define EXTSW  XO31(986)
443#define ADD    XO31(266)
444#define ADDE   XO31(138)
445#define ADDME  XO31(234)
446#define ADDZE  XO31(202)
447#define ADDC   XO31( 10)
448#define AND    XO31( 28)
449#define SUBF   XO31( 40)
450#define SUBFC  XO31(  8)
451#define SUBFE  XO31(136)
452#define SUBFME XO31(232)
453#define SUBFZE XO31(200)
454#define OR     XO31(444)
455#define XOR    XO31(316)
456#define MULLW  XO31(235)
457#define MULHW  XO31( 75)
458#define MULHWU XO31( 11)
459#define DIVW   XO31(491)
460#define DIVWU  XO31(459)
461#define MODSW  XO31(779)
462#define MODUW  XO31(267)
463#define CMP    XO31(  0)
464#define CMPL   XO31( 32)
465#define LHBRX  XO31(790)
466#define LWBRX  XO31(534)
467#define LDBRX  XO31(532)
468#define STHBRX XO31(918)
469#define STWBRX XO31(662)
470#define STDBRX XO31(660)
471#define MFSPR  XO31(339)
472#define MTSPR  XO31(467)
473#define SRAWI  XO31(824)
474#define NEG    XO31(104)
475#define MFCR   XO31( 19)
476#define MFOCRF (MFCR | (1u << 20))
477#define NOR    XO31(124)
478#define CNTLZW XO31( 26)
479#define CNTLZD XO31( 58)
480#define CNTTZW XO31(538)
481#define CNTTZD XO31(570)
482#define CNTPOPW XO31(378)
483#define CNTPOPD XO31(506)
484#define ANDC   XO31( 60)
485#define ORC    XO31(412)
486#define EQV    XO31(284)
487#define NAND   XO31(476)
488#define ISEL   XO31( 15)
489
490#define MULLD  XO31(233)
491#define MULHD  XO31( 73)
492#define MULHDU XO31(  9)
493#define DIVD   XO31(489)
494#define DIVDU  XO31(457)
495#define MODSD  XO31(777)
496#define MODUD  XO31(265)
497
498#define LBZX   XO31( 87)
499#define LHZX   XO31(279)
500#define LHAX   XO31(343)
501#define LWZX   XO31( 23)
502#define STBX   XO31(215)
503#define STHX   XO31(407)
504#define STWX   XO31(151)
505
506#define EIEIO  XO31(854)
507#define HWSYNC XO31(598)
508#define LWSYNC (HWSYNC | (1u << 21))
509
510#define SPR(a, b) ((((a)<<5)|(b))<<11)
511#define LR     SPR(8, 0)
512#define CTR    SPR(9, 0)
513
514#define SLW    XO31( 24)
515#define SRW    XO31(536)
516#define SRAW   XO31(792)
517
518#define SLD    XO31( 27)
519#define SRD    XO31(539)
520#define SRAD   XO31(794)
521#define SRADI  XO31(413<<1)
522
523#define BRH    XO31(219)
524#define BRW    XO31(155)
525#define BRD    XO31(187)
526
527#define TW     XO31( 4)
528#define TRAP   (TW | TO(31))
529
530#define SETBC    XO31(384)  /* v3.10 */
531#define SETBCR   XO31(416)  /* v3.10 */
532#define SETNBC   XO31(448)  /* v3.10 */
533#define SETNBCR  XO31(480)  /* v3.10 */
534
535#define NOP    ORI  /* ori 0,0,0 */
536
537#define LVX        XO31(103)
538#define LVEBX      XO31(7)
539#define LVEHX      XO31(39)
540#define LVEWX      XO31(71)
541#define LXSDX      (XO31(588) | 1)  /* v2.06, force tx=1 */
542#define LXVDSX     (XO31(332) | 1)  /* v2.06, force tx=1 */
543#define LXSIWZX    (XO31(12) | 1)   /* v2.07, force tx=1 */
544#define LXV        (OPCD(61) | 8 | 1)  /* v3.00, force tx=1 */
545#define LXSD       (OPCD(57) | 2)   /* v3.00 */
546#define LXVWSX     (XO31(364) | 1)  /* v3.00, force tx=1 */
547
548#define STVX       XO31(231)
549#define STVEWX     XO31(199)
550#define STXSDX     (XO31(716) | 1)  /* v2.06, force sx=1 */
551#define STXSIWX    (XO31(140) | 1)  /* v2.07, force sx=1 */
552#define STXV       (OPCD(61) | 8 | 5) /* v3.00, force sx=1 */
553#define STXSD      (OPCD(61) | 2)   /* v3.00 */
554
555#define VADDSBS    VX4(768)
556#define VADDUBS    VX4(512)
557#define VADDUBM    VX4(0)
558#define VADDSHS    VX4(832)
559#define VADDUHS    VX4(576)
560#define VADDUHM    VX4(64)
561#define VADDSWS    VX4(896)
562#define VADDUWS    VX4(640)
563#define VADDUWM    VX4(128)
564#define VADDUDM    VX4(192)       /* v2.07 */
565
566#define VSUBSBS    VX4(1792)
567#define VSUBUBS    VX4(1536)
568#define VSUBUBM    VX4(1024)
569#define VSUBSHS    VX4(1856)
570#define VSUBUHS    VX4(1600)
571#define VSUBUHM    VX4(1088)
572#define VSUBSWS    VX4(1920)
573#define VSUBUWS    VX4(1664)
574#define VSUBUWM    VX4(1152)
575#define VSUBUDM    VX4(1216)      /* v2.07 */
576
577#define VNEGW      (VX4(1538) | (6 << 16))  /* v3.00 */
578#define VNEGD      (VX4(1538) | (7 << 16))  /* v3.00 */
579
580#define VMAXSB     VX4(258)
581#define VMAXSH     VX4(322)
582#define VMAXSW     VX4(386)
583#define VMAXSD     VX4(450)       /* v2.07 */
584#define VMAXUB     VX4(2)
585#define VMAXUH     VX4(66)
586#define VMAXUW     VX4(130)
587#define VMAXUD     VX4(194)       /* v2.07 */
588#define VMINSB     VX4(770)
589#define VMINSH     VX4(834)
590#define VMINSW     VX4(898)
591#define VMINSD     VX4(962)       /* v2.07 */
592#define VMINUB     VX4(514)
593#define VMINUH     VX4(578)
594#define VMINUW     VX4(642)
595#define VMINUD     VX4(706)       /* v2.07 */
596
597#define VCMPEQUB   VX4(6)
598#define VCMPEQUH   VX4(70)
599#define VCMPEQUW   VX4(134)
600#define VCMPEQUD   VX4(199)       /* v2.07 */
601#define VCMPGTSB   VX4(774)
602#define VCMPGTSH   VX4(838)
603#define VCMPGTSW   VX4(902)
604#define VCMPGTSD   VX4(967)       /* v2.07 */
605#define VCMPGTUB   VX4(518)
606#define VCMPGTUH   VX4(582)
607#define VCMPGTUW   VX4(646)
608#define VCMPGTUD   VX4(711)       /* v2.07 */
609#define VCMPNEB    VX4(7)         /* v3.00 */
610#define VCMPNEH    VX4(71)        /* v3.00 */
611#define VCMPNEW    VX4(135)       /* v3.00 */
612
613#define VSLB       VX4(260)
614#define VSLH       VX4(324)
615#define VSLW       VX4(388)
616#define VSLD       VX4(1476)      /* v2.07 */
617#define VSRB       VX4(516)
618#define VSRH       VX4(580)
619#define VSRW       VX4(644)
620#define VSRD       VX4(1732)      /* v2.07 */
621#define VSRAB      VX4(772)
622#define VSRAH      VX4(836)
623#define VSRAW      VX4(900)
624#define VSRAD      VX4(964)       /* v2.07 */
625#define VRLB       VX4(4)
626#define VRLH       VX4(68)
627#define VRLW       VX4(132)
628#define VRLD       VX4(196)       /* v2.07 */
629
630#define VMULEUB    VX4(520)
631#define VMULEUH    VX4(584)
632#define VMULEUW    VX4(648)       /* v2.07 */
633#define VMULOUB    VX4(8)
634#define VMULOUH    VX4(72)
635#define VMULOUW    VX4(136)       /* v2.07 */
636#define VMULUWM    VX4(137)       /* v2.07 */
637#define VMULLD     VX4(457)       /* v3.10 */
638#define VMSUMUHM   VX4(38)
639
640#define VMRGHB     VX4(12)
641#define VMRGHH     VX4(76)
642#define VMRGHW     VX4(140)
643#define VMRGLB     VX4(268)
644#define VMRGLH     VX4(332)
645#define VMRGLW     VX4(396)
646
647#define VPKUHUM    VX4(14)
648#define VPKUWUM    VX4(78)
649
650#define VAND       VX4(1028)
651#define VANDC      VX4(1092)
652#define VNOR       VX4(1284)
653#define VOR        VX4(1156)
654#define VXOR       VX4(1220)
655#define VEQV       VX4(1668)      /* v2.07 */
656#define VNAND      VX4(1412)      /* v2.07 */
657#define VORC       VX4(1348)      /* v2.07 */
658
659#define VSPLTB     VX4(524)
660#define VSPLTH     VX4(588)
661#define VSPLTW     VX4(652)
662#define VSPLTISB   VX4(780)
663#define VSPLTISH   VX4(844)
664#define VSPLTISW   VX4(908)
665
666#define VSLDOI     VX4(44)
667
668#define XXPERMDI   (OPCD(60) | (10 << 3) | 7)  /* v2.06, force ax=bx=tx=1 */
669#define XXSEL      (OPCD(60) | (3 << 4) | 0xf) /* v2.06, force ax=bx=cx=tx=1 */
670#define XXSPLTIB   (OPCD(60) | (360 << 1) | 1) /* v3.00, force tx=1 */
671
672#define MFVSRD     (XO31(51) | 1)   /* v2.07, force sx=1 */
673#define MFVSRWZ    (XO31(115) | 1)  /* v2.07, force sx=1 */
674#define MTVSRD     (XO31(179) | 1)  /* v2.07, force tx=1 */
675#define MTVSRWZ    (XO31(243) | 1)  /* v2.07, force tx=1 */
676#define MTVSRDD    (XO31(435) | 1)  /* v3.00, force tx=1 */
677#define MTVSRWS    (XO31(403) | 1)  /* v3.00, force tx=1 */
678
679#define RT(r) ((r)<<21)
680#define RS(r) ((r)<<21)
681#define RA(r) ((r)<<16)
682#define RB(r) ((r)<<11)
683#define TO(t) ((t)<<21)
684#define SH(s) ((s)<<11)
685#define MB(b) ((b)<<6)
686#define ME(e) ((e)<<1)
687#define BO(o) ((o)<<21)
688#define MB64(b) ((b)<<5)
689#define FXM(b) (1 << (19 - (b)))
690
691#define VRT(r)  (((r) & 31) << 21)
692#define VRA(r)  (((r) & 31) << 16)
693#define VRB(r)  (((r) & 31) << 11)
694#define VRC(r)  (((r) & 31) <<  6)
695
696#define LK    1
697
698#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
699#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
700#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
701#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
702
703#define BF(n)    ((n)<<23)
704#define BI(n, c) (((c)+((n)*4))<<16)
705#define BT(n, c) (((c)+((n)*4))<<21)
706#define BA(n, c) (((c)+((n)*4))<<16)
707#define BB(n, c) (((c)+((n)*4))<<11)
708#define BC_(n, c) (((c)+((n)*4))<<6)
709
710#define BO_COND_TRUE  BO(12)
711#define BO_COND_FALSE BO( 4)
712#define BO_ALWAYS     BO(20)
713
714enum {
715    CR_LT,
716    CR_GT,
717    CR_EQ,
718    CR_SO
719};
720
721static const uint32_t tcg_to_bc[16] = {
722    [TCG_COND_EQ]  = BC | BI(0, CR_EQ) | BO_COND_TRUE,
723    [TCG_COND_NE]  = BC | BI(0, CR_EQ) | BO_COND_FALSE,
724    [TCG_COND_TSTEQ]  = BC | BI(0, CR_EQ) | BO_COND_TRUE,
725    [TCG_COND_TSTNE]  = BC | BI(0, CR_EQ) | BO_COND_FALSE,
726    [TCG_COND_LT]  = BC | BI(0, CR_LT) | BO_COND_TRUE,
727    [TCG_COND_GE]  = BC | BI(0, CR_LT) | BO_COND_FALSE,
728    [TCG_COND_LE]  = BC | BI(0, CR_GT) | BO_COND_FALSE,
729    [TCG_COND_GT]  = BC | BI(0, CR_GT) | BO_COND_TRUE,
730    [TCG_COND_LTU] = BC | BI(0, CR_LT) | BO_COND_TRUE,
731    [TCG_COND_GEU] = BC | BI(0, CR_LT) | BO_COND_FALSE,
732    [TCG_COND_LEU] = BC | BI(0, CR_GT) | BO_COND_FALSE,
733    [TCG_COND_GTU] = BC | BI(0, CR_GT) | BO_COND_TRUE,
734};
735
736/* The low bit here is set if the RA and RB fields must be inverted.  */
737static const uint32_t tcg_to_isel[16] = {
738    [TCG_COND_EQ]  = ISEL | BC_(0, CR_EQ),
739    [TCG_COND_NE]  = ISEL | BC_(0, CR_EQ) | 1,
740    [TCG_COND_TSTEQ] = ISEL | BC_(0, CR_EQ),
741    [TCG_COND_TSTNE] = ISEL | BC_(0, CR_EQ) | 1,
742    [TCG_COND_LT]  = ISEL | BC_(0, CR_LT),
743    [TCG_COND_GE]  = ISEL | BC_(0, CR_LT) | 1,
744    [TCG_COND_LE]  = ISEL | BC_(0, CR_GT) | 1,
745    [TCG_COND_GT]  = ISEL | BC_(0, CR_GT),
746    [TCG_COND_LTU] = ISEL | BC_(0, CR_LT),
747    [TCG_COND_GEU] = ISEL | BC_(0, CR_LT) | 1,
748    [TCG_COND_LEU] = ISEL | BC_(0, CR_GT) | 1,
749    [TCG_COND_GTU] = ISEL | BC_(0, CR_GT),
750};
751
752static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
753                        intptr_t value, intptr_t addend)
754{
755    const tcg_insn_unit *target;
756    int16_t lo;
757    int32_t hi;
758
759    value += addend;
760    target = (const tcg_insn_unit *)value;
761
762    switch (type) {
763    case R_PPC_REL14:
764        return reloc_pc14(code_ptr, target);
765    case R_PPC_REL24:
766        return reloc_pc24(code_ptr, target);
767    case R_PPC64_PCREL34:
768        return reloc_pc34(code_ptr, target);
769    case R_PPC_ADDR16:
770        /*
771         * We are (slightly) abusing this relocation type.  In particular,
772         * assert that the low 2 bits are zero, and do not modify them.
773         * That way we can use this with LD et al that have opcode bits
774         * in the low 2 bits of the insn.
775         */
776        if ((value & 3) || value != (int16_t)value) {
777            return false;
778        }
779        *code_ptr = (*code_ptr & ~0xfffc) | (value & 0xfffc);
780        break;
781    case R_PPC_ADDR32:
782        /*
783         * We are abusing this relocation type.  Again, this points to
784         * a pair of insns, lis + load.  This is an absolute address
785         * relocation for PPC32 so the lis cannot be removed.
786         */
787        lo = value;
788        hi = value - lo;
789        if (hi + lo != value) {
790            return false;
791        }
792        code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
793        code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
794        break;
795    default:
796        g_assert_not_reached();
797    }
798    return true;
799}
800
801/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */
802static bool tcg_out_need_prefix_align(TCGContext *s)
803{
804    return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c;
805}
806
807static void tcg_out_prefix_align(TCGContext *s)
808{
809    if (tcg_out_need_prefix_align(s)) {
810        tcg_out32(s, NOP);
811    }
812}
813
814static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target)
815{
816    return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0);
817}
818
819/* Output Type 00 Prefix - 8-Byte Load/Store Form (8LS:D) */
820static void tcg_out_8ls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
821                          unsigned ra, tcg_target_long imm, bool r)
822{
823    tcg_insn_unit p, i;
824
825    p = OPCD(1) | (r << 20) | ((imm >> 16) & 0x3ffff);
826    i = opc | TAI(rt, ra, imm);
827
828    tcg_out_prefix_align(s);
829    tcg_out32(s, p);
830    tcg_out32(s, i);
831}
832
833/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
834static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
835                          unsigned ra, tcg_target_long imm, bool r)
836{
837    tcg_insn_unit p, i;
838
839    p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff);
840    i = opc | TAI(rt, ra, imm);
841
842    tcg_out_prefix_align(s);
843    tcg_out32(s, p);
844    tcg_out32(s, i);
845}
846
847static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
848                             TCGReg base, tcg_target_long offset);
849
850static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
851{
852    if (ret == arg) {
853        return true;
854    }
855    switch (type) {
856    case TCG_TYPE_I64:
857        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
858        /* fallthru */
859    case TCG_TYPE_I32:
860        if (ret < TCG_REG_V0) {
861            if (arg < TCG_REG_V0) {
862                tcg_out32(s, OR | SAB(arg, ret, arg));
863                break;
864            } else if (have_isa_2_07) {
865                tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
866                          | VRT(arg) | RA(ret));
867                break;
868            } else {
869                /* Altivec does not support vector->integer moves.  */
870                return false;
871            }
872        } else if (arg < TCG_REG_V0) {
873            if (have_isa_2_07) {
874                tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
875                          | VRT(ret) | RA(arg));
876                break;
877            } else {
878                /* Altivec does not support integer->vector moves.  */
879                return false;
880            }
881        }
882        /* fallthru */
883    case TCG_TYPE_V64:
884    case TCG_TYPE_V128:
885        tcg_debug_assert(ret >= TCG_REG_V0 && arg >= TCG_REG_V0);
886        tcg_out32(s, VOR | VRT(ret) | VRA(arg) | VRB(arg));
887        break;
888    default:
889        g_assert_not_reached();
890    }
891    return true;
892}
893
894static void tcg_out_rld_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs,
895                           int sh, int mb, bool rc)
896{
897    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
898    sh = SH(sh & 0x1f) | (((sh >> 5) & 1) << 1);
899    mb = MB64((mb >> 5) | ((mb << 1) & 0x3f));
900    tcg_out32(s, op | RA(ra) | RS(rs) | sh | mb | rc);
901}
902
903static void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
904                        int sh, int mb)
905{
906    tcg_out_rld_rc(s, op, ra, rs, sh, mb, false);
907}
908
909static void tcg_out_rlw_rc(TCGContext *s, int op, TCGReg ra, TCGReg rs,
910                           int sh, int mb, int me, bool rc)
911{
912    tcg_debug_assert((mb & 0x1f) == mb);
913    tcg_debug_assert((me & 0x1f) == me);
914    tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh & 0x1f) | MB(mb) | ME(me) | rc);
915}
916
917static void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
918                        int sh, int mb, int me)
919{
920    tcg_out_rlw_rc(s, op, ra, rs, sh, mb, me, false);
921}
922
923static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
924{
925    tcg_out32(s, EXTSB | RA(dst) | RS(src));
926}
927
928static void tcg_out_ext8u(TCGContext *s, TCGReg dst, TCGReg src)
929{
930    tcg_out32(s, ANDI | SAI(src, dst, 0xff));
931}
932
933static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
934{
935    tcg_out32(s, EXTSH | RA(dst) | RS(src));
936}
937
938static void tcg_out_ext16u(TCGContext *s, TCGReg dst, TCGReg src)
939{
940    tcg_out32(s, ANDI | SAI(src, dst, 0xffff));
941}
942
943static void tcg_out_ext32s(TCGContext *s, TCGReg dst, TCGReg src)
944{
945    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
946    tcg_out32(s, EXTSW | RA(dst) | RS(src));
947}
948
949static void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
950{
951    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
952    tcg_out_rld(s, RLDICL, dst, src, 0, 32);
953}
954
955static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
956{
957    tcg_out_ext32s(s, dst, src);
958}
959
960static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dst, TCGReg src)
961{
962    tcg_out_ext32u(s, dst, src);
963}
964
965static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
966{
967    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
968    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
969}
970
971static inline void tcg_out_shli32(TCGContext *s, TCGReg dst, TCGReg src, int c)
972{
973    tcg_out_rlw(s, RLWINM, dst, src, c, 0, 31 - c);
974}
975
976static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
977{
978    tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
979}
980
981static inline void tcg_out_sari32(TCGContext *s, TCGReg dst, TCGReg src, int c)
982{
983    /* Limit immediate shift count lest we create an illegal insn.  */
984    tcg_out32(s, SRAWI | RA(dst) | RS(src) | SH(c & 31));
985}
986
987static inline void tcg_out_shri32(TCGContext *s, TCGReg dst, TCGReg src, int c)
988{
989    tcg_out_rlw(s, RLWINM, dst, src, 32 - c, c, 31);
990}
991
992static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
993{
994    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
995}
996
997static inline void tcg_out_sari64(TCGContext *s, TCGReg dst, TCGReg src, int c)
998{
999    tcg_out32(s, SRADI | RA(dst) | RS(src) | SH(c & 0x1f) | ((c >> 4) & 2));
1000}
1001
1002static void tcg_out_addpcis(TCGContext *s, TCGReg dst, intptr_t imm)
1003{
1004    uint32_t d0, d1, d2;
1005
1006    tcg_debug_assert((imm & 0xffff) == 0);
1007    tcg_debug_assert(imm == (int32_t)imm);
1008
1009    d2 = extract32(imm, 16, 1);
1010    d1 = extract32(imm, 17, 5);
1011    d0 = extract32(imm, 22, 10);
1012    tcg_out32(s, ADDPCIS | RT(dst) | (d1 << 16) | (d0 << 6) | d2);
1013}
1014
1015/* Emit a move into ret of arg, if it can be done in one insn.  */
1016static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg)
1017{
1018    if (arg == (int16_t)arg) {
1019        tcg_out32(s, ADDI | TAI(ret, 0, arg));
1020        return true;
1021    }
1022    if (arg == (int32_t)arg && (arg & 0xffff) == 0) {
1023        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
1024        return true;
1025    }
1026    return false;
1027}
1028
1029static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
1030                             tcg_target_long arg, bool in_prologue)
1031{
1032    intptr_t tb_diff;
1033    tcg_target_long tmp;
1034    int shift;
1035
1036    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1037
1038    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
1039        arg = (int32_t)arg;
1040    }
1041
1042    /* Load 16-bit immediates with one insn.  */
1043    if (tcg_out_movi_one(s, ret, arg)) {
1044        return;
1045    }
1046
1047    /* Load addresses within the TB with one insn.  */
1048    tb_diff = ppc_tbrel_diff(s, (void *)arg);
1049    if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
1050        tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
1051        return;
1052    }
1053
1054    /*
1055     * Load values up to 34 bits, and pc-relative addresses,
1056     * with one prefixed insn.
1057     */
1058    if (have_isa_3_10) {
1059        if (arg == sextract64(arg, 0, 34)) {
1060            /* pli ret,value = paddi ret,0,value,0 */
1061            tcg_out_mls_d(s, ADDI, ret, 0, arg, 0);
1062            return;
1063        }
1064
1065        tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg);
1066        if (tmp == sextract64(tmp, 0, 34)) {
1067            /* pla ret,value = paddi ret,0,value,1 */
1068            tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1);
1069            return;
1070        }
1071    }
1072
1073    /* Load 32-bit immediates with two insns.  Note that we've already
1074       eliminated bare ADDIS, so we know both insns are required.  */
1075    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
1076        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
1077        tcg_out32(s, ORI | SAI(ret, ret, arg));
1078        return;
1079    }
1080    if (arg == (uint32_t)arg && !(arg & 0x8000)) {
1081        tcg_out32(s, ADDI | TAI(ret, 0, arg));
1082        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1083        return;
1084    }
1085
1086    /* Load masked 16-bit value.  */
1087    if (arg > 0 && (arg & 0x8000)) {
1088        tmp = arg | 0x7fff;
1089        if ((tmp & (tmp + 1)) == 0) {
1090            int mb = clz64(tmp + 1) + 1;
1091            tcg_out32(s, ADDI | TAI(ret, 0, arg));
1092            tcg_out_rld(s, RLDICL, ret, ret, 0, mb);
1093            return;
1094        }
1095    }
1096
1097    /* Load common masks with 2 insns.  */
1098    shift = ctz64(arg);
1099    tmp = arg >> shift;
1100    if (tmp == (int16_t)tmp) {
1101        tcg_out32(s, ADDI | TAI(ret, 0, tmp));
1102        tcg_out_shli64(s, ret, ret, shift);
1103        return;
1104    }
1105    shift = clz64(arg);
1106    if (tcg_out_movi_one(s, ret, arg << shift)) {
1107        tcg_out_shri64(s, ret, ret, shift);
1108        return;
1109    }
1110
1111    /* Load addresses within 2GB with 2 insns. */
1112    if (have_isa_3_00) {
1113        intptr_t hi = tcg_pcrel_diff(s, (void *)arg) - 4;
1114        int16_t lo = hi;
1115
1116        hi -= lo;
1117        if (hi == (int32_t)hi) {
1118            tcg_out_addpcis(s, TCG_REG_TMP2, hi);
1119            tcg_out32(s, ADDI | TAI(ret, TCG_REG_TMP2, lo));
1120            return;
1121        }
1122    }
1123
1124    /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
1125    if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
1126        tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
1127        return;
1128    }
1129
1130    /* Use the constant pool, if possible.  */
1131    if (!in_prologue && USE_REG_TB) {
1132        new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
1133                       ppc_tbrel_diff(s, NULL));
1134        tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
1135        return;
1136    }
1137    if (have_isa_3_10) {
1138        tcg_out_8ls_d(s, PLD, ret, 0, 0, 1);
1139        new_pool_label(s, arg, R_PPC64_PCREL34, s->code_ptr - 2, 0);
1140        return;
1141    }
1142    if (have_isa_3_00) {
1143        tcg_out_addpcis(s, TCG_REG_TMP2, 0);
1144        new_pool_label(s, arg, R_PPC_REL14, s->code_ptr, 0);
1145        tcg_out32(s, LD | TAI(ret, TCG_REG_TMP2, 0));
1146        return;
1147    }
1148
1149    tmp = arg >> 31 >> 1;
1150    tcg_out_movi(s, TCG_TYPE_I32, ret, tmp);
1151    if (tmp) {
1152        tcg_out_shli64(s, ret, ret, 32);
1153    }
1154    if (arg & 0xffff0000) {
1155        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
1156    }
1157    if (arg & 0xffff) {
1158        tcg_out32(s, ORI | SAI(ret, ret, arg));
1159    }
1160}
1161
1162static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1163                             TCGReg ret, int64_t val)
1164{
1165    uint32_t load_insn;
1166    int rel, low;
1167    intptr_t add;
1168
1169    switch (vece) {
1170    case MO_8:
1171        low = (int8_t)val;
1172        if (low >= -16 && low < 16) {
1173            tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
1174            return;
1175        }
1176        if (have_isa_3_00) {
1177            tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
1178            return;
1179        }
1180        break;
1181
1182    case MO_16:
1183        low = (int16_t)val;
1184        if (low >= -16 && low < 16) {
1185            tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
1186            return;
1187        }
1188        break;
1189
1190    case MO_32:
1191        low = (int32_t)val;
1192        if (low >= -16 && low < 16) {
1193            tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
1194            return;
1195        }
1196        break;
1197    }
1198
1199    /*
1200     * Otherwise we must load the value from the constant pool.
1201     */
1202    if (USE_REG_TB) {
1203        rel = R_PPC_ADDR16;
1204        add = ppc_tbrel_diff(s, NULL);
1205    } else if (have_isa_3_10) {
1206        if (type == TCG_TYPE_V64) {
1207            tcg_out_8ls_d(s, PLXSD, ret & 31, 0, 0, 1);
1208            new_pool_label(s, val, R_PPC64_PCREL34, s->code_ptr - 2, 0);
1209        } else {
1210            tcg_out_8ls_d(s, PLXV, ret & 31, 0, 0, 1);
1211            new_pool_l2(s, R_PPC64_PCREL34, s->code_ptr - 2, 0, val, val);
1212        }
1213        return;
1214    } else if (have_isa_3_00) {
1215        tcg_out_addpcis(s, TCG_REG_TMP1, 0);
1216        rel = R_PPC_REL14;
1217        add = 0;
1218    } else {
1219        rel = R_PPC_ADDR32;
1220        add = 0;
1221    }
1222
1223    if (have_vsx) {
1224        load_insn = type == TCG_TYPE_V64 ? LXSDX : LXVDSX;
1225        load_insn |= VRT(ret) | RB(TCG_REG_TMP1);
1226        if (TCG_TARGET_REG_BITS == 64) {
1227            new_pool_label(s, val, rel, s->code_ptr, add);
1228        } else {
1229            new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
1230        }
1231    } else {
1232        load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
1233        if (TCG_TARGET_REG_BITS == 64) {
1234            new_pool_l2(s, rel, s->code_ptr, add, val, val);
1235        } else {
1236            new_pool_l4(s, rel, s->code_ptr, add,
1237                        val >> 32, val, val >> 32, val);
1238        }
1239    }
1240
1241    if (USE_REG_TB) {
1242        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, 0, 0));
1243        load_insn |= RA(TCG_REG_TB);
1244    } else if (have_isa_3_00) {
1245        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1246    } else {
1247        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, 0, 0));
1248        tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, 0));
1249    }
1250    tcg_out32(s, load_insn);
1251}
1252
1253static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
1254                         tcg_target_long arg)
1255{
1256    switch (type) {
1257    case TCG_TYPE_I32:
1258    case TCG_TYPE_I64:
1259        tcg_debug_assert(ret < TCG_REG_V0);
1260        tcg_out_movi_int(s, type, ret, arg, false);
1261        break;
1262
1263    default:
1264        g_assert_not_reached();
1265    }
1266}
1267
1268static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1269{
1270    return false;
1271}
1272
1273static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1274                             tcg_target_long imm)
1275{
1276    /* This function is only used for passing structs by reference. */
1277    g_assert_not_reached();
1278}
1279
1280static bool mask_operand(uint32_t c, int *mb, int *me)
1281{
1282    uint32_t lsb, test;
1283
1284    /* Accept a bit pattern like:
1285           0....01....1
1286           1....10....0
1287           0..01..10..0
1288       Keep track of the transitions.  */
1289    if (c == 0 || c == -1) {
1290        return false;
1291    }
1292    test = c;
1293    lsb = test & -test;
1294    test += lsb;
1295    if (test & (test - 1)) {
1296        return false;
1297    }
1298
1299    *me = clz32(lsb);
1300    *mb = test ? clz32(test & -test) + 1 : 0;
1301    return true;
1302}
1303
1304static bool mask64_operand(uint64_t c, int *mb, int *me)
1305{
1306    uint64_t lsb;
1307
1308    if (c == 0) {
1309        return false;
1310    }
1311
1312    lsb = c & -c;
1313    /* Accept 1..10..0.  */
1314    if (c == -lsb) {
1315        *mb = 0;
1316        *me = clz64(lsb);
1317        return true;
1318    }
1319    /* Accept 0..01..1.  */
1320    if (lsb == 1 && (c & (c + 1)) == 0) {
1321        *mb = clz64(c + 1) + 1;
1322        *me = 63;
1323        return true;
1324    }
1325    return false;
1326}
1327
1328static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1329{
1330    int mb, me;
1331
1332    if (mask_operand(c, &mb, &me)) {
1333        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
1334    } else if ((c & 0xffff) == c) {
1335        tcg_out32(s, ANDI | SAI(src, dst, c));
1336        return;
1337    } else if ((c & 0xffff0000) == c) {
1338        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1339        return;
1340    } else {
1341        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
1342        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1343    }
1344}
1345
1346static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
1347{
1348    int mb, me;
1349
1350    tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1351    if (mask64_operand(c, &mb, &me)) {
1352        if (mb == 0) {
1353            tcg_out_rld(s, RLDICR, dst, src, 0, me);
1354        } else {
1355            tcg_out_rld(s, RLDICL, dst, src, 0, mb);
1356        }
1357    } else if ((c & 0xffff) == c) {
1358        tcg_out32(s, ANDI | SAI(src, dst, c));
1359        return;
1360    } else if ((c & 0xffff0000) == c) {
1361        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
1362        return;
1363    } else {
1364        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
1365        tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
1366    }
1367}
1368
1369static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
1370                           int op_lo, int op_hi)
1371{
1372    if (c >> 16) {
1373        tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
1374        src = dst;
1375    }
1376    if (c & 0xffff) {
1377        tcg_out32(s, op_lo | SAI(src, dst, c));
1378        src = dst;
1379    }
1380}
1381
1382static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1383{
1384    tcg_out_zori32(s, dst, src, c, ORI, ORIS);
1385}
1386
1387static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
1388{
1389    tcg_out_zori32(s, dst, src, c, XORI, XORIS);
1390}
1391
1392static void tcg_out_b(TCGContext *s, int mask, const tcg_insn_unit *target)
1393{
1394    ptrdiff_t disp = tcg_pcrel_diff(s, target);
1395    if (in_range_b(disp)) {
1396        tcg_out32(s, B | (disp & 0x3fffffc) | mask);
1397    } else {
1398        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, (uintptr_t)target);
1399        tcg_out32(s, MTSPR | RS(TCG_REG_R0) | CTR);
1400        tcg_out32(s, BCCTR | BO_ALWAYS | mask);
1401    }
1402}
1403
1404static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
1405                             TCGReg base, tcg_target_long offset)
1406{
1407    tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
1408    bool is_int_store = false;
1409    TCGReg rs = TCG_REG_TMP1;
1410
1411    switch (opi) {
1412    case LD: case LWA:
1413        align = 3;
1414        /* FALLTHRU */
1415    default:
1416        if (rt > TCG_REG_R0 && rt < TCG_REG_V0) {
1417            rs = rt;
1418            break;
1419        }
1420        break;
1421    case LXSD:
1422    case STXSD:
1423        align = 3;
1424        break;
1425    case LXV:
1426    case STXV:
1427        align = 15;
1428        break;
1429    case STD:
1430        align = 3;
1431        /* FALLTHRU */
1432    case STB: case STH: case STW:
1433        is_int_store = true;
1434        break;
1435    }
1436
1437    /* For unaligned or large offsets, use the prefixed form. */
1438    if (have_isa_3_10
1439        && (offset != (int16_t)offset || (offset & align))
1440        && offset == sextract64(offset, 0, 34)) {
1441        /*
1442         * Note that the MLS:D insns retain their un-prefixed opcode,
1443         * while the 8LS:D insns use a different opcode space.
1444         */
1445        switch (opi) {
1446        case LBZ:
1447        case LHZ:
1448        case LHA:
1449        case LWZ:
1450        case STB:
1451        case STH:
1452        case STW:
1453        case ADDI:
1454            tcg_out_mls_d(s, opi, rt, base, offset, 0);
1455            return;
1456        case LWA:
1457            tcg_out_8ls_d(s, PLWA, rt, base, offset, 0);
1458            return;
1459        case LD:
1460            tcg_out_8ls_d(s, PLD, rt, base, offset, 0);
1461            return;
1462        case STD:
1463            tcg_out_8ls_d(s, PSTD, rt, base, offset, 0);
1464            return;
1465        case LXSD:
1466            tcg_out_8ls_d(s, PLXSD, rt & 31, base, offset, 0);
1467            return;
1468        case STXSD:
1469            tcg_out_8ls_d(s, PSTXSD, rt & 31, base, offset, 0);
1470            return;
1471        case LXV:
1472            tcg_out_8ls_d(s, PLXV, rt & 31, base, offset, 0);
1473            return;
1474        case STXV:
1475            tcg_out_8ls_d(s, PSTXV, rt & 31, base, offset, 0);
1476            return;
1477        }
1478    }
1479
1480    /* For unaligned, or very large offsets, use the indexed form.  */
1481    if (offset & align || offset != (int32_t)offset || opi == 0) {
1482        if (rs == base) {
1483            rs = TCG_REG_R0;
1484        }
1485        tcg_debug_assert(!is_int_store || rs != rt);
1486        tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
1487        tcg_out32(s, opx | TAB(rt & 31, base, rs));
1488        return;
1489    }
1490
1491    l0 = (int16_t)offset;
1492    offset = (offset - l0) >> 16;
1493    l1 = (int16_t)offset;
1494
1495    if (l1 < 0 && orig >= 0) {
1496        extra = 0x4000;
1497        l1 = (int16_t)(offset - 0x4000);
1498    }
1499    if (l1) {
1500        tcg_out32(s, ADDIS | TAI(rs, base, l1));
1501        base = rs;
1502    }
1503    if (extra) {
1504        tcg_out32(s, ADDIS | TAI(rs, base, extra));
1505        base = rs;
1506    }
1507    if (opi != ADDI || base != rt || l0 != 0) {
1508        tcg_out32(s, opi | TAI(rt & 31, base, l0));
1509    }
1510}
1511
1512static void tcg_out_vsldoi(TCGContext *s, TCGReg ret,
1513                           TCGReg va, TCGReg vb, int shb)
1514{
1515    tcg_out32(s, VSLDOI | VRT(ret) | VRA(va) | VRB(vb) | (shb << 6));
1516}
1517
1518static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1519                       TCGReg base, intptr_t offset)
1520{
1521    int shift;
1522
1523    switch (type) {
1524    case TCG_TYPE_I32:
1525        if (ret < TCG_REG_V0) {
1526            tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
1527            break;
1528        }
1529        if (have_isa_2_07 && have_vsx) {
1530            tcg_out_mem_long(s, 0, LXSIWZX, ret, base, offset);
1531            break;
1532        }
1533        tcg_debug_assert((offset & 3) == 0);
1534        tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
1535        shift = (offset - 4) & 0xc;
1536        if (shift) {
1537            tcg_out_vsldoi(s, ret, ret, ret, shift);
1538        }
1539        break;
1540    case TCG_TYPE_I64:
1541        if (ret < TCG_REG_V0) {
1542            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1543            tcg_out_mem_long(s, LD, LDX, ret, base, offset);
1544            break;
1545        }
1546        /* fallthru */
1547    case TCG_TYPE_V64:
1548        tcg_debug_assert(ret >= TCG_REG_V0);
1549        if (have_vsx) {
1550            tcg_out_mem_long(s, have_isa_3_00 ? LXSD : 0, LXSDX,
1551                             ret, base, offset);
1552            break;
1553        }
1554        tcg_debug_assert((offset & 7) == 0);
1555        tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
1556        if (offset & 8) {
1557            tcg_out_vsldoi(s, ret, ret, ret, 8);
1558        }
1559        break;
1560    case TCG_TYPE_V128:
1561        tcg_debug_assert(ret >= TCG_REG_V0);
1562        tcg_debug_assert((offset & 15) == 0);
1563        tcg_out_mem_long(s, have_isa_3_00 ? LXV : 0,
1564                         LVX, ret, base, offset);
1565        break;
1566    default:
1567        g_assert_not_reached();
1568    }
1569}
1570
1571static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1572                              TCGReg base, intptr_t offset)
1573{
1574    int shift;
1575
1576    switch (type) {
1577    case TCG_TYPE_I32:
1578        if (arg < TCG_REG_V0) {
1579            tcg_out_mem_long(s, STW, STWX, arg, base, offset);
1580            break;
1581        }
1582        if (have_isa_2_07 && have_vsx) {
1583            tcg_out_mem_long(s, 0, STXSIWX, arg, base, offset);
1584            break;
1585        }
1586        assert((offset & 3) == 0);
1587        tcg_debug_assert((offset & 3) == 0);
1588        shift = (offset - 4) & 0xc;
1589        if (shift) {
1590            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
1591            arg = TCG_VEC_TMP1;
1592        }
1593        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1594        break;
1595    case TCG_TYPE_I64:
1596        if (arg < TCG_REG_V0) {
1597            tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1598            tcg_out_mem_long(s, STD, STDX, arg, base, offset);
1599            break;
1600        }
1601        /* fallthru */
1602    case TCG_TYPE_V64:
1603        tcg_debug_assert(arg >= TCG_REG_V0);
1604        if (have_vsx) {
1605            tcg_out_mem_long(s, have_isa_3_00 ? STXSD : 0,
1606                             STXSDX, arg, base, offset);
1607            break;
1608        }
1609        tcg_debug_assert((offset & 7) == 0);
1610        if (offset & 8) {
1611            tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
1612            arg = TCG_VEC_TMP1;
1613        }
1614        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
1615        tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
1616        break;
1617    case TCG_TYPE_V128:
1618        tcg_debug_assert(arg >= TCG_REG_V0);
1619        tcg_out_mem_long(s, have_isa_3_00 ? STXV : 0,
1620                         STVX, arg, base, offset);
1621        break;
1622    default:
1623        g_assert_not_reached();
1624    }
1625}
1626
1627static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1628                               TCGReg base, intptr_t ofs)
1629{
1630    return false;
1631}
1632
1633/*
1634 * Set dest non-zero if and only if (arg1 & arg2) is non-zero.
1635 * If RC, then also set RC0.
1636 */
1637static void tcg_out_test(TCGContext *s, TCGReg dest, TCGReg arg1, TCGArg arg2,
1638                         bool const_arg2, TCGType type, bool rc)
1639{
1640    int mb, me;
1641
1642    if (!const_arg2) {
1643        tcg_out32(s, AND | SAB(arg1, dest, arg2) | rc);
1644        return;
1645    }
1646
1647    if (type == TCG_TYPE_I32) {
1648        arg2 = (uint32_t)arg2;
1649    }
1650
1651    if ((arg2 & ~0xffff) == 0) {
1652        tcg_out32(s, ANDI | SAI(arg1, dest, arg2));
1653        return;
1654    }
1655    if ((arg2 & ~0xffff0000ull) == 0) {
1656        tcg_out32(s, ANDIS | SAI(arg1, dest, arg2 >> 16));
1657        return;
1658    }
1659    if (arg2 == (uint32_t)arg2 && mask_operand(arg2, &mb, &me)) {
1660        tcg_out_rlw_rc(s, RLWINM, dest, arg1, 0, mb, me, rc);
1661        return;
1662    }
1663    if (TCG_TARGET_REG_BITS == 64) {
1664        int sh = clz64(arg2);
1665        if (mask64_operand(arg2 << sh, &mb, &me)) {
1666            tcg_out_rld_rc(s, RLDICR, dest, arg1, sh, me, rc);
1667            return;
1668        }
1669    }
1670    /* Constraints should satisfy this. */
1671    g_assert_not_reached();
1672}
1673
1674static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
1675                        bool const_arg2, int cr, TCGType type)
1676{
1677    uint32_t op;
1678
1679    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1680
1681    /*
1682     * Simplify the comparisons below wrt CMPI.
1683     * All of the tests are 16-bit, so a 32-bit sign extend always works.
1684     */
1685    if (type == TCG_TYPE_I32) {
1686        arg2 = (int32_t)arg2;
1687    }
1688
1689    switch (cond) {
1690    case TCG_COND_EQ:
1691    case TCG_COND_NE:
1692        if (const_arg2) {
1693            if ((int16_t)arg2 == arg2) {
1694                op = CMPI;
1695                break;
1696            }
1697            tcg_debug_assert((uint16_t)arg2 == arg2);
1698            op = CMPLI;
1699            break;
1700        }
1701        op = CMPL;
1702        break;
1703
1704    case TCG_COND_TSTEQ:
1705    case TCG_COND_TSTNE:
1706        tcg_debug_assert(cr == 0);
1707        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, true);
1708        return;
1709
1710    case TCG_COND_LT:
1711    case TCG_COND_GE:
1712    case TCG_COND_LE:
1713    case TCG_COND_GT:
1714        if (const_arg2) {
1715            tcg_debug_assert((int16_t)arg2 == arg2);
1716            op = CMPI;
1717            break;
1718        }
1719        op = CMP;
1720        break;
1721
1722    case TCG_COND_LTU:
1723    case TCG_COND_GEU:
1724    case TCG_COND_LEU:
1725    case TCG_COND_GTU:
1726        if (const_arg2) {
1727            tcg_debug_assert((uint16_t)arg2 == arg2);
1728            op = CMPLI;
1729            break;
1730        }
1731        op = CMPL;
1732        break;
1733
1734    default:
1735        g_assert_not_reached();
1736    }
1737    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
1738    op |= RA(arg1);
1739    op |= const_arg2 ? arg2 & 0xffff : RB(arg2);
1740    tcg_out32(s, op);
1741}
1742
1743static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
1744                                TCGReg dst, TCGReg src, bool neg)
1745{
1746    if (neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
1747        /*
1748         * X != 0 implies X + -1 generates a carry.
1749         * RT = (~X + X) + CA
1750         *    = -1 + CA
1751         *    = CA ? 0 : -1
1752         */
1753        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1754        tcg_out32(s, SUBFE | TAB(dst, src, src));
1755        return;
1756    }
1757
1758    if (type == TCG_TYPE_I32) {
1759        tcg_out32(s, CNTLZW | RS(src) | RA(dst));
1760        tcg_out_shri32(s, dst, dst, 5);
1761    } else {
1762        tcg_out32(s, CNTLZD | RS(src) | RA(dst));
1763        tcg_out_shri64(s, dst, dst, 6);
1764    }
1765    if (neg) {
1766        tcg_out32(s, NEG | RT(dst) | RA(dst));
1767    }
1768}
1769
1770static void tcg_out_setcond_ne0(TCGContext *s, TCGType type,
1771                                TCGReg dst, TCGReg src, bool neg)
1772{
1773    if (!neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
1774        /*
1775         * X != 0 implies X + -1 generates a carry.  Extra addition
1776         * trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.
1777         */
1778        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
1779        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
1780        return;
1781    }
1782    tcg_out_setcond_eq0(s, type, dst, src, false);
1783    if (neg) {
1784        tcg_out32(s, ADDI | TAI(dst, dst, -1));
1785    } else {
1786        tcg_out_xori32(s, dst, dst, 1);
1787    }
1788}
1789
1790static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
1791                                  bool const_arg2)
1792{
1793    if (const_arg2) {
1794        if ((uint32_t)arg2 == arg2) {
1795            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
1796        } else {
1797            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
1798            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
1799        }
1800    } else {
1801        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
1802    }
1803    return TCG_REG_R0;
1804}
1805
1806static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
1807                            TCGReg arg0, TCGReg arg1, TCGArg arg2,
1808                            bool const_arg2, bool neg)
1809{
1810    int sh;
1811    bool inv;
1812
1813    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1814
1815    /* Ignore high bits of a potential constant arg2.  */
1816    if (type == TCG_TYPE_I32) {
1817        arg2 = (uint32_t)arg2;
1818    }
1819
1820    /* With SETBC/SETBCR, we can always implement with 2 insns. */
1821    if (have_isa_3_10) {
1822        tcg_insn_unit bi, opc;
1823
1824        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
1825
1826        /* Re-use tcg_to_bc for BI and BO_COND_{TRUE,FALSE}. */
1827        bi = tcg_to_bc[cond] & (0x1f << 16);
1828        if (tcg_to_bc[cond] & BO(8)) {
1829            opc = neg ? SETNBC : SETBC;
1830        } else {
1831            opc = neg ? SETNBCR : SETBCR;
1832        }
1833        tcg_out32(s, opc | RT(arg0) | bi);
1834        return;
1835    }
1836
1837    /* Handle common and trivial cases before handling anything else.  */
1838    if (arg2 == 0) {
1839        switch (cond) {
1840        case TCG_COND_EQ:
1841            tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
1842            return;
1843        case TCG_COND_NE:
1844            tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
1845            return;
1846        case TCG_COND_GE:
1847            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
1848            arg1 = arg0;
1849            /* FALLTHRU */
1850        case TCG_COND_LT:
1851            /* Extract the sign bit.  */
1852            if (type == TCG_TYPE_I32) {
1853                if (neg) {
1854                    tcg_out_sari32(s, arg0, arg1, 31);
1855                } else {
1856                    tcg_out_shri32(s, arg0, arg1, 31);
1857                }
1858            } else {
1859                if (neg) {
1860                    tcg_out_sari64(s, arg0, arg1, 63);
1861                } else {
1862                    tcg_out_shri64(s, arg0, arg1, 63);
1863                }
1864            }
1865            return;
1866        default:
1867            break;
1868        }
1869    }
1870
1871    /* If we have ISEL, we can implement everything with 3 or 4 insns.
1872       All other cases below are also at least 3 insns, so speed up the
1873       code generator by not considering them and always using ISEL.  */
1874    if (have_isel) {
1875        int isel, tab;
1876
1877        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 0, type);
1878
1879        isel = tcg_to_isel[cond];
1880
1881        tcg_out_movi(s, type, arg0, neg ? -1 : 1);
1882        if (isel & 1) {
1883            /* arg0 = (bc ? 0 : 1) */
1884            tab = TAB(arg0, 0, arg0);
1885            isel &= ~1;
1886        } else {
1887            /* arg0 = (bc ? 1 : 0) */
1888            tcg_out_movi(s, type, TCG_REG_R0, 0);
1889            tab = TAB(arg0, arg0, TCG_REG_R0);
1890        }
1891        tcg_out32(s, isel | tab);
1892        return;
1893    }
1894
1895    inv = false;
1896    switch (cond) {
1897    case TCG_COND_EQ:
1898        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1899        tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
1900        break;
1901
1902    case TCG_COND_NE:
1903        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
1904        tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
1905        break;
1906
1907    case TCG_COND_TSTEQ:
1908        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false);
1909        tcg_out_setcond_eq0(s, type, arg0, TCG_REG_R0, neg);
1910        break;
1911
1912    case TCG_COND_TSTNE:
1913        tcg_out_test(s, TCG_REG_R0, arg1, arg2, const_arg2, type, false);
1914        tcg_out_setcond_ne0(s, type, arg0, TCG_REG_R0, neg);
1915        break;
1916
1917    case TCG_COND_LE:
1918    case TCG_COND_LEU:
1919        inv = true;
1920        /* fall through */
1921    case TCG_COND_GT:
1922    case TCG_COND_GTU:
1923        sh = 30; /* CR7 CR_GT */
1924        goto crtest;
1925
1926    case TCG_COND_GE:
1927    case TCG_COND_GEU:
1928        inv = true;
1929        /* fall through */
1930    case TCG_COND_LT:
1931    case TCG_COND_LTU:
1932        sh = 29; /* CR7 CR_LT */
1933        goto crtest;
1934
1935    crtest:
1936        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
1937        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
1938        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
1939        if (neg && inv) {
1940            tcg_out32(s, ADDI | TAI(arg0, arg0, -1));
1941        } else if (neg) {
1942            tcg_out32(s, NEG | RT(arg0) | RA(arg0));
1943        } else if (inv) {
1944            tcg_out_xori32(s, arg0, arg0, 1);
1945        }
1946        break;
1947
1948    default:
1949        g_assert_not_reached();
1950    }
1951}
1952
1953static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
1954                         TCGReg dest, TCGReg arg1, TCGReg arg2)
1955{
1956    tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, false);
1957}
1958
1959static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond,
1960                          TCGReg dest, TCGReg arg1, tcg_target_long arg2)
1961{
1962    tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, false);
1963}
1964
1965static const TCGOutOpSetcond outop_setcond = {
1966    .base.static_constraint = C_O1_I2(r, r, rC),
1967    .out_rrr = tgen_setcond,
1968    .out_rri = tgen_setcondi,
1969};
1970
1971static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond,
1972                            TCGReg dest, TCGReg arg1, TCGReg arg2)
1973{
1974    tcg_out_setcond(s, type, cond, dest, arg1, arg2, false, true);
1975}
1976
1977static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond,
1978                             TCGReg dest, TCGReg arg1, tcg_target_long arg2)
1979{
1980    tcg_out_setcond(s, type, cond, dest, arg1, arg2, true, true);
1981}
1982
1983static const TCGOutOpSetcond outop_negsetcond = {
1984    .base.static_constraint = C_O1_I2(r, r, rC),
1985    .out_rrr = tgen_negsetcond,
1986    .out_rri = tgen_negsetcondi,
1987};
1988
1989static void tcg_out_bc(TCGContext *s, TCGCond cond, int bd)
1990{
1991    tcg_out32(s, tcg_to_bc[cond] | bd);
1992}
1993
1994static void tcg_out_bc_lab(TCGContext *s, TCGCond cond, TCGLabel *l)
1995{
1996    int bd = 0;
1997    if (l->has_value) {
1998        bd = reloc_pc14_val(tcg_splitwx_to_rx(s->code_ptr), l->u.value_ptr);
1999    } else {
2000        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, l, 0);
2001    }
2002    tcg_out_bc(s, cond, bd);
2003}
2004
2005static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond,
2006                        TCGReg arg1, TCGReg arg2, TCGLabel *l)
2007{
2008    tcg_out_cmp(s, cond, arg1, arg2, false, 0, type);
2009    tcg_out_bc_lab(s, cond, l);
2010}
2011
2012static void tgen_brcondi(TCGContext *s, TCGType type, TCGCond cond,
2013                         TCGReg arg1, tcg_target_long arg2, TCGLabel *l)
2014{
2015    tcg_out_cmp(s, cond, arg1, arg2, true, 0, type);
2016    tcg_out_bc_lab(s, cond, l);
2017}
2018
2019static const TCGOutOpBrcond outop_brcond = {
2020    .base.static_constraint = C_O0_I2(r, rC),
2021    .out_rr = tgen_brcond,
2022    .out_ri = tgen_brcondi,
2023};
2024
2025static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond,
2026                         TCGReg dest, TCGReg c1, TCGArg c2, bool const_c2,
2027                         TCGArg v1, bool const_v1, TCGArg v2, bool const_v2)
2028{
2029    /* If for some reason both inputs are zero, don't produce bad code.  */
2030    if (v1 == 0 && v2 == 0) {
2031        tcg_out_movi(s, type, dest, 0);
2032        return;
2033    }
2034
2035    tcg_out_cmp(s, cond, c1, c2, const_c2, 0, type);
2036
2037    if (have_isel) {
2038        int isel = tcg_to_isel[cond];
2039
2040        /* Swap the V operands if the operation indicates inversion.  */
2041        if (isel & 1) {
2042            int t = v1;
2043            v1 = v2;
2044            v2 = t;
2045            isel &= ~1;
2046        }
2047        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
2048        if (v2 == 0) {
2049            tcg_out_movi(s, type, TCG_REG_R0, 0);
2050        }
2051        tcg_out32(s, isel | TAB(dest, v1, v2));
2052    } else {
2053        if (dest == v2) {
2054            cond = tcg_invert_cond(cond);
2055            v2 = v1;
2056        } else if (dest != v1) {
2057            if (v1 == 0) {
2058                tcg_out_movi(s, type, dest, 0);
2059            } else {
2060                tcg_out_mov(s, type, dest, v1);
2061            }
2062        }
2063        /* Branch forward over one insn */
2064        tcg_out_bc(s, cond, 8);
2065        if (v2 == 0) {
2066            tcg_out_movi(s, type, dest, 0);
2067        } else {
2068            tcg_out_mov(s, type, dest, v2);
2069        }
2070    }
2071}
2072
2073static const TCGOutOpMovcond outop_movcond = {
2074    .base.static_constraint = C_O1_I4(r, r, rC, rZ, rZ),
2075    .out = tgen_movcond,
2076};
2077
2078static void tcg_out_cntxz(TCGContext *s, TCGType type, uint32_t opc,
2079                          TCGArg a0, TCGArg a1, TCGArg a2, bool const_a2)
2080{
2081    if (const_a2 && a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
2082        tcg_out32(s, opc | RA(a0) | RS(a1));
2083    } else {
2084        tcg_out_cmp(s, TCG_COND_EQ, a1, 0, 1, 0, type);
2085        /* Note that the only other valid constant for a2 is 0.  */
2086        if (have_isel) {
2087            tcg_out32(s, opc | RA(TCG_REG_R0) | RS(a1));
2088            tcg_out32(s, tcg_to_isel[TCG_COND_EQ] | TAB(a0, a2, TCG_REG_R0));
2089        } else if (!const_a2 && a0 == a2) {
2090            tcg_out_bc(s, TCG_COND_EQ, 8);
2091            tcg_out32(s, opc | RA(a0) | RS(a1));
2092        } else {
2093            tcg_out32(s, opc | RA(a0) | RS(a1));
2094            tcg_out_bc(s, TCG_COND_NE, 8);
2095            if (const_a2) {
2096                tcg_out_movi(s, type, a0, 0);
2097            } else {
2098                tcg_out_mov(s, type, a0, a2);
2099            }
2100        }
2101    }
2102}
2103
2104static void tcg_out_cmp2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
2105                         TCGArg bl, bool blconst, TCGArg bh, bool bhconst)
2106{
2107    static const struct { uint8_t bit1, bit2; } bits[] = {
2108        [TCG_COND_LT ] = { CR_LT, CR_LT },
2109        [TCG_COND_LE ] = { CR_LT, CR_GT },
2110        [TCG_COND_GT ] = { CR_GT, CR_GT },
2111        [TCG_COND_GE ] = { CR_GT, CR_LT },
2112        [TCG_COND_LTU] = { CR_LT, CR_LT },
2113        [TCG_COND_LEU] = { CR_LT, CR_GT },
2114        [TCG_COND_GTU] = { CR_GT, CR_GT },
2115        [TCG_COND_GEU] = { CR_GT, CR_LT },
2116    };
2117
2118    TCGCond cond2;
2119    int op, bit1, bit2;
2120
2121    switch (cond) {
2122    case TCG_COND_EQ:
2123        op = CRAND;
2124        goto do_equality;
2125    case TCG_COND_NE:
2126        op = CRNAND;
2127    do_equality:
2128        tcg_out_cmp(s, cond, al, bl, blconst, 6, TCG_TYPE_I32);
2129        tcg_out_cmp(s, cond, ah, bh, bhconst, 7, TCG_TYPE_I32);
2130        tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
2131        break;
2132
2133    case TCG_COND_TSTEQ:
2134    case TCG_COND_TSTNE:
2135        if (blconst) {
2136            tcg_out_andi32(s, TCG_REG_R0, al, bl);
2137        } else {
2138            tcg_out32(s, AND | SAB(al, TCG_REG_R0, bl));
2139        }
2140        if (bhconst) {
2141            tcg_out_andi32(s, TCG_REG_TMP1, ah, bh);
2142        } else {
2143            tcg_out32(s, AND | SAB(ah, TCG_REG_TMP1, bh));
2144        }
2145        tcg_out32(s, OR | SAB(TCG_REG_R0, TCG_REG_R0, TCG_REG_TMP1) | 1);
2146        break;
2147
2148    case TCG_COND_LT:
2149    case TCG_COND_LE:
2150    case TCG_COND_GT:
2151    case TCG_COND_GE:
2152    case TCG_COND_LTU:
2153    case TCG_COND_LEU:
2154    case TCG_COND_GTU:
2155    case TCG_COND_GEU:
2156        bit1 = bits[cond].bit1;
2157        bit2 = bits[cond].bit2;
2158        op = (bit1 != bit2 ? CRANDC : CRAND);
2159        cond2 = tcg_unsigned_cond(cond);
2160
2161        tcg_out_cmp(s, cond, ah, bh, bhconst, 6, TCG_TYPE_I32);
2162        tcg_out_cmp(s, cond2, al, bl, blconst, 7, TCG_TYPE_I32);
2163        tcg_out32(s, op | BT(0, CR_EQ) | BA(6, CR_EQ) | BB(7, bit2));
2164        tcg_out32(s, CROR | BT(0, CR_EQ) | BA(6, bit1) | BB(0, CR_EQ));
2165        break;
2166
2167    default:
2168        g_assert_not_reached();
2169    }
2170}
2171
2172static void tgen_setcond2(TCGContext *s, TCGCond cond, TCGReg ret,
2173                          TCGReg al, TCGReg ah,
2174                          TCGArg bl, bool const_bl,
2175                          TCGArg bh, bool const_bh)
2176{
2177    tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh);
2178    tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(0));
2179    tcg_out_rlw(s, RLWINM, ret, TCG_REG_R0, CR_EQ + 0*4 + 1, 31, 31);
2180}
2181
2182#if TCG_TARGET_REG_BITS != 32
2183__attribute__((unused))
2184#endif
2185static const TCGOutOpSetcond2 outop_setcond2 = {
2186    .base.static_constraint = C_O1_I4(r, r, r, rU, rC),
2187    .out = tgen_setcond2,
2188};
2189
2190static void tgen_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
2191                         TCGArg bl, bool const_bl,
2192                         TCGArg bh, bool const_bh, TCGLabel *l)
2193{
2194    assert(TCG_TARGET_REG_BITS == 32);
2195    tcg_out_cmp2(s, cond, al, ah, bl, const_bl, bh, const_bh);
2196    tcg_out_bc_lab(s, TCG_COND_EQ, l);
2197}
2198
2199#if TCG_TARGET_REG_BITS != 32
2200__attribute__((unused))
2201#endif
2202static const TCGOutOpBrcond2 outop_brcond2 = {
2203    .base.static_constraint = C_O0_I4(r, r, rU, rC),
2204    .out = tgen_brcond2,
2205};
2206
2207static void tcg_out_mb(TCGContext *s, TCGArg a0)
2208{
2209    uint32_t insn;
2210
2211    if (a0 & TCG_MO_ST_LD) {
2212        insn = HWSYNC;
2213    } else {
2214        insn = LWSYNC;
2215    }
2216
2217    tcg_out32(s, insn);
2218}
2219
2220static void tcg_out_call_int(TCGContext *s, int lk,
2221                             const tcg_insn_unit *target)
2222{
2223#ifdef _CALL_AIX
2224    /* Look through the descriptor.  If the branch is in range, and we
2225       don't have to spend too much effort on building the toc.  */
2226    const void *tgt = ((const void * const *)target)[0];
2227    uintptr_t toc = ((const uintptr_t *)target)[1];
2228    intptr_t diff = tcg_pcrel_diff(s, tgt);
2229
2230    if (in_range_b(diff) && toc == (uint32_t)toc) {
2231        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, toc);
2232        tcg_out_b(s, lk, tgt);
2233    } else {
2234        /* Fold the low bits of the constant into the addresses below.  */
2235        intptr_t arg = (intptr_t)target;
2236        int ofs = (int16_t)arg;
2237
2238        if (ofs + 8 < 0x8000) {
2239            arg -= ofs;
2240        } else {
2241            ofs = 0;
2242        }
2243        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, arg);
2244        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_TMP1, ofs);
2245        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
2246        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_REG_TMP1, ofs + SZP);
2247        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
2248    }
2249#elif defined(_CALL_ELF) && _CALL_ELF == 2
2250    intptr_t diff;
2251
2252    /* In the ELFv2 ABI, we have to set up r12 to contain the destination
2253       address, which the callee uses to compute its TOC address.  */
2254    /* FIXME: when the branch is in range, we could avoid r12 load if we
2255       knew that the destination uses the same TOC, and what its local
2256       entry point offset is.  */
2257    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R12, (intptr_t)target);
2258
2259    diff = tcg_pcrel_diff(s, target);
2260    if (in_range_b(diff)) {
2261        tcg_out_b(s, lk, target);
2262    } else {
2263        tcg_out32(s, MTSPR | RS(TCG_REG_R12) | CTR);
2264        tcg_out32(s, BCCTR | BO_ALWAYS | lk);
2265    }
2266#else
2267    tcg_out_b(s, lk, target);
2268#endif
2269}
2270
2271static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
2272                         const TCGHelperInfo *info)
2273{
2274    tcg_out_call_int(s, LK, target);
2275}
2276
2277static const uint32_t qemu_ldx_opc[(MO_SSIZE + MO_BSWAP) + 1] = {
2278    [MO_UB] = LBZX,
2279    [MO_UW] = LHZX,
2280    [MO_UL] = LWZX,
2281    [MO_UQ] = LDX,
2282    [MO_SW] = LHAX,
2283    [MO_SL] = LWAX,
2284    [MO_BSWAP | MO_UB] = LBZX,
2285    [MO_BSWAP | MO_UW] = LHBRX,
2286    [MO_BSWAP | MO_UL] = LWBRX,
2287    [MO_BSWAP | MO_UQ] = LDBRX,
2288};
2289
2290static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
2291    [MO_UB] = STBX,
2292    [MO_UW] = STHX,
2293    [MO_UL] = STWX,
2294    [MO_UQ] = STDX,
2295    [MO_BSWAP | MO_UB] = STBX,
2296    [MO_BSWAP | MO_UW] = STHBRX,
2297    [MO_BSWAP | MO_UL] = STWBRX,
2298    [MO_BSWAP | MO_UQ] = STDBRX,
2299};
2300
2301static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
2302{
2303    if (arg < 0) {
2304        arg = TCG_REG_TMP1;
2305    }
2306    tcg_out32(s, MFSPR | RT(arg) | LR);
2307    return arg;
2308}
2309
2310/*
2311 * For the purposes of ppc32 sorting 4 input registers into 4 argument
2312 * registers, there is an outside chance we would require 3 temps.
2313 */
2314static const TCGLdstHelperParam ldst_helper_param = {
2315    .ra_gen = ldst_ra_gen,
2316    .ntmp = 3,
2317    .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 }
2318};
2319
2320static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2321{
2322    MemOp opc = get_memop(lb->oi);
2323
2324    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2325        return false;
2326    }
2327
2328    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
2329    tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]);
2330    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
2331
2332    tcg_out_b(s, 0, lb->raddr);
2333    return true;
2334}
2335
2336static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
2337{
2338    MemOp opc = get_memop(lb->oi);
2339
2340    if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
2341        return false;
2342    }
2343
2344    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
2345    tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]);
2346
2347    tcg_out_b(s, 0, lb->raddr);
2348    return true;
2349}
2350
2351typedef struct {
2352    TCGReg base;
2353    TCGReg index;
2354    TCGAtomAlign aa;
2355} HostAddress;
2356
2357bool tcg_target_has_memory_bswap(MemOp memop)
2358{
2359    TCGAtomAlign aa;
2360
2361    if ((memop & MO_SIZE) <= MO_64) {
2362        return true;
2363    }
2364
2365    /*
2366     * Reject 16-byte memop with 16-byte atomicity,
2367     * but do allow a pair of 64-bit operations.
2368     */
2369    aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
2370    return aa.atom <= MO_64;
2371}
2372
2373/* We expect to use a 16-bit negative offset from ENV.  */
2374#define MIN_TLB_MASK_TABLE_OFS  -32768
2375
2376/*
2377 * For system-mode, perform the TLB load and compare.
2378 * For user-mode, perform any required alignment tests.
2379 * In both cases, return a TCGLabelQemuLdst structure if the slow path
2380 * is required and fill in @h with the host address for the fast path.
2381 */
2382static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
2383                                           TCGReg addr, MemOpIdx oi, bool is_ld)
2384{
2385    TCGType addr_type = s->addr_type;
2386    TCGLabelQemuLdst *ldst = NULL;
2387    MemOp opc = get_memop(oi);
2388    MemOp a_bits, s_bits;
2389
2390    /*
2391     * Book II, Section 1.4, Single-Copy Atomicity, specifies:
2392     *
2393     * Before 3.0, "An access that is not atomic is performed as a set of
2394     * smaller disjoint atomic accesses. In general, the number and alignment
2395     * of these accesses are implementation-dependent."  Thus MO_ATOM_IFALIGN.
2396     *
2397     * As of 3.0, "the non-atomic access is performed as described in
2398     * the corresponding list", which matches MO_ATOM_SUBALIGN.
2399     */
2400    s_bits = opc & MO_SIZE;
2401    h->aa = atom_and_align_for_opc(s, opc,
2402                                   have_isa_3_00 ? MO_ATOM_SUBALIGN
2403                                                 : MO_ATOM_IFALIGN,
2404                                   s_bits == MO_128);
2405    a_bits = h->aa.align;
2406
2407    if (tcg_use_softmmu) {
2408        int mem_index = get_mmuidx(oi);
2409        int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
2410                            : offsetof(CPUTLBEntry, addr_write);
2411        int fast_off = tlb_mask_table_ofs(s, mem_index);
2412        int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
2413        int table_off = fast_off + offsetof(CPUTLBDescFast, table);
2414
2415        ldst = new_ldst_label(s);
2416        ldst->is_ld = is_ld;
2417        ldst->oi = oi;
2418        ldst->addr_reg = addr;
2419
2420        /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
2421        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
2422        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
2423
2424        /* Extract the page index, shifted into place for tlb index.  */
2425        if (TCG_TARGET_REG_BITS == 32) {
2426            tcg_out_shri32(s, TCG_REG_R0, addr,
2427                           s->page_bits - CPU_TLB_ENTRY_BITS);
2428        } else {
2429            tcg_out_shri64(s, TCG_REG_R0, addr,
2430                           s->page_bits - CPU_TLB_ENTRY_BITS);
2431        }
2432        tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
2433
2434        /*
2435         * Load the TLB comparator into TMP2.
2436         * For 64-bit host, always load the entire 64-bit slot for simplicity.
2437         * We will ignore the high bits with tcg_out_cmp(..., addr_type).
2438         */
2439        if (cmp_off == 0) {
2440            tcg_out32(s, (TCG_TARGET_REG_BITS == 64 ? LDUX : LWZUX)
2441                      | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2));
2442        } else {
2443            tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
2444            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off);
2445        }
2446
2447        /*
2448         * Load the TLB addend for use on the fast path.
2449         * Do this asap to minimize any load use delay.
2450         */
2451        if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
2452            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
2453                       offsetof(CPUTLBEntry, addend));
2454        }
2455
2456        /* Clear the non-page, non-alignment bits from the address in R0. */
2457        if (TCG_TARGET_REG_BITS == 32) {
2458            /*
2459             * We don't support unaligned accesses on 32-bits.
2460             * Preserve the bottom bits and thus trigger a comparison
2461             * failure on unaligned accesses.
2462             */
2463            if (a_bits < s_bits) {
2464                a_bits = s_bits;
2465            }
2466            tcg_out_rlw(s, RLWINM, TCG_REG_R0, addr, 0,
2467                        (32 - a_bits) & 31, 31 - s->page_bits);
2468        } else {
2469            TCGReg t = addr;
2470
2471            /*
2472             * If the access is unaligned, we need to make sure we fail if we
2473             * cross a page boundary.  The trick is to add the access size-1
2474             * to the address before masking the low bits.  That will make the
2475             * address overflow to the next page if we cross a page boundary,
2476             * which will then force a mismatch of the TLB compare.
2477             */
2478            if (a_bits < s_bits) {
2479                unsigned a_mask = (1 << a_bits) - 1;
2480                unsigned s_mask = (1 << s_bits) - 1;
2481                tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
2482                t = TCG_REG_R0;
2483            }
2484
2485            /* Mask the address for the requested alignment.  */
2486            if (addr_type == TCG_TYPE_I32) {
2487                tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
2488                            (32 - a_bits) & 31, 31 - s->page_bits);
2489            } else if (a_bits == 0) {
2490                tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
2491            } else {
2492                tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
2493                            64 - s->page_bits, s->page_bits - a_bits);
2494                tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
2495            }
2496        }
2497
2498        /* Full comparison into cr0. */
2499        tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2, 0, 0, addr_type);
2500
2501        /* Load a pointer into the current opcode w/conditional branch-link. */
2502        ldst->label_ptr[0] = s->code_ptr;
2503        tcg_out_bc(s, TCG_COND_NE, LK);
2504
2505        h->base = TCG_REG_TMP1;
2506    } else {
2507        if (a_bits) {
2508            ldst = new_ldst_label(s);
2509            ldst->is_ld = is_ld;
2510            ldst->oi = oi;
2511            ldst->addr_reg = addr;
2512
2513            /* We are expecting a_bits to max out at 7, much lower than ANDI. */
2514            tcg_debug_assert(a_bits < 16);
2515            tcg_out32(s, ANDI | SAI(addr, TCG_REG_R0, (1 << a_bits) - 1));
2516
2517            ldst->label_ptr[0] = s->code_ptr;
2518            tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
2519        }
2520
2521        h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
2522    }
2523
2524    if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
2525        /* Zero-extend the guest address for use in the host address. */
2526        tcg_out_ext32u(s, TCG_REG_TMP2, addr);
2527        h->index = TCG_REG_TMP2;
2528    } else {
2529        h->index = addr;
2530    }
2531
2532    return ldst;
2533}
2534
2535static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
2536                            TCGReg addr, MemOpIdx oi, TCGType data_type)
2537{
2538    MemOp opc = get_memop(oi);
2539    TCGLabelQemuLdst *ldst;
2540    HostAddress h;
2541
2542    ldst = prepare_host_addr(s, &h, addr, oi, true);
2543
2544    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2545        if (opc & MO_BSWAP) {
2546            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2547            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2548            tcg_out32(s, LWBRX | TAB(datahi, h.base, TCG_REG_R0));
2549        } else if (h.base != 0) {
2550            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2551            tcg_out32(s, LWZX | TAB(datahi, h.base, h.index));
2552            tcg_out32(s, LWZX | TAB(datalo, h.base, TCG_REG_R0));
2553        } else if (h.index == datahi) {
2554            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2555            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2556        } else {
2557            tcg_out32(s, LWZ | TAI(datahi, h.index, 0));
2558            tcg_out32(s, LWZ | TAI(datalo, h.index, 4));
2559        }
2560    } else {
2561        uint32_t insn = qemu_ldx_opc[opc & (MO_BSWAP | MO_SSIZE)];
2562        if (!have_isa_2_06 && insn == LDBRX) {
2563            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2564            tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
2565            tcg_out32(s, LWBRX | TAB(TCG_REG_R0, h.base, TCG_REG_R0));
2566            tcg_out_rld(s, RLDIMI, datalo, TCG_REG_R0, 32, 0);
2567        } else if (insn) {
2568            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2569        } else {
2570            insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
2571            tcg_out32(s, insn | TAB(datalo, h.base, h.index));
2572            tcg_out_movext(s, TCG_TYPE_REG, datalo,
2573                           TCG_TYPE_REG, opc & MO_SSIZE, datalo);
2574        }
2575    }
2576
2577    if (ldst) {
2578        ldst->type = data_type;
2579        ldst->datalo_reg = datalo;
2580        ldst->datahi_reg = datahi;
2581        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2582    }
2583}
2584
2585static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
2586                            TCGReg addr, MemOpIdx oi, TCGType data_type)
2587{
2588    MemOp opc = get_memop(oi);
2589    TCGLabelQemuLdst *ldst;
2590    HostAddress h;
2591
2592    ldst = prepare_host_addr(s, &h, addr, oi, false);
2593
2594    if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
2595        if (opc & MO_BSWAP) {
2596            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2597            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2598            tcg_out32(s, STWBRX | SAB(datahi, h.base, TCG_REG_R0));
2599        } else if (h.base != 0) {
2600            tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
2601            tcg_out32(s, STWX | SAB(datahi, h.base, h.index));
2602            tcg_out32(s, STWX | SAB(datalo, h.base, TCG_REG_R0));
2603        } else {
2604            tcg_out32(s, STW | TAI(datahi, h.index, 0));
2605            tcg_out32(s, STW | TAI(datalo, h.index, 4));
2606        }
2607    } else {
2608        uint32_t insn = qemu_stx_opc[opc & (MO_BSWAP | MO_SIZE)];
2609        if (!have_isa_2_06 && insn == STDBRX) {
2610            tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
2611            tcg_out32(s, ADDI | TAI(TCG_REG_TMP2, h.index, 4));
2612            tcg_out_shri64(s, TCG_REG_R0, datalo, 32);
2613            tcg_out32(s, STWBRX | SAB(TCG_REG_R0, h.base, TCG_REG_TMP2));
2614        } else {
2615            tcg_out32(s, insn | SAB(datalo, h.base, h.index));
2616        }
2617    }
2618
2619    if (ldst) {
2620        ldst->type = data_type;
2621        ldst->datalo_reg = datalo;
2622        ldst->datahi_reg = datahi;
2623        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2624    }
2625}
2626
2627static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
2628                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
2629{
2630    TCGLabelQemuLdst *ldst;
2631    HostAddress h;
2632    bool need_bswap;
2633    uint32_t insn;
2634    TCGReg index;
2635
2636    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
2637
2638    /* Compose the final address, as LQ/STQ have no indexing. */
2639    index = h.index;
2640    if (h.base != 0) {
2641        index = TCG_REG_TMP1;
2642        tcg_out32(s, ADD | TAB(index, h.base, h.index));
2643    }
2644    need_bswap = get_memop(oi) & MO_BSWAP;
2645
2646    if (h.aa.atom == MO_128) {
2647        tcg_debug_assert(!need_bswap);
2648        tcg_debug_assert(datalo & 1);
2649        tcg_debug_assert(datahi == datalo - 1);
2650        tcg_debug_assert(!is_ld || datahi != index);
2651        insn = is_ld ? LQ : STQ;
2652        tcg_out32(s, insn | TAI(datahi, index, 0));
2653    } else {
2654        TCGReg d1, d2;
2655
2656        if (HOST_BIG_ENDIAN ^ need_bswap) {
2657            d1 = datahi, d2 = datalo;
2658        } else {
2659            d1 = datalo, d2 = datahi;
2660        }
2661
2662        if (need_bswap) {
2663            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8);
2664            insn = is_ld ? LDBRX : STDBRX;
2665            tcg_out32(s, insn | TAB(d1, 0, index));
2666            tcg_out32(s, insn | TAB(d2, index, TCG_REG_R0));
2667        } else {
2668            insn = is_ld ? LD : STD;
2669            tcg_out32(s, insn | TAI(d1, index, 0));
2670            tcg_out32(s, insn | TAI(d2, index, 8));
2671        }
2672    }
2673
2674    if (ldst) {
2675        ldst->type = TCG_TYPE_I128;
2676        ldst->datalo_reg = datalo;
2677        ldst->datahi_reg = datahi;
2678        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2679    }
2680}
2681
2682static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2683{
2684    int i;
2685    for (i = 0; i < count; ++i) {
2686        p[i] = NOP;
2687    }
2688}
2689
2690/* Parameters for function call generation, used in tcg.c.  */
2691#define TCG_TARGET_STACK_ALIGN       16
2692
2693#ifdef _CALL_AIX
2694# define LINK_AREA_SIZE                (6 * SZR)
2695# define LR_OFFSET                     (1 * SZR)
2696# define TCG_TARGET_CALL_STACK_OFFSET  (LINK_AREA_SIZE + 8 * SZR)
2697#elif defined(_CALL_DARWIN)
2698# define LINK_AREA_SIZE                (6 * SZR)
2699# define LR_OFFSET                     (2 * SZR)
2700#elif TCG_TARGET_REG_BITS == 64
2701# if defined(_CALL_ELF) && _CALL_ELF == 2
2702#  define LINK_AREA_SIZE               (4 * SZR)
2703#  define LR_OFFSET                    (1 * SZR)
2704# endif
2705#else /* TCG_TARGET_REG_BITS == 32 */
2706# if defined(_CALL_SYSV)
2707#  define LINK_AREA_SIZE               (2 * SZR)
2708#  define LR_OFFSET                    (1 * SZR)
2709# endif
2710#endif
2711#ifndef LR_OFFSET
2712# error "Unhandled abi"
2713#endif
2714#ifndef TCG_TARGET_CALL_STACK_OFFSET
2715# define TCG_TARGET_CALL_STACK_OFFSET  LINK_AREA_SIZE
2716#endif
2717
2718#define CPU_TEMP_BUF_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2719#define REG_SAVE_SIZE      ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
2720
2721#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET   \
2722                     + TCG_STATIC_CALL_ARGS_SIZE    \
2723                     + CPU_TEMP_BUF_SIZE            \
2724                     + REG_SAVE_SIZE                \
2725                     + TCG_TARGET_STACK_ALIGN - 1)  \
2726                    & -TCG_TARGET_STACK_ALIGN)
2727
2728#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
2729
2730static void tcg_target_qemu_prologue(TCGContext *s)
2731{
2732    int i;
2733
2734#ifdef _CALL_AIX
2735    const void **desc = (const void **)s->code_ptr;
2736    desc[0] = tcg_splitwx_to_rx(desc + 2);  /* entry point */
2737    desc[1] = 0;                            /* environment pointer */
2738    s->code_ptr = (void *)(desc + 2);       /* skip over descriptor */
2739#endif
2740
2741    tcg_set_frame(s, TCG_REG_CALL_STACK, REG_SAVE_BOT - CPU_TEMP_BUF_SIZE,
2742                  CPU_TEMP_BUF_SIZE);
2743
2744    /* Prologue */
2745    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
2746    tcg_out32(s, (SZR == 8 ? STDU : STWU)
2747              | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
2748
2749    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2750        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2751                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2752    }
2753    tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2754
2755    if (!tcg_use_softmmu && guest_base) {
2756        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
2757        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2758    }
2759
2760    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2761    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
2762    tcg_out32(s, BCCTR | BO_ALWAYS);
2763
2764    /* Epilogue */
2765    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2766
2767    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
2768    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
2769        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2770                   TCG_REG_R1, REG_SAVE_BOT + i * SZR);
2771    }
2772    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
2773    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
2774    tcg_out32(s, BCLR | BO_ALWAYS);
2775}
2776
2777static void tcg_out_tb_start(TCGContext *s)
2778{
2779    /* Load TCG_REG_TB. */
2780    if (USE_REG_TB) {
2781        if (have_isa_3_00) {
2782            /* lnia REG_TB */
2783            tcg_out_addpcis(s, TCG_REG_TB, 0);
2784        } else {
2785            /* bcl 20,31,$+4 (preferred form for getting nia) */
2786            tcg_out32(s, BC | BO_ALWAYS | BI(7, CR_SO) | 0x4 | LK);
2787            tcg_out32(s, MFSPR | RT(TCG_REG_TB) | LR);
2788        }
2789    }
2790}
2791
2792static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg)
2793{
2794    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg);
2795    tcg_out_b(s, 0, tcg_code_gen_epilogue);
2796}
2797
2798static void tcg_out_goto_tb(TCGContext *s, int which)
2799{
2800    uintptr_t ptr = get_jmp_target_addr(s, which);
2801    int16_t lo;
2802
2803    /* Direct branch will be patched by tb_target_set_jmp_target. */
2804    set_jmp_insn_offset(s, which);
2805    tcg_out32(s, NOP);
2806
2807    /* When branch is out of range, fall through to indirect. */
2808    if (USE_REG_TB) {
2809        ptrdiff_t offset = ppc_tbrel_diff(s, (void *)ptr);
2810        tcg_out_mem_long(s, LD, LDX, TCG_REG_TMP1, TCG_REG_TB, offset);
2811    } else if (have_isa_3_10) {
2812        ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr);
2813        tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1);
2814    } else if (have_isa_3_00) {
2815        ptrdiff_t offset = tcg_pcrel_diff(s, (void *)ptr) - 4;
2816        lo = offset;
2817        tcg_out_addpcis(s, TCG_REG_TMP1, offset - lo);
2818        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
2819    } else {
2820        lo = ptr;
2821        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - lo);
2822        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, lo);
2823    }
2824
2825    tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
2826    tcg_out32(s, BCCTR | BO_ALWAYS);
2827    set_jmp_reset_offset(s, which);
2828}
2829
2830void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2831                              uintptr_t jmp_rx, uintptr_t jmp_rw)
2832{
2833    uintptr_t addr = tb->jmp_target_addr[n];
2834    intptr_t diff = addr - jmp_rx;
2835    tcg_insn_unit insn;
2836
2837    if (in_range_b(diff)) {
2838        insn = B | (diff & 0x3fffffc);
2839    } else {
2840        insn = NOP;
2841    }
2842
2843    qatomic_set((uint32_t *)jmp_rw, insn);
2844    flush_idcache_range(jmp_rx, jmp_rw, 4);
2845}
2846
2847
2848static void tgen_add(TCGContext *s, TCGType type,
2849                     TCGReg a0, TCGReg a1, TCGReg a2)
2850{
2851    tcg_out32(s, ADD | TAB(a0, a1, a2));
2852}
2853
2854static void tgen_addi(TCGContext *s, TCGType type,
2855                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2856{
2857    tcg_out_mem_long(s, ADDI, ADD, a0, a1, a2);
2858}
2859
2860static const TCGOutOpBinary outop_add = {
2861    .base.static_constraint = C_O1_I2(r, r, rT),
2862    .out_rrr = tgen_add,
2863    .out_rri = tgen_addi,
2864};
2865
2866static void tgen_and(TCGContext *s, TCGType type,
2867                     TCGReg a0, TCGReg a1, TCGReg a2)
2868{
2869    tcg_out32(s, AND | SAB(a1, a0, a2));
2870}
2871
2872static void tgen_andi(TCGContext *s, TCGType type,
2873                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2874{
2875    if (type == TCG_TYPE_I32) {
2876        tcg_out_andi32(s, a0, a1, a2);
2877    } else {
2878        tcg_out_andi64(s, a0, a1, a2);
2879    }
2880}
2881
2882static const TCGOutOpBinary outop_and = {
2883    .base.static_constraint = C_O1_I2(r, r, ri),
2884    .out_rrr = tgen_and,
2885    .out_rri = tgen_andi,
2886};
2887
2888static void tgen_andc(TCGContext *s, TCGType type,
2889                      TCGReg a0, TCGReg a1, TCGReg a2)
2890{
2891    tcg_out32(s, ANDC | SAB(a1, a0, a2));
2892}
2893
2894static const TCGOutOpBinary outop_andc = {
2895    .base.static_constraint = C_O1_I2(r, r, r),
2896    .out_rrr = tgen_andc,
2897};
2898
2899static void tgen_clz(TCGContext *s, TCGType type,
2900                     TCGReg a0, TCGReg a1, TCGReg a2)
2901{
2902    uint32_t insn = type == TCG_TYPE_I32 ? CNTLZW : CNTLZD;
2903    tcg_out_cntxz(s, type, insn, a0, a1, a2, false);
2904}
2905
2906static void tgen_clzi(TCGContext *s, TCGType type,
2907                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2908{
2909    uint32_t insn = type == TCG_TYPE_I32 ? CNTLZW : CNTLZD;
2910    tcg_out_cntxz(s, type, insn, a0, a1, a2, true);
2911}
2912
2913static const TCGOutOpBinary outop_clz = {
2914    .base.static_constraint = C_O1_I2(r, r, rZW),
2915    .out_rrr = tgen_clz,
2916    .out_rri = tgen_clzi,
2917};
2918
2919static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
2920{
2921    uint32_t insn = type == TCG_TYPE_I32 ? CNTPOPW : CNTPOPD;
2922    tcg_out32(s, insn | SAB(a1, a0, 0));
2923}
2924
2925static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags)
2926{
2927    return have_isa_2_06 ? C_O1_I1(r, r) : C_NotImplemented;
2928}
2929
2930static const TCGOutOpUnary outop_ctpop = {
2931    .base.static_constraint = C_Dynamic,
2932    .base.dynamic_constraint = cset_ctpop,
2933    .out_rr = tgen_ctpop,
2934};
2935
2936static void tgen_ctz(TCGContext *s, TCGType type,
2937                     TCGReg a0, TCGReg a1, TCGReg a2)
2938{
2939    uint32_t insn = type == TCG_TYPE_I32 ? CNTTZW : CNTTZD;
2940    tcg_out_cntxz(s, type, insn, a0, a1, a2, false);
2941}
2942
2943static void tgen_ctzi(TCGContext *s, TCGType type,
2944                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2945{
2946    uint32_t insn = type == TCG_TYPE_I32 ? CNTTZW : CNTTZD;
2947    tcg_out_cntxz(s, type, insn, a0, a1, a2, true);
2948}
2949
2950static TCGConstraintSetIndex cset_ctz(TCGType type, unsigned flags)
2951{
2952    return have_isa_3_00 ? C_O1_I2(r, r, rZW) : C_NotImplemented;
2953}
2954
2955static const TCGOutOpBinary outop_ctz = {
2956    .base.static_constraint = C_Dynamic,
2957    .base.dynamic_constraint = cset_ctz,
2958    .out_rrr = tgen_ctz,
2959    .out_rri = tgen_ctzi,
2960};
2961
2962static void tgen_eqv(TCGContext *s, TCGType type,
2963                     TCGReg a0, TCGReg a1, TCGReg a2)
2964{
2965    tcg_out32(s, EQV | SAB(a1, a0, a2));
2966}
2967
2968#if TCG_TARGET_REG_BITS == 64
2969static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
2970{
2971    tcg_out_shri64(s, a0, a1, 32);
2972}
2973
2974static const TCGOutOpUnary outop_extrh_i64_i32 = {
2975    .base.static_constraint = C_O1_I1(r, r),
2976    .out_rr = tgen_extrh_i64_i32,
2977};
2978#endif
2979
2980static void tgen_divs(TCGContext *s, TCGType type,
2981                      TCGReg a0, TCGReg a1, TCGReg a2)
2982{
2983    uint32_t insn = type == TCG_TYPE_I32 ? DIVW : DIVD;
2984    tcg_out32(s, insn | TAB(a0, a1, a2));
2985}
2986
2987static const TCGOutOpBinary outop_divs = {
2988    .base.static_constraint = C_O1_I2(r, r, r),
2989    .out_rrr = tgen_divs,
2990};
2991
2992static const TCGOutOpDivRem outop_divs2 = {
2993    .base.static_constraint = C_NotImplemented,
2994};
2995
2996static void tgen_divu(TCGContext *s, TCGType type,
2997                      TCGReg a0, TCGReg a1, TCGReg a2)
2998{
2999    uint32_t insn = type == TCG_TYPE_I32 ? DIVWU : DIVDU;
3000    tcg_out32(s, insn | TAB(a0, a1, a2));
3001}
3002
3003static const TCGOutOpBinary outop_divu = {
3004    .base.static_constraint = C_O1_I2(r, r, r),
3005    .out_rrr = tgen_divu,
3006};
3007
3008static const TCGOutOpDivRem outop_divu2 = {
3009    .base.static_constraint = C_NotImplemented,
3010};
3011
3012static const TCGOutOpBinary outop_eqv = {
3013    .base.static_constraint = C_O1_I2(r, r, r),
3014    .out_rrr = tgen_eqv,
3015};
3016
3017static void tgen_mul(TCGContext *s, TCGType type,
3018                    TCGReg a0, TCGReg a1, TCGReg a2)
3019{
3020    uint32_t insn = type == TCG_TYPE_I32 ? MULLW : MULLD;
3021    tcg_out32(s, insn | TAB(a0, a1, a2));
3022}
3023
3024static void tgen_muli(TCGContext *s, TCGType type,
3025                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3026{
3027    tcg_out32(s, MULLI | TAI(a0, a1, a2));
3028}
3029
3030static const TCGOutOpBinary outop_mul = {
3031    .base.static_constraint = C_O1_I2(r, r, rI),
3032    .out_rrr = tgen_mul,
3033    .out_rri = tgen_muli,
3034};
3035
3036static const TCGOutOpMul2 outop_muls2 = {
3037    .base.static_constraint = C_NotImplemented,
3038};
3039
3040static void tgen_mulsh(TCGContext *s, TCGType type,
3041                       TCGReg a0, TCGReg a1, TCGReg a2)
3042{
3043    uint32_t insn = type == TCG_TYPE_I32 ? MULHW : MULHD;
3044    tcg_out32(s, insn | TAB(a0, a1, a2));
3045}
3046
3047static const TCGOutOpBinary outop_mulsh = {
3048    .base.static_constraint = C_O1_I2(r, r, r),
3049    .out_rrr = tgen_mulsh,
3050};
3051
3052static const TCGOutOpMul2 outop_mulu2 = {
3053    .base.static_constraint = C_NotImplemented,
3054};
3055
3056static void tgen_muluh(TCGContext *s, TCGType type,
3057                       TCGReg a0, TCGReg a1, TCGReg a2)
3058{
3059    uint32_t insn = type == TCG_TYPE_I32 ? MULHWU : MULHDU;
3060    tcg_out32(s, insn | TAB(a0, a1, a2));
3061}
3062
3063static const TCGOutOpBinary outop_muluh = {
3064    .base.static_constraint = C_O1_I2(r, r, r),
3065    .out_rrr = tgen_muluh,
3066};
3067
3068static void tgen_nand(TCGContext *s, TCGType type,
3069                     TCGReg a0, TCGReg a1, TCGReg a2)
3070{
3071    tcg_out32(s, NAND | SAB(a1, a0, a2));
3072}
3073
3074static const TCGOutOpBinary outop_nand = {
3075    .base.static_constraint = C_O1_I2(r, r, r),
3076    .out_rrr = tgen_nand,
3077};
3078
3079static void tgen_nor(TCGContext *s, TCGType type,
3080                     TCGReg a0, TCGReg a1, TCGReg a2)
3081{
3082    tcg_out32(s, NOR | SAB(a1, a0, a2));
3083}
3084
3085static const TCGOutOpBinary outop_nor = {
3086    .base.static_constraint = C_O1_I2(r, r, r),
3087    .out_rrr = tgen_nor,
3088};
3089
3090static void tgen_or(TCGContext *s, TCGType type,
3091                    TCGReg a0, TCGReg a1, TCGReg a2)
3092{
3093    tcg_out32(s, OR | SAB(a1, a0, a2));
3094}
3095
3096static void tgen_ori(TCGContext *s, TCGType type,
3097                     TCGReg a0, TCGReg a1, tcg_target_long a2)
3098{
3099    tcg_out_ori32(s, a0, a1, a2);
3100}
3101
3102static const TCGOutOpBinary outop_or = {
3103    .base.static_constraint = C_O1_I2(r, r, rU),
3104    .out_rrr = tgen_or,
3105    .out_rri = tgen_ori,
3106};
3107
3108static void tgen_orc(TCGContext *s, TCGType type,
3109                     TCGReg a0, TCGReg a1, TCGReg a2)
3110{
3111    tcg_out32(s, ORC | SAB(a1, a0, a2));
3112}
3113
3114static const TCGOutOpBinary outop_orc = {
3115    .base.static_constraint = C_O1_I2(r, r, r),
3116    .out_rrr = tgen_orc,
3117};
3118
3119static TCGConstraintSetIndex cset_mod(TCGType type, unsigned flags)
3120{
3121    return have_isa_3_00 ? C_O1_I2(r, r, r) : C_NotImplemented;
3122}
3123
3124static void tgen_rems(TCGContext *s, TCGType type,
3125                      TCGReg a0, TCGReg a1, TCGReg a2)
3126{
3127    uint32_t insn = type == TCG_TYPE_I32 ? MODSW : MODSD;
3128    tcg_out32(s, insn | TAB(a0, a1, a2));
3129}
3130
3131static const TCGOutOpBinary outop_rems = {
3132    .base.static_constraint = C_Dynamic,
3133    .base.dynamic_constraint = cset_mod,
3134    .out_rrr = tgen_rems,
3135};
3136
3137static void tgen_remu(TCGContext *s, TCGType type,
3138                      TCGReg a0, TCGReg a1, TCGReg a2)
3139{
3140    uint32_t insn = type == TCG_TYPE_I32 ? MODUW : MODUD;
3141    tcg_out32(s, insn | TAB(a0, a1, a2));
3142}
3143
3144static const TCGOutOpBinary outop_remu = {
3145    .base.static_constraint = C_Dynamic,
3146    .base.dynamic_constraint = cset_mod,
3147    .out_rrr = tgen_remu,
3148};
3149
3150static void tgen_rotl(TCGContext *s, TCGType type,
3151                     TCGReg a0, TCGReg a1, TCGReg a2)
3152{
3153    if (type == TCG_TYPE_I32) {
3154        tcg_out32(s, RLWNM | SAB(a1, a0, a2) | MB(0) | ME(31));
3155    } else {
3156        tcg_out32(s, RLDCL | SAB(a1, a0, a2) | MB64(0));
3157    }
3158}
3159
3160static void tgen_rotli(TCGContext *s, TCGType type,
3161                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3162{
3163    if (type == TCG_TYPE_I32) {
3164        tcg_out_rlw(s, RLWINM, a0, a1, a2, 0, 31);
3165    } else {
3166        tcg_out_rld(s, RLDICL, a0, a1, a2, 0);
3167    }
3168}
3169
3170static const TCGOutOpBinary outop_rotl = {
3171    .base.static_constraint = C_O1_I2(r, r, ri),
3172    .out_rrr = tgen_rotl,
3173    .out_rri = tgen_rotli,
3174};
3175
3176static const TCGOutOpBinary outop_rotr = {
3177    .base.static_constraint = C_NotImplemented,
3178};
3179
3180static void tgen_sar(TCGContext *s, TCGType type,
3181                     TCGReg a0, TCGReg a1, TCGReg a2)
3182{
3183    uint32_t insn = type == TCG_TYPE_I32 ? SRAW : SRAD;
3184    tcg_out32(s, insn | SAB(a1, a0, a2));
3185}
3186
3187static void tgen_sari(TCGContext *s, TCGType type,
3188                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3189{
3190    /* Limit immediate shift count lest we create an illegal insn.  */
3191    if (type == TCG_TYPE_I32) {
3192        tcg_out_sari32(s, a0, a1, a2 & 31);
3193    } else {
3194        tcg_out_sari64(s, a0, a1, a2 & 63);
3195    }
3196}
3197
3198static const TCGOutOpBinary outop_sar = {
3199    .base.static_constraint = C_O1_I2(r, r, ri),
3200    .out_rrr = tgen_sar,
3201    .out_rri = tgen_sari,
3202};
3203
3204static void tgen_shl(TCGContext *s, TCGType type,
3205                     TCGReg a0, TCGReg a1, TCGReg a2)
3206{
3207    uint32_t insn = type == TCG_TYPE_I32 ? SLW : SLD;
3208    tcg_out32(s, insn | SAB(a1, a0, a2));
3209}
3210
3211static void tgen_shli(TCGContext *s, TCGType type,
3212                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3213{
3214    /* Limit immediate shift count lest we create an illegal insn.  */
3215    if (type == TCG_TYPE_I32) {
3216        tcg_out_shli32(s, a0, a1, a2 & 31);
3217    } else {
3218        tcg_out_shli64(s, a0, a1, a2 & 63);
3219    }
3220}
3221
3222static const TCGOutOpBinary outop_shl = {
3223    .base.static_constraint = C_O1_I2(r, r, ri),
3224    .out_rrr = tgen_shl,
3225    .out_rri = tgen_shli,
3226};
3227
3228static void tgen_shr(TCGContext *s, TCGType type,
3229                     TCGReg a0, TCGReg a1, TCGReg a2)
3230{
3231    uint32_t insn = type == TCG_TYPE_I32 ? SRW : SRD;
3232    tcg_out32(s, insn | SAB(a1, a0, a2));
3233}
3234
3235static void tgen_shri(TCGContext *s, TCGType type,
3236                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3237{
3238    /* Limit immediate shift count lest we create an illegal insn.  */
3239    if (type == TCG_TYPE_I32) {
3240        tcg_out_shri32(s, a0, a1, a2 & 31);
3241    } else {
3242        tcg_out_shri64(s, a0, a1, a2 & 63);
3243    }
3244}
3245
3246static const TCGOutOpBinary outop_shr = {
3247    .base.static_constraint = C_O1_I2(r, r, ri),
3248    .out_rrr = tgen_shr,
3249    .out_rri = tgen_shri,
3250};
3251
3252static void tgen_sub(TCGContext *s, TCGType type,
3253                     TCGReg a0, TCGReg a1, TCGReg a2)
3254{
3255    tcg_out32(s, SUBF | TAB(a0, a2, a1));
3256}
3257
3258static void tgen_subfi(TCGContext *s, TCGType type,
3259                       TCGReg a0, tcg_target_long a1, TCGReg a2)
3260{
3261    tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
3262}
3263
3264static const TCGOutOpSubtract outop_sub = {
3265    .base.static_constraint = C_O1_I2(r, rI, r),
3266    .out_rrr = tgen_sub,
3267    .out_rir = tgen_subfi,
3268};
3269
3270static void tgen_xor(TCGContext *s, TCGType type,
3271                     TCGReg a0, TCGReg a1, TCGReg a2)
3272{
3273    tcg_out32(s, XOR | SAB(a1, a0, a2));
3274}
3275
3276static void tgen_xori(TCGContext *s, TCGType type,
3277                      TCGReg a0, TCGReg a1, tcg_target_long a2)
3278{
3279    tcg_out_xori32(s, a0, a1, a2);
3280}
3281
3282static const TCGOutOpBinary outop_xor = {
3283    .base.static_constraint = C_O1_I2(r, r, rU),
3284    .out_rrr = tgen_xor,
3285    .out_rri = tgen_xori,
3286};
3287
3288static void tgen_bswap16(TCGContext *s, TCGType type,
3289                         TCGReg dst, TCGReg src, unsigned flags)
3290{
3291    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
3292
3293    if (have_isa_3_10) {
3294        tcg_out32(s, BRH | RA(dst) | RS(src));
3295        if (flags & TCG_BSWAP_OS) {
3296            tcg_out_ext16s(s, TCG_TYPE_REG, dst, dst);
3297        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
3298            tcg_out_ext16u(s, dst, dst);
3299        }
3300        return;
3301    }
3302
3303    /*
3304     * In the following,
3305     *   dep(a, b, m) -> (a & ~m) | (b & m)
3306     *
3307     * Begin with:                              src = xxxxabcd
3308     */
3309    /* tmp = rol32(src, 24) & 0x000000ff            = 0000000c */
3310    tcg_out_rlw(s, RLWINM, tmp, src, 24, 24, 31);
3311    /* tmp = dep(tmp, rol32(src, 8), 0x0000ff00)    = 000000dc */
3312    tcg_out_rlw(s, RLWIMI, tmp, src, 8, 16, 23);
3313
3314    if (flags & TCG_BSWAP_OS) {
3315        tcg_out_ext16s(s, TCG_TYPE_REG, dst, tmp);
3316    } else {
3317        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
3318    }
3319}
3320
3321static const TCGOutOpBswap outop_bswap16 = {
3322    .base.static_constraint = C_O1_I1(r, r),
3323    .out_rr = tgen_bswap16,
3324};
3325
3326static void tgen_bswap32(TCGContext *s, TCGType type,
3327                         TCGReg dst, TCGReg src, unsigned flags)
3328{
3329    TCGReg tmp = dst == src ? TCG_REG_R0 : dst;
3330
3331    if (have_isa_3_10) {
3332        tcg_out32(s, BRW | RA(dst) | RS(src));
3333        if (flags & TCG_BSWAP_OS) {
3334            tcg_out_ext32s(s, dst, dst);
3335        } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
3336            tcg_out_ext32u(s, dst, dst);
3337        }
3338        return;
3339    }
3340
3341    /*
3342     * Stolen from gcc's builtin_bswap32.
3343     * In the following,
3344     *   dep(a, b, m) -> (a & ~m) | (b & m)
3345     *
3346     * Begin with:                              src = xxxxabcd
3347     */
3348    /* tmp = rol32(src, 8) & 0xffffffff             = 0000bcda */
3349    tcg_out_rlw(s, RLWINM, tmp, src, 8, 0, 31);
3350    /* tmp = dep(tmp, rol32(src, 24), 0xff000000)   = 0000dcda */
3351    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 0, 7);
3352    /* tmp = dep(tmp, rol32(src, 24), 0x0000ff00)   = 0000dcba */
3353    tcg_out_rlw(s, RLWIMI, tmp, src, 24, 16, 23);
3354
3355    if (flags & TCG_BSWAP_OS) {
3356        tcg_out_ext32s(s, dst, tmp);
3357    } else {
3358        tcg_out_mov(s, TCG_TYPE_REG, dst, tmp);
3359    }
3360}
3361
3362static const TCGOutOpBswap outop_bswap32 = {
3363    .base.static_constraint = C_O1_I1(r, r),
3364    .out_rr = tgen_bswap32,
3365};
3366
3367#if TCG_TARGET_REG_BITS == 64
3368static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
3369{
3370    TCGReg t0 = dst == src ? TCG_REG_R0 : dst;
3371    TCGReg t1 = dst == src ? dst : TCG_REG_R0;
3372
3373    if (have_isa_3_10) {
3374        tcg_out32(s, BRD | RA(dst) | RS(src));
3375        return;
3376    }
3377
3378    /*
3379     * In the following,
3380     *   dep(a, b, m) -> (a & ~m) | (b & m)
3381     *
3382     * Begin with:                              src = abcdefgh
3383     */
3384    /* t0 = rol32(src, 8) & 0xffffffff              = 0000fghe */
3385    tcg_out_rlw(s, RLWINM, t0, src, 8, 0, 31);
3386    /* t0 = dep(t0, rol32(src, 24), 0xff000000)     = 0000hghe */
3387    tcg_out_rlw(s, RLWIMI, t0, src, 24, 0, 7);
3388    /* t0 = dep(t0, rol32(src, 24), 0x0000ff00)     = 0000hgfe */
3389    tcg_out_rlw(s, RLWIMI, t0, src, 24, 16, 23);
3390
3391    /* t0 = rol64(t0, 32)                           = hgfe0000 */
3392    tcg_out_rld(s, RLDICL, t0, t0, 32, 0);
3393    /* t1 = rol64(src, 32)                          = efghabcd */
3394    tcg_out_rld(s, RLDICL, t1, src, 32, 0);
3395
3396    /* t0 = dep(t0, rol32(t1, 24), 0xffffffff)      = hgfebcda */
3397    tcg_out_rlw(s, RLWIMI, t0, t1, 8, 0, 31);
3398    /* t0 = dep(t0, rol32(t1, 24), 0xff000000)      = hgfedcda */
3399    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 0, 7);
3400    /* t0 = dep(t0, rol32(t1, 24), 0x0000ff00)      = hgfedcba */
3401    tcg_out_rlw(s, RLWIMI, t0, t1, 24, 16, 23);
3402
3403    tcg_out_mov(s, TCG_TYPE_REG, dst, t0);
3404}
3405
3406static const TCGOutOpUnary outop_bswap64 = {
3407    .base.static_constraint = C_O1_I1(r, r),
3408    .out_rr = tgen_bswap64,
3409};
3410#endif /* TCG_TARGET_REG_BITS == 64 */
3411
3412static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
3413{
3414    tcg_out32(s, NEG | RT(a0) | RA(a1));
3415}
3416
3417static const TCGOutOpUnary outop_neg = {
3418    .base.static_constraint = C_O1_I1(r, r),
3419    .out_rr = tgen_neg,
3420};
3421
3422static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
3423{
3424    tgen_nor(s, type, a0, a1, a1);
3425}
3426
3427static const TCGOutOpUnary outop_not = {
3428    .base.static_constraint = C_O1_I1(r, r),
3429    .out_rr = tgen_not,
3430};
3431
3432static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
3433                         TCGReg a2, unsigned ofs, unsigned len)
3434{
3435    if (type == TCG_TYPE_I32) {
3436        tcg_out_rlw(s, RLWIMI, a0, a2, ofs, 32 - ofs - len, 31 - ofs);
3437    } else {
3438        tcg_out_rld(s, RLDIMI, a0, a2, ofs, 64 - ofs - len);
3439    }
3440}
3441
3442static void tgen_depositi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
3443                          tcg_target_long a2, unsigned ofs, unsigned len)
3444{
3445    tgen_andi(s, type, a0, a1, ~MAKE_64BIT_MASK(ofs, len));
3446}
3447
3448static const TCGOutOpDeposit outop_deposit = {
3449    .base.static_constraint = C_O1_I2(r, 0, rZ),
3450    .out_rrr = tgen_deposit,
3451    .out_rri = tgen_depositi,
3452};
3453
3454static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
3455                         unsigned ofs, unsigned len)
3456{
3457    if (ofs == 0 && len <= 16) {
3458        tgen_andi(s, TCG_TYPE_I32, a0, a1, (1 << len) - 1);
3459    } else if (type == TCG_TYPE_I32) {
3460        tcg_out_rlw(s, RLWINM, a0, a1, 32 - ofs, 32 - len, 31);
3461    } else {
3462        tcg_out_rld(s, RLDICL, a0, a1, 64 - ofs, 64 - len);
3463    }
3464}
3465
3466static const TCGOutOpExtract outop_extract = {
3467    .base.static_constraint = C_O1_I1(r, r),
3468    .out_rr = tgen_extract,
3469};
3470
3471static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
3472                          unsigned ofs, unsigned len)
3473{
3474    if (ofs == 0) {
3475        switch (len) {
3476        case 8:
3477            tcg_out_ext8s(s, type, a0, a1);
3478            return;
3479        case 16:
3480            tcg_out_ext16s(s, type, a0, a1);
3481            return;
3482        case 32:
3483            tcg_out_ext32s(s, a0, a1);
3484            return;
3485        }
3486    } else if (ofs + len == 32) {
3487        tcg_out_sari32(s, a0, a1, ofs);
3488        return;
3489    }
3490    g_assert_not_reached();
3491}
3492
3493static const TCGOutOpExtract outop_sextract = {
3494    .base.static_constraint = C_O1_I1(r, r),
3495    .out_rr = tgen_sextract,
3496};
3497
3498
3499static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
3500                       const TCGArg args[TCG_MAX_OP_ARGS],
3501                       const int const_args[TCG_MAX_OP_ARGS])
3502{
3503    TCGArg a0, a1;
3504
3505    switch (opc) {
3506    case INDEX_op_goto_ptr:
3507        tcg_out32(s, MTSPR | RS(args[0]) | CTR);
3508        tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
3509        tcg_out32(s, BCCTR | BO_ALWAYS);
3510        break;
3511    case INDEX_op_br:
3512        {
3513            TCGLabel *l = arg_label(args[0]);
3514            uint32_t insn = B;
3515
3516            if (l->has_value) {
3517                insn |= reloc_pc24_val(tcg_splitwx_to_rx(s->code_ptr),
3518                                       l->u.value_ptr);
3519            } else {
3520                tcg_out_reloc(s, s->code_ptr, R_PPC_REL24, l, 0);
3521            }
3522            tcg_out32(s, insn);
3523        }
3524        break;
3525    case INDEX_op_ld8u_i32:
3526    case INDEX_op_ld8u_i64:
3527        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
3528        break;
3529    case INDEX_op_ld8s_i32:
3530    case INDEX_op_ld8s_i64:
3531        tcg_out_mem_long(s, LBZ, LBZX, args[0], args[1], args[2]);
3532        tcg_out_ext8s(s, TCG_TYPE_REG, args[0], args[0]);
3533        break;
3534    case INDEX_op_ld16u_i32:
3535    case INDEX_op_ld16u_i64:
3536        tcg_out_mem_long(s, LHZ, LHZX, args[0], args[1], args[2]);
3537        break;
3538    case INDEX_op_ld16s_i32:
3539    case INDEX_op_ld16s_i64:
3540        tcg_out_mem_long(s, LHA, LHAX, args[0], args[1], args[2]);
3541        break;
3542    case INDEX_op_ld_i32:
3543    case INDEX_op_ld32u_i64:
3544        tcg_out_mem_long(s, LWZ, LWZX, args[0], args[1], args[2]);
3545        break;
3546    case INDEX_op_ld32s_i64:
3547        tcg_out_mem_long(s, LWA, LWAX, args[0], args[1], args[2]);
3548        break;
3549    case INDEX_op_ld_i64:
3550        tcg_out_mem_long(s, LD, LDX, args[0], args[1], args[2]);
3551        break;
3552    case INDEX_op_st8_i32:
3553    case INDEX_op_st8_i64:
3554        tcg_out_mem_long(s, STB, STBX, args[0], args[1], args[2]);
3555        break;
3556    case INDEX_op_st16_i32:
3557    case INDEX_op_st16_i64:
3558        tcg_out_mem_long(s, STH, STHX, args[0], args[1], args[2]);
3559        break;
3560    case INDEX_op_st_i32:
3561    case INDEX_op_st32_i64:
3562        tcg_out_mem_long(s, STW, STWX, args[0], args[1], args[2]);
3563        break;
3564    case INDEX_op_st_i64:
3565        tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
3566        break;
3567
3568    case INDEX_op_qemu_ld_i32:
3569        tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I32);
3570        break;
3571    case INDEX_op_qemu_ld_i64:
3572        if (TCG_TARGET_REG_BITS == 64) {
3573            tcg_out_qemu_ld(s, args[0], -1, args[1], args[2], TCG_TYPE_I64);
3574        } else {
3575            tcg_out_qemu_ld(s, args[0], args[1], args[2],
3576                            args[3], TCG_TYPE_I64);
3577        }
3578        break;
3579    case INDEX_op_qemu_ld_i128:
3580        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
3581        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
3582        break;
3583
3584    case INDEX_op_qemu_st_i32:
3585        tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I32);
3586        break;
3587    case INDEX_op_qemu_st_i64:
3588        if (TCG_TARGET_REG_BITS == 64) {
3589            tcg_out_qemu_st(s, args[0], -1, args[1], args[2], TCG_TYPE_I64);
3590        } else {
3591            tcg_out_qemu_st(s, args[0], args[1], args[2],
3592                            args[3], TCG_TYPE_I64);
3593        }
3594        break;
3595    case INDEX_op_qemu_st_i128:
3596        tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
3597        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
3598        break;
3599
3600#if TCG_TARGET_REG_BITS == 64
3601    case INDEX_op_add2_i64:
3602#else
3603    case INDEX_op_add2_i32:
3604#endif
3605        /* Note that the CA bit is defined based on the word size of the
3606           environment.  So in 64-bit mode it's always carry-out of bit 63.
3607           The fallback code using deposit works just as well for 32-bit.  */
3608        a0 = args[0], a1 = args[1];
3609        if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
3610            a0 = TCG_REG_R0;
3611        }
3612        if (const_args[4]) {
3613            tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
3614        } else {
3615            tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
3616        }
3617        if (const_args[5]) {
3618            tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
3619        } else {
3620            tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
3621        }
3622        if (a0 != args[0]) {
3623            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3624        }
3625        break;
3626
3627#if TCG_TARGET_REG_BITS == 64
3628    case INDEX_op_sub2_i64:
3629#else
3630    case INDEX_op_sub2_i32:
3631#endif
3632        a0 = args[0], a1 = args[1];
3633        if (a0 == args[5] || (!const_args[3] && a0 == args[3])) {
3634            a0 = TCG_REG_R0;
3635        }
3636        if (const_args[2]) {
3637            tcg_out32(s, SUBFIC | TAI(a0, args[4], args[2]));
3638        } else {
3639            tcg_out32(s, SUBFC | TAB(a0, args[4], args[2]));
3640        }
3641        if (const_args[3]) {
3642            tcg_out32(s, (args[3] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
3643        } else {
3644            tcg_out32(s, SUBFE | TAB(a1, args[5], args[3]));
3645        }
3646        if (a0 != args[0]) {
3647            tcg_out_mov(s, TCG_TYPE_REG, args[0], a0);
3648        }
3649        break;
3650
3651    case INDEX_op_mb:
3652        tcg_out_mb(s, args[0]);
3653        break;
3654
3655    case INDEX_op_call:      /* Always emitted via tcg_out_call.  */
3656    case INDEX_op_exit_tb:   /* Always emitted via tcg_out_exit_tb.  */
3657    case INDEX_op_goto_tb:   /* Always emitted via tcg_out_goto_tb.  */
3658    default:
3659        g_assert_not_reached();
3660    }
3661}
3662
3663int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3664{
3665    switch (opc) {
3666    case INDEX_op_and_vec:
3667    case INDEX_op_or_vec:
3668    case INDEX_op_xor_vec:
3669    case INDEX_op_andc_vec:
3670    case INDEX_op_not_vec:
3671    case INDEX_op_nor_vec:
3672    case INDEX_op_eqv_vec:
3673    case INDEX_op_nand_vec:
3674        return 1;
3675    case INDEX_op_orc_vec:
3676        return have_isa_2_07;
3677    case INDEX_op_add_vec:
3678    case INDEX_op_sub_vec:
3679    case INDEX_op_smax_vec:
3680    case INDEX_op_smin_vec:
3681    case INDEX_op_umax_vec:
3682    case INDEX_op_umin_vec:
3683    case INDEX_op_shlv_vec:
3684    case INDEX_op_shrv_vec:
3685    case INDEX_op_sarv_vec:
3686    case INDEX_op_rotlv_vec:
3687        return vece <= MO_32 || have_isa_2_07;
3688    case INDEX_op_ssadd_vec:
3689    case INDEX_op_sssub_vec:
3690    case INDEX_op_usadd_vec:
3691    case INDEX_op_ussub_vec:
3692        return vece <= MO_32;
3693    case INDEX_op_shli_vec:
3694    case INDEX_op_shri_vec:
3695    case INDEX_op_sari_vec:
3696    case INDEX_op_rotli_vec:
3697        return vece <= MO_32 || have_isa_2_07 ? -1 : 0;
3698    case INDEX_op_cmp_vec:
3699    case INDEX_op_cmpsel_vec:
3700        return vece <= MO_32 || have_isa_2_07 ? 1 : 0;
3701    case INDEX_op_neg_vec:
3702        return vece >= MO_32 && have_isa_3_00;
3703    case INDEX_op_mul_vec:
3704        switch (vece) {
3705        case MO_8:
3706        case MO_16:
3707            return -1;
3708        case MO_32:
3709            return have_isa_2_07 ? 1 : -1;
3710        case MO_64:
3711            return have_isa_3_10;
3712        }
3713        return 0;
3714    case INDEX_op_bitsel_vec:
3715        return have_vsx;
3716    case INDEX_op_rotrv_vec:
3717        return -1;
3718    default:
3719        return 0;
3720    }
3721}
3722
3723static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
3724                            TCGReg dst, TCGReg src)
3725{
3726    tcg_debug_assert(dst >= TCG_REG_V0);
3727
3728    /* Splat from integer reg allowed via constraints for v3.00.  */
3729    if (src < TCG_REG_V0) {
3730        tcg_debug_assert(have_isa_3_00);
3731        switch (vece) {
3732        case MO_64:
3733            tcg_out32(s, MTVSRDD | VRT(dst) | RA(src) | RB(src));
3734            return true;
3735        case MO_32:
3736            tcg_out32(s, MTVSRWS | VRT(dst) | RA(src));
3737            return true;
3738        default:
3739            /* Fail, so that we fall back on either dupm or mov+dup.  */
3740            return false;
3741        }
3742    }
3743
3744    /*
3745     * Recall we use (or emulate) VSX integer loads, so the integer is
3746     * right justified within the left (zero-index) double-word.
3747     */
3748    switch (vece) {
3749    case MO_8:
3750        tcg_out32(s, VSPLTB | VRT(dst) | VRB(src) | (7 << 16));
3751        break;
3752    case MO_16:
3753        tcg_out32(s, VSPLTH | VRT(dst) | VRB(src) | (3 << 16));
3754        break;
3755    case MO_32:
3756        tcg_out32(s, VSPLTW | VRT(dst) | VRB(src) | (1 << 16));
3757        break;
3758    case MO_64:
3759        if (have_vsx) {
3760            tcg_out32(s, XXPERMDI | VRT(dst) | VRA(src) | VRB(src));
3761            break;
3762        }
3763        tcg_out_vsldoi(s, TCG_VEC_TMP1, src, src, 8);
3764        tcg_out_vsldoi(s, dst, TCG_VEC_TMP1, src, 8);
3765        break;
3766    default:
3767        g_assert_not_reached();
3768    }
3769    return true;
3770}
3771
3772static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
3773                             TCGReg out, TCGReg base, intptr_t offset)
3774{
3775    int elt;
3776
3777    tcg_debug_assert(out >= TCG_REG_V0);
3778    switch (vece) {
3779    case MO_8:
3780        if (have_isa_3_00) {
3781            tcg_out_mem_long(s, LXV, LVX, out, base, offset & -16);
3782        } else {
3783            tcg_out_mem_long(s, 0, LVEBX, out, base, offset);
3784        }
3785        elt = extract32(offset, 0, 4);
3786#if !HOST_BIG_ENDIAN
3787        elt ^= 15;
3788#endif
3789        tcg_out32(s, VSPLTB | VRT(out) | VRB(out) | (elt << 16));
3790        break;
3791    case MO_16:
3792        tcg_debug_assert((offset & 1) == 0);
3793        if (have_isa_3_00) {
3794            tcg_out_mem_long(s, LXV | 8, LVX, out, base, offset & -16);
3795        } else {
3796            tcg_out_mem_long(s, 0, LVEHX, out, base, offset);
3797        }
3798        elt = extract32(offset, 1, 3);
3799#if !HOST_BIG_ENDIAN
3800        elt ^= 7;
3801#endif
3802        tcg_out32(s, VSPLTH | VRT(out) | VRB(out) | (elt << 16));
3803        break;
3804    case MO_32:
3805        if (have_isa_3_00) {
3806            tcg_out_mem_long(s, 0, LXVWSX, out, base, offset);
3807            break;
3808        }
3809        tcg_debug_assert((offset & 3) == 0);
3810        tcg_out_mem_long(s, 0, LVEWX, out, base, offset);
3811        elt = extract32(offset, 2, 2);
3812#if !HOST_BIG_ENDIAN
3813        elt ^= 3;
3814#endif
3815        tcg_out32(s, VSPLTW | VRT(out) | VRB(out) | (elt << 16));
3816        break;
3817    case MO_64:
3818        if (have_vsx) {
3819            tcg_out_mem_long(s, 0, LXVDSX, out, base, offset);
3820            break;
3821        }
3822        tcg_debug_assert((offset & 7) == 0);
3823        tcg_out_mem_long(s, 0, LVX, out, base, offset & -16);
3824        tcg_out_vsldoi(s, TCG_VEC_TMP1, out, out, 8);
3825        elt = extract32(offset, 3, 1);
3826#if !HOST_BIG_ENDIAN
3827        elt = !elt;
3828#endif
3829        if (elt) {
3830            tcg_out_vsldoi(s, out, out, TCG_VEC_TMP1, 8);
3831        } else {
3832            tcg_out_vsldoi(s, out, TCG_VEC_TMP1, out, 8);
3833        }
3834        break;
3835    default:
3836        g_assert_not_reached();
3837    }
3838    return true;
3839}
3840
3841static void tcg_out_not_vec(TCGContext *s, TCGReg a0, TCGReg a1)
3842{
3843    tcg_out32(s, VNOR | VRT(a0) | VRA(a1) | VRB(a1));
3844}
3845
3846static void tcg_out_or_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
3847{
3848    tcg_out32(s, VOR | VRT(a0) | VRA(a1) | VRB(a2));
3849}
3850
3851static void tcg_out_orc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
3852{
3853    tcg_out32(s, VORC | VRT(a0) | VRA(a1) | VRB(a2));
3854}
3855
3856static void tcg_out_and_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
3857{
3858    tcg_out32(s, VAND | VRT(a0) | VRA(a1) | VRB(a2));
3859}
3860
3861static void tcg_out_andc_vec(TCGContext *s, TCGReg a0, TCGReg a1, TCGReg a2)
3862{
3863    tcg_out32(s, VANDC | VRT(a0) | VRA(a1) | VRB(a2));
3864}
3865
3866static void tcg_out_bitsel_vec(TCGContext *s, TCGReg d,
3867                               TCGReg c, TCGReg t, TCGReg f)
3868{
3869    if (TCG_TARGET_HAS_bitsel_vec) {
3870        tcg_out32(s, XXSEL | VRT(d) | VRC(c) | VRB(t) | VRA(f));
3871    } else {
3872        tcg_out_and_vec(s, TCG_VEC_TMP2, t, c);
3873        tcg_out_andc_vec(s, d, f, c);
3874        tcg_out_or_vec(s, d, d, TCG_VEC_TMP2);
3875    }
3876}
3877
3878static bool tcg_out_cmp_vec_noinv(TCGContext *s, unsigned vece, TCGReg a0,
3879                                  TCGReg a1, TCGReg a2, TCGCond cond)
3880{
3881    static const uint32_t
3882        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
3883        ne_op[4]  = { VCMPNEB, VCMPNEH, VCMPNEW, 0 },
3884        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
3885        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD };
3886    uint32_t insn;
3887
3888    bool need_swap = false, need_inv = false;
3889
3890    tcg_debug_assert(vece <= MO_32 || have_isa_2_07);
3891
3892    switch (cond) {
3893    case TCG_COND_EQ:
3894    case TCG_COND_GT:
3895    case TCG_COND_GTU:
3896        break;
3897    case TCG_COND_NE:
3898        if (have_isa_3_00 && vece <= MO_32) {
3899            break;
3900        }
3901        /* fall through */
3902    case TCG_COND_LE:
3903    case TCG_COND_LEU:
3904        need_inv = true;
3905        break;
3906    case TCG_COND_LT:
3907    case TCG_COND_LTU:
3908        need_swap = true;
3909        break;
3910    case TCG_COND_GE:
3911    case TCG_COND_GEU:
3912        need_swap = need_inv = true;
3913        break;
3914    default:
3915        g_assert_not_reached();
3916    }
3917
3918    if (need_inv) {
3919        cond = tcg_invert_cond(cond);
3920    }
3921    if (need_swap) {
3922        TCGReg swap = a1;
3923        a1 = a2;
3924        a2 = swap;
3925        cond = tcg_swap_cond(cond);
3926    }
3927
3928    switch (cond) {
3929    case TCG_COND_EQ:
3930        insn = eq_op[vece];
3931        break;
3932    case TCG_COND_NE:
3933        insn = ne_op[vece];
3934        break;
3935    case TCG_COND_GT:
3936        insn = gts_op[vece];
3937        break;
3938    case TCG_COND_GTU:
3939        insn = gtu_op[vece];
3940        break;
3941    default:
3942        g_assert_not_reached();
3943    }
3944    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
3945
3946    return need_inv;
3947}
3948
3949static void tcg_out_cmp_vec(TCGContext *s, unsigned vece, TCGReg a0,
3950                            TCGReg a1, TCGReg a2, TCGCond cond)
3951{
3952    if (tcg_out_cmp_vec_noinv(s, vece, a0, a1, a2, cond)) {
3953        tcg_out_not_vec(s, a0, a0);
3954    }
3955}
3956
3957static void tcg_out_cmpsel_vec(TCGContext *s, unsigned vece, TCGReg a0,
3958                               TCGReg c1, TCGReg c2, TCGArg v3, int const_v3,
3959                               TCGReg v4, TCGCond cond)
3960{
3961    bool inv = tcg_out_cmp_vec_noinv(s, vece, TCG_VEC_TMP1, c1, c2, cond);
3962
3963    if (!const_v3) {
3964        if (inv) {
3965            tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v4, v3);
3966        } else {
3967            tcg_out_bitsel_vec(s, a0, TCG_VEC_TMP1, v3, v4);
3968        }
3969    } else if (v3) {
3970        if (inv) {
3971            tcg_out_orc_vec(s, a0, v4, TCG_VEC_TMP1);
3972        } else {
3973            tcg_out_or_vec(s, a0, v4, TCG_VEC_TMP1);
3974        }
3975    } else {
3976        if (inv) {
3977            tcg_out_and_vec(s, a0, v4, TCG_VEC_TMP1);
3978        } else {
3979            tcg_out_andc_vec(s, a0, v4, TCG_VEC_TMP1);
3980        }
3981    }
3982}
3983
3984static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
3985                           unsigned vecl, unsigned vece,
3986                           const TCGArg args[TCG_MAX_OP_ARGS],
3987                           const int const_args[TCG_MAX_OP_ARGS])
3988{
3989    static const uint32_t
3990        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
3991        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
3992        mul_op[4] = { 0, 0, VMULUWM, VMULLD },
3993        neg_op[4] = { 0, 0, VNEGW, VNEGD },
3994        ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
3995        usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
3996        sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
3997        ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
3998        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD },
3999        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD },
4000        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD },
4001        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
4002        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
4003        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
4004        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
4005        mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
4006        mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
4007        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
4008        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
4009        pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
4010        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
4011
4012    TCGType type = vecl + TCG_TYPE_V64;
4013    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
4014    uint32_t insn;
4015
4016    switch (opc) {
4017    case INDEX_op_ld_vec:
4018        tcg_out_ld(s, type, a0, a1, a2);
4019        return;
4020    case INDEX_op_st_vec:
4021        tcg_out_st(s, type, a0, a1, a2);
4022        return;
4023    case INDEX_op_dupm_vec:
4024        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
4025        return;
4026
4027    case INDEX_op_add_vec:
4028        insn = add_op[vece];
4029        break;
4030    case INDEX_op_sub_vec:
4031        insn = sub_op[vece];
4032        break;
4033    case INDEX_op_neg_vec:
4034        insn = neg_op[vece];
4035        a2 = a1;
4036        a1 = 0;
4037        break;
4038    case INDEX_op_mul_vec:
4039        insn = mul_op[vece];
4040        break;
4041    case INDEX_op_ssadd_vec:
4042        insn = ssadd_op[vece];
4043        break;
4044    case INDEX_op_sssub_vec:
4045        insn = sssub_op[vece];
4046        break;
4047    case INDEX_op_usadd_vec:
4048        insn = usadd_op[vece];
4049        break;
4050    case INDEX_op_ussub_vec:
4051        insn = ussub_op[vece];
4052        break;
4053    case INDEX_op_smin_vec:
4054        insn = smin_op[vece];
4055        break;
4056    case INDEX_op_umin_vec:
4057        insn = umin_op[vece];
4058        break;
4059    case INDEX_op_smax_vec:
4060        insn = smax_op[vece];
4061        break;
4062    case INDEX_op_umax_vec:
4063        insn = umax_op[vece];
4064        break;
4065    case INDEX_op_shlv_vec:
4066        insn = shlv_op[vece];
4067        break;
4068    case INDEX_op_shrv_vec:
4069        insn = shrv_op[vece];
4070        break;
4071    case INDEX_op_sarv_vec:
4072        insn = sarv_op[vece];
4073        break;
4074    case INDEX_op_and_vec:
4075        tcg_out_and_vec(s, a0, a1, a2);
4076        return;
4077    case INDEX_op_or_vec:
4078        tcg_out_or_vec(s, a0, a1, a2);
4079        return;
4080    case INDEX_op_xor_vec:
4081        insn = VXOR;
4082        break;
4083    case INDEX_op_andc_vec:
4084        tcg_out_andc_vec(s, a0, a1, a2);
4085        return;
4086    case INDEX_op_not_vec:
4087        tcg_out_not_vec(s, a0, a1);
4088        return;
4089    case INDEX_op_orc_vec:
4090        tcg_out_orc_vec(s, a0, a1, a2);
4091        return;
4092    case INDEX_op_nand_vec:
4093        insn = VNAND;
4094        break;
4095    case INDEX_op_nor_vec:
4096        insn = VNOR;
4097        break;
4098    case INDEX_op_eqv_vec:
4099        insn = VEQV;
4100        break;
4101
4102    case INDEX_op_cmp_vec:
4103        tcg_out_cmp_vec(s, vece, a0, a1, a2, args[3]);
4104        return;
4105    case INDEX_op_cmpsel_vec:
4106        tcg_out_cmpsel_vec(s, vece, a0, a1, a2,
4107                           args[3], const_args[3], args[4], args[5]);
4108        return;
4109    case INDEX_op_bitsel_vec:
4110        tcg_out_bitsel_vec(s, a0, a1, a2, args[3]);
4111        return;
4112
4113    case INDEX_op_dup2_vec:
4114        assert(TCG_TARGET_REG_BITS == 32);
4115        /* With inputs a1 = xLxx, a2 = xHxx  */
4116        tcg_out32(s, VMRGHW | VRT(a0) | VRA(a2) | VRB(a1));  /* a0  = xxHL */
4117        tcg_out_vsldoi(s, TCG_VEC_TMP1, a0, a0, 8);          /* tmp = HLxx */
4118        tcg_out_vsldoi(s, a0, a0, TCG_VEC_TMP1, 8);          /* a0  = HLHL */
4119        return;
4120
4121    case INDEX_op_ppc_mrgh_vec:
4122        insn = mrgh_op[vece];
4123        break;
4124    case INDEX_op_ppc_mrgl_vec:
4125        insn = mrgl_op[vece];
4126        break;
4127    case INDEX_op_ppc_muleu_vec:
4128        insn = muleu_op[vece];
4129        break;
4130    case INDEX_op_ppc_mulou_vec:
4131        insn = mulou_op[vece];
4132        break;
4133    case INDEX_op_ppc_pkum_vec:
4134        insn = pkum_op[vece];
4135        break;
4136    case INDEX_op_rotlv_vec:
4137        insn = rotl_op[vece];
4138        break;
4139    case INDEX_op_ppc_msum_vec:
4140        tcg_debug_assert(vece == MO_16);
4141        tcg_out32(s, VMSUMUHM | VRT(a0) | VRA(a1) | VRB(a2) | VRC(args[3]));
4142        return;
4143
4144    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
4145    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
4146    default:
4147        g_assert_not_reached();
4148    }
4149
4150    tcg_debug_assert(insn != 0);
4151    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
4152}
4153
4154static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
4155                           TCGv_vec v1, TCGArg imm, TCGOpcode opci)
4156{
4157    TCGv_vec t1;
4158
4159    if (vece == MO_32) {
4160        /*
4161         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
4162         * So using negative numbers gets us the 4th bit easily.
4163         */
4164        imm = sextract32(imm, 0, 5);
4165    } else {
4166        imm &= (8 << vece) - 1;
4167    }
4168
4169    /* Splat w/bytes for xxspltib when 2.07 allows MO_64. */
4170    t1 = tcg_constant_vec(type, MO_8, imm);
4171    vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
4172              tcgv_vec_arg(v1), tcgv_vec_arg(t1));
4173}
4174
4175static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
4176                           TCGv_vec v1, TCGv_vec v2)
4177{
4178    TCGv_vec t1 = tcg_temp_new_vec(type);
4179    TCGv_vec t2 = tcg_temp_new_vec(type);
4180    TCGv_vec c0, c16;
4181
4182    switch (vece) {
4183    case MO_8:
4184    case MO_16:
4185        vec_gen_3(INDEX_op_ppc_muleu_vec, type, vece, tcgv_vec_arg(t1),
4186                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4187        vec_gen_3(INDEX_op_ppc_mulou_vec, type, vece, tcgv_vec_arg(t2),
4188                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4189        vec_gen_3(INDEX_op_ppc_mrgh_vec, type, vece + 1, tcgv_vec_arg(v0),
4190                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
4191        vec_gen_3(INDEX_op_ppc_mrgl_vec, type, vece + 1, tcgv_vec_arg(t1),
4192                  tcgv_vec_arg(t1), tcgv_vec_arg(t2));
4193        vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
4194                  tcgv_vec_arg(v0), tcgv_vec_arg(t1));
4195        break;
4196
4197    case MO_32:
4198        tcg_debug_assert(!have_isa_2_07);
4199        /*
4200         * Only 5 bits are significant, and VSPLTISB can represent -16..15.
4201         * So using -16 is a quick way to represent 16.
4202         */
4203        c16 = tcg_constant_vec(type, MO_8, -16);
4204        c0 = tcg_constant_vec(type, MO_8, 0);
4205
4206        vec_gen_3(INDEX_op_rotlv_vec, type, MO_32, tcgv_vec_arg(t1),
4207                  tcgv_vec_arg(v2), tcgv_vec_arg(c16));
4208        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_16, tcgv_vec_arg(t2),
4209                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
4210        vec_gen_4(INDEX_op_ppc_msum_vec, type, MO_16, tcgv_vec_arg(t1),
4211                  tcgv_vec_arg(v1), tcgv_vec_arg(t1), tcgv_vec_arg(c0));
4212        vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t1),
4213                  tcgv_vec_arg(t1), tcgv_vec_arg(c16));
4214        tcg_gen_add_vec(MO_32, v0, t1, t2);
4215        break;
4216
4217    default:
4218        g_assert_not_reached();
4219    }
4220    tcg_temp_free_vec(t1);
4221    tcg_temp_free_vec(t2);
4222}
4223
4224void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
4225                       TCGArg a0, ...)
4226{
4227    va_list va;
4228    TCGv_vec v0, v1, v2, t0;
4229    TCGArg a2;
4230
4231    va_start(va, a0);
4232    v0 = temp_tcgv_vec(arg_temp(a0));
4233    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
4234    a2 = va_arg(va, TCGArg);
4235
4236    switch (opc) {
4237    case INDEX_op_shli_vec:
4238        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
4239        break;
4240    case INDEX_op_shri_vec:
4241        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
4242        break;
4243    case INDEX_op_sari_vec:
4244        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
4245        break;
4246    case INDEX_op_rotli_vec:
4247        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_rotlv_vec);
4248        break;
4249    case INDEX_op_mul_vec:
4250        v2 = temp_tcgv_vec(arg_temp(a2));
4251        expand_vec_mul(type, vece, v0, v1, v2);
4252        break;
4253    case INDEX_op_rotlv_vec:
4254        v2 = temp_tcgv_vec(arg_temp(a2));
4255        t0 = tcg_temp_new_vec(type);
4256        tcg_gen_neg_vec(vece, t0, v2);
4257        tcg_gen_rotlv_vec(vece, v0, v1, t0);
4258        tcg_temp_free_vec(t0);
4259        break;
4260    default:
4261        g_assert_not_reached();
4262    }
4263    va_end(va);
4264}
4265
4266static TCGConstraintSetIndex
4267tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
4268{
4269    switch (op) {
4270    case INDEX_op_goto_ptr:
4271        return C_O0_I1(r);
4272
4273    case INDEX_op_ld8u_i32:
4274    case INDEX_op_ld8s_i32:
4275    case INDEX_op_ld16u_i32:
4276    case INDEX_op_ld16s_i32:
4277    case INDEX_op_ld_i32:
4278    case INDEX_op_ld8u_i64:
4279    case INDEX_op_ld8s_i64:
4280    case INDEX_op_ld16u_i64:
4281    case INDEX_op_ld16s_i64:
4282    case INDEX_op_ld32u_i64:
4283    case INDEX_op_ld32s_i64:
4284    case INDEX_op_ld_i64:
4285        return C_O1_I1(r, r);
4286
4287    case INDEX_op_st8_i32:
4288    case INDEX_op_st16_i32:
4289    case INDEX_op_st_i32:
4290    case INDEX_op_st8_i64:
4291    case INDEX_op_st16_i64:
4292    case INDEX_op_st32_i64:
4293    case INDEX_op_st_i64:
4294        return C_O0_I2(r, r);
4295
4296    case INDEX_op_add2_i64:
4297    case INDEX_op_add2_i32:
4298        return C_O2_I4(r, r, r, r, rI, rZM);
4299    case INDEX_op_sub2_i64:
4300    case INDEX_op_sub2_i32:
4301        return C_O2_I4(r, r, rI, rZM, r, r);
4302
4303    case INDEX_op_qemu_ld_i32:
4304        return C_O1_I1(r, r);
4305    case INDEX_op_qemu_ld_i64:
4306        return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
4307
4308    case INDEX_op_qemu_st_i32:
4309        return C_O0_I2(r, r);
4310    case INDEX_op_qemu_st_i64:
4311        return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
4312
4313    case INDEX_op_qemu_ld_i128:
4314        return C_N1O1_I1(o, m, r);
4315    case INDEX_op_qemu_st_i128:
4316        return C_O0_I3(o, m, r);
4317
4318    case INDEX_op_add_vec:
4319    case INDEX_op_sub_vec:
4320    case INDEX_op_mul_vec:
4321    case INDEX_op_and_vec:
4322    case INDEX_op_or_vec:
4323    case INDEX_op_xor_vec:
4324    case INDEX_op_andc_vec:
4325    case INDEX_op_orc_vec:
4326    case INDEX_op_nor_vec:
4327    case INDEX_op_eqv_vec:
4328    case INDEX_op_nand_vec:
4329    case INDEX_op_cmp_vec:
4330    case INDEX_op_ssadd_vec:
4331    case INDEX_op_sssub_vec:
4332    case INDEX_op_usadd_vec:
4333    case INDEX_op_ussub_vec:
4334    case INDEX_op_smax_vec:
4335    case INDEX_op_smin_vec:
4336    case INDEX_op_umax_vec:
4337    case INDEX_op_umin_vec:
4338    case INDEX_op_shlv_vec:
4339    case INDEX_op_shrv_vec:
4340    case INDEX_op_sarv_vec:
4341    case INDEX_op_rotlv_vec:
4342    case INDEX_op_rotrv_vec:
4343    case INDEX_op_ppc_mrgh_vec:
4344    case INDEX_op_ppc_mrgl_vec:
4345    case INDEX_op_ppc_muleu_vec:
4346    case INDEX_op_ppc_mulou_vec:
4347    case INDEX_op_ppc_pkum_vec:
4348    case INDEX_op_dup2_vec:
4349        return C_O1_I2(v, v, v);
4350
4351    case INDEX_op_not_vec:
4352    case INDEX_op_neg_vec:
4353        return C_O1_I1(v, v);
4354
4355    case INDEX_op_dup_vec:
4356        return have_isa_3_00 ? C_O1_I1(v, vr) : C_O1_I1(v, v);
4357
4358    case INDEX_op_ld_vec:
4359    case INDEX_op_dupm_vec:
4360        return C_O1_I1(v, r);
4361
4362    case INDEX_op_st_vec:
4363        return C_O0_I2(v, r);
4364
4365    case INDEX_op_bitsel_vec:
4366    case INDEX_op_ppc_msum_vec:
4367        return C_O1_I3(v, v, v, v);
4368    case INDEX_op_cmpsel_vec:
4369        return C_O1_I4(v, v, v, vZM, v);
4370
4371    default:
4372        return C_NotImplemented;
4373    }
4374}
4375
4376static void tcg_target_init(TCGContext *s)
4377{
4378    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
4379    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
4380    if (have_altivec) {
4381        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
4382        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
4383    }
4384
4385    tcg_target_call_clobber_regs = 0;
4386    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
4387    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
4388    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
4389    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
4390    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
4391    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
4392    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R7);
4393    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
4394    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
4395    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
4396    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
4397    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R12);
4398
4399    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
4400    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
4401    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
4402    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
4403    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
4404    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
4405    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
4406    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
4407    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
4408    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
4409    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
4410    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
4411    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
4412    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
4413    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
4414    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
4415    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
4416    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
4417    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
4418    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
4419
4420    s->reserved_regs = 0;
4421    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* tcg temp */
4422    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R1); /* stack pointer */
4423#if defined(_CALL_SYSV)
4424    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R2); /* toc pointer */
4425#endif
4426#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
4427    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
4428#endif
4429    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
4430    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
4431    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
4432    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
4433    if (USE_REG_TB) {
4434        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
4435    }
4436}
4437
4438#ifdef __ELF__
4439typedef struct {
4440    DebugFrameCIE cie;
4441    DebugFrameFDEHeader fde;
4442    uint8_t fde_def_cfa[4];
4443    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2 + 3];
4444} DebugFrame;
4445
4446/* We're expecting a 2 byte uleb128 encoded value.  */
4447QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
4448
4449#if TCG_TARGET_REG_BITS == 64
4450# define ELF_HOST_MACHINE EM_PPC64
4451#else
4452# define ELF_HOST_MACHINE EM_PPC
4453#endif
4454
4455static DebugFrame debug_frame = {
4456    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
4457    .cie.id = -1,
4458    .cie.version = 1,
4459    .cie.code_align = 1,
4460    .cie.data_align = (-SZR & 0x7f),         /* sleb128 -SZR */
4461    .cie.return_column = 65,
4462
4463    /* Total FDE size does not include the "len" member.  */
4464    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
4465
4466    .fde_def_cfa = {
4467        12, TCG_REG_R1,                 /* DW_CFA_def_cfa r1, ... */
4468        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
4469        (FRAME_SIZE >> 7)
4470    },
4471    .fde_reg_ofs = {
4472        /* DW_CFA_offset_extended_sf, lr, LR_OFFSET */
4473        0x11, 65, (LR_OFFSET / -SZR) & 0x7f,
4474    }
4475};
4476
4477void tcg_register_jit(const void *buf, size_t buf_size)
4478{
4479    uint8_t *p = &debug_frame.fde_reg_ofs[3];
4480    int i;
4481
4482    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i, p += 2) {
4483        p[0] = 0x80 + tcg_target_callee_save_regs[i];
4484        p[1] = (FRAME_SIZE - (REG_SAVE_BOT + i * SZR)) / SZR;
4485    }
4486
4487    debug_frame.fde.func_start = (uintptr_t)buf;
4488    debug_frame.fde.func_len = buf_size;
4489
4490    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
4491}
4492#endif /* __ELF__ */
4493#undef VMULEUB
4494#undef VMULEUH
4495#undef VMULEUW
4496#undef VMULOUB
4497#undef VMULOUH
4498#undef VMULOUW
4499#undef VMSUMUHM
4500