xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision c4601322)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-ldst.c.inc"
14#include "../tcg-pool.c.inc"
15#include "qemu/bitops.h"
16
17/* We're going to re-use TCGType in setting of the SF bit, which controls
18   the size of the operation performed.  If we know the values match, it
19   makes things much cleaner.  */
20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21
22#ifdef CONFIG_DEBUG_TCG
23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28
29    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33};
34#endif /* CONFIG_DEBUG_TCG */
35
36static const int tcg_target_reg_alloc_order[] = {
37    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39    TCG_REG_X28, /* we will reserve this for guest_base if configured */
40
41    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
43    TCG_REG_X16, TCG_REG_X17,
44
45    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
46    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
47
48    /* X18 reserved by system */
49    /* X19 reserved for AREG0 */
50    /* X29 reserved as fp */
51    /* X30 reserved as temporary */
52
53    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
54    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
55    /* V8 - V15 are call-saved, and skipped.  */
56    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
57    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
58    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
59    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
60};
61
62static const int tcg_target_call_iarg_regs[8] = {
63    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
64    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
65};
66
67static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
68{
69    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
70    tcg_debug_assert(slot >= 0 && slot <= 1);
71    return TCG_REG_X0 + slot;
72}
73
74#define TCG_REG_TMP TCG_REG_X30
75#define TCG_VEC_TMP TCG_REG_V31
76
77#ifndef CONFIG_SOFTMMU
78/* Note that XZR cannot be encoded in the address base register slot,
79   as that actaully encodes SP.  So if we need to zero-extend the guest
80   address, via the address index register slot, we need to load even
81   a zero guest base into a register.  */
82#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
83#define TCG_REG_GUEST_BASE TCG_REG_X28
84#endif
85
86static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
87{
88    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
89    ptrdiff_t offset = target - src_rx;
90
91    if (offset == sextract64(offset, 0, 26)) {
92        /* read instruction, mask away previous PC_REL26 parameter contents,
93           set the proper offset, then write back the instruction. */
94        *src_rw = deposit32(*src_rw, 0, 26, offset);
95        return true;
96    }
97    return false;
98}
99
100static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
101{
102    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
103    ptrdiff_t offset = target - src_rx;
104
105    if (offset == sextract64(offset, 0, 19)) {
106        *src_rw = deposit32(*src_rw, 5, 19, offset);
107        return true;
108    }
109    return false;
110}
111
112static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
113                        intptr_t value, intptr_t addend)
114{
115    tcg_debug_assert(addend == 0);
116    switch (type) {
117    case R_AARCH64_JUMP26:
118    case R_AARCH64_CALL26:
119        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
120    case R_AARCH64_CONDBR19:
121        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
122    default:
123        g_assert_not_reached();
124    }
125}
126
127#define TCG_CT_CONST_AIMM 0x100
128#define TCG_CT_CONST_LIMM 0x200
129#define TCG_CT_CONST_ZERO 0x400
130#define TCG_CT_CONST_MONE 0x800
131#define TCG_CT_CONST_ORRI 0x1000
132#define TCG_CT_CONST_ANDI 0x2000
133
134#define ALL_GENERAL_REGS  0xffffffffu
135#define ALL_VECTOR_REGS   0xffffffff00000000ull
136
137#ifdef CONFIG_SOFTMMU
138#define ALL_QLDST_REGS \
139    (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
140                          (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
141#else
142#define ALL_QLDST_REGS   ALL_GENERAL_REGS
143#endif
144
145/* Match a constant valid for addition (12-bit, optionally shifted).  */
146static inline bool is_aimm(uint64_t val)
147{
148    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
149}
150
151/* Match a constant valid for logical operations.  */
152static inline bool is_limm(uint64_t val)
153{
154    /* Taking a simplified view of the logical immediates for now, ignoring
155       the replication that can happen across the field.  Match bit patterns
156       of the forms
157           0....01....1
158           0..01..10..0
159       and their inverses.  */
160
161    /* Make things easier below, by testing the form with msb clear. */
162    if ((int64_t)val < 0) {
163        val = ~val;
164    }
165    if (val == 0) {
166        return false;
167    }
168    val += val & -val;
169    return (val & (val - 1)) == 0;
170}
171
172/* Return true if v16 is a valid 16-bit shifted immediate.  */
173static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
174{
175    if (v16 == (v16 & 0xff)) {
176        *cmode = 0x8;
177        *imm8 = v16 & 0xff;
178        return true;
179    } else if (v16 == (v16 & 0xff00)) {
180        *cmode = 0xa;
181        *imm8 = v16 >> 8;
182        return true;
183    }
184    return false;
185}
186
187/* Return true if v32 is a valid 32-bit shifted immediate.  */
188static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
189{
190    if (v32 == (v32 & 0xff)) {
191        *cmode = 0x0;
192        *imm8 = v32 & 0xff;
193        return true;
194    } else if (v32 == (v32 & 0xff00)) {
195        *cmode = 0x2;
196        *imm8 = (v32 >> 8) & 0xff;
197        return true;
198    } else if (v32 == (v32 & 0xff0000)) {
199        *cmode = 0x4;
200        *imm8 = (v32 >> 16) & 0xff;
201        return true;
202    } else if (v32 == (v32 & 0xff000000)) {
203        *cmode = 0x6;
204        *imm8 = v32 >> 24;
205        return true;
206    }
207    return false;
208}
209
210/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
211static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
212{
213    if ((v32 & 0xffff00ff) == 0xff) {
214        *cmode = 0xc;
215        *imm8 = (v32 >> 8) & 0xff;
216        return true;
217    } else if ((v32 & 0xff00ffff) == 0xffff) {
218        *cmode = 0xd;
219        *imm8 = (v32 >> 16) & 0xff;
220        return true;
221    }
222    return false;
223}
224
225/* Return true if v32 is a valid float32 immediate.  */
226static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
227{
228    if (extract32(v32, 0, 19) == 0
229        && (extract32(v32, 25, 6) == 0x20
230            || extract32(v32, 25, 6) == 0x1f)) {
231        *cmode = 0xf;
232        *imm8 = (extract32(v32, 31, 1) << 7)
233              | (extract32(v32, 25, 1) << 6)
234              | extract32(v32, 19, 6);
235        return true;
236    }
237    return false;
238}
239
240/* Return true if v64 is a valid float64 immediate.  */
241static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
242{
243    if (extract64(v64, 0, 48) == 0
244        && (extract64(v64, 54, 9) == 0x100
245            || extract64(v64, 54, 9) == 0x0ff)) {
246        *cmode = 0xf;
247        *imm8 = (extract64(v64, 63, 1) << 7)
248              | (extract64(v64, 54, 1) << 6)
249              | extract64(v64, 48, 6);
250        return true;
251    }
252    return false;
253}
254
255/*
256 * Return non-zero if v32 can be formed by MOVI+ORR.
257 * Place the parameters for MOVI in (cmode, imm8).
258 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
259 */
260static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
261{
262    int i;
263
264    for (i = 6; i > 0; i -= 2) {
265        /* Mask out one byte we can add with ORR.  */
266        uint32_t tmp = v32 & ~(0xffu << (i * 4));
267        if (is_shimm32(tmp, cmode, imm8) ||
268            is_soimm32(tmp, cmode, imm8)) {
269            break;
270        }
271    }
272    return i;
273}
274
275/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
276static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
277{
278    if (v32 == deposit32(v32, 16, 16, v32)) {
279        return is_shimm16(v32, cmode, imm8);
280    } else {
281        return is_shimm32(v32, cmode, imm8);
282    }
283}
284
285static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
286{
287    if (ct & TCG_CT_CONST) {
288        return 1;
289    }
290    if (type == TCG_TYPE_I32) {
291        val = (int32_t)val;
292    }
293    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
294        return 1;
295    }
296    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
297        return 1;
298    }
299    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
300        return 1;
301    }
302    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
303        return 1;
304    }
305
306    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
307    case 0:
308        break;
309    case TCG_CT_CONST_ANDI:
310        val = ~val;
311        /* fallthru */
312    case TCG_CT_CONST_ORRI:
313        if (val == deposit64(val, 32, 32, val)) {
314            int cmode, imm8;
315            return is_shimm1632(val, &cmode, &imm8);
316        }
317        break;
318    default:
319        /* Both bits should not be set for the same insn.  */
320        g_assert_not_reached();
321    }
322
323    return 0;
324}
325
326enum aarch64_cond_code {
327    COND_EQ = 0x0,
328    COND_NE = 0x1,
329    COND_CS = 0x2,     /* Unsigned greater or equal */
330    COND_HS = COND_CS, /* ALIAS greater or equal */
331    COND_CC = 0x3,     /* Unsigned less than */
332    COND_LO = COND_CC, /* ALIAS Lower */
333    COND_MI = 0x4,     /* Negative */
334    COND_PL = 0x5,     /* Zero or greater */
335    COND_VS = 0x6,     /* Overflow */
336    COND_VC = 0x7,     /* No overflow */
337    COND_HI = 0x8,     /* Unsigned greater than */
338    COND_LS = 0x9,     /* Unsigned less or equal */
339    COND_GE = 0xa,
340    COND_LT = 0xb,
341    COND_GT = 0xc,
342    COND_LE = 0xd,
343    COND_AL = 0xe,
344    COND_NV = 0xf, /* behaves like COND_AL here */
345};
346
347static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
348    [TCG_COND_EQ] = COND_EQ,
349    [TCG_COND_NE] = COND_NE,
350    [TCG_COND_LT] = COND_LT,
351    [TCG_COND_GE] = COND_GE,
352    [TCG_COND_LE] = COND_LE,
353    [TCG_COND_GT] = COND_GT,
354    /* unsigned */
355    [TCG_COND_LTU] = COND_LO,
356    [TCG_COND_GTU] = COND_HI,
357    [TCG_COND_GEU] = COND_HS,
358    [TCG_COND_LEU] = COND_LS,
359};
360
361typedef enum {
362    LDST_ST = 0,    /* store */
363    LDST_LD = 1,    /* load */
364    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
365    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
366} AArch64LdstType;
367
368/* We encode the format of the insn into the beginning of the name, so that
369   we can have the preprocessor help "typecheck" the insn vs the output
370   function.  Arm didn't provide us with nice names for the formats, so we
371   use the section number of the architecture reference manual in which the
372   instruction group is described.  */
373typedef enum {
374    /* Compare and branch (immediate).  */
375    I3201_CBZ       = 0x34000000,
376    I3201_CBNZ      = 0x35000000,
377
378    /* Conditional branch (immediate).  */
379    I3202_B_C       = 0x54000000,
380
381    /* Unconditional branch (immediate).  */
382    I3206_B         = 0x14000000,
383    I3206_BL        = 0x94000000,
384
385    /* Unconditional branch (register).  */
386    I3207_BR        = 0xd61f0000,
387    I3207_BLR       = 0xd63f0000,
388    I3207_RET       = 0xd65f0000,
389
390    /* AdvSIMD load/store single structure.  */
391    I3303_LD1R      = 0x0d40c000,
392
393    /* Load literal for loading the address at pc-relative offset */
394    I3305_LDR       = 0x58000000,
395    I3305_LDR_v64   = 0x5c000000,
396    I3305_LDR_v128  = 0x9c000000,
397
398    /* Load/store register.  Described here as 3.3.12, but the helper
399       that emits them can transform to 3.3.10 or 3.3.13.  */
400    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
401    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
402    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
403    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
404
405    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
406    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
407    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
408    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
409
410    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
411    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
412
413    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
414    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
415    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
416
417    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
418    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
419
420    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
421    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
422
423    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
424    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
425
426    I3312_TO_I3310  = 0x00200800,
427    I3312_TO_I3313  = 0x01000000,
428
429    /* Load/store register pair instructions.  */
430    I3314_LDP       = 0x28400000,
431    I3314_STP       = 0x28000000,
432
433    /* Add/subtract immediate instructions.  */
434    I3401_ADDI      = 0x11000000,
435    I3401_ADDSI     = 0x31000000,
436    I3401_SUBI      = 0x51000000,
437    I3401_SUBSI     = 0x71000000,
438
439    /* Bitfield instructions.  */
440    I3402_BFM       = 0x33000000,
441    I3402_SBFM      = 0x13000000,
442    I3402_UBFM      = 0x53000000,
443
444    /* Extract instruction.  */
445    I3403_EXTR      = 0x13800000,
446
447    /* Logical immediate instructions.  */
448    I3404_ANDI      = 0x12000000,
449    I3404_ORRI      = 0x32000000,
450    I3404_EORI      = 0x52000000,
451    I3404_ANDSI     = 0x72000000,
452
453    /* Move wide immediate instructions.  */
454    I3405_MOVN      = 0x12800000,
455    I3405_MOVZ      = 0x52800000,
456    I3405_MOVK      = 0x72800000,
457
458    /* PC relative addressing instructions.  */
459    I3406_ADR       = 0x10000000,
460    I3406_ADRP      = 0x90000000,
461
462    /* Add/subtract shifted register instructions (without a shift).  */
463    I3502_ADD       = 0x0b000000,
464    I3502_ADDS      = 0x2b000000,
465    I3502_SUB       = 0x4b000000,
466    I3502_SUBS      = 0x6b000000,
467
468    /* Add/subtract shifted register instructions (with a shift).  */
469    I3502S_ADD_LSL  = I3502_ADD,
470
471    /* Add/subtract with carry instructions.  */
472    I3503_ADC       = 0x1a000000,
473    I3503_SBC       = 0x5a000000,
474
475    /* Conditional select instructions.  */
476    I3506_CSEL      = 0x1a800000,
477    I3506_CSINC     = 0x1a800400,
478    I3506_CSINV     = 0x5a800000,
479    I3506_CSNEG     = 0x5a800400,
480
481    /* Data-processing (1 source) instructions.  */
482    I3507_CLZ       = 0x5ac01000,
483    I3507_RBIT      = 0x5ac00000,
484    I3507_REV       = 0x5ac00000, /* + size << 10 */
485
486    /* Data-processing (2 source) instructions.  */
487    I3508_LSLV      = 0x1ac02000,
488    I3508_LSRV      = 0x1ac02400,
489    I3508_ASRV      = 0x1ac02800,
490    I3508_RORV      = 0x1ac02c00,
491    I3508_SMULH     = 0x9b407c00,
492    I3508_UMULH     = 0x9bc07c00,
493    I3508_UDIV      = 0x1ac00800,
494    I3508_SDIV      = 0x1ac00c00,
495
496    /* Data-processing (3 source) instructions.  */
497    I3509_MADD      = 0x1b000000,
498    I3509_MSUB      = 0x1b008000,
499
500    /* Logical shifted register instructions (without a shift).  */
501    I3510_AND       = 0x0a000000,
502    I3510_BIC       = 0x0a200000,
503    I3510_ORR       = 0x2a000000,
504    I3510_ORN       = 0x2a200000,
505    I3510_EOR       = 0x4a000000,
506    I3510_EON       = 0x4a200000,
507    I3510_ANDS      = 0x6a000000,
508
509    /* Logical shifted register instructions (with a shift).  */
510    I3502S_AND_LSR  = I3510_AND | (1 << 22),
511
512    /* AdvSIMD copy */
513    I3605_DUP      = 0x0e000400,
514    I3605_INS      = 0x4e001c00,
515    I3605_UMOV     = 0x0e003c00,
516
517    /* AdvSIMD modified immediate */
518    I3606_MOVI      = 0x0f000400,
519    I3606_MVNI      = 0x2f000400,
520    I3606_BIC       = 0x2f001400,
521    I3606_ORR       = 0x0f001400,
522
523    /* AdvSIMD scalar shift by immediate */
524    I3609_SSHR      = 0x5f000400,
525    I3609_SSRA      = 0x5f001400,
526    I3609_SHL       = 0x5f005400,
527    I3609_USHR      = 0x7f000400,
528    I3609_USRA      = 0x7f001400,
529    I3609_SLI       = 0x7f005400,
530
531    /* AdvSIMD scalar three same */
532    I3611_SQADD     = 0x5e200c00,
533    I3611_SQSUB     = 0x5e202c00,
534    I3611_CMGT      = 0x5e203400,
535    I3611_CMGE      = 0x5e203c00,
536    I3611_SSHL      = 0x5e204400,
537    I3611_ADD       = 0x5e208400,
538    I3611_CMTST     = 0x5e208c00,
539    I3611_UQADD     = 0x7e200c00,
540    I3611_UQSUB     = 0x7e202c00,
541    I3611_CMHI      = 0x7e203400,
542    I3611_CMHS      = 0x7e203c00,
543    I3611_USHL      = 0x7e204400,
544    I3611_SUB       = 0x7e208400,
545    I3611_CMEQ      = 0x7e208c00,
546
547    /* AdvSIMD scalar two-reg misc */
548    I3612_CMGT0     = 0x5e208800,
549    I3612_CMEQ0     = 0x5e209800,
550    I3612_CMLT0     = 0x5e20a800,
551    I3612_ABS       = 0x5e20b800,
552    I3612_CMGE0     = 0x7e208800,
553    I3612_CMLE0     = 0x7e209800,
554    I3612_NEG       = 0x7e20b800,
555
556    /* AdvSIMD shift by immediate */
557    I3614_SSHR      = 0x0f000400,
558    I3614_SSRA      = 0x0f001400,
559    I3614_SHL       = 0x0f005400,
560    I3614_SLI       = 0x2f005400,
561    I3614_USHR      = 0x2f000400,
562    I3614_USRA      = 0x2f001400,
563
564    /* AdvSIMD three same.  */
565    I3616_ADD       = 0x0e208400,
566    I3616_AND       = 0x0e201c00,
567    I3616_BIC       = 0x0e601c00,
568    I3616_BIF       = 0x2ee01c00,
569    I3616_BIT       = 0x2ea01c00,
570    I3616_BSL       = 0x2e601c00,
571    I3616_EOR       = 0x2e201c00,
572    I3616_MUL       = 0x0e209c00,
573    I3616_ORR       = 0x0ea01c00,
574    I3616_ORN       = 0x0ee01c00,
575    I3616_SUB       = 0x2e208400,
576    I3616_CMGT      = 0x0e203400,
577    I3616_CMGE      = 0x0e203c00,
578    I3616_CMTST     = 0x0e208c00,
579    I3616_CMHI      = 0x2e203400,
580    I3616_CMHS      = 0x2e203c00,
581    I3616_CMEQ      = 0x2e208c00,
582    I3616_SMAX      = 0x0e206400,
583    I3616_SMIN      = 0x0e206c00,
584    I3616_SSHL      = 0x0e204400,
585    I3616_SQADD     = 0x0e200c00,
586    I3616_SQSUB     = 0x0e202c00,
587    I3616_UMAX      = 0x2e206400,
588    I3616_UMIN      = 0x2e206c00,
589    I3616_UQADD     = 0x2e200c00,
590    I3616_UQSUB     = 0x2e202c00,
591    I3616_USHL      = 0x2e204400,
592
593    /* AdvSIMD two-reg misc.  */
594    I3617_CMGT0     = 0x0e208800,
595    I3617_CMEQ0     = 0x0e209800,
596    I3617_CMLT0     = 0x0e20a800,
597    I3617_CMGE0     = 0x2e208800,
598    I3617_CMLE0     = 0x2e209800,
599    I3617_NOT       = 0x2e205800,
600    I3617_ABS       = 0x0e20b800,
601    I3617_NEG       = 0x2e20b800,
602
603    /* System instructions.  */
604    NOP             = 0xd503201f,
605    DMB_ISH         = 0xd50338bf,
606    DMB_LD          = 0x00000100,
607    DMB_ST          = 0x00000200,
608} AArch64Insn;
609
610static inline uint32_t tcg_in32(TCGContext *s)
611{
612    uint32_t v = *(uint32_t *)s->code_ptr;
613    return v;
614}
615
616/* Emit an opcode with "type-checking" of the format.  */
617#define tcg_out_insn(S, FMT, OP, ...) \
618    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
619
620static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
621                              TCGReg rt, TCGReg rn, unsigned size)
622{
623    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
624}
625
626static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
627                              int imm19, TCGReg rt)
628{
629    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
630}
631
632static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
633                              TCGReg rt, int imm19)
634{
635    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
636}
637
638static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
639                              TCGCond c, int imm19)
640{
641    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
642}
643
644static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
645{
646    tcg_out32(s, insn | (imm26 & 0x03ffffff));
647}
648
649static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
650{
651    tcg_out32(s, insn | rn << 5);
652}
653
654static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
655                              TCGReg r1, TCGReg r2, TCGReg rn,
656                              tcg_target_long ofs, bool pre, bool w)
657{
658    insn |= 1u << 31; /* ext */
659    insn |= pre << 24;
660    insn |= w << 23;
661
662    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
663    insn |= (ofs & (0x7f << 3)) << (15 - 3);
664
665    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
666}
667
668static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
669                              TCGReg rd, TCGReg rn, uint64_t aimm)
670{
671    if (aimm > 0xfff) {
672        tcg_debug_assert((aimm & 0xfff) == 0);
673        aimm >>= 12;
674        tcg_debug_assert(aimm <= 0xfff);
675        aimm |= 1 << 12;  /* apply LSL 12 */
676    }
677    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
678}
679
680/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
681   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
682   that feed the DecodeBitMasks pseudo function.  */
683static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
684                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
685{
686    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
687              | rn << 5 | rd);
688}
689
690#define tcg_out_insn_3404  tcg_out_insn_3402
691
692static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
693                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
694{
695    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
696              | rn << 5 | rd);
697}
698
699/* This function is used for the Move (wide immediate) instruction group.
700   Note that SHIFT is a full shift count, not the 2 bit HW field. */
701static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
702                              TCGReg rd, uint16_t half, unsigned shift)
703{
704    tcg_debug_assert((shift & ~0x30) == 0);
705    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
706}
707
708static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
709                              TCGReg rd, int64_t disp)
710{
711    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
712}
713
714/* This function is for both 3.5.2 (Add/Subtract shifted register), for
715   the rare occasion when we actually want to supply a shift amount.  */
716static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
717                                      TCGType ext, TCGReg rd, TCGReg rn,
718                                      TCGReg rm, int imm6)
719{
720    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
721}
722
723/* This function is for 3.5.2 (Add/subtract shifted register),
724   and 3.5.10 (Logical shifted register), for the vast majorty of cases
725   when we don't want to apply a shift.  Thus it can also be used for
726   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
727static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
728                              TCGReg rd, TCGReg rn, TCGReg rm)
729{
730    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
731}
732
733#define tcg_out_insn_3503  tcg_out_insn_3502
734#define tcg_out_insn_3508  tcg_out_insn_3502
735#define tcg_out_insn_3510  tcg_out_insn_3502
736
737static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
738                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
739{
740    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
741              | tcg_cond_to_aarch64[c] << 12);
742}
743
744static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
745                              TCGReg rd, TCGReg rn)
746{
747    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
748}
749
750static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
751                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
752{
753    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
754}
755
756static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
757                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
758{
759    /* Note that bit 11 set means general register input.  Therefore
760       we can handle both register sets with one function.  */
761    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
762              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
763}
764
765static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
766                              TCGReg rd, bool op, int cmode, uint8_t imm8)
767{
768    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
769              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
770}
771
772static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
773                              TCGReg rd, TCGReg rn, unsigned immhb)
774{
775    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
776}
777
778static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
779                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
780{
781    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
782              | (rn & 0x1f) << 5 | (rd & 0x1f));
783}
784
785static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
786                              unsigned size, TCGReg rd, TCGReg rn)
787{
788    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
789}
790
791static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
792                              TCGReg rd, TCGReg rn, unsigned immhb)
793{
794    tcg_out32(s, insn | q << 30 | immhb << 16
795              | (rn & 0x1f) << 5 | (rd & 0x1f));
796}
797
798static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
799                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
800{
801    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
802              | (rn & 0x1f) << 5 | (rd & 0x1f));
803}
804
805static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
806                              unsigned size, TCGReg rd, TCGReg rn)
807{
808    tcg_out32(s, insn | q << 30 | (size << 22)
809              | (rn & 0x1f) << 5 | (rd & 0x1f));
810}
811
812static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
813                              TCGReg rd, TCGReg base, TCGType ext,
814                              TCGReg regoff)
815{
816    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
817    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
818              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
819}
820
821static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
822                              TCGReg rd, TCGReg rn, intptr_t offset)
823{
824    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
825}
826
827static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
828                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
829{
830    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
831    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
832              | rn << 5 | (rd & 0x1f));
833}
834
835/* Register to register move using ORR (shifted register with no shift). */
836static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
837{
838    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
839}
840
841/* Register to register move using ADDI (move to/from SP).  */
842static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
843{
844    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
845}
846
847/* This function is used for the Logical (immediate) instruction group.
848   The value of LIMM must satisfy IS_LIMM.  See the comment above about
849   only supporting simplified logical immediates.  */
850static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
851                             TCGReg rd, TCGReg rn, uint64_t limm)
852{
853    unsigned h, l, r, c;
854
855    tcg_debug_assert(is_limm(limm));
856
857    h = clz64(limm);
858    l = ctz64(limm);
859    if (l == 0) {
860        r = 0;                  /* form 0....01....1 */
861        c = ctz64(~limm) - 1;
862        if (h == 0) {
863            r = clz64(~limm);   /* form 1..10..01..1 */
864            c += r;
865        }
866    } else {
867        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
868        c = r - h - 1;
869    }
870    if (ext == TCG_TYPE_I32) {
871        r &= 31;
872        c &= 31;
873    }
874
875    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
876}
877
878static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
879                             TCGReg rd, int64_t v64)
880{
881    bool q = type == TCG_TYPE_V128;
882    int cmode, imm8, i;
883
884    /* Test all bytes equal first.  */
885    if (vece == MO_8) {
886        imm8 = (uint8_t)v64;
887        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
888        return;
889    }
890
891    /*
892     * Test all bytes 0x00 or 0xff second.  This can match cases that
893     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
894     */
895    for (i = imm8 = 0; i < 8; i++) {
896        uint8_t byte = v64 >> (i * 8);
897        if (byte == 0xff) {
898            imm8 |= 1 << i;
899        } else if (byte != 0) {
900            goto fail_bytes;
901        }
902    }
903    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
904    return;
905 fail_bytes:
906
907    /*
908     * Tests for various replications.  For each element width, if we
909     * cannot find an expansion there's no point checking a larger
910     * width because we already know by replication it cannot match.
911     */
912    if (vece == MO_16) {
913        uint16_t v16 = v64;
914
915        if (is_shimm16(v16, &cmode, &imm8)) {
916            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
917            return;
918        }
919        if (is_shimm16(~v16, &cmode, &imm8)) {
920            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
921            return;
922        }
923
924        /*
925         * Otherwise, all remaining constants can be loaded in two insns:
926         * rd = v16 & 0xff, rd |= v16 & 0xff00.
927         */
928        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
929        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
930        return;
931    } else if (vece == MO_32) {
932        uint32_t v32 = v64;
933        uint32_t n32 = ~v32;
934
935        if (is_shimm32(v32, &cmode, &imm8) ||
936            is_soimm32(v32, &cmode, &imm8) ||
937            is_fimm32(v32, &cmode, &imm8)) {
938            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
939            return;
940        }
941        if (is_shimm32(n32, &cmode, &imm8) ||
942            is_soimm32(n32, &cmode, &imm8)) {
943            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
944            return;
945        }
946
947        /*
948         * Restrict the set of constants to those we can load with
949         * two instructions.  Others we load from the pool.
950         */
951        i = is_shimm32_pair(v32, &cmode, &imm8);
952        if (i) {
953            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
954            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
955            return;
956        }
957        i = is_shimm32_pair(n32, &cmode, &imm8);
958        if (i) {
959            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
960            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
961            return;
962        }
963    } else if (is_fimm64(v64, &cmode, &imm8)) {
964        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
965        return;
966    }
967
968    /*
969     * As a last resort, load from the constant pool.  Sadly there
970     * is no LD1R (literal), so store the full 16-byte vector.
971     */
972    if (type == TCG_TYPE_V128) {
973        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
974        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
975    } else {
976        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
977        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
978    }
979}
980
981static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
982                            TCGReg rd, TCGReg rs)
983{
984    int is_q = type - TCG_TYPE_V64;
985    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
986    return true;
987}
988
989static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
990                             TCGReg r, TCGReg base, intptr_t offset)
991{
992    TCGReg temp = TCG_REG_TMP;
993
994    if (offset < -0xffffff || offset > 0xffffff) {
995        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
996        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
997        base = temp;
998    } else {
999        AArch64Insn add_insn = I3401_ADDI;
1000
1001        if (offset < 0) {
1002            add_insn = I3401_SUBI;
1003            offset = -offset;
1004        }
1005        if (offset & 0xfff000) {
1006            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1007            base = temp;
1008        }
1009        if (offset & 0xfff) {
1010            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1011            base = temp;
1012        }
1013    }
1014    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1015    return true;
1016}
1017
1018static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1019                         tcg_target_long value)
1020{
1021    tcg_target_long svalue = value;
1022    tcg_target_long ivalue = ~value;
1023    tcg_target_long t0, t1, t2;
1024    int s0, s1;
1025    AArch64Insn opc;
1026
1027    switch (type) {
1028    case TCG_TYPE_I32:
1029    case TCG_TYPE_I64:
1030        tcg_debug_assert(rd < 32);
1031        break;
1032    default:
1033        g_assert_not_reached();
1034    }
1035
1036    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1037       values within [2**31, 2**32-1], we can create smaller sequences by
1038       interpreting this as a negative 32-bit number, while ensuring that
1039       the high 32 bits are cleared by setting SF=0.  */
1040    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1041        svalue = (int32_t)value;
1042        value = (uint32_t)value;
1043        ivalue = (uint32_t)ivalue;
1044        type = TCG_TYPE_I32;
1045    }
1046
1047    /* Speed things up by handling the common case of small positive
1048       and negative values specially.  */
1049    if ((value & ~0xffffull) == 0) {
1050        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1051        return;
1052    } else if ((ivalue & ~0xffffull) == 0) {
1053        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1054        return;
1055    }
1056
1057    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1058       use the sign-extended value.  That lets us match rotated values such
1059       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1060    if (is_limm(svalue)) {
1061        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1062        return;
1063    }
1064
1065    /* Look for host pointer values within 4G of the PC.  This happens
1066       often when loading pointers to QEMU's own data structures.  */
1067    if (type == TCG_TYPE_I64) {
1068        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1069        tcg_target_long disp = value - src_rx;
1070        if (disp == sextract64(disp, 0, 21)) {
1071            tcg_out_insn(s, 3406, ADR, rd, disp);
1072            return;
1073        }
1074        disp = (value >> 12) - (src_rx >> 12);
1075        if (disp == sextract64(disp, 0, 21)) {
1076            tcg_out_insn(s, 3406, ADRP, rd, disp);
1077            if (value & 0xfff) {
1078                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1079            }
1080            return;
1081        }
1082    }
1083
1084    /* Would it take fewer insns to begin with MOVN?  */
1085    if (ctpop64(value) >= 32) {
1086        t0 = ivalue;
1087        opc = I3405_MOVN;
1088    } else {
1089        t0 = value;
1090        opc = I3405_MOVZ;
1091    }
1092    s0 = ctz64(t0) & (63 & -16);
1093    t1 = t0 & ~(0xffffull << s0);
1094    s1 = ctz64(t1) & (63 & -16);
1095    t2 = t1 & ~(0xffffull << s1);
1096    if (t2 == 0) {
1097        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1098        if (t1 != 0) {
1099            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1100        }
1101        return;
1102    }
1103
1104    /* For more than 2 insns, dump it into the constant pool.  */
1105    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1106    tcg_out_insn(s, 3305, LDR, 0, rd);
1107}
1108
1109static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1110{
1111    return false;
1112}
1113
1114static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1115                             tcg_target_long imm)
1116{
1117    /* This function is only used for passing structs by reference. */
1118    g_assert_not_reached();
1119}
1120
1121/* Define something more legible for general use.  */
1122#define tcg_out_ldst_r  tcg_out_insn_3310
1123
1124static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1125                         TCGReg rn, intptr_t offset, int lgsize)
1126{
1127    /* If the offset is naturally aligned and in range, then we can
1128       use the scaled uimm12 encoding */
1129    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1130        uintptr_t scaled_uimm = offset >> lgsize;
1131        if (scaled_uimm <= 0xfff) {
1132            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1133            return;
1134        }
1135    }
1136
1137    /* Small signed offsets can use the unscaled encoding.  */
1138    if (offset >= -256 && offset < 256) {
1139        tcg_out_insn_3312(s, insn, rd, rn, offset);
1140        return;
1141    }
1142
1143    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1144    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1145    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1146}
1147
1148static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1149{
1150    if (ret == arg) {
1151        return true;
1152    }
1153    switch (type) {
1154    case TCG_TYPE_I32:
1155    case TCG_TYPE_I64:
1156        if (ret < 32 && arg < 32) {
1157            tcg_out_movr(s, type, ret, arg);
1158            break;
1159        } else if (ret < 32) {
1160            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1161            break;
1162        } else if (arg < 32) {
1163            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1164            break;
1165        }
1166        /* FALLTHRU */
1167
1168    case TCG_TYPE_V64:
1169        tcg_debug_assert(ret >= 32 && arg >= 32);
1170        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1171        break;
1172    case TCG_TYPE_V128:
1173        tcg_debug_assert(ret >= 32 && arg >= 32);
1174        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1175        break;
1176
1177    default:
1178        g_assert_not_reached();
1179    }
1180    return true;
1181}
1182
1183static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1184                       TCGReg base, intptr_t ofs)
1185{
1186    AArch64Insn insn;
1187    int lgsz;
1188
1189    switch (type) {
1190    case TCG_TYPE_I32:
1191        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1192        lgsz = 2;
1193        break;
1194    case TCG_TYPE_I64:
1195        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1196        lgsz = 3;
1197        break;
1198    case TCG_TYPE_V64:
1199        insn = I3312_LDRVD;
1200        lgsz = 3;
1201        break;
1202    case TCG_TYPE_V128:
1203        insn = I3312_LDRVQ;
1204        lgsz = 4;
1205        break;
1206    default:
1207        g_assert_not_reached();
1208    }
1209    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1210}
1211
1212static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1213                       TCGReg base, intptr_t ofs)
1214{
1215    AArch64Insn insn;
1216    int lgsz;
1217
1218    switch (type) {
1219    case TCG_TYPE_I32:
1220        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1221        lgsz = 2;
1222        break;
1223    case TCG_TYPE_I64:
1224        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1225        lgsz = 3;
1226        break;
1227    case TCG_TYPE_V64:
1228        insn = I3312_STRVD;
1229        lgsz = 3;
1230        break;
1231    case TCG_TYPE_V128:
1232        insn = I3312_STRVQ;
1233        lgsz = 4;
1234        break;
1235    default:
1236        g_assert_not_reached();
1237    }
1238    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1239}
1240
1241static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1242                               TCGReg base, intptr_t ofs)
1243{
1244    if (type <= TCG_TYPE_I64 && val == 0) {
1245        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1246        return true;
1247    }
1248    return false;
1249}
1250
1251static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1252                               TCGReg rn, unsigned int a, unsigned int b)
1253{
1254    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1255}
1256
1257static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1258                                TCGReg rn, unsigned int a, unsigned int b)
1259{
1260    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1261}
1262
1263static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1264                                TCGReg rn, unsigned int a, unsigned int b)
1265{
1266    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1267}
1268
1269static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1270                                TCGReg rn, TCGReg rm, unsigned int a)
1271{
1272    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1273}
1274
1275static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1276                               TCGReg rd, TCGReg rn, unsigned int m)
1277{
1278    int bits = ext ? 64 : 32;
1279    int max = bits - 1;
1280    tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1281}
1282
1283static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1284                               TCGReg rd, TCGReg rn, unsigned int m)
1285{
1286    int max = ext ? 63 : 31;
1287    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1288}
1289
1290static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1291                               TCGReg rd, TCGReg rn, unsigned int m)
1292{
1293    int max = ext ? 63 : 31;
1294    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1295}
1296
1297static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1298                                TCGReg rd, TCGReg rn, unsigned int m)
1299{
1300    int max = ext ? 63 : 31;
1301    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1302}
1303
1304static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1305                                TCGReg rd, TCGReg rn, unsigned int m)
1306{
1307    int max = ext ? 63 : 31;
1308    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1309}
1310
1311static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1312                               TCGReg rn, unsigned lsb, unsigned width)
1313{
1314    unsigned size = ext ? 64 : 32;
1315    unsigned a = (size - lsb) & (size - 1);
1316    unsigned b = width - 1;
1317    tcg_out_bfm(s, ext, rd, rn, a, b);
1318}
1319
1320static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1321                        tcg_target_long b, bool const_b)
1322{
1323    if (const_b) {
1324        /* Using CMP or CMN aliases.  */
1325        if (b >= 0) {
1326            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1327        } else {
1328            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1329        }
1330    } else {
1331        /* Using CMP alias SUBS wzr, Wn, Wm */
1332        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1333    }
1334}
1335
1336static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1337{
1338    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1339    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1340    tcg_out_insn(s, 3206, B, offset);
1341}
1342
1343static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1344{
1345    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1346    if (offset == sextract64(offset, 0, 26)) {
1347        tcg_out_insn(s, 3206, B, offset);
1348    } else {
1349        /* Choose X9 as a call-clobbered non-LR temporary. */
1350        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
1351        tcg_out_insn(s, 3207, BR, TCG_REG_X9);
1352    }
1353}
1354
1355static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1356{
1357    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1358    if (offset == sextract64(offset, 0, 26)) {
1359        tcg_out_insn(s, 3206, BL, offset);
1360    } else {
1361        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1362        tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
1363    }
1364}
1365
1366static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1367                         const TCGHelperInfo *info)
1368{
1369    tcg_out_call_int(s, target);
1370}
1371
1372static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1373{
1374    if (!l->has_value) {
1375        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1376        tcg_out_insn(s, 3206, B, 0);
1377    } else {
1378        tcg_out_goto(s, l->u.value_ptr);
1379    }
1380}
1381
1382static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1383                           TCGArg b, bool b_const, TCGLabel *l)
1384{
1385    intptr_t offset;
1386    bool need_cmp;
1387
1388    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1389        need_cmp = false;
1390    } else {
1391        need_cmp = true;
1392        tcg_out_cmp(s, ext, a, b, b_const);
1393    }
1394
1395    if (!l->has_value) {
1396        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1397        offset = tcg_in32(s) >> 5;
1398    } else {
1399        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1400        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1401    }
1402
1403    if (need_cmp) {
1404        tcg_out_insn(s, 3202, B_C, c, offset);
1405    } else if (c == TCG_COND_EQ) {
1406        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1407    } else {
1408        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1409    }
1410}
1411
1412static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1413                               TCGReg rd, TCGReg rn)
1414{
1415    /* REV, REV16, REV32 */
1416    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1417}
1418
1419static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1420                               TCGReg rd, TCGReg rn)
1421{
1422    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1423    int bits = (8 << s_bits) - 1;
1424    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1425}
1426
1427static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1428{
1429    tcg_out_sxt(s, type, MO_8, rd, rn);
1430}
1431
1432static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1433{
1434    tcg_out_sxt(s, type, MO_16, rd, rn);
1435}
1436
1437static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1438{
1439    tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1440}
1441
1442static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1443{
1444    tcg_out_ext32s(s, rd, rn);
1445}
1446
1447static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1448                               TCGReg rd, TCGReg rn)
1449{
1450    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1451    int bits = (8 << s_bits) - 1;
1452    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1453}
1454
1455static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1456{
1457    tcg_out_uxt(s, MO_8, rd, rn);
1458}
1459
1460static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1461{
1462    tcg_out_uxt(s, MO_16, rd, rn);
1463}
1464
1465static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1466{
1467    tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1468}
1469
1470static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1471{
1472    tcg_out_ext32u(s, rd, rn);
1473}
1474
1475static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1476{
1477    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1478}
1479
1480static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1481                            TCGReg rn, int64_t aimm)
1482{
1483    if (aimm >= 0) {
1484        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1485    } else {
1486        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1487    }
1488}
1489
1490static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1491                            TCGReg rh, TCGReg al, TCGReg ah,
1492                            tcg_target_long bl, tcg_target_long bh,
1493                            bool const_bl, bool const_bh, bool sub)
1494{
1495    TCGReg orig_rl = rl;
1496    AArch64Insn insn;
1497
1498    if (rl == ah || (!const_bh && rl == bh)) {
1499        rl = TCG_REG_TMP;
1500    }
1501
1502    if (const_bl) {
1503        if (bl < 0) {
1504            bl = -bl;
1505            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1506        } else {
1507            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1508        }
1509
1510        if (unlikely(al == TCG_REG_XZR)) {
1511            /* ??? We want to allow al to be zero for the benefit of
1512               negation via subtraction.  However, that leaves open the
1513               possibility of adding 0+const in the low part, and the
1514               immediate add instructions encode XSP not XZR.  Don't try
1515               anything more elaborate here than loading another zero.  */
1516            al = TCG_REG_TMP;
1517            tcg_out_movi(s, ext, al, 0);
1518        }
1519        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1520    } else {
1521        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1522    }
1523
1524    insn = I3503_ADC;
1525    if (const_bh) {
1526        /* Note that the only two constants we support are 0 and -1, and
1527           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1528        if ((bh != 0) ^ sub) {
1529            insn = I3503_SBC;
1530        }
1531        bh = TCG_REG_XZR;
1532    } else if (sub) {
1533        insn = I3503_SBC;
1534    }
1535    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1536
1537    tcg_out_mov(s, ext, orig_rl, rl);
1538}
1539
1540static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1541{
1542    static const uint32_t sync[] = {
1543        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1544        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1545        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1546        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1547        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1548    };
1549    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1550}
1551
1552static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1553                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1554{
1555    TCGReg a1 = a0;
1556    if (is_ctz) {
1557        a1 = TCG_REG_TMP;
1558        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1559    }
1560    if (const_b && b == (ext ? 64 : 32)) {
1561        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1562    } else {
1563        AArch64Insn sel = I3506_CSEL;
1564
1565        tcg_out_cmp(s, ext, a0, 0, 1);
1566        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1567
1568        if (const_b) {
1569            if (b == -1) {
1570                b = TCG_REG_XZR;
1571                sel = I3506_CSINV;
1572            } else if (b == 0) {
1573                b = TCG_REG_XZR;
1574            } else {
1575                tcg_out_movi(s, ext, d, b);
1576                b = d;
1577            }
1578        }
1579        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1580    }
1581}
1582
1583typedef struct {
1584    TCGReg base;
1585    TCGReg index;
1586    TCGType index_ext;
1587} HostAddress;
1588
1589#ifdef CONFIG_SOFTMMU
1590/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1591 *                                     MemOpIdx oi, uintptr_t ra)
1592 */
1593static void * const qemu_ld_helpers[MO_SIZE + 1] = {
1594    [MO_8]  = helper_ret_ldub_mmu,
1595#if HOST_BIG_ENDIAN
1596    [MO_16] = helper_be_lduw_mmu,
1597    [MO_32] = helper_be_ldul_mmu,
1598    [MO_64] = helper_be_ldq_mmu,
1599#else
1600    [MO_16] = helper_le_lduw_mmu,
1601    [MO_32] = helper_le_ldul_mmu,
1602    [MO_64] = helper_le_ldq_mmu,
1603#endif
1604};
1605
1606/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1607 *                                     uintxx_t val, MemOpIdx oi,
1608 *                                     uintptr_t ra)
1609 */
1610static void * const qemu_st_helpers[MO_SIZE + 1] = {
1611    [MO_8]  = helper_ret_stb_mmu,
1612#if HOST_BIG_ENDIAN
1613    [MO_16] = helper_be_stw_mmu,
1614    [MO_32] = helper_be_stl_mmu,
1615    [MO_64] = helper_be_stq_mmu,
1616#else
1617    [MO_16] = helper_le_stw_mmu,
1618    [MO_32] = helper_le_stl_mmu,
1619    [MO_64] = helper_le_stq_mmu,
1620#endif
1621};
1622
1623static const TCGLdstHelperParam ldst_helper_param = {
1624    .ntmp = 1, .tmp = { TCG_REG_TMP }
1625};
1626
1627static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1628{
1629    MemOp opc = get_memop(lb->oi);
1630
1631    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1632        return false;
1633    }
1634
1635    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1636    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1637    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1638    tcg_out_goto(s, lb->raddr);
1639    return true;
1640}
1641
1642static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1643{
1644    MemOp opc = get_memop(lb->oi);
1645
1646    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1647        return false;
1648    }
1649
1650    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1651    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1652    tcg_out_goto(s, lb->raddr);
1653    return true;
1654}
1655#else
1656static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1657{
1658    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1659    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1660    tcg_out_insn(s, 3406, ADR, rd, offset);
1661}
1662
1663static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
1664{
1665    if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1666        return false;
1667    }
1668
1669    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg);
1670    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1671
1672    /* "Tail call" to the helper, with the return address back inline. */
1673    tcg_out_adr(s, TCG_REG_LR, l->raddr);
1674    tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld
1675                                        : helper_unaligned_st));
1676    return true;
1677}
1678
1679static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1680{
1681    return tcg_out_fail_alignment(s, l);
1682}
1683
1684static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1685{
1686    return tcg_out_fail_alignment(s, l);
1687}
1688#endif /* CONFIG_SOFTMMU */
1689
1690/*
1691 * For softmmu, perform the TLB load and compare.
1692 * For useronly, perform any required alignment tests.
1693 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1694 * is required and fill in @h with the host address for the fast path.
1695 */
1696static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1697                                           TCGReg addr_reg, MemOpIdx oi,
1698                                           bool is_ld)
1699{
1700    TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1701    TCGLabelQemuLdst *ldst = NULL;
1702    MemOp opc = get_memop(oi);
1703    unsigned a_bits = get_alignment_bits(opc);
1704    unsigned a_mask = (1u << a_bits) - 1;
1705
1706#ifdef CONFIG_SOFTMMU
1707    unsigned s_bits = opc & MO_SIZE;
1708    unsigned s_mask = (1u << s_bits) - 1;
1709    unsigned mem_index = get_mmuidx(oi);
1710    TCGReg x3;
1711    TCGType mask_type;
1712    uint64_t compare_mask;
1713
1714    ldst = new_ldst_label(s);
1715    ldst->is_ld = is_ld;
1716    ldst->oi = oi;
1717    ldst->addrlo_reg = addr_reg;
1718
1719    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1720                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1721
1722    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1723    QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1724    QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1725    QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1726    QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1727    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1728                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1729
1730    /* Extract the TLB index from the address into X0.  */
1731    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1732                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1733                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1734
1735    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1736    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1737
1738    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1739    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1,
1740               is_ld ? offsetof(CPUTLBEntry, addr_read)
1741                     : offsetof(CPUTLBEntry, addr_write));
1742    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1743               offsetof(CPUTLBEntry, addend));
1744
1745    /*
1746     * For aligned accesses, we check the first byte and include the alignment
1747     * bits within the address.  For unaligned access, we check that we don't
1748     * cross pages using the address of the last byte of the access.
1749     */
1750    if (a_bits >= s_bits) {
1751        x3 = addr_reg;
1752    } else {
1753        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1754                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1755        x3 = TCG_REG_X3;
1756    }
1757    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1758
1759    /* Store the page mask part of the address into X3.  */
1760    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1761                     TCG_REG_X3, x3, compare_mask);
1762
1763    /* Perform the address comparison. */
1764    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1765
1766    /* If not equal, we jump to the slow path. */
1767    ldst->label_ptr[0] = s->code_ptr;
1768    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1769
1770    *h = (HostAddress){
1771        .base = TCG_REG_X1,
1772        .index = addr_reg,
1773        .index_ext = addr_type
1774    };
1775#else
1776    if (a_mask) {
1777        ldst = new_ldst_label(s);
1778
1779        ldst->is_ld = is_ld;
1780        ldst->oi = oi;
1781        ldst->addrlo_reg = addr_reg;
1782
1783        /* tst addr, #mask */
1784        tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1785
1786        /* b.ne slow_path */
1787        ldst->label_ptr[0] = s->code_ptr;
1788        tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1789    }
1790
1791    if (USE_GUEST_BASE) {
1792        *h = (HostAddress){
1793            .base = TCG_REG_GUEST_BASE,
1794            .index = addr_reg,
1795            .index_ext = addr_type
1796        };
1797    } else {
1798        *h = (HostAddress){
1799            .base = addr_reg,
1800            .index = TCG_REG_XZR,
1801            .index_ext = TCG_TYPE_I64
1802        };
1803    }
1804#endif
1805
1806    return ldst;
1807}
1808
1809static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1810                                   TCGReg data_r, HostAddress h)
1811{
1812    switch (memop & MO_SSIZE) {
1813    case MO_UB:
1814        tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
1815        break;
1816    case MO_SB:
1817        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1818                       data_r, h.base, h.index_ext, h.index);
1819        break;
1820    case MO_UW:
1821        tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
1822        break;
1823    case MO_SW:
1824        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1825                       data_r, h.base, h.index_ext, h.index);
1826        break;
1827    case MO_UL:
1828        tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
1829        break;
1830    case MO_SL:
1831        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
1832        break;
1833    case MO_UQ:
1834        tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
1835        break;
1836    default:
1837        g_assert_not_reached();
1838    }
1839}
1840
1841static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1842                                   TCGReg data_r, HostAddress h)
1843{
1844    switch (memop & MO_SIZE) {
1845    case MO_8:
1846        tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
1847        break;
1848    case MO_16:
1849        tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
1850        break;
1851    case MO_32:
1852        tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
1853        break;
1854    case MO_64:
1855        tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
1856        break;
1857    default:
1858        g_assert_not_reached();
1859    }
1860}
1861
1862static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1863                            MemOpIdx oi, TCGType data_type)
1864{
1865    TCGLabelQemuLdst *ldst;
1866    HostAddress h;
1867
1868    ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1869    tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
1870
1871    if (ldst) {
1872        ldst->type = data_type;
1873        ldst->datalo_reg = data_reg;
1874        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1875    }
1876}
1877
1878static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1879                            MemOpIdx oi, TCGType data_type)
1880{
1881    TCGLabelQemuLdst *ldst;
1882    HostAddress h;
1883
1884    ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1885    tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1886
1887    if (ldst) {
1888        ldst->type = data_type;
1889        ldst->datalo_reg = data_reg;
1890        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1891    }
1892}
1893
1894static const tcg_insn_unit *tb_ret_addr;
1895
1896static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1897{
1898    /* Reuse the zeroing that exists for goto_ptr.  */
1899    if (a0 == 0) {
1900        tcg_out_goto_long(s, tcg_code_gen_epilogue);
1901    } else {
1902        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1903        tcg_out_goto_long(s, tb_ret_addr);
1904    }
1905}
1906
1907static void tcg_out_goto_tb(TCGContext *s, int which)
1908{
1909    /*
1910     * Direct branch, or indirect address load, will be patched
1911     * by tb_target_set_jmp_target.  Assert indirect load offset
1912     * in range early, regardless of direct branch distance.
1913     */
1914    intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1915    tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1916
1917    set_jmp_insn_offset(s, which);
1918    tcg_out32(s, I3206_B);
1919    tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1920    set_jmp_reset_offset(s, which);
1921}
1922
1923void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1924                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1925{
1926    uintptr_t d_addr = tb->jmp_target_addr[n];
1927    ptrdiff_t d_offset = d_addr - jmp_rx;
1928    tcg_insn_unit insn;
1929
1930    /* Either directly branch, or indirect branch load. */
1931    if (d_offset == sextract64(d_offset, 0, 28)) {
1932        insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
1933    } else {
1934        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
1935        ptrdiff_t i_offset = i_addr - jmp_rx;
1936
1937        /* Note that we asserted this in range in tcg_out_goto_tb. */
1938        insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2);
1939    }
1940    qatomic_set((uint32_t *)jmp_rw, insn);
1941    flush_idcache_range(jmp_rx, jmp_rw, 4);
1942}
1943
1944static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1945                       const TCGArg args[TCG_MAX_OP_ARGS],
1946                       const int const_args[TCG_MAX_OP_ARGS])
1947{
1948    /* 99% of the time, we can signal the use of extension registers
1949       by looking to see if the opcode handles 64-bit data.  */
1950    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1951
1952    /* Hoist the loads of the most common arguments.  */
1953    TCGArg a0 = args[0];
1954    TCGArg a1 = args[1];
1955    TCGArg a2 = args[2];
1956    int c2 = const_args[2];
1957
1958    /* Some operands are defined with "rZ" constraint, a register or
1959       the zero register.  These need not actually test args[I] == 0.  */
1960#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1961
1962    switch (opc) {
1963    case INDEX_op_goto_ptr:
1964        tcg_out_insn(s, 3207, BR, a0);
1965        break;
1966
1967    case INDEX_op_br:
1968        tcg_out_goto_label(s, arg_label(a0));
1969        break;
1970
1971    case INDEX_op_ld8u_i32:
1972    case INDEX_op_ld8u_i64:
1973        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1974        break;
1975    case INDEX_op_ld8s_i32:
1976        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1977        break;
1978    case INDEX_op_ld8s_i64:
1979        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1980        break;
1981    case INDEX_op_ld16u_i32:
1982    case INDEX_op_ld16u_i64:
1983        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1984        break;
1985    case INDEX_op_ld16s_i32:
1986        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1987        break;
1988    case INDEX_op_ld16s_i64:
1989        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1990        break;
1991    case INDEX_op_ld_i32:
1992    case INDEX_op_ld32u_i64:
1993        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1994        break;
1995    case INDEX_op_ld32s_i64:
1996        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1997        break;
1998    case INDEX_op_ld_i64:
1999        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
2000        break;
2001
2002    case INDEX_op_st8_i32:
2003    case INDEX_op_st8_i64:
2004        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
2005        break;
2006    case INDEX_op_st16_i32:
2007    case INDEX_op_st16_i64:
2008        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
2009        break;
2010    case INDEX_op_st_i32:
2011    case INDEX_op_st32_i64:
2012        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
2013        break;
2014    case INDEX_op_st_i64:
2015        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
2016        break;
2017
2018    case INDEX_op_add_i32:
2019        a2 = (int32_t)a2;
2020        /* FALLTHRU */
2021    case INDEX_op_add_i64:
2022        if (c2) {
2023            tcg_out_addsubi(s, ext, a0, a1, a2);
2024        } else {
2025            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2026        }
2027        break;
2028
2029    case INDEX_op_sub_i32:
2030        a2 = (int32_t)a2;
2031        /* FALLTHRU */
2032    case INDEX_op_sub_i64:
2033        if (c2) {
2034            tcg_out_addsubi(s, ext, a0, a1, -a2);
2035        } else {
2036            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2037        }
2038        break;
2039
2040    case INDEX_op_neg_i64:
2041    case INDEX_op_neg_i32:
2042        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2043        break;
2044
2045    case INDEX_op_and_i32:
2046        a2 = (int32_t)a2;
2047        /* FALLTHRU */
2048    case INDEX_op_and_i64:
2049        if (c2) {
2050            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2051        } else {
2052            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2053        }
2054        break;
2055
2056    case INDEX_op_andc_i32:
2057        a2 = (int32_t)a2;
2058        /* FALLTHRU */
2059    case INDEX_op_andc_i64:
2060        if (c2) {
2061            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2062        } else {
2063            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2064        }
2065        break;
2066
2067    case INDEX_op_or_i32:
2068        a2 = (int32_t)a2;
2069        /* FALLTHRU */
2070    case INDEX_op_or_i64:
2071        if (c2) {
2072            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2073        } else {
2074            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2075        }
2076        break;
2077
2078    case INDEX_op_orc_i32:
2079        a2 = (int32_t)a2;
2080        /* FALLTHRU */
2081    case INDEX_op_orc_i64:
2082        if (c2) {
2083            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2084        } else {
2085            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2086        }
2087        break;
2088
2089    case INDEX_op_xor_i32:
2090        a2 = (int32_t)a2;
2091        /* FALLTHRU */
2092    case INDEX_op_xor_i64:
2093        if (c2) {
2094            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2095        } else {
2096            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2097        }
2098        break;
2099
2100    case INDEX_op_eqv_i32:
2101        a2 = (int32_t)a2;
2102        /* FALLTHRU */
2103    case INDEX_op_eqv_i64:
2104        if (c2) {
2105            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2106        } else {
2107            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2108        }
2109        break;
2110
2111    case INDEX_op_not_i64:
2112    case INDEX_op_not_i32:
2113        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2114        break;
2115
2116    case INDEX_op_mul_i64:
2117    case INDEX_op_mul_i32:
2118        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2119        break;
2120
2121    case INDEX_op_div_i64:
2122    case INDEX_op_div_i32:
2123        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2124        break;
2125    case INDEX_op_divu_i64:
2126    case INDEX_op_divu_i32:
2127        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2128        break;
2129
2130    case INDEX_op_rem_i64:
2131    case INDEX_op_rem_i32:
2132        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2133        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2134        break;
2135    case INDEX_op_remu_i64:
2136    case INDEX_op_remu_i32:
2137        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2138        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2139        break;
2140
2141    case INDEX_op_shl_i64:
2142    case INDEX_op_shl_i32:
2143        if (c2) {
2144            tcg_out_shl(s, ext, a0, a1, a2);
2145        } else {
2146            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2147        }
2148        break;
2149
2150    case INDEX_op_shr_i64:
2151    case INDEX_op_shr_i32:
2152        if (c2) {
2153            tcg_out_shr(s, ext, a0, a1, a2);
2154        } else {
2155            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2156        }
2157        break;
2158
2159    case INDEX_op_sar_i64:
2160    case INDEX_op_sar_i32:
2161        if (c2) {
2162            tcg_out_sar(s, ext, a0, a1, a2);
2163        } else {
2164            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2165        }
2166        break;
2167
2168    case INDEX_op_rotr_i64:
2169    case INDEX_op_rotr_i32:
2170        if (c2) {
2171            tcg_out_rotr(s, ext, a0, a1, a2);
2172        } else {
2173            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2174        }
2175        break;
2176
2177    case INDEX_op_rotl_i64:
2178    case INDEX_op_rotl_i32:
2179        if (c2) {
2180            tcg_out_rotl(s, ext, a0, a1, a2);
2181        } else {
2182            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2183            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2184        }
2185        break;
2186
2187    case INDEX_op_clz_i64:
2188    case INDEX_op_clz_i32:
2189        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2190        break;
2191    case INDEX_op_ctz_i64:
2192    case INDEX_op_ctz_i32:
2193        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2194        break;
2195
2196    case INDEX_op_brcond_i32:
2197        a1 = (int32_t)a1;
2198        /* FALLTHRU */
2199    case INDEX_op_brcond_i64:
2200        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2201        break;
2202
2203    case INDEX_op_setcond_i32:
2204        a2 = (int32_t)a2;
2205        /* FALLTHRU */
2206    case INDEX_op_setcond_i64:
2207        tcg_out_cmp(s, ext, a1, a2, c2);
2208        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2209        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2210                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2211        break;
2212
2213    case INDEX_op_movcond_i32:
2214        a2 = (int32_t)a2;
2215        /* FALLTHRU */
2216    case INDEX_op_movcond_i64:
2217        tcg_out_cmp(s, ext, a1, a2, c2);
2218        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2219        break;
2220
2221    case INDEX_op_qemu_ld_i32:
2222    case INDEX_op_qemu_ld_i64:
2223        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2224        break;
2225    case INDEX_op_qemu_st_i32:
2226    case INDEX_op_qemu_st_i64:
2227        tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
2228        break;
2229
2230    case INDEX_op_bswap64_i64:
2231        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2232        break;
2233    case INDEX_op_bswap32_i64:
2234        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2235        if (a2 & TCG_BSWAP_OS) {
2236            tcg_out_ext32s(s, a0, a0);
2237        }
2238        break;
2239    case INDEX_op_bswap32_i32:
2240        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2241        break;
2242    case INDEX_op_bswap16_i64:
2243    case INDEX_op_bswap16_i32:
2244        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2245        if (a2 & TCG_BSWAP_OS) {
2246            /* Output must be sign-extended. */
2247            tcg_out_ext16s(s, ext, a0, a0);
2248        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2249            /* Output must be zero-extended, but input isn't. */
2250            tcg_out_ext16u(s, a0, a0);
2251        }
2252        break;
2253
2254    case INDEX_op_deposit_i64:
2255    case INDEX_op_deposit_i32:
2256        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2257        break;
2258
2259    case INDEX_op_extract_i64:
2260    case INDEX_op_extract_i32:
2261        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2262        break;
2263
2264    case INDEX_op_sextract_i64:
2265    case INDEX_op_sextract_i32:
2266        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2267        break;
2268
2269    case INDEX_op_extract2_i64:
2270    case INDEX_op_extract2_i32:
2271        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2272        break;
2273
2274    case INDEX_op_add2_i32:
2275        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2276                        (int32_t)args[4], args[5], const_args[4],
2277                        const_args[5], false);
2278        break;
2279    case INDEX_op_add2_i64:
2280        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2281                        args[5], const_args[4], const_args[5], false);
2282        break;
2283    case INDEX_op_sub2_i32:
2284        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2285                        (int32_t)args[4], args[5], const_args[4],
2286                        const_args[5], true);
2287        break;
2288    case INDEX_op_sub2_i64:
2289        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2290                        args[5], const_args[4], const_args[5], true);
2291        break;
2292
2293    case INDEX_op_muluh_i64:
2294        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2295        break;
2296    case INDEX_op_mulsh_i64:
2297        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2298        break;
2299
2300    case INDEX_op_mb:
2301        tcg_out_mb(s, a0);
2302        break;
2303
2304    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2305    case INDEX_op_mov_i64:
2306    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2307    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2308    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2309    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2310    case INDEX_op_ext8s_i64:
2311    case INDEX_op_ext8u_i32:
2312    case INDEX_op_ext8u_i64:
2313    case INDEX_op_ext16s_i64:
2314    case INDEX_op_ext16s_i32:
2315    case INDEX_op_ext16u_i64:
2316    case INDEX_op_ext16u_i32:
2317    case INDEX_op_ext32s_i64:
2318    case INDEX_op_ext32u_i64:
2319    case INDEX_op_ext_i32_i64:
2320    case INDEX_op_extu_i32_i64:
2321    case INDEX_op_extrl_i64_i32:
2322    default:
2323        g_assert_not_reached();
2324    }
2325
2326#undef REG0
2327}
2328
2329static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2330                           unsigned vecl, unsigned vece,
2331                           const TCGArg args[TCG_MAX_OP_ARGS],
2332                           const int const_args[TCG_MAX_OP_ARGS])
2333{
2334    static const AArch64Insn cmp_vec_insn[16] = {
2335        [TCG_COND_EQ] = I3616_CMEQ,
2336        [TCG_COND_GT] = I3616_CMGT,
2337        [TCG_COND_GE] = I3616_CMGE,
2338        [TCG_COND_GTU] = I3616_CMHI,
2339        [TCG_COND_GEU] = I3616_CMHS,
2340    };
2341    static const AArch64Insn cmp_scalar_insn[16] = {
2342        [TCG_COND_EQ] = I3611_CMEQ,
2343        [TCG_COND_GT] = I3611_CMGT,
2344        [TCG_COND_GE] = I3611_CMGE,
2345        [TCG_COND_GTU] = I3611_CMHI,
2346        [TCG_COND_GEU] = I3611_CMHS,
2347    };
2348    static const AArch64Insn cmp0_vec_insn[16] = {
2349        [TCG_COND_EQ] = I3617_CMEQ0,
2350        [TCG_COND_GT] = I3617_CMGT0,
2351        [TCG_COND_GE] = I3617_CMGE0,
2352        [TCG_COND_LT] = I3617_CMLT0,
2353        [TCG_COND_LE] = I3617_CMLE0,
2354    };
2355    static const AArch64Insn cmp0_scalar_insn[16] = {
2356        [TCG_COND_EQ] = I3612_CMEQ0,
2357        [TCG_COND_GT] = I3612_CMGT0,
2358        [TCG_COND_GE] = I3612_CMGE0,
2359        [TCG_COND_LT] = I3612_CMLT0,
2360        [TCG_COND_LE] = I3612_CMLE0,
2361    };
2362
2363    TCGType type = vecl + TCG_TYPE_V64;
2364    unsigned is_q = vecl;
2365    bool is_scalar = !is_q && vece == MO_64;
2366    TCGArg a0, a1, a2, a3;
2367    int cmode, imm8;
2368
2369    a0 = args[0];
2370    a1 = args[1];
2371    a2 = args[2];
2372
2373    switch (opc) {
2374    case INDEX_op_ld_vec:
2375        tcg_out_ld(s, type, a0, a1, a2);
2376        break;
2377    case INDEX_op_st_vec:
2378        tcg_out_st(s, type, a0, a1, a2);
2379        break;
2380    case INDEX_op_dupm_vec:
2381        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2382        break;
2383    case INDEX_op_add_vec:
2384        if (is_scalar) {
2385            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2386        } else {
2387            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2388        }
2389        break;
2390    case INDEX_op_sub_vec:
2391        if (is_scalar) {
2392            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2393        } else {
2394            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2395        }
2396        break;
2397    case INDEX_op_mul_vec:
2398        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2399        break;
2400    case INDEX_op_neg_vec:
2401        if (is_scalar) {
2402            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2403        } else {
2404            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2405        }
2406        break;
2407    case INDEX_op_abs_vec:
2408        if (is_scalar) {
2409            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2410        } else {
2411            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2412        }
2413        break;
2414    case INDEX_op_and_vec:
2415        if (const_args[2]) {
2416            is_shimm1632(~a2, &cmode, &imm8);
2417            if (a0 == a1) {
2418                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2419                return;
2420            }
2421            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2422            a2 = a0;
2423        }
2424        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2425        break;
2426    case INDEX_op_or_vec:
2427        if (const_args[2]) {
2428            is_shimm1632(a2, &cmode, &imm8);
2429            if (a0 == a1) {
2430                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2431                return;
2432            }
2433            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2434            a2 = a0;
2435        }
2436        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2437        break;
2438    case INDEX_op_andc_vec:
2439        if (const_args[2]) {
2440            is_shimm1632(a2, &cmode, &imm8);
2441            if (a0 == a1) {
2442                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2443                return;
2444            }
2445            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2446            a2 = a0;
2447        }
2448        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2449        break;
2450    case INDEX_op_orc_vec:
2451        if (const_args[2]) {
2452            is_shimm1632(~a2, &cmode, &imm8);
2453            if (a0 == a1) {
2454                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2455                return;
2456            }
2457            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2458            a2 = a0;
2459        }
2460        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2461        break;
2462    case INDEX_op_xor_vec:
2463        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2464        break;
2465    case INDEX_op_ssadd_vec:
2466        if (is_scalar) {
2467            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2468        } else {
2469            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2470        }
2471        break;
2472    case INDEX_op_sssub_vec:
2473        if (is_scalar) {
2474            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2475        } else {
2476            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2477        }
2478        break;
2479    case INDEX_op_usadd_vec:
2480        if (is_scalar) {
2481            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2482        } else {
2483            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2484        }
2485        break;
2486    case INDEX_op_ussub_vec:
2487        if (is_scalar) {
2488            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2489        } else {
2490            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2491        }
2492        break;
2493    case INDEX_op_smax_vec:
2494        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2495        break;
2496    case INDEX_op_smin_vec:
2497        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2498        break;
2499    case INDEX_op_umax_vec:
2500        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2501        break;
2502    case INDEX_op_umin_vec:
2503        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2504        break;
2505    case INDEX_op_not_vec:
2506        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2507        break;
2508    case INDEX_op_shli_vec:
2509        if (is_scalar) {
2510            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2511        } else {
2512            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2513        }
2514        break;
2515    case INDEX_op_shri_vec:
2516        if (is_scalar) {
2517            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2518        } else {
2519            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2520        }
2521        break;
2522    case INDEX_op_sari_vec:
2523        if (is_scalar) {
2524            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2525        } else {
2526            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2527        }
2528        break;
2529    case INDEX_op_aa64_sli_vec:
2530        if (is_scalar) {
2531            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2532        } else {
2533            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2534        }
2535        break;
2536    case INDEX_op_shlv_vec:
2537        if (is_scalar) {
2538            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2539        } else {
2540            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2541        }
2542        break;
2543    case INDEX_op_aa64_sshl_vec:
2544        if (is_scalar) {
2545            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2546        } else {
2547            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2548        }
2549        break;
2550    case INDEX_op_cmp_vec:
2551        {
2552            TCGCond cond = args[3];
2553            AArch64Insn insn;
2554
2555            if (cond == TCG_COND_NE) {
2556                if (const_args[2]) {
2557                    if (is_scalar) {
2558                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2559                    } else {
2560                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2561                    }
2562                } else {
2563                    if (is_scalar) {
2564                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2565                    } else {
2566                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2567                    }
2568                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2569                }
2570            } else {
2571                if (const_args[2]) {
2572                    if (is_scalar) {
2573                        insn = cmp0_scalar_insn[cond];
2574                        if (insn) {
2575                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2576                            break;
2577                        }
2578                    } else {
2579                        insn = cmp0_vec_insn[cond];
2580                        if (insn) {
2581                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2582                            break;
2583                        }
2584                    }
2585                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2586                    a2 = TCG_VEC_TMP;
2587                }
2588                if (is_scalar) {
2589                    insn = cmp_scalar_insn[cond];
2590                    if (insn == 0) {
2591                        TCGArg t;
2592                        t = a1, a1 = a2, a2 = t;
2593                        cond = tcg_swap_cond(cond);
2594                        insn = cmp_scalar_insn[cond];
2595                        tcg_debug_assert(insn != 0);
2596                    }
2597                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2598                } else {
2599                    insn = cmp_vec_insn[cond];
2600                    if (insn == 0) {
2601                        TCGArg t;
2602                        t = a1, a1 = a2, a2 = t;
2603                        cond = tcg_swap_cond(cond);
2604                        insn = cmp_vec_insn[cond];
2605                        tcg_debug_assert(insn != 0);
2606                    }
2607                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2608                }
2609            }
2610        }
2611        break;
2612
2613    case INDEX_op_bitsel_vec:
2614        a3 = args[3];
2615        if (a0 == a3) {
2616            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2617        } else if (a0 == a2) {
2618            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2619        } else {
2620            if (a0 != a1) {
2621                tcg_out_mov(s, type, a0, a1);
2622            }
2623            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2624        }
2625        break;
2626
2627    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2628    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2629    default:
2630        g_assert_not_reached();
2631    }
2632}
2633
2634int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2635{
2636    switch (opc) {
2637    case INDEX_op_add_vec:
2638    case INDEX_op_sub_vec:
2639    case INDEX_op_and_vec:
2640    case INDEX_op_or_vec:
2641    case INDEX_op_xor_vec:
2642    case INDEX_op_andc_vec:
2643    case INDEX_op_orc_vec:
2644    case INDEX_op_neg_vec:
2645    case INDEX_op_abs_vec:
2646    case INDEX_op_not_vec:
2647    case INDEX_op_cmp_vec:
2648    case INDEX_op_shli_vec:
2649    case INDEX_op_shri_vec:
2650    case INDEX_op_sari_vec:
2651    case INDEX_op_ssadd_vec:
2652    case INDEX_op_sssub_vec:
2653    case INDEX_op_usadd_vec:
2654    case INDEX_op_ussub_vec:
2655    case INDEX_op_shlv_vec:
2656    case INDEX_op_bitsel_vec:
2657        return 1;
2658    case INDEX_op_rotli_vec:
2659    case INDEX_op_shrv_vec:
2660    case INDEX_op_sarv_vec:
2661    case INDEX_op_rotlv_vec:
2662    case INDEX_op_rotrv_vec:
2663        return -1;
2664    case INDEX_op_mul_vec:
2665    case INDEX_op_smax_vec:
2666    case INDEX_op_smin_vec:
2667    case INDEX_op_umax_vec:
2668    case INDEX_op_umin_vec:
2669        return vece < MO_64;
2670
2671    default:
2672        return 0;
2673    }
2674}
2675
2676void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2677                       TCGArg a0, ...)
2678{
2679    va_list va;
2680    TCGv_vec v0, v1, v2, t1, t2, c1;
2681    TCGArg a2;
2682
2683    va_start(va, a0);
2684    v0 = temp_tcgv_vec(arg_temp(a0));
2685    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2686    a2 = va_arg(va, TCGArg);
2687    va_end(va);
2688
2689    switch (opc) {
2690    case INDEX_op_rotli_vec:
2691        t1 = tcg_temp_new_vec(type);
2692        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2693        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2694                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2695        tcg_temp_free_vec(t1);
2696        break;
2697
2698    case INDEX_op_shrv_vec:
2699    case INDEX_op_sarv_vec:
2700        /* Right shifts are negative left shifts for AArch64.  */
2701        v2 = temp_tcgv_vec(arg_temp(a2));
2702        t1 = tcg_temp_new_vec(type);
2703        tcg_gen_neg_vec(vece, t1, v2);
2704        opc = (opc == INDEX_op_shrv_vec
2705               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2706        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2707                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2708        tcg_temp_free_vec(t1);
2709        break;
2710
2711    case INDEX_op_rotlv_vec:
2712        v2 = temp_tcgv_vec(arg_temp(a2));
2713        t1 = tcg_temp_new_vec(type);
2714        c1 = tcg_constant_vec(type, vece, 8 << vece);
2715        tcg_gen_sub_vec(vece, t1, v2, c1);
2716        /* Right shifts are negative left shifts for AArch64.  */
2717        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2718                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2719        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2720                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2721        tcg_gen_or_vec(vece, v0, v0, t1);
2722        tcg_temp_free_vec(t1);
2723        break;
2724
2725    case INDEX_op_rotrv_vec:
2726        v2 = temp_tcgv_vec(arg_temp(a2));
2727        t1 = tcg_temp_new_vec(type);
2728        t2 = tcg_temp_new_vec(type);
2729        c1 = tcg_constant_vec(type, vece, 8 << vece);
2730        tcg_gen_neg_vec(vece, t1, v2);
2731        tcg_gen_sub_vec(vece, t2, c1, v2);
2732        /* Right shifts are negative left shifts for AArch64.  */
2733        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2734                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2735        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2736                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2737        tcg_gen_or_vec(vece, v0, t1, t2);
2738        tcg_temp_free_vec(t1);
2739        tcg_temp_free_vec(t2);
2740        break;
2741
2742    default:
2743        g_assert_not_reached();
2744    }
2745}
2746
2747static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2748{
2749    switch (op) {
2750    case INDEX_op_goto_ptr:
2751        return C_O0_I1(r);
2752
2753    case INDEX_op_ld8u_i32:
2754    case INDEX_op_ld8s_i32:
2755    case INDEX_op_ld16u_i32:
2756    case INDEX_op_ld16s_i32:
2757    case INDEX_op_ld_i32:
2758    case INDEX_op_ld8u_i64:
2759    case INDEX_op_ld8s_i64:
2760    case INDEX_op_ld16u_i64:
2761    case INDEX_op_ld16s_i64:
2762    case INDEX_op_ld32u_i64:
2763    case INDEX_op_ld32s_i64:
2764    case INDEX_op_ld_i64:
2765    case INDEX_op_neg_i32:
2766    case INDEX_op_neg_i64:
2767    case INDEX_op_not_i32:
2768    case INDEX_op_not_i64:
2769    case INDEX_op_bswap16_i32:
2770    case INDEX_op_bswap32_i32:
2771    case INDEX_op_bswap16_i64:
2772    case INDEX_op_bswap32_i64:
2773    case INDEX_op_bswap64_i64:
2774    case INDEX_op_ext8s_i32:
2775    case INDEX_op_ext16s_i32:
2776    case INDEX_op_ext8u_i32:
2777    case INDEX_op_ext16u_i32:
2778    case INDEX_op_ext8s_i64:
2779    case INDEX_op_ext16s_i64:
2780    case INDEX_op_ext32s_i64:
2781    case INDEX_op_ext8u_i64:
2782    case INDEX_op_ext16u_i64:
2783    case INDEX_op_ext32u_i64:
2784    case INDEX_op_ext_i32_i64:
2785    case INDEX_op_extu_i32_i64:
2786    case INDEX_op_extract_i32:
2787    case INDEX_op_extract_i64:
2788    case INDEX_op_sextract_i32:
2789    case INDEX_op_sextract_i64:
2790        return C_O1_I1(r, r);
2791
2792    case INDEX_op_st8_i32:
2793    case INDEX_op_st16_i32:
2794    case INDEX_op_st_i32:
2795    case INDEX_op_st8_i64:
2796    case INDEX_op_st16_i64:
2797    case INDEX_op_st32_i64:
2798    case INDEX_op_st_i64:
2799        return C_O0_I2(rZ, r);
2800
2801    case INDEX_op_add_i32:
2802    case INDEX_op_add_i64:
2803    case INDEX_op_sub_i32:
2804    case INDEX_op_sub_i64:
2805    case INDEX_op_setcond_i32:
2806    case INDEX_op_setcond_i64:
2807        return C_O1_I2(r, r, rA);
2808
2809    case INDEX_op_mul_i32:
2810    case INDEX_op_mul_i64:
2811    case INDEX_op_div_i32:
2812    case INDEX_op_div_i64:
2813    case INDEX_op_divu_i32:
2814    case INDEX_op_divu_i64:
2815    case INDEX_op_rem_i32:
2816    case INDEX_op_rem_i64:
2817    case INDEX_op_remu_i32:
2818    case INDEX_op_remu_i64:
2819    case INDEX_op_muluh_i64:
2820    case INDEX_op_mulsh_i64:
2821        return C_O1_I2(r, r, r);
2822
2823    case INDEX_op_and_i32:
2824    case INDEX_op_and_i64:
2825    case INDEX_op_or_i32:
2826    case INDEX_op_or_i64:
2827    case INDEX_op_xor_i32:
2828    case INDEX_op_xor_i64:
2829    case INDEX_op_andc_i32:
2830    case INDEX_op_andc_i64:
2831    case INDEX_op_orc_i32:
2832    case INDEX_op_orc_i64:
2833    case INDEX_op_eqv_i32:
2834    case INDEX_op_eqv_i64:
2835        return C_O1_I2(r, r, rL);
2836
2837    case INDEX_op_shl_i32:
2838    case INDEX_op_shr_i32:
2839    case INDEX_op_sar_i32:
2840    case INDEX_op_rotl_i32:
2841    case INDEX_op_rotr_i32:
2842    case INDEX_op_shl_i64:
2843    case INDEX_op_shr_i64:
2844    case INDEX_op_sar_i64:
2845    case INDEX_op_rotl_i64:
2846    case INDEX_op_rotr_i64:
2847        return C_O1_I2(r, r, ri);
2848
2849    case INDEX_op_clz_i32:
2850    case INDEX_op_ctz_i32:
2851    case INDEX_op_clz_i64:
2852    case INDEX_op_ctz_i64:
2853        return C_O1_I2(r, r, rAL);
2854
2855    case INDEX_op_brcond_i32:
2856    case INDEX_op_brcond_i64:
2857        return C_O0_I2(r, rA);
2858
2859    case INDEX_op_movcond_i32:
2860    case INDEX_op_movcond_i64:
2861        return C_O1_I4(r, r, rA, rZ, rZ);
2862
2863    case INDEX_op_qemu_ld_i32:
2864    case INDEX_op_qemu_ld_i64:
2865        return C_O1_I1(r, l);
2866    case INDEX_op_qemu_st_i32:
2867    case INDEX_op_qemu_st_i64:
2868        return C_O0_I2(lZ, l);
2869
2870    case INDEX_op_deposit_i32:
2871    case INDEX_op_deposit_i64:
2872        return C_O1_I2(r, 0, rZ);
2873
2874    case INDEX_op_extract2_i32:
2875    case INDEX_op_extract2_i64:
2876        return C_O1_I2(r, rZ, rZ);
2877
2878    case INDEX_op_add2_i32:
2879    case INDEX_op_add2_i64:
2880    case INDEX_op_sub2_i32:
2881    case INDEX_op_sub2_i64:
2882        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2883
2884    case INDEX_op_add_vec:
2885    case INDEX_op_sub_vec:
2886    case INDEX_op_mul_vec:
2887    case INDEX_op_xor_vec:
2888    case INDEX_op_ssadd_vec:
2889    case INDEX_op_sssub_vec:
2890    case INDEX_op_usadd_vec:
2891    case INDEX_op_ussub_vec:
2892    case INDEX_op_smax_vec:
2893    case INDEX_op_smin_vec:
2894    case INDEX_op_umax_vec:
2895    case INDEX_op_umin_vec:
2896    case INDEX_op_shlv_vec:
2897    case INDEX_op_shrv_vec:
2898    case INDEX_op_sarv_vec:
2899    case INDEX_op_aa64_sshl_vec:
2900        return C_O1_I2(w, w, w);
2901    case INDEX_op_not_vec:
2902    case INDEX_op_neg_vec:
2903    case INDEX_op_abs_vec:
2904    case INDEX_op_shli_vec:
2905    case INDEX_op_shri_vec:
2906    case INDEX_op_sari_vec:
2907        return C_O1_I1(w, w);
2908    case INDEX_op_ld_vec:
2909    case INDEX_op_dupm_vec:
2910        return C_O1_I1(w, r);
2911    case INDEX_op_st_vec:
2912        return C_O0_I2(w, r);
2913    case INDEX_op_dup_vec:
2914        return C_O1_I1(w, wr);
2915    case INDEX_op_or_vec:
2916    case INDEX_op_andc_vec:
2917        return C_O1_I2(w, w, wO);
2918    case INDEX_op_and_vec:
2919    case INDEX_op_orc_vec:
2920        return C_O1_I2(w, w, wN);
2921    case INDEX_op_cmp_vec:
2922        return C_O1_I2(w, w, wZ);
2923    case INDEX_op_bitsel_vec:
2924        return C_O1_I3(w, w, w, w);
2925    case INDEX_op_aa64_sli_vec:
2926        return C_O1_I2(w, 0, w);
2927
2928    default:
2929        g_assert_not_reached();
2930    }
2931}
2932
2933static void tcg_target_init(TCGContext *s)
2934{
2935    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2936    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2937    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2938    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2939
2940    tcg_target_call_clobber_regs = -1ull;
2941    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2942    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2943    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2944    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2945    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2946    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2947    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2948    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2949    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2950    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2951    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2952    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2953    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2954    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2955    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2956    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2957    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2958    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2959    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2960
2961    s->reserved_regs = 0;
2962    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2963    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2964    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2965    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2966    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2967}
2968
2969/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2970#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2971
2972#define FRAME_SIZE \
2973    ((PUSH_SIZE \
2974      + TCG_STATIC_CALL_ARGS_SIZE \
2975      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2976      + TCG_TARGET_STACK_ALIGN - 1) \
2977     & ~(TCG_TARGET_STACK_ALIGN - 1))
2978
2979/* We're expecting a 2 byte uleb128 encoded value.  */
2980QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2981
2982/* We're expecting to use a single ADDI insn.  */
2983QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2984
2985static void tcg_target_qemu_prologue(TCGContext *s)
2986{
2987    TCGReg r;
2988
2989    /* Push (FP, LR) and allocate space for all saved registers.  */
2990    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2991                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2992
2993    /* Set up frame pointer for canonical unwinding.  */
2994    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2995
2996    /* Store callee-preserved regs x19..x28.  */
2997    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2998        int ofs = (r - TCG_REG_X19 + 2) * 8;
2999        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3000    }
3001
3002    /* Make stack space for TCG locals.  */
3003    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3004                 FRAME_SIZE - PUSH_SIZE);
3005
3006    /* Inform TCG about how to find TCG locals with register, offset, size.  */
3007    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
3008                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3009
3010#if !defined(CONFIG_SOFTMMU)
3011    if (USE_GUEST_BASE) {
3012        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3013        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3014    }
3015#endif
3016
3017    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3018    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3019
3020    /*
3021     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3022     * and fall through to the rest of the epilogue.
3023     */
3024    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3025    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3026
3027    /* TB epilogue */
3028    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3029
3030    /* Remove TCG locals stack space.  */
3031    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3032                 FRAME_SIZE - PUSH_SIZE);
3033
3034    /* Restore registers x19..x28.  */
3035    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3036        int ofs = (r - TCG_REG_X19 + 2) * 8;
3037        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3038    }
3039
3040    /* Pop (FP, LR), restore SP to previous frame.  */
3041    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3042                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3043    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3044}
3045
3046static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3047{
3048    int i;
3049    for (i = 0; i < count; ++i) {
3050        p[i] = NOP;
3051    }
3052}
3053
3054typedef struct {
3055    DebugFrameHeader h;
3056    uint8_t fde_def_cfa[4];
3057    uint8_t fde_reg_ofs[24];
3058} DebugFrame;
3059
3060#define ELF_HOST_MACHINE EM_AARCH64
3061
3062static const DebugFrame debug_frame = {
3063    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3064    .h.cie.id = -1,
3065    .h.cie.version = 1,
3066    .h.cie.code_align = 1,
3067    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3068    .h.cie.return_column = TCG_REG_LR,
3069
3070    /* Total FDE size does not include the "len" member.  */
3071    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3072
3073    .fde_def_cfa = {
3074        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3075        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3076        (FRAME_SIZE >> 7)
3077    },
3078    .fde_reg_ofs = {
3079        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3080        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3081        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3082        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3083        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3084        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3085        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3086        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3087        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3088        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3089        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3090        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3091    }
3092};
3093
3094void tcg_register_jit(const void *buf, size_t buf_size)
3095{
3096    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3097}
3098