xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision 8c6631e6)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-ldst.c.inc"
14#include "../tcg-pool.c.inc"
15#include "qemu/bitops.h"
16
17/* We're going to re-use TCGType in setting of the SF bit, which controls
18   the size of the operation performed.  If we know the values match, it
19   makes things much cleaner.  */
20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21
22#ifdef CONFIG_DEBUG_TCG
23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28
29    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33};
34#endif /* CONFIG_DEBUG_TCG */
35
36static const int tcg_target_reg_alloc_order[] = {
37    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39    TCG_REG_X28, /* we will reserve this for guest_base if configured */
40
41    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
43    TCG_REG_X16, TCG_REG_X17,
44
45    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
46    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
47
48    /* X18 reserved by system */
49    /* X19 reserved for AREG0 */
50    /* X29 reserved as fp */
51    /* X30 reserved as temporary */
52
53    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
54    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
55    /* V8 - V15 are call-saved, and skipped.  */
56    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
57    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
58    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
59    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
60};
61
62static const int tcg_target_call_iarg_regs[8] = {
63    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
64    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
65};
66
67static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
68{
69    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
70    tcg_debug_assert(slot >= 0 && slot <= 1);
71    return TCG_REG_X0 + slot;
72}
73
74#define TCG_REG_TMP TCG_REG_X30
75#define TCG_VEC_TMP TCG_REG_V31
76
77#ifndef CONFIG_SOFTMMU
78/* Note that XZR cannot be encoded in the address base register slot,
79   as that actaully encodes SP.  So if we need to zero-extend the guest
80   address, via the address index register slot, we need to load even
81   a zero guest base into a register.  */
82#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
83#define TCG_REG_GUEST_BASE TCG_REG_X28
84#endif
85
86static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
87{
88    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
89    ptrdiff_t offset = target - src_rx;
90
91    if (offset == sextract64(offset, 0, 26)) {
92        /* read instruction, mask away previous PC_REL26 parameter contents,
93           set the proper offset, then write back the instruction. */
94        *src_rw = deposit32(*src_rw, 0, 26, offset);
95        return true;
96    }
97    return false;
98}
99
100static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
101{
102    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
103    ptrdiff_t offset = target - src_rx;
104
105    if (offset == sextract64(offset, 0, 19)) {
106        *src_rw = deposit32(*src_rw, 5, 19, offset);
107        return true;
108    }
109    return false;
110}
111
112static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
113                        intptr_t value, intptr_t addend)
114{
115    tcg_debug_assert(addend == 0);
116    switch (type) {
117    case R_AARCH64_JUMP26:
118    case R_AARCH64_CALL26:
119        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
120    case R_AARCH64_CONDBR19:
121        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
122    default:
123        g_assert_not_reached();
124    }
125}
126
127#define TCG_CT_CONST_AIMM 0x100
128#define TCG_CT_CONST_LIMM 0x200
129#define TCG_CT_CONST_ZERO 0x400
130#define TCG_CT_CONST_MONE 0x800
131#define TCG_CT_CONST_ORRI 0x1000
132#define TCG_CT_CONST_ANDI 0x2000
133
134#define ALL_GENERAL_REGS  0xffffffffu
135#define ALL_VECTOR_REGS   0xffffffff00000000ull
136
137#ifdef CONFIG_SOFTMMU
138#define ALL_QLDST_REGS \
139    (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
140                          (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
141#else
142#define ALL_QLDST_REGS   ALL_GENERAL_REGS
143#endif
144
145/* Match a constant valid for addition (12-bit, optionally shifted).  */
146static inline bool is_aimm(uint64_t val)
147{
148    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
149}
150
151/* Match a constant valid for logical operations.  */
152static inline bool is_limm(uint64_t val)
153{
154    /* Taking a simplified view of the logical immediates for now, ignoring
155       the replication that can happen across the field.  Match bit patterns
156       of the forms
157           0....01....1
158           0..01..10..0
159       and their inverses.  */
160
161    /* Make things easier below, by testing the form with msb clear. */
162    if ((int64_t)val < 0) {
163        val = ~val;
164    }
165    if (val == 0) {
166        return false;
167    }
168    val += val & -val;
169    return (val & (val - 1)) == 0;
170}
171
172/* Return true if v16 is a valid 16-bit shifted immediate.  */
173static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
174{
175    if (v16 == (v16 & 0xff)) {
176        *cmode = 0x8;
177        *imm8 = v16 & 0xff;
178        return true;
179    } else if (v16 == (v16 & 0xff00)) {
180        *cmode = 0xa;
181        *imm8 = v16 >> 8;
182        return true;
183    }
184    return false;
185}
186
187/* Return true if v32 is a valid 32-bit shifted immediate.  */
188static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
189{
190    if (v32 == (v32 & 0xff)) {
191        *cmode = 0x0;
192        *imm8 = v32 & 0xff;
193        return true;
194    } else if (v32 == (v32 & 0xff00)) {
195        *cmode = 0x2;
196        *imm8 = (v32 >> 8) & 0xff;
197        return true;
198    } else if (v32 == (v32 & 0xff0000)) {
199        *cmode = 0x4;
200        *imm8 = (v32 >> 16) & 0xff;
201        return true;
202    } else if (v32 == (v32 & 0xff000000)) {
203        *cmode = 0x6;
204        *imm8 = v32 >> 24;
205        return true;
206    }
207    return false;
208}
209
210/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
211static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
212{
213    if ((v32 & 0xffff00ff) == 0xff) {
214        *cmode = 0xc;
215        *imm8 = (v32 >> 8) & 0xff;
216        return true;
217    } else if ((v32 & 0xff00ffff) == 0xffff) {
218        *cmode = 0xd;
219        *imm8 = (v32 >> 16) & 0xff;
220        return true;
221    }
222    return false;
223}
224
225/* Return true if v32 is a valid float32 immediate.  */
226static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
227{
228    if (extract32(v32, 0, 19) == 0
229        && (extract32(v32, 25, 6) == 0x20
230            || extract32(v32, 25, 6) == 0x1f)) {
231        *cmode = 0xf;
232        *imm8 = (extract32(v32, 31, 1) << 7)
233              | (extract32(v32, 25, 1) << 6)
234              | extract32(v32, 19, 6);
235        return true;
236    }
237    return false;
238}
239
240/* Return true if v64 is a valid float64 immediate.  */
241static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
242{
243    if (extract64(v64, 0, 48) == 0
244        && (extract64(v64, 54, 9) == 0x100
245            || extract64(v64, 54, 9) == 0x0ff)) {
246        *cmode = 0xf;
247        *imm8 = (extract64(v64, 63, 1) << 7)
248              | (extract64(v64, 54, 1) << 6)
249              | extract64(v64, 48, 6);
250        return true;
251    }
252    return false;
253}
254
255/*
256 * Return non-zero if v32 can be formed by MOVI+ORR.
257 * Place the parameters for MOVI in (cmode, imm8).
258 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
259 */
260static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
261{
262    int i;
263
264    for (i = 6; i > 0; i -= 2) {
265        /* Mask out one byte we can add with ORR.  */
266        uint32_t tmp = v32 & ~(0xffu << (i * 4));
267        if (is_shimm32(tmp, cmode, imm8) ||
268            is_soimm32(tmp, cmode, imm8)) {
269            break;
270        }
271    }
272    return i;
273}
274
275/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
276static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
277{
278    if (v32 == deposit32(v32, 16, 16, v32)) {
279        return is_shimm16(v32, cmode, imm8);
280    } else {
281        return is_shimm32(v32, cmode, imm8);
282    }
283}
284
285static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
286{
287    if (ct & TCG_CT_CONST) {
288        return 1;
289    }
290    if (type == TCG_TYPE_I32) {
291        val = (int32_t)val;
292    }
293    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
294        return 1;
295    }
296    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
297        return 1;
298    }
299    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
300        return 1;
301    }
302    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
303        return 1;
304    }
305
306    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
307    case 0:
308        break;
309    case TCG_CT_CONST_ANDI:
310        val = ~val;
311        /* fallthru */
312    case TCG_CT_CONST_ORRI:
313        if (val == deposit64(val, 32, 32, val)) {
314            int cmode, imm8;
315            return is_shimm1632(val, &cmode, &imm8);
316        }
317        break;
318    default:
319        /* Both bits should not be set for the same insn.  */
320        g_assert_not_reached();
321    }
322
323    return 0;
324}
325
326enum aarch64_cond_code {
327    COND_EQ = 0x0,
328    COND_NE = 0x1,
329    COND_CS = 0x2,     /* Unsigned greater or equal */
330    COND_HS = COND_CS, /* ALIAS greater or equal */
331    COND_CC = 0x3,     /* Unsigned less than */
332    COND_LO = COND_CC, /* ALIAS Lower */
333    COND_MI = 0x4,     /* Negative */
334    COND_PL = 0x5,     /* Zero or greater */
335    COND_VS = 0x6,     /* Overflow */
336    COND_VC = 0x7,     /* No overflow */
337    COND_HI = 0x8,     /* Unsigned greater than */
338    COND_LS = 0x9,     /* Unsigned less or equal */
339    COND_GE = 0xa,
340    COND_LT = 0xb,
341    COND_GT = 0xc,
342    COND_LE = 0xd,
343    COND_AL = 0xe,
344    COND_NV = 0xf, /* behaves like COND_AL here */
345};
346
347static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
348    [TCG_COND_EQ] = COND_EQ,
349    [TCG_COND_NE] = COND_NE,
350    [TCG_COND_LT] = COND_LT,
351    [TCG_COND_GE] = COND_GE,
352    [TCG_COND_LE] = COND_LE,
353    [TCG_COND_GT] = COND_GT,
354    /* unsigned */
355    [TCG_COND_LTU] = COND_LO,
356    [TCG_COND_GTU] = COND_HI,
357    [TCG_COND_GEU] = COND_HS,
358    [TCG_COND_LEU] = COND_LS,
359};
360
361typedef enum {
362    LDST_ST = 0,    /* store */
363    LDST_LD = 1,    /* load */
364    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
365    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
366} AArch64LdstType;
367
368/* We encode the format of the insn into the beginning of the name, so that
369   we can have the preprocessor help "typecheck" the insn vs the output
370   function.  Arm didn't provide us with nice names for the formats, so we
371   use the section number of the architecture reference manual in which the
372   instruction group is described.  */
373typedef enum {
374    /* Compare and branch (immediate).  */
375    I3201_CBZ       = 0x34000000,
376    I3201_CBNZ      = 0x35000000,
377
378    /* Conditional branch (immediate).  */
379    I3202_B_C       = 0x54000000,
380
381    /* Unconditional branch (immediate).  */
382    I3206_B         = 0x14000000,
383    I3206_BL        = 0x94000000,
384
385    /* Unconditional branch (register).  */
386    I3207_BR        = 0xd61f0000,
387    I3207_BLR       = 0xd63f0000,
388    I3207_RET       = 0xd65f0000,
389
390    /* AdvSIMD load/store single structure.  */
391    I3303_LD1R      = 0x0d40c000,
392
393    /* Load literal for loading the address at pc-relative offset */
394    I3305_LDR       = 0x58000000,
395    I3305_LDR_v64   = 0x5c000000,
396    I3305_LDR_v128  = 0x9c000000,
397
398    /* Load/store register.  Described here as 3.3.12, but the helper
399       that emits them can transform to 3.3.10 or 3.3.13.  */
400    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
401    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
402    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
403    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
404
405    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
406    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
407    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
408    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
409
410    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
411    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
412
413    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
414    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
415    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
416
417    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
418    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
419
420    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
421    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
422
423    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
424    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
425
426    I3312_TO_I3310  = 0x00200800,
427    I3312_TO_I3313  = 0x01000000,
428
429    /* Load/store register pair instructions.  */
430    I3314_LDP       = 0x28400000,
431    I3314_STP       = 0x28000000,
432
433    /* Add/subtract immediate instructions.  */
434    I3401_ADDI      = 0x11000000,
435    I3401_ADDSI     = 0x31000000,
436    I3401_SUBI      = 0x51000000,
437    I3401_SUBSI     = 0x71000000,
438
439    /* Bitfield instructions.  */
440    I3402_BFM       = 0x33000000,
441    I3402_SBFM      = 0x13000000,
442    I3402_UBFM      = 0x53000000,
443
444    /* Extract instruction.  */
445    I3403_EXTR      = 0x13800000,
446
447    /* Logical immediate instructions.  */
448    I3404_ANDI      = 0x12000000,
449    I3404_ORRI      = 0x32000000,
450    I3404_EORI      = 0x52000000,
451    I3404_ANDSI     = 0x72000000,
452
453    /* Move wide immediate instructions.  */
454    I3405_MOVN      = 0x12800000,
455    I3405_MOVZ      = 0x52800000,
456    I3405_MOVK      = 0x72800000,
457
458    /* PC relative addressing instructions.  */
459    I3406_ADR       = 0x10000000,
460    I3406_ADRP      = 0x90000000,
461
462    /* Add/subtract shifted register instructions (without a shift).  */
463    I3502_ADD       = 0x0b000000,
464    I3502_ADDS      = 0x2b000000,
465    I3502_SUB       = 0x4b000000,
466    I3502_SUBS      = 0x6b000000,
467
468    /* Add/subtract shifted register instructions (with a shift).  */
469    I3502S_ADD_LSL  = I3502_ADD,
470
471    /* Add/subtract with carry instructions.  */
472    I3503_ADC       = 0x1a000000,
473    I3503_SBC       = 0x5a000000,
474
475    /* Conditional select instructions.  */
476    I3506_CSEL      = 0x1a800000,
477    I3506_CSINC     = 0x1a800400,
478    I3506_CSINV     = 0x5a800000,
479    I3506_CSNEG     = 0x5a800400,
480
481    /* Data-processing (1 source) instructions.  */
482    I3507_CLZ       = 0x5ac01000,
483    I3507_RBIT      = 0x5ac00000,
484    I3507_REV       = 0x5ac00000, /* + size << 10 */
485
486    /* Data-processing (2 source) instructions.  */
487    I3508_LSLV      = 0x1ac02000,
488    I3508_LSRV      = 0x1ac02400,
489    I3508_ASRV      = 0x1ac02800,
490    I3508_RORV      = 0x1ac02c00,
491    I3508_SMULH     = 0x9b407c00,
492    I3508_UMULH     = 0x9bc07c00,
493    I3508_UDIV      = 0x1ac00800,
494    I3508_SDIV      = 0x1ac00c00,
495
496    /* Data-processing (3 source) instructions.  */
497    I3509_MADD      = 0x1b000000,
498    I3509_MSUB      = 0x1b008000,
499
500    /* Logical shifted register instructions (without a shift).  */
501    I3510_AND       = 0x0a000000,
502    I3510_BIC       = 0x0a200000,
503    I3510_ORR       = 0x2a000000,
504    I3510_ORN       = 0x2a200000,
505    I3510_EOR       = 0x4a000000,
506    I3510_EON       = 0x4a200000,
507    I3510_ANDS      = 0x6a000000,
508
509    /* Logical shifted register instructions (with a shift).  */
510    I3502S_AND_LSR  = I3510_AND | (1 << 22),
511
512    /* AdvSIMD copy */
513    I3605_DUP      = 0x0e000400,
514    I3605_INS      = 0x4e001c00,
515    I3605_UMOV     = 0x0e003c00,
516
517    /* AdvSIMD modified immediate */
518    I3606_MOVI      = 0x0f000400,
519    I3606_MVNI      = 0x2f000400,
520    I3606_BIC       = 0x2f001400,
521    I3606_ORR       = 0x0f001400,
522
523    /* AdvSIMD scalar shift by immediate */
524    I3609_SSHR      = 0x5f000400,
525    I3609_SSRA      = 0x5f001400,
526    I3609_SHL       = 0x5f005400,
527    I3609_USHR      = 0x7f000400,
528    I3609_USRA      = 0x7f001400,
529    I3609_SLI       = 0x7f005400,
530
531    /* AdvSIMD scalar three same */
532    I3611_SQADD     = 0x5e200c00,
533    I3611_SQSUB     = 0x5e202c00,
534    I3611_CMGT      = 0x5e203400,
535    I3611_CMGE      = 0x5e203c00,
536    I3611_SSHL      = 0x5e204400,
537    I3611_ADD       = 0x5e208400,
538    I3611_CMTST     = 0x5e208c00,
539    I3611_UQADD     = 0x7e200c00,
540    I3611_UQSUB     = 0x7e202c00,
541    I3611_CMHI      = 0x7e203400,
542    I3611_CMHS      = 0x7e203c00,
543    I3611_USHL      = 0x7e204400,
544    I3611_SUB       = 0x7e208400,
545    I3611_CMEQ      = 0x7e208c00,
546
547    /* AdvSIMD scalar two-reg misc */
548    I3612_CMGT0     = 0x5e208800,
549    I3612_CMEQ0     = 0x5e209800,
550    I3612_CMLT0     = 0x5e20a800,
551    I3612_ABS       = 0x5e20b800,
552    I3612_CMGE0     = 0x7e208800,
553    I3612_CMLE0     = 0x7e209800,
554    I3612_NEG       = 0x7e20b800,
555
556    /* AdvSIMD shift by immediate */
557    I3614_SSHR      = 0x0f000400,
558    I3614_SSRA      = 0x0f001400,
559    I3614_SHL       = 0x0f005400,
560    I3614_SLI       = 0x2f005400,
561    I3614_USHR      = 0x2f000400,
562    I3614_USRA      = 0x2f001400,
563
564    /* AdvSIMD three same.  */
565    I3616_ADD       = 0x0e208400,
566    I3616_AND       = 0x0e201c00,
567    I3616_BIC       = 0x0e601c00,
568    I3616_BIF       = 0x2ee01c00,
569    I3616_BIT       = 0x2ea01c00,
570    I3616_BSL       = 0x2e601c00,
571    I3616_EOR       = 0x2e201c00,
572    I3616_MUL       = 0x0e209c00,
573    I3616_ORR       = 0x0ea01c00,
574    I3616_ORN       = 0x0ee01c00,
575    I3616_SUB       = 0x2e208400,
576    I3616_CMGT      = 0x0e203400,
577    I3616_CMGE      = 0x0e203c00,
578    I3616_CMTST     = 0x0e208c00,
579    I3616_CMHI      = 0x2e203400,
580    I3616_CMHS      = 0x2e203c00,
581    I3616_CMEQ      = 0x2e208c00,
582    I3616_SMAX      = 0x0e206400,
583    I3616_SMIN      = 0x0e206c00,
584    I3616_SSHL      = 0x0e204400,
585    I3616_SQADD     = 0x0e200c00,
586    I3616_SQSUB     = 0x0e202c00,
587    I3616_UMAX      = 0x2e206400,
588    I3616_UMIN      = 0x2e206c00,
589    I3616_UQADD     = 0x2e200c00,
590    I3616_UQSUB     = 0x2e202c00,
591    I3616_USHL      = 0x2e204400,
592
593    /* AdvSIMD two-reg misc.  */
594    I3617_CMGT0     = 0x0e208800,
595    I3617_CMEQ0     = 0x0e209800,
596    I3617_CMLT0     = 0x0e20a800,
597    I3617_CMGE0     = 0x2e208800,
598    I3617_CMLE0     = 0x2e209800,
599    I3617_NOT       = 0x2e205800,
600    I3617_ABS       = 0x0e20b800,
601    I3617_NEG       = 0x2e20b800,
602
603    /* System instructions.  */
604    NOP             = 0xd503201f,
605    DMB_ISH         = 0xd50338bf,
606    DMB_LD          = 0x00000100,
607    DMB_ST          = 0x00000200,
608} AArch64Insn;
609
610static inline uint32_t tcg_in32(TCGContext *s)
611{
612    uint32_t v = *(uint32_t *)s->code_ptr;
613    return v;
614}
615
616/* Emit an opcode with "type-checking" of the format.  */
617#define tcg_out_insn(S, FMT, OP, ...) \
618    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
619
620static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
621                              TCGReg rt, TCGReg rn, unsigned size)
622{
623    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
624}
625
626static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
627                              int imm19, TCGReg rt)
628{
629    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
630}
631
632static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
633                              TCGReg rt, int imm19)
634{
635    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
636}
637
638static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
639                              TCGCond c, int imm19)
640{
641    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
642}
643
644static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
645{
646    tcg_out32(s, insn | (imm26 & 0x03ffffff));
647}
648
649static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
650{
651    tcg_out32(s, insn | rn << 5);
652}
653
654static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
655                              TCGReg r1, TCGReg r2, TCGReg rn,
656                              tcg_target_long ofs, bool pre, bool w)
657{
658    insn |= 1u << 31; /* ext */
659    insn |= pre << 24;
660    insn |= w << 23;
661
662    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
663    insn |= (ofs & (0x7f << 3)) << (15 - 3);
664
665    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
666}
667
668static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
669                              TCGReg rd, TCGReg rn, uint64_t aimm)
670{
671    if (aimm > 0xfff) {
672        tcg_debug_assert((aimm & 0xfff) == 0);
673        aimm >>= 12;
674        tcg_debug_assert(aimm <= 0xfff);
675        aimm |= 1 << 12;  /* apply LSL 12 */
676    }
677    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
678}
679
680/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
681   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
682   that feed the DecodeBitMasks pseudo function.  */
683static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
684                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
685{
686    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
687              | rn << 5 | rd);
688}
689
690#define tcg_out_insn_3404  tcg_out_insn_3402
691
692static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
693                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
694{
695    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
696              | rn << 5 | rd);
697}
698
699/* This function is used for the Move (wide immediate) instruction group.
700   Note that SHIFT is a full shift count, not the 2 bit HW field. */
701static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
702                              TCGReg rd, uint16_t half, unsigned shift)
703{
704    tcg_debug_assert((shift & ~0x30) == 0);
705    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
706}
707
708static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
709                              TCGReg rd, int64_t disp)
710{
711    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
712}
713
714/* This function is for both 3.5.2 (Add/Subtract shifted register), for
715   the rare occasion when we actually want to supply a shift amount.  */
716static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
717                                      TCGType ext, TCGReg rd, TCGReg rn,
718                                      TCGReg rm, int imm6)
719{
720    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
721}
722
723/* This function is for 3.5.2 (Add/subtract shifted register),
724   and 3.5.10 (Logical shifted register), for the vast majorty of cases
725   when we don't want to apply a shift.  Thus it can also be used for
726   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
727static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
728                              TCGReg rd, TCGReg rn, TCGReg rm)
729{
730    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
731}
732
733#define tcg_out_insn_3503  tcg_out_insn_3502
734#define tcg_out_insn_3508  tcg_out_insn_3502
735#define tcg_out_insn_3510  tcg_out_insn_3502
736
737static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
738                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
739{
740    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
741              | tcg_cond_to_aarch64[c] << 12);
742}
743
744static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
745                              TCGReg rd, TCGReg rn)
746{
747    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
748}
749
750static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
751                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
752{
753    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
754}
755
756static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
757                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
758{
759    /* Note that bit 11 set means general register input.  Therefore
760       we can handle both register sets with one function.  */
761    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
762              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
763}
764
765static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
766                              TCGReg rd, bool op, int cmode, uint8_t imm8)
767{
768    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
769              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
770}
771
772static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
773                              TCGReg rd, TCGReg rn, unsigned immhb)
774{
775    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
776}
777
778static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
779                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
780{
781    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
782              | (rn & 0x1f) << 5 | (rd & 0x1f));
783}
784
785static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
786                              unsigned size, TCGReg rd, TCGReg rn)
787{
788    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
789}
790
791static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
792                              TCGReg rd, TCGReg rn, unsigned immhb)
793{
794    tcg_out32(s, insn | q << 30 | immhb << 16
795              | (rn & 0x1f) << 5 | (rd & 0x1f));
796}
797
798static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
799                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
800{
801    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
802              | (rn & 0x1f) << 5 | (rd & 0x1f));
803}
804
805static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
806                              unsigned size, TCGReg rd, TCGReg rn)
807{
808    tcg_out32(s, insn | q << 30 | (size << 22)
809              | (rn & 0x1f) << 5 | (rd & 0x1f));
810}
811
812static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
813                              TCGReg rd, TCGReg base, TCGType ext,
814                              TCGReg regoff)
815{
816    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
817    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
818              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
819}
820
821static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
822                              TCGReg rd, TCGReg rn, intptr_t offset)
823{
824    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
825}
826
827static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
828                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
829{
830    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
831    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
832              | rn << 5 | (rd & 0x1f));
833}
834
835/* Register to register move using ORR (shifted register with no shift). */
836static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
837{
838    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
839}
840
841/* Register to register move using ADDI (move to/from SP).  */
842static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
843{
844    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
845}
846
847/* This function is used for the Logical (immediate) instruction group.
848   The value of LIMM must satisfy IS_LIMM.  See the comment above about
849   only supporting simplified logical immediates.  */
850static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
851                             TCGReg rd, TCGReg rn, uint64_t limm)
852{
853    unsigned h, l, r, c;
854
855    tcg_debug_assert(is_limm(limm));
856
857    h = clz64(limm);
858    l = ctz64(limm);
859    if (l == 0) {
860        r = 0;                  /* form 0....01....1 */
861        c = ctz64(~limm) - 1;
862        if (h == 0) {
863            r = clz64(~limm);   /* form 1..10..01..1 */
864            c += r;
865        }
866    } else {
867        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
868        c = r - h - 1;
869    }
870    if (ext == TCG_TYPE_I32) {
871        r &= 31;
872        c &= 31;
873    }
874
875    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
876}
877
878static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
879                             TCGReg rd, int64_t v64)
880{
881    bool q = type == TCG_TYPE_V128;
882    int cmode, imm8, i;
883
884    /* Test all bytes equal first.  */
885    if (vece == MO_8) {
886        imm8 = (uint8_t)v64;
887        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
888        return;
889    }
890
891    /*
892     * Test all bytes 0x00 or 0xff second.  This can match cases that
893     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
894     */
895    for (i = imm8 = 0; i < 8; i++) {
896        uint8_t byte = v64 >> (i * 8);
897        if (byte == 0xff) {
898            imm8 |= 1 << i;
899        } else if (byte != 0) {
900            goto fail_bytes;
901        }
902    }
903    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
904    return;
905 fail_bytes:
906
907    /*
908     * Tests for various replications.  For each element width, if we
909     * cannot find an expansion there's no point checking a larger
910     * width because we already know by replication it cannot match.
911     */
912    if (vece == MO_16) {
913        uint16_t v16 = v64;
914
915        if (is_shimm16(v16, &cmode, &imm8)) {
916            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
917            return;
918        }
919        if (is_shimm16(~v16, &cmode, &imm8)) {
920            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
921            return;
922        }
923
924        /*
925         * Otherwise, all remaining constants can be loaded in two insns:
926         * rd = v16 & 0xff, rd |= v16 & 0xff00.
927         */
928        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
929        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
930        return;
931    } else if (vece == MO_32) {
932        uint32_t v32 = v64;
933        uint32_t n32 = ~v32;
934
935        if (is_shimm32(v32, &cmode, &imm8) ||
936            is_soimm32(v32, &cmode, &imm8) ||
937            is_fimm32(v32, &cmode, &imm8)) {
938            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
939            return;
940        }
941        if (is_shimm32(n32, &cmode, &imm8) ||
942            is_soimm32(n32, &cmode, &imm8)) {
943            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
944            return;
945        }
946
947        /*
948         * Restrict the set of constants to those we can load with
949         * two instructions.  Others we load from the pool.
950         */
951        i = is_shimm32_pair(v32, &cmode, &imm8);
952        if (i) {
953            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
954            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
955            return;
956        }
957        i = is_shimm32_pair(n32, &cmode, &imm8);
958        if (i) {
959            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
960            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
961            return;
962        }
963    } else if (is_fimm64(v64, &cmode, &imm8)) {
964        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
965        return;
966    }
967
968    /*
969     * As a last resort, load from the constant pool.  Sadly there
970     * is no LD1R (literal), so store the full 16-byte vector.
971     */
972    if (type == TCG_TYPE_V128) {
973        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
974        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
975    } else {
976        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
977        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
978    }
979}
980
981static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
982                            TCGReg rd, TCGReg rs)
983{
984    int is_q = type - TCG_TYPE_V64;
985    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
986    return true;
987}
988
989static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
990                             TCGReg r, TCGReg base, intptr_t offset)
991{
992    TCGReg temp = TCG_REG_TMP;
993
994    if (offset < -0xffffff || offset > 0xffffff) {
995        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
996        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
997        base = temp;
998    } else {
999        AArch64Insn add_insn = I3401_ADDI;
1000
1001        if (offset < 0) {
1002            add_insn = I3401_SUBI;
1003            offset = -offset;
1004        }
1005        if (offset & 0xfff000) {
1006            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1007            base = temp;
1008        }
1009        if (offset & 0xfff) {
1010            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1011            base = temp;
1012        }
1013    }
1014    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1015    return true;
1016}
1017
1018static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1019                         tcg_target_long value)
1020{
1021    tcg_target_long svalue = value;
1022    tcg_target_long ivalue = ~value;
1023    tcg_target_long t0, t1, t2;
1024    int s0, s1;
1025    AArch64Insn opc;
1026
1027    switch (type) {
1028    case TCG_TYPE_I32:
1029    case TCG_TYPE_I64:
1030        tcg_debug_assert(rd < 32);
1031        break;
1032    default:
1033        g_assert_not_reached();
1034    }
1035
1036    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1037       values within [2**31, 2**32-1], we can create smaller sequences by
1038       interpreting this as a negative 32-bit number, while ensuring that
1039       the high 32 bits are cleared by setting SF=0.  */
1040    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1041        svalue = (int32_t)value;
1042        value = (uint32_t)value;
1043        ivalue = (uint32_t)ivalue;
1044        type = TCG_TYPE_I32;
1045    }
1046
1047    /* Speed things up by handling the common case of small positive
1048       and negative values specially.  */
1049    if ((value & ~0xffffull) == 0) {
1050        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1051        return;
1052    } else if ((ivalue & ~0xffffull) == 0) {
1053        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1054        return;
1055    }
1056
1057    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1058       use the sign-extended value.  That lets us match rotated values such
1059       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1060    if (is_limm(svalue)) {
1061        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1062        return;
1063    }
1064
1065    /* Look for host pointer values within 4G of the PC.  This happens
1066       often when loading pointers to QEMU's own data structures.  */
1067    if (type == TCG_TYPE_I64) {
1068        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1069        tcg_target_long disp = value - src_rx;
1070        if (disp == sextract64(disp, 0, 21)) {
1071            tcg_out_insn(s, 3406, ADR, rd, disp);
1072            return;
1073        }
1074        disp = (value >> 12) - (src_rx >> 12);
1075        if (disp == sextract64(disp, 0, 21)) {
1076            tcg_out_insn(s, 3406, ADRP, rd, disp);
1077            if (value & 0xfff) {
1078                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1079            }
1080            return;
1081        }
1082    }
1083
1084    /* Would it take fewer insns to begin with MOVN?  */
1085    if (ctpop64(value) >= 32) {
1086        t0 = ivalue;
1087        opc = I3405_MOVN;
1088    } else {
1089        t0 = value;
1090        opc = I3405_MOVZ;
1091    }
1092    s0 = ctz64(t0) & (63 & -16);
1093    t1 = t0 & ~(0xffffull << s0);
1094    s1 = ctz64(t1) & (63 & -16);
1095    t2 = t1 & ~(0xffffull << s1);
1096    if (t2 == 0) {
1097        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1098        if (t1 != 0) {
1099            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1100        }
1101        return;
1102    }
1103
1104    /* For more than 2 insns, dump it into the constant pool.  */
1105    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1106    tcg_out_insn(s, 3305, LDR, 0, rd);
1107}
1108
1109static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1110                             tcg_target_long imm)
1111{
1112    /* This function is only used for passing structs by reference. */
1113    g_assert_not_reached();
1114}
1115
1116/* Define something more legible for general use.  */
1117#define tcg_out_ldst_r  tcg_out_insn_3310
1118
1119static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1120                         TCGReg rn, intptr_t offset, int lgsize)
1121{
1122    /* If the offset is naturally aligned and in range, then we can
1123       use the scaled uimm12 encoding */
1124    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1125        uintptr_t scaled_uimm = offset >> lgsize;
1126        if (scaled_uimm <= 0xfff) {
1127            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1128            return;
1129        }
1130    }
1131
1132    /* Small signed offsets can use the unscaled encoding.  */
1133    if (offset >= -256 && offset < 256) {
1134        tcg_out_insn_3312(s, insn, rd, rn, offset);
1135        return;
1136    }
1137
1138    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1139    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1140    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1141}
1142
1143static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1144{
1145    if (ret == arg) {
1146        return true;
1147    }
1148    switch (type) {
1149    case TCG_TYPE_I32:
1150    case TCG_TYPE_I64:
1151        if (ret < 32 && arg < 32) {
1152            tcg_out_movr(s, type, ret, arg);
1153            break;
1154        } else if (ret < 32) {
1155            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1156            break;
1157        } else if (arg < 32) {
1158            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1159            break;
1160        }
1161        /* FALLTHRU */
1162
1163    case TCG_TYPE_V64:
1164        tcg_debug_assert(ret >= 32 && arg >= 32);
1165        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1166        break;
1167    case TCG_TYPE_V128:
1168        tcg_debug_assert(ret >= 32 && arg >= 32);
1169        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1170        break;
1171
1172    default:
1173        g_assert_not_reached();
1174    }
1175    return true;
1176}
1177
1178static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1179                       TCGReg base, intptr_t ofs)
1180{
1181    AArch64Insn insn;
1182    int lgsz;
1183
1184    switch (type) {
1185    case TCG_TYPE_I32:
1186        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1187        lgsz = 2;
1188        break;
1189    case TCG_TYPE_I64:
1190        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1191        lgsz = 3;
1192        break;
1193    case TCG_TYPE_V64:
1194        insn = I3312_LDRVD;
1195        lgsz = 3;
1196        break;
1197    case TCG_TYPE_V128:
1198        insn = I3312_LDRVQ;
1199        lgsz = 4;
1200        break;
1201    default:
1202        g_assert_not_reached();
1203    }
1204    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1205}
1206
1207static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1208                       TCGReg base, intptr_t ofs)
1209{
1210    AArch64Insn insn;
1211    int lgsz;
1212
1213    switch (type) {
1214    case TCG_TYPE_I32:
1215        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1216        lgsz = 2;
1217        break;
1218    case TCG_TYPE_I64:
1219        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1220        lgsz = 3;
1221        break;
1222    case TCG_TYPE_V64:
1223        insn = I3312_STRVD;
1224        lgsz = 3;
1225        break;
1226    case TCG_TYPE_V128:
1227        insn = I3312_STRVQ;
1228        lgsz = 4;
1229        break;
1230    default:
1231        g_assert_not_reached();
1232    }
1233    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1234}
1235
1236static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1237                               TCGReg base, intptr_t ofs)
1238{
1239    if (type <= TCG_TYPE_I64 && val == 0) {
1240        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1241        return true;
1242    }
1243    return false;
1244}
1245
1246static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1247                               TCGReg rn, unsigned int a, unsigned int b)
1248{
1249    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1250}
1251
1252static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1253                                TCGReg rn, unsigned int a, unsigned int b)
1254{
1255    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1256}
1257
1258static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1259                                TCGReg rn, unsigned int a, unsigned int b)
1260{
1261    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1262}
1263
1264static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1265                                TCGReg rn, TCGReg rm, unsigned int a)
1266{
1267    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1268}
1269
1270static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1271                               TCGReg rd, TCGReg rn, unsigned int m)
1272{
1273    int bits = ext ? 64 : 32;
1274    int max = bits - 1;
1275    tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1276}
1277
1278static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1279                               TCGReg rd, TCGReg rn, unsigned int m)
1280{
1281    int max = ext ? 63 : 31;
1282    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1283}
1284
1285static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1286                               TCGReg rd, TCGReg rn, unsigned int m)
1287{
1288    int max = ext ? 63 : 31;
1289    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1290}
1291
1292static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1293                                TCGReg rd, TCGReg rn, unsigned int m)
1294{
1295    int max = ext ? 63 : 31;
1296    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1297}
1298
1299static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1300                                TCGReg rd, TCGReg rn, unsigned int m)
1301{
1302    int max = ext ? 63 : 31;
1303    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1304}
1305
1306static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1307                               TCGReg rn, unsigned lsb, unsigned width)
1308{
1309    unsigned size = ext ? 64 : 32;
1310    unsigned a = (size - lsb) & (size - 1);
1311    unsigned b = width - 1;
1312    tcg_out_bfm(s, ext, rd, rn, a, b);
1313}
1314
1315static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1316                        tcg_target_long b, bool const_b)
1317{
1318    if (const_b) {
1319        /* Using CMP or CMN aliases.  */
1320        if (b >= 0) {
1321            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1322        } else {
1323            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1324        }
1325    } else {
1326        /* Using CMP alias SUBS wzr, Wn, Wm */
1327        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1328    }
1329}
1330
1331static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1332{
1333    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1334    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1335    tcg_out_insn(s, 3206, B, offset);
1336}
1337
1338static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1339{
1340    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1341    if (offset == sextract64(offset, 0, 26)) {
1342        tcg_out_insn(s, 3206, B, offset);
1343    } else {
1344        /* Choose X9 as a call-clobbered non-LR temporary. */
1345        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
1346        tcg_out_insn(s, 3207, BR, TCG_REG_X9);
1347    }
1348}
1349
1350static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1351{
1352    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1353    if (offset == sextract64(offset, 0, 26)) {
1354        tcg_out_insn(s, 3206, BL, offset);
1355    } else {
1356        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1357        tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
1358    }
1359}
1360
1361static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1362                         const TCGHelperInfo *info)
1363{
1364    tcg_out_call_int(s, target);
1365}
1366
1367static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1368{
1369    if (!l->has_value) {
1370        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1371        tcg_out_insn(s, 3206, B, 0);
1372    } else {
1373        tcg_out_goto(s, l->u.value_ptr);
1374    }
1375}
1376
1377static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1378                           TCGArg b, bool b_const, TCGLabel *l)
1379{
1380    intptr_t offset;
1381    bool need_cmp;
1382
1383    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1384        need_cmp = false;
1385    } else {
1386        need_cmp = true;
1387        tcg_out_cmp(s, ext, a, b, b_const);
1388    }
1389
1390    if (!l->has_value) {
1391        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1392        offset = tcg_in32(s) >> 5;
1393    } else {
1394        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1395        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1396    }
1397
1398    if (need_cmp) {
1399        tcg_out_insn(s, 3202, B_C, c, offset);
1400    } else if (c == TCG_COND_EQ) {
1401        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1402    } else {
1403        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1404    }
1405}
1406
1407static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1408                               TCGReg rd, TCGReg rn)
1409{
1410    /* REV, REV16, REV32 */
1411    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1412}
1413
1414static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1415                               TCGReg rd, TCGReg rn)
1416{
1417    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1418    int bits = (8 << s_bits) - 1;
1419    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1420}
1421
1422static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1423                               TCGReg rd, TCGReg rn)
1424{
1425    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1426    int bits = (8 << s_bits) - 1;
1427    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1428}
1429
1430static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1431                            TCGReg rn, int64_t aimm)
1432{
1433    if (aimm >= 0) {
1434        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1435    } else {
1436        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1437    }
1438}
1439
1440static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1441                            TCGReg rh, TCGReg al, TCGReg ah,
1442                            tcg_target_long bl, tcg_target_long bh,
1443                            bool const_bl, bool const_bh, bool sub)
1444{
1445    TCGReg orig_rl = rl;
1446    AArch64Insn insn;
1447
1448    if (rl == ah || (!const_bh && rl == bh)) {
1449        rl = TCG_REG_TMP;
1450    }
1451
1452    if (const_bl) {
1453        if (bl < 0) {
1454            bl = -bl;
1455            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1456        } else {
1457            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1458        }
1459
1460        if (unlikely(al == TCG_REG_XZR)) {
1461            /* ??? We want to allow al to be zero for the benefit of
1462               negation via subtraction.  However, that leaves open the
1463               possibility of adding 0+const in the low part, and the
1464               immediate add instructions encode XSP not XZR.  Don't try
1465               anything more elaborate here than loading another zero.  */
1466            al = TCG_REG_TMP;
1467            tcg_out_movi(s, ext, al, 0);
1468        }
1469        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1470    } else {
1471        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1472    }
1473
1474    insn = I3503_ADC;
1475    if (const_bh) {
1476        /* Note that the only two constants we support are 0 and -1, and
1477           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1478        if ((bh != 0) ^ sub) {
1479            insn = I3503_SBC;
1480        }
1481        bh = TCG_REG_XZR;
1482    } else if (sub) {
1483        insn = I3503_SBC;
1484    }
1485    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1486
1487    tcg_out_mov(s, ext, orig_rl, rl);
1488}
1489
1490static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1491{
1492    static const uint32_t sync[] = {
1493        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1494        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1495        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1496        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1497        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1498    };
1499    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1500}
1501
1502static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1503                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1504{
1505    TCGReg a1 = a0;
1506    if (is_ctz) {
1507        a1 = TCG_REG_TMP;
1508        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1509    }
1510    if (const_b && b == (ext ? 64 : 32)) {
1511        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1512    } else {
1513        AArch64Insn sel = I3506_CSEL;
1514
1515        tcg_out_cmp(s, ext, a0, 0, 1);
1516        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1517
1518        if (const_b) {
1519            if (b == -1) {
1520                b = TCG_REG_XZR;
1521                sel = I3506_CSINV;
1522            } else if (b == 0) {
1523                b = TCG_REG_XZR;
1524            } else {
1525                tcg_out_movi(s, ext, d, b);
1526                b = d;
1527            }
1528        }
1529        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1530    }
1531}
1532
1533static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1534{
1535    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1536    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1537    tcg_out_insn(s, 3406, ADR, rd, offset);
1538}
1539
1540#ifdef CONFIG_SOFTMMU
1541/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1542 *                                     MemOpIdx oi, uintptr_t ra)
1543 */
1544static void * const qemu_ld_helpers[MO_SIZE + 1] = {
1545    [MO_8]  = helper_ret_ldub_mmu,
1546#if HOST_BIG_ENDIAN
1547    [MO_16] = helper_be_lduw_mmu,
1548    [MO_32] = helper_be_ldul_mmu,
1549    [MO_64] = helper_be_ldq_mmu,
1550#else
1551    [MO_16] = helper_le_lduw_mmu,
1552    [MO_32] = helper_le_ldul_mmu,
1553    [MO_64] = helper_le_ldq_mmu,
1554#endif
1555};
1556
1557/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1558 *                                     uintxx_t val, MemOpIdx oi,
1559 *                                     uintptr_t ra)
1560 */
1561static void * const qemu_st_helpers[MO_SIZE + 1] = {
1562    [MO_8]  = helper_ret_stb_mmu,
1563#if HOST_BIG_ENDIAN
1564    [MO_16] = helper_be_stw_mmu,
1565    [MO_32] = helper_be_stl_mmu,
1566    [MO_64] = helper_be_stq_mmu,
1567#else
1568    [MO_16] = helper_le_stw_mmu,
1569    [MO_32] = helper_le_stl_mmu,
1570    [MO_64] = helper_le_stq_mmu,
1571#endif
1572};
1573
1574static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1575{
1576    MemOpIdx oi = lb->oi;
1577    MemOp opc = get_memop(oi);
1578    MemOp size = opc & MO_SIZE;
1579
1580    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1581        return false;
1582    }
1583
1584    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1585    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1586    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1587    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1588    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1589    if (opc & MO_SIGN) {
1590        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1591    } else {
1592        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1593    }
1594
1595    tcg_out_goto(s, lb->raddr);
1596    return true;
1597}
1598
1599static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1600{
1601    MemOpIdx oi = lb->oi;
1602    MemOp opc = get_memop(oi);
1603    MemOp size = opc & MO_SIZE;
1604
1605    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1606        return false;
1607    }
1608
1609    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1610    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1611    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1612    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1613    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1614    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1615    tcg_out_goto(s, lb->raddr);
1616    return true;
1617}
1618
1619static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1620                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1621                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1622{
1623    TCGLabelQemuLdst *label = new_ldst_label(s);
1624
1625    label->is_ld = is_ld;
1626    label->oi = oi;
1627    label->type = ext;
1628    label->datalo_reg = data_reg;
1629    label->addrlo_reg = addr_reg;
1630    label->raddr = tcg_splitwx_to_rx(raddr);
1631    label->label_ptr[0] = label_ptr;
1632}
1633
1634/* We expect to use a 7-bit scaled negative offset from ENV.  */
1635QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1636QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1637
1638/* These offsets are built into the LDP below.  */
1639QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1640QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1641
1642/* Load and compare a TLB entry, emitting the conditional jump to the
1643   slow path for the failure case, which will be patched later when finalizing
1644   the slow path. Generated code returns the host addend in X1,
1645   clobbers X0,X2,X3,TMP. */
1646static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1647                             tcg_insn_unit **label_ptr, int mem_index,
1648                             bool is_read)
1649{
1650    unsigned a_bits = get_alignment_bits(opc);
1651    unsigned s_bits = opc & MO_SIZE;
1652    unsigned a_mask = (1u << a_bits) - 1;
1653    unsigned s_mask = (1u << s_bits) - 1;
1654    TCGReg x3;
1655    TCGType mask_type;
1656    uint64_t compare_mask;
1657
1658    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1659                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1660
1661    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1662    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1663                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1664
1665    /* Extract the TLB index from the address into X0.  */
1666    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1667                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1668                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1669
1670    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1671    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1672
1673    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1674    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1675               ? offsetof(CPUTLBEntry, addr_read)
1676               : offsetof(CPUTLBEntry, addr_write));
1677    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1678               offsetof(CPUTLBEntry, addend));
1679
1680    /* For aligned accesses, we check the first byte and include the alignment
1681       bits within the address.  For unaligned access, we check that we don't
1682       cross pages using the address of the last byte of the access.  */
1683    if (a_bits >= s_bits) {
1684        x3 = addr_reg;
1685    } else {
1686        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1687                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1688        x3 = TCG_REG_X3;
1689    }
1690    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1691
1692    /* Store the page mask part of the address into X3.  */
1693    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1694                     TCG_REG_X3, x3, compare_mask);
1695
1696    /* Perform the address comparison. */
1697    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1698
1699    /* If not equal, we jump to the slow path. */
1700    *label_ptr = s->code_ptr;
1701    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1702}
1703
1704#else
1705static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
1706                                   unsigned a_bits)
1707{
1708    unsigned a_mask = (1 << a_bits) - 1;
1709    TCGLabelQemuLdst *label = new_ldst_label(s);
1710
1711    label->is_ld = is_ld;
1712    label->addrlo_reg = addr_reg;
1713
1714    /* tst addr, #mask */
1715    tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1716
1717    label->label_ptr[0] = s->code_ptr;
1718
1719    /* b.ne slow_path */
1720    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1721
1722    label->raddr = tcg_splitwx_to_rx(s->code_ptr);
1723}
1724
1725static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
1726{
1727    if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1728        return false;
1729    }
1730
1731    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg);
1732    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1733
1734    /* "Tail call" to the helper, with the return address back inline. */
1735    tcg_out_adr(s, TCG_REG_LR, l->raddr);
1736    tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld
1737                                        : helper_unaligned_st));
1738    return true;
1739}
1740
1741static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1742{
1743    return tcg_out_fail_alignment(s, l);
1744}
1745
1746static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1747{
1748    return tcg_out_fail_alignment(s, l);
1749}
1750#endif /* CONFIG_SOFTMMU */
1751
1752static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1753                                   TCGReg data_r, TCGReg addr_r,
1754                                   TCGType otype, TCGReg off_r)
1755{
1756    switch (memop & MO_SSIZE) {
1757    case MO_UB:
1758        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1759        break;
1760    case MO_SB:
1761        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1762                       data_r, addr_r, otype, off_r);
1763        break;
1764    case MO_UW:
1765        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1766        break;
1767    case MO_SW:
1768        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1769                       data_r, addr_r, otype, off_r);
1770        break;
1771    case MO_UL:
1772        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1773        break;
1774    case MO_SL:
1775        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1776        break;
1777    case MO_UQ:
1778        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1779        break;
1780    default:
1781        tcg_abort();
1782    }
1783}
1784
1785static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1786                                   TCGReg data_r, TCGReg addr_r,
1787                                   TCGType otype, TCGReg off_r)
1788{
1789    switch (memop & MO_SIZE) {
1790    case MO_8:
1791        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1792        break;
1793    case MO_16:
1794        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1795        break;
1796    case MO_32:
1797        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1798        break;
1799    case MO_64:
1800        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1801        break;
1802    default:
1803        tcg_abort();
1804    }
1805}
1806
1807static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1808                            MemOpIdx oi, TCGType ext)
1809{
1810    MemOp memop = get_memop(oi);
1811    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1812
1813    /* Byte swapping is left to middle-end expansion. */
1814    tcg_debug_assert((memop & MO_BSWAP) == 0);
1815
1816#ifdef CONFIG_SOFTMMU
1817    unsigned mem_index = get_mmuidx(oi);
1818    tcg_insn_unit *label_ptr;
1819
1820    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1821    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1822                           TCG_REG_X1, otype, addr_reg);
1823    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1824                        s->code_ptr, label_ptr);
1825#else /* !CONFIG_SOFTMMU */
1826    unsigned a_bits = get_alignment_bits(memop);
1827    if (a_bits) {
1828        tcg_out_test_alignment(s, true, addr_reg, a_bits);
1829    }
1830    if (USE_GUEST_BASE) {
1831        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1832                               TCG_REG_GUEST_BASE, otype, addr_reg);
1833    } else {
1834        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1835                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1836    }
1837#endif /* CONFIG_SOFTMMU */
1838}
1839
1840static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1841                            MemOpIdx oi)
1842{
1843    MemOp memop = get_memop(oi);
1844    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1845
1846    /* Byte swapping is left to middle-end expansion. */
1847    tcg_debug_assert((memop & MO_BSWAP) == 0);
1848
1849#ifdef CONFIG_SOFTMMU
1850    unsigned mem_index = get_mmuidx(oi);
1851    tcg_insn_unit *label_ptr;
1852
1853    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1854    tcg_out_qemu_st_direct(s, memop, data_reg,
1855                           TCG_REG_X1, otype, addr_reg);
1856    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1857                        data_reg, addr_reg, s->code_ptr, label_ptr);
1858#else /* !CONFIG_SOFTMMU */
1859    unsigned a_bits = get_alignment_bits(memop);
1860    if (a_bits) {
1861        tcg_out_test_alignment(s, false, addr_reg, a_bits);
1862    }
1863    if (USE_GUEST_BASE) {
1864        tcg_out_qemu_st_direct(s, memop, data_reg,
1865                               TCG_REG_GUEST_BASE, otype, addr_reg);
1866    } else {
1867        tcg_out_qemu_st_direct(s, memop, data_reg,
1868                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1869    }
1870#endif /* CONFIG_SOFTMMU */
1871}
1872
1873static const tcg_insn_unit *tb_ret_addr;
1874
1875static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1876{
1877    /* Reuse the zeroing that exists for goto_ptr.  */
1878    if (a0 == 0) {
1879        tcg_out_goto_long(s, tcg_code_gen_epilogue);
1880    } else {
1881        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1882        tcg_out_goto_long(s, tb_ret_addr);
1883    }
1884}
1885
1886static void tcg_out_goto_tb(TCGContext *s, int which)
1887{
1888    /*
1889     * Direct branch, or indirect address load, will be patched
1890     * by tb_target_set_jmp_target.  Assert indirect load offset
1891     * in range early, regardless of direct branch distance.
1892     */
1893    intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1894    tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1895
1896    set_jmp_insn_offset(s, which);
1897    tcg_out32(s, I3206_B);
1898    tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1899    set_jmp_reset_offset(s, which);
1900}
1901
1902void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1903                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1904{
1905    uintptr_t d_addr = tb->jmp_target_addr[n];
1906    ptrdiff_t d_offset = d_addr - jmp_rx;
1907    tcg_insn_unit insn;
1908
1909    /* Either directly branch, or indirect branch load. */
1910    if (d_offset == sextract64(d_offset, 0, 28)) {
1911        insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
1912    } else {
1913        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
1914        ptrdiff_t i_offset = i_addr - jmp_rx;
1915
1916        /* Note that we asserted this in range in tcg_out_goto_tb. */
1917        insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2);
1918    }
1919    qatomic_set((uint32_t *)jmp_rw, insn);
1920    flush_idcache_range(jmp_rx, jmp_rw, 4);
1921}
1922
1923static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1924                       const TCGArg args[TCG_MAX_OP_ARGS],
1925                       const int const_args[TCG_MAX_OP_ARGS])
1926{
1927    /* 99% of the time, we can signal the use of extension registers
1928       by looking to see if the opcode handles 64-bit data.  */
1929    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1930
1931    /* Hoist the loads of the most common arguments.  */
1932    TCGArg a0 = args[0];
1933    TCGArg a1 = args[1];
1934    TCGArg a2 = args[2];
1935    int c2 = const_args[2];
1936
1937    /* Some operands are defined with "rZ" constraint, a register or
1938       the zero register.  These need not actually test args[I] == 0.  */
1939#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1940
1941    switch (opc) {
1942    case INDEX_op_goto_ptr:
1943        tcg_out_insn(s, 3207, BR, a0);
1944        break;
1945
1946    case INDEX_op_br:
1947        tcg_out_goto_label(s, arg_label(a0));
1948        break;
1949
1950    case INDEX_op_ld8u_i32:
1951    case INDEX_op_ld8u_i64:
1952        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1953        break;
1954    case INDEX_op_ld8s_i32:
1955        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1956        break;
1957    case INDEX_op_ld8s_i64:
1958        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1959        break;
1960    case INDEX_op_ld16u_i32:
1961    case INDEX_op_ld16u_i64:
1962        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1963        break;
1964    case INDEX_op_ld16s_i32:
1965        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1966        break;
1967    case INDEX_op_ld16s_i64:
1968        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1969        break;
1970    case INDEX_op_ld_i32:
1971    case INDEX_op_ld32u_i64:
1972        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1973        break;
1974    case INDEX_op_ld32s_i64:
1975        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1976        break;
1977    case INDEX_op_ld_i64:
1978        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1979        break;
1980
1981    case INDEX_op_st8_i32:
1982    case INDEX_op_st8_i64:
1983        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1984        break;
1985    case INDEX_op_st16_i32:
1986    case INDEX_op_st16_i64:
1987        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1988        break;
1989    case INDEX_op_st_i32:
1990    case INDEX_op_st32_i64:
1991        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1992        break;
1993    case INDEX_op_st_i64:
1994        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1995        break;
1996
1997    case INDEX_op_add_i32:
1998        a2 = (int32_t)a2;
1999        /* FALLTHRU */
2000    case INDEX_op_add_i64:
2001        if (c2) {
2002            tcg_out_addsubi(s, ext, a0, a1, a2);
2003        } else {
2004            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2005        }
2006        break;
2007
2008    case INDEX_op_sub_i32:
2009        a2 = (int32_t)a2;
2010        /* FALLTHRU */
2011    case INDEX_op_sub_i64:
2012        if (c2) {
2013            tcg_out_addsubi(s, ext, a0, a1, -a2);
2014        } else {
2015            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2016        }
2017        break;
2018
2019    case INDEX_op_neg_i64:
2020    case INDEX_op_neg_i32:
2021        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2022        break;
2023
2024    case INDEX_op_and_i32:
2025        a2 = (int32_t)a2;
2026        /* FALLTHRU */
2027    case INDEX_op_and_i64:
2028        if (c2) {
2029            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2030        } else {
2031            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2032        }
2033        break;
2034
2035    case INDEX_op_andc_i32:
2036        a2 = (int32_t)a2;
2037        /* FALLTHRU */
2038    case INDEX_op_andc_i64:
2039        if (c2) {
2040            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2041        } else {
2042            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2043        }
2044        break;
2045
2046    case INDEX_op_or_i32:
2047        a2 = (int32_t)a2;
2048        /* FALLTHRU */
2049    case INDEX_op_or_i64:
2050        if (c2) {
2051            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2052        } else {
2053            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2054        }
2055        break;
2056
2057    case INDEX_op_orc_i32:
2058        a2 = (int32_t)a2;
2059        /* FALLTHRU */
2060    case INDEX_op_orc_i64:
2061        if (c2) {
2062            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2063        } else {
2064            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2065        }
2066        break;
2067
2068    case INDEX_op_xor_i32:
2069        a2 = (int32_t)a2;
2070        /* FALLTHRU */
2071    case INDEX_op_xor_i64:
2072        if (c2) {
2073            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2074        } else {
2075            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2076        }
2077        break;
2078
2079    case INDEX_op_eqv_i32:
2080        a2 = (int32_t)a2;
2081        /* FALLTHRU */
2082    case INDEX_op_eqv_i64:
2083        if (c2) {
2084            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2085        } else {
2086            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2087        }
2088        break;
2089
2090    case INDEX_op_not_i64:
2091    case INDEX_op_not_i32:
2092        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2093        break;
2094
2095    case INDEX_op_mul_i64:
2096    case INDEX_op_mul_i32:
2097        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2098        break;
2099
2100    case INDEX_op_div_i64:
2101    case INDEX_op_div_i32:
2102        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2103        break;
2104    case INDEX_op_divu_i64:
2105    case INDEX_op_divu_i32:
2106        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2107        break;
2108
2109    case INDEX_op_rem_i64:
2110    case INDEX_op_rem_i32:
2111        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2112        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2113        break;
2114    case INDEX_op_remu_i64:
2115    case INDEX_op_remu_i32:
2116        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2117        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2118        break;
2119
2120    case INDEX_op_shl_i64:
2121    case INDEX_op_shl_i32:
2122        if (c2) {
2123            tcg_out_shl(s, ext, a0, a1, a2);
2124        } else {
2125            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2126        }
2127        break;
2128
2129    case INDEX_op_shr_i64:
2130    case INDEX_op_shr_i32:
2131        if (c2) {
2132            tcg_out_shr(s, ext, a0, a1, a2);
2133        } else {
2134            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2135        }
2136        break;
2137
2138    case INDEX_op_sar_i64:
2139    case INDEX_op_sar_i32:
2140        if (c2) {
2141            tcg_out_sar(s, ext, a0, a1, a2);
2142        } else {
2143            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2144        }
2145        break;
2146
2147    case INDEX_op_rotr_i64:
2148    case INDEX_op_rotr_i32:
2149        if (c2) {
2150            tcg_out_rotr(s, ext, a0, a1, a2);
2151        } else {
2152            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2153        }
2154        break;
2155
2156    case INDEX_op_rotl_i64:
2157    case INDEX_op_rotl_i32:
2158        if (c2) {
2159            tcg_out_rotl(s, ext, a0, a1, a2);
2160        } else {
2161            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2162            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2163        }
2164        break;
2165
2166    case INDEX_op_clz_i64:
2167    case INDEX_op_clz_i32:
2168        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2169        break;
2170    case INDEX_op_ctz_i64:
2171    case INDEX_op_ctz_i32:
2172        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2173        break;
2174
2175    case INDEX_op_brcond_i32:
2176        a1 = (int32_t)a1;
2177        /* FALLTHRU */
2178    case INDEX_op_brcond_i64:
2179        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2180        break;
2181
2182    case INDEX_op_setcond_i32:
2183        a2 = (int32_t)a2;
2184        /* FALLTHRU */
2185    case INDEX_op_setcond_i64:
2186        tcg_out_cmp(s, ext, a1, a2, c2);
2187        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2188        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2189                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2190        break;
2191
2192    case INDEX_op_movcond_i32:
2193        a2 = (int32_t)a2;
2194        /* FALLTHRU */
2195    case INDEX_op_movcond_i64:
2196        tcg_out_cmp(s, ext, a1, a2, c2);
2197        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2198        break;
2199
2200    case INDEX_op_qemu_ld_i32:
2201    case INDEX_op_qemu_ld_i64:
2202        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2203        break;
2204    case INDEX_op_qemu_st_i32:
2205    case INDEX_op_qemu_st_i64:
2206        tcg_out_qemu_st(s, REG0(0), a1, a2);
2207        break;
2208
2209    case INDEX_op_bswap64_i64:
2210        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2211        break;
2212    case INDEX_op_bswap32_i64:
2213        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2214        if (a2 & TCG_BSWAP_OS) {
2215            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a0);
2216        }
2217        break;
2218    case INDEX_op_bswap32_i32:
2219        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2220        break;
2221    case INDEX_op_bswap16_i64:
2222    case INDEX_op_bswap16_i32:
2223        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2224        if (a2 & TCG_BSWAP_OS) {
2225            /* Output must be sign-extended. */
2226            tcg_out_sxt(s, ext, MO_16, a0, a0);
2227        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2228            /* Output must be zero-extended, but input isn't. */
2229            tcg_out_uxt(s, MO_16, a0, a0);
2230        }
2231        break;
2232
2233    case INDEX_op_ext8s_i64:
2234    case INDEX_op_ext8s_i32:
2235        tcg_out_sxt(s, ext, MO_8, a0, a1);
2236        break;
2237    case INDEX_op_ext16s_i64:
2238    case INDEX_op_ext16s_i32:
2239        tcg_out_sxt(s, ext, MO_16, a0, a1);
2240        break;
2241    case INDEX_op_ext_i32_i64:
2242    case INDEX_op_ext32s_i64:
2243        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2244        break;
2245    case INDEX_op_ext8u_i64:
2246    case INDEX_op_ext8u_i32:
2247        tcg_out_uxt(s, MO_8, a0, a1);
2248        break;
2249    case INDEX_op_ext16u_i64:
2250    case INDEX_op_ext16u_i32:
2251        tcg_out_uxt(s, MO_16, a0, a1);
2252        break;
2253    case INDEX_op_extu_i32_i64:
2254    case INDEX_op_ext32u_i64:
2255        tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2256        break;
2257
2258    case INDEX_op_deposit_i64:
2259    case INDEX_op_deposit_i32:
2260        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2261        break;
2262
2263    case INDEX_op_extract_i64:
2264    case INDEX_op_extract_i32:
2265        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2266        break;
2267
2268    case INDEX_op_sextract_i64:
2269    case INDEX_op_sextract_i32:
2270        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2271        break;
2272
2273    case INDEX_op_extract2_i64:
2274    case INDEX_op_extract2_i32:
2275        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2276        break;
2277
2278    case INDEX_op_add2_i32:
2279        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2280                        (int32_t)args[4], args[5], const_args[4],
2281                        const_args[5], false);
2282        break;
2283    case INDEX_op_add2_i64:
2284        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2285                        args[5], const_args[4], const_args[5], false);
2286        break;
2287    case INDEX_op_sub2_i32:
2288        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2289                        (int32_t)args[4], args[5], const_args[4],
2290                        const_args[5], true);
2291        break;
2292    case INDEX_op_sub2_i64:
2293        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2294                        args[5], const_args[4], const_args[5], true);
2295        break;
2296
2297    case INDEX_op_muluh_i64:
2298        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2299        break;
2300    case INDEX_op_mulsh_i64:
2301        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2302        break;
2303
2304    case INDEX_op_mb:
2305        tcg_out_mb(s, a0);
2306        break;
2307
2308    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2309    case INDEX_op_mov_i64:
2310    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2311    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2312    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2313    default:
2314        g_assert_not_reached();
2315    }
2316
2317#undef REG0
2318}
2319
2320static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2321                           unsigned vecl, unsigned vece,
2322                           const TCGArg args[TCG_MAX_OP_ARGS],
2323                           const int const_args[TCG_MAX_OP_ARGS])
2324{
2325    static const AArch64Insn cmp_vec_insn[16] = {
2326        [TCG_COND_EQ] = I3616_CMEQ,
2327        [TCG_COND_GT] = I3616_CMGT,
2328        [TCG_COND_GE] = I3616_CMGE,
2329        [TCG_COND_GTU] = I3616_CMHI,
2330        [TCG_COND_GEU] = I3616_CMHS,
2331    };
2332    static const AArch64Insn cmp_scalar_insn[16] = {
2333        [TCG_COND_EQ] = I3611_CMEQ,
2334        [TCG_COND_GT] = I3611_CMGT,
2335        [TCG_COND_GE] = I3611_CMGE,
2336        [TCG_COND_GTU] = I3611_CMHI,
2337        [TCG_COND_GEU] = I3611_CMHS,
2338    };
2339    static const AArch64Insn cmp0_vec_insn[16] = {
2340        [TCG_COND_EQ] = I3617_CMEQ0,
2341        [TCG_COND_GT] = I3617_CMGT0,
2342        [TCG_COND_GE] = I3617_CMGE0,
2343        [TCG_COND_LT] = I3617_CMLT0,
2344        [TCG_COND_LE] = I3617_CMLE0,
2345    };
2346    static const AArch64Insn cmp0_scalar_insn[16] = {
2347        [TCG_COND_EQ] = I3612_CMEQ0,
2348        [TCG_COND_GT] = I3612_CMGT0,
2349        [TCG_COND_GE] = I3612_CMGE0,
2350        [TCG_COND_LT] = I3612_CMLT0,
2351        [TCG_COND_LE] = I3612_CMLE0,
2352    };
2353
2354    TCGType type = vecl + TCG_TYPE_V64;
2355    unsigned is_q = vecl;
2356    bool is_scalar = !is_q && vece == MO_64;
2357    TCGArg a0, a1, a2, a3;
2358    int cmode, imm8;
2359
2360    a0 = args[0];
2361    a1 = args[1];
2362    a2 = args[2];
2363
2364    switch (opc) {
2365    case INDEX_op_ld_vec:
2366        tcg_out_ld(s, type, a0, a1, a2);
2367        break;
2368    case INDEX_op_st_vec:
2369        tcg_out_st(s, type, a0, a1, a2);
2370        break;
2371    case INDEX_op_dupm_vec:
2372        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2373        break;
2374    case INDEX_op_add_vec:
2375        if (is_scalar) {
2376            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2377        } else {
2378            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2379        }
2380        break;
2381    case INDEX_op_sub_vec:
2382        if (is_scalar) {
2383            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2384        } else {
2385            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2386        }
2387        break;
2388    case INDEX_op_mul_vec:
2389        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2390        break;
2391    case INDEX_op_neg_vec:
2392        if (is_scalar) {
2393            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2394        } else {
2395            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2396        }
2397        break;
2398    case INDEX_op_abs_vec:
2399        if (is_scalar) {
2400            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2401        } else {
2402            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2403        }
2404        break;
2405    case INDEX_op_and_vec:
2406        if (const_args[2]) {
2407            is_shimm1632(~a2, &cmode, &imm8);
2408            if (a0 == a1) {
2409                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2410                return;
2411            }
2412            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2413            a2 = a0;
2414        }
2415        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2416        break;
2417    case INDEX_op_or_vec:
2418        if (const_args[2]) {
2419            is_shimm1632(a2, &cmode, &imm8);
2420            if (a0 == a1) {
2421                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2422                return;
2423            }
2424            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2425            a2 = a0;
2426        }
2427        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2428        break;
2429    case INDEX_op_andc_vec:
2430        if (const_args[2]) {
2431            is_shimm1632(a2, &cmode, &imm8);
2432            if (a0 == a1) {
2433                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2434                return;
2435            }
2436            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2437            a2 = a0;
2438        }
2439        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2440        break;
2441    case INDEX_op_orc_vec:
2442        if (const_args[2]) {
2443            is_shimm1632(~a2, &cmode, &imm8);
2444            if (a0 == a1) {
2445                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2446                return;
2447            }
2448            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2449            a2 = a0;
2450        }
2451        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2452        break;
2453    case INDEX_op_xor_vec:
2454        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2455        break;
2456    case INDEX_op_ssadd_vec:
2457        if (is_scalar) {
2458            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2459        } else {
2460            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2461        }
2462        break;
2463    case INDEX_op_sssub_vec:
2464        if (is_scalar) {
2465            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2466        } else {
2467            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2468        }
2469        break;
2470    case INDEX_op_usadd_vec:
2471        if (is_scalar) {
2472            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2473        } else {
2474            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2475        }
2476        break;
2477    case INDEX_op_ussub_vec:
2478        if (is_scalar) {
2479            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2480        } else {
2481            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2482        }
2483        break;
2484    case INDEX_op_smax_vec:
2485        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2486        break;
2487    case INDEX_op_smin_vec:
2488        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2489        break;
2490    case INDEX_op_umax_vec:
2491        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2492        break;
2493    case INDEX_op_umin_vec:
2494        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2495        break;
2496    case INDEX_op_not_vec:
2497        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2498        break;
2499    case INDEX_op_shli_vec:
2500        if (is_scalar) {
2501            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2502        } else {
2503            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2504        }
2505        break;
2506    case INDEX_op_shri_vec:
2507        if (is_scalar) {
2508            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2509        } else {
2510            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2511        }
2512        break;
2513    case INDEX_op_sari_vec:
2514        if (is_scalar) {
2515            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2516        } else {
2517            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2518        }
2519        break;
2520    case INDEX_op_aa64_sli_vec:
2521        if (is_scalar) {
2522            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2523        } else {
2524            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2525        }
2526        break;
2527    case INDEX_op_shlv_vec:
2528        if (is_scalar) {
2529            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2530        } else {
2531            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2532        }
2533        break;
2534    case INDEX_op_aa64_sshl_vec:
2535        if (is_scalar) {
2536            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2537        } else {
2538            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2539        }
2540        break;
2541    case INDEX_op_cmp_vec:
2542        {
2543            TCGCond cond = args[3];
2544            AArch64Insn insn;
2545
2546            if (cond == TCG_COND_NE) {
2547                if (const_args[2]) {
2548                    if (is_scalar) {
2549                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2550                    } else {
2551                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2552                    }
2553                } else {
2554                    if (is_scalar) {
2555                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2556                    } else {
2557                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2558                    }
2559                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2560                }
2561            } else {
2562                if (const_args[2]) {
2563                    if (is_scalar) {
2564                        insn = cmp0_scalar_insn[cond];
2565                        if (insn) {
2566                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2567                            break;
2568                        }
2569                    } else {
2570                        insn = cmp0_vec_insn[cond];
2571                        if (insn) {
2572                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2573                            break;
2574                        }
2575                    }
2576                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2577                    a2 = TCG_VEC_TMP;
2578                }
2579                if (is_scalar) {
2580                    insn = cmp_scalar_insn[cond];
2581                    if (insn == 0) {
2582                        TCGArg t;
2583                        t = a1, a1 = a2, a2 = t;
2584                        cond = tcg_swap_cond(cond);
2585                        insn = cmp_scalar_insn[cond];
2586                        tcg_debug_assert(insn != 0);
2587                    }
2588                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2589                } else {
2590                    insn = cmp_vec_insn[cond];
2591                    if (insn == 0) {
2592                        TCGArg t;
2593                        t = a1, a1 = a2, a2 = t;
2594                        cond = tcg_swap_cond(cond);
2595                        insn = cmp_vec_insn[cond];
2596                        tcg_debug_assert(insn != 0);
2597                    }
2598                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2599                }
2600            }
2601        }
2602        break;
2603
2604    case INDEX_op_bitsel_vec:
2605        a3 = args[3];
2606        if (a0 == a3) {
2607            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2608        } else if (a0 == a2) {
2609            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2610        } else {
2611            if (a0 != a1) {
2612                tcg_out_mov(s, type, a0, a1);
2613            }
2614            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2615        }
2616        break;
2617
2618    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2619    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2620    default:
2621        g_assert_not_reached();
2622    }
2623}
2624
2625int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2626{
2627    switch (opc) {
2628    case INDEX_op_add_vec:
2629    case INDEX_op_sub_vec:
2630    case INDEX_op_and_vec:
2631    case INDEX_op_or_vec:
2632    case INDEX_op_xor_vec:
2633    case INDEX_op_andc_vec:
2634    case INDEX_op_orc_vec:
2635    case INDEX_op_neg_vec:
2636    case INDEX_op_abs_vec:
2637    case INDEX_op_not_vec:
2638    case INDEX_op_cmp_vec:
2639    case INDEX_op_shli_vec:
2640    case INDEX_op_shri_vec:
2641    case INDEX_op_sari_vec:
2642    case INDEX_op_ssadd_vec:
2643    case INDEX_op_sssub_vec:
2644    case INDEX_op_usadd_vec:
2645    case INDEX_op_ussub_vec:
2646    case INDEX_op_shlv_vec:
2647    case INDEX_op_bitsel_vec:
2648        return 1;
2649    case INDEX_op_rotli_vec:
2650    case INDEX_op_shrv_vec:
2651    case INDEX_op_sarv_vec:
2652    case INDEX_op_rotlv_vec:
2653    case INDEX_op_rotrv_vec:
2654        return -1;
2655    case INDEX_op_mul_vec:
2656    case INDEX_op_smax_vec:
2657    case INDEX_op_smin_vec:
2658    case INDEX_op_umax_vec:
2659    case INDEX_op_umin_vec:
2660        return vece < MO_64;
2661
2662    default:
2663        return 0;
2664    }
2665}
2666
2667void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2668                       TCGArg a0, ...)
2669{
2670    va_list va;
2671    TCGv_vec v0, v1, v2, t1, t2, c1;
2672    TCGArg a2;
2673
2674    va_start(va, a0);
2675    v0 = temp_tcgv_vec(arg_temp(a0));
2676    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2677    a2 = va_arg(va, TCGArg);
2678    va_end(va);
2679
2680    switch (opc) {
2681    case INDEX_op_rotli_vec:
2682        t1 = tcg_temp_new_vec(type);
2683        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2684        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2685                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2686        tcg_temp_free_vec(t1);
2687        break;
2688
2689    case INDEX_op_shrv_vec:
2690    case INDEX_op_sarv_vec:
2691        /* Right shifts are negative left shifts for AArch64.  */
2692        v2 = temp_tcgv_vec(arg_temp(a2));
2693        t1 = tcg_temp_new_vec(type);
2694        tcg_gen_neg_vec(vece, t1, v2);
2695        opc = (opc == INDEX_op_shrv_vec
2696               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2697        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2698                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2699        tcg_temp_free_vec(t1);
2700        break;
2701
2702    case INDEX_op_rotlv_vec:
2703        v2 = temp_tcgv_vec(arg_temp(a2));
2704        t1 = tcg_temp_new_vec(type);
2705        c1 = tcg_constant_vec(type, vece, 8 << vece);
2706        tcg_gen_sub_vec(vece, t1, v2, c1);
2707        /* Right shifts are negative left shifts for AArch64.  */
2708        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2709                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2710        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2711                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2712        tcg_gen_or_vec(vece, v0, v0, t1);
2713        tcg_temp_free_vec(t1);
2714        break;
2715
2716    case INDEX_op_rotrv_vec:
2717        v2 = temp_tcgv_vec(arg_temp(a2));
2718        t1 = tcg_temp_new_vec(type);
2719        t2 = tcg_temp_new_vec(type);
2720        c1 = tcg_constant_vec(type, vece, 8 << vece);
2721        tcg_gen_neg_vec(vece, t1, v2);
2722        tcg_gen_sub_vec(vece, t2, c1, v2);
2723        /* Right shifts are negative left shifts for AArch64.  */
2724        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2725                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2726        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2727                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2728        tcg_gen_or_vec(vece, v0, t1, t2);
2729        tcg_temp_free_vec(t1);
2730        tcg_temp_free_vec(t2);
2731        break;
2732
2733    default:
2734        g_assert_not_reached();
2735    }
2736}
2737
2738static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2739{
2740    switch (op) {
2741    case INDEX_op_goto_ptr:
2742        return C_O0_I1(r);
2743
2744    case INDEX_op_ld8u_i32:
2745    case INDEX_op_ld8s_i32:
2746    case INDEX_op_ld16u_i32:
2747    case INDEX_op_ld16s_i32:
2748    case INDEX_op_ld_i32:
2749    case INDEX_op_ld8u_i64:
2750    case INDEX_op_ld8s_i64:
2751    case INDEX_op_ld16u_i64:
2752    case INDEX_op_ld16s_i64:
2753    case INDEX_op_ld32u_i64:
2754    case INDEX_op_ld32s_i64:
2755    case INDEX_op_ld_i64:
2756    case INDEX_op_neg_i32:
2757    case INDEX_op_neg_i64:
2758    case INDEX_op_not_i32:
2759    case INDEX_op_not_i64:
2760    case INDEX_op_bswap16_i32:
2761    case INDEX_op_bswap32_i32:
2762    case INDEX_op_bswap16_i64:
2763    case INDEX_op_bswap32_i64:
2764    case INDEX_op_bswap64_i64:
2765    case INDEX_op_ext8s_i32:
2766    case INDEX_op_ext16s_i32:
2767    case INDEX_op_ext8u_i32:
2768    case INDEX_op_ext16u_i32:
2769    case INDEX_op_ext8s_i64:
2770    case INDEX_op_ext16s_i64:
2771    case INDEX_op_ext32s_i64:
2772    case INDEX_op_ext8u_i64:
2773    case INDEX_op_ext16u_i64:
2774    case INDEX_op_ext32u_i64:
2775    case INDEX_op_ext_i32_i64:
2776    case INDEX_op_extu_i32_i64:
2777    case INDEX_op_extract_i32:
2778    case INDEX_op_extract_i64:
2779    case INDEX_op_sextract_i32:
2780    case INDEX_op_sextract_i64:
2781        return C_O1_I1(r, r);
2782
2783    case INDEX_op_st8_i32:
2784    case INDEX_op_st16_i32:
2785    case INDEX_op_st_i32:
2786    case INDEX_op_st8_i64:
2787    case INDEX_op_st16_i64:
2788    case INDEX_op_st32_i64:
2789    case INDEX_op_st_i64:
2790        return C_O0_I2(rZ, r);
2791
2792    case INDEX_op_add_i32:
2793    case INDEX_op_add_i64:
2794    case INDEX_op_sub_i32:
2795    case INDEX_op_sub_i64:
2796    case INDEX_op_setcond_i32:
2797    case INDEX_op_setcond_i64:
2798        return C_O1_I2(r, r, rA);
2799
2800    case INDEX_op_mul_i32:
2801    case INDEX_op_mul_i64:
2802    case INDEX_op_div_i32:
2803    case INDEX_op_div_i64:
2804    case INDEX_op_divu_i32:
2805    case INDEX_op_divu_i64:
2806    case INDEX_op_rem_i32:
2807    case INDEX_op_rem_i64:
2808    case INDEX_op_remu_i32:
2809    case INDEX_op_remu_i64:
2810    case INDEX_op_muluh_i64:
2811    case INDEX_op_mulsh_i64:
2812        return C_O1_I2(r, r, r);
2813
2814    case INDEX_op_and_i32:
2815    case INDEX_op_and_i64:
2816    case INDEX_op_or_i32:
2817    case INDEX_op_or_i64:
2818    case INDEX_op_xor_i32:
2819    case INDEX_op_xor_i64:
2820    case INDEX_op_andc_i32:
2821    case INDEX_op_andc_i64:
2822    case INDEX_op_orc_i32:
2823    case INDEX_op_orc_i64:
2824    case INDEX_op_eqv_i32:
2825    case INDEX_op_eqv_i64:
2826        return C_O1_I2(r, r, rL);
2827
2828    case INDEX_op_shl_i32:
2829    case INDEX_op_shr_i32:
2830    case INDEX_op_sar_i32:
2831    case INDEX_op_rotl_i32:
2832    case INDEX_op_rotr_i32:
2833    case INDEX_op_shl_i64:
2834    case INDEX_op_shr_i64:
2835    case INDEX_op_sar_i64:
2836    case INDEX_op_rotl_i64:
2837    case INDEX_op_rotr_i64:
2838        return C_O1_I2(r, r, ri);
2839
2840    case INDEX_op_clz_i32:
2841    case INDEX_op_ctz_i32:
2842    case INDEX_op_clz_i64:
2843    case INDEX_op_ctz_i64:
2844        return C_O1_I2(r, r, rAL);
2845
2846    case INDEX_op_brcond_i32:
2847    case INDEX_op_brcond_i64:
2848        return C_O0_I2(r, rA);
2849
2850    case INDEX_op_movcond_i32:
2851    case INDEX_op_movcond_i64:
2852        return C_O1_I4(r, r, rA, rZ, rZ);
2853
2854    case INDEX_op_qemu_ld_i32:
2855    case INDEX_op_qemu_ld_i64:
2856        return C_O1_I1(r, l);
2857    case INDEX_op_qemu_st_i32:
2858    case INDEX_op_qemu_st_i64:
2859        return C_O0_I2(lZ, l);
2860
2861    case INDEX_op_deposit_i32:
2862    case INDEX_op_deposit_i64:
2863        return C_O1_I2(r, 0, rZ);
2864
2865    case INDEX_op_extract2_i32:
2866    case INDEX_op_extract2_i64:
2867        return C_O1_I2(r, rZ, rZ);
2868
2869    case INDEX_op_add2_i32:
2870    case INDEX_op_add2_i64:
2871    case INDEX_op_sub2_i32:
2872    case INDEX_op_sub2_i64:
2873        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2874
2875    case INDEX_op_add_vec:
2876    case INDEX_op_sub_vec:
2877    case INDEX_op_mul_vec:
2878    case INDEX_op_xor_vec:
2879    case INDEX_op_ssadd_vec:
2880    case INDEX_op_sssub_vec:
2881    case INDEX_op_usadd_vec:
2882    case INDEX_op_ussub_vec:
2883    case INDEX_op_smax_vec:
2884    case INDEX_op_smin_vec:
2885    case INDEX_op_umax_vec:
2886    case INDEX_op_umin_vec:
2887    case INDEX_op_shlv_vec:
2888    case INDEX_op_shrv_vec:
2889    case INDEX_op_sarv_vec:
2890    case INDEX_op_aa64_sshl_vec:
2891        return C_O1_I2(w, w, w);
2892    case INDEX_op_not_vec:
2893    case INDEX_op_neg_vec:
2894    case INDEX_op_abs_vec:
2895    case INDEX_op_shli_vec:
2896    case INDEX_op_shri_vec:
2897    case INDEX_op_sari_vec:
2898        return C_O1_I1(w, w);
2899    case INDEX_op_ld_vec:
2900    case INDEX_op_dupm_vec:
2901        return C_O1_I1(w, r);
2902    case INDEX_op_st_vec:
2903        return C_O0_I2(w, r);
2904    case INDEX_op_dup_vec:
2905        return C_O1_I1(w, wr);
2906    case INDEX_op_or_vec:
2907    case INDEX_op_andc_vec:
2908        return C_O1_I2(w, w, wO);
2909    case INDEX_op_and_vec:
2910    case INDEX_op_orc_vec:
2911        return C_O1_I2(w, w, wN);
2912    case INDEX_op_cmp_vec:
2913        return C_O1_I2(w, w, wZ);
2914    case INDEX_op_bitsel_vec:
2915        return C_O1_I3(w, w, w, w);
2916    case INDEX_op_aa64_sli_vec:
2917        return C_O1_I2(w, 0, w);
2918
2919    default:
2920        g_assert_not_reached();
2921    }
2922}
2923
2924static void tcg_target_init(TCGContext *s)
2925{
2926    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2927    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2928    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2929    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2930
2931    tcg_target_call_clobber_regs = -1ull;
2932    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2933    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2934    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2935    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2936    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2937    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2938    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2939    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2940    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2941    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2942    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2943    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2944    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2945    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2946    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2947    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2948    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2949    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2950    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2951
2952    s->reserved_regs = 0;
2953    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2954    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2955    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2956    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2957    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2958}
2959
2960/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2961#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2962
2963#define FRAME_SIZE \
2964    ((PUSH_SIZE \
2965      + TCG_STATIC_CALL_ARGS_SIZE \
2966      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2967      + TCG_TARGET_STACK_ALIGN - 1) \
2968     & ~(TCG_TARGET_STACK_ALIGN - 1))
2969
2970/* We're expecting a 2 byte uleb128 encoded value.  */
2971QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2972
2973/* We're expecting to use a single ADDI insn.  */
2974QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2975
2976static void tcg_target_qemu_prologue(TCGContext *s)
2977{
2978    TCGReg r;
2979
2980    /* Push (FP, LR) and allocate space for all saved registers.  */
2981    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2982                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2983
2984    /* Set up frame pointer for canonical unwinding.  */
2985    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2986
2987    /* Store callee-preserved regs x19..x28.  */
2988    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2989        int ofs = (r - TCG_REG_X19 + 2) * 8;
2990        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2991    }
2992
2993    /* Make stack space for TCG locals.  */
2994    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2995                 FRAME_SIZE - PUSH_SIZE);
2996
2997    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2998    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2999                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3000
3001#if !defined(CONFIG_SOFTMMU)
3002    if (USE_GUEST_BASE) {
3003        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3004        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3005    }
3006#endif
3007
3008    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3009    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3010
3011    /*
3012     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3013     * and fall through to the rest of the epilogue.
3014     */
3015    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3016    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3017
3018    /* TB epilogue */
3019    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3020
3021    /* Remove TCG locals stack space.  */
3022    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3023                 FRAME_SIZE - PUSH_SIZE);
3024
3025    /* Restore registers x19..x28.  */
3026    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3027        int ofs = (r - TCG_REG_X19 + 2) * 8;
3028        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3029    }
3030
3031    /* Pop (FP, LR), restore SP to previous frame.  */
3032    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3033                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3034    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3035}
3036
3037static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3038{
3039    int i;
3040    for (i = 0; i < count; ++i) {
3041        p[i] = NOP;
3042    }
3043}
3044
3045typedef struct {
3046    DebugFrameHeader h;
3047    uint8_t fde_def_cfa[4];
3048    uint8_t fde_reg_ofs[24];
3049} DebugFrame;
3050
3051#define ELF_HOST_MACHINE EM_AARCH64
3052
3053static const DebugFrame debug_frame = {
3054    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3055    .h.cie.id = -1,
3056    .h.cie.version = 1,
3057    .h.cie.code_align = 1,
3058    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3059    .h.cie.return_column = TCG_REG_LR,
3060
3061    /* Total FDE size does not include the "len" member.  */
3062    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3063
3064    .fde_def_cfa = {
3065        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3066        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3067        (FRAME_SIZE >> 7)
3068    },
3069    .fde_reg_ofs = {
3070        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3071        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3072        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3073        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3074        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3075        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3076        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3077        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3078        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3079        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3080        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3081        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3082    }
3083};
3084
3085void tcg_register_jit(const void *buf, size_t buf_size)
3086{
3087    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3088}
3089