xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision 51e47cf8)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-ldst.c.inc"
14#include "../tcg-pool.c.inc"
15#include "qemu/bitops.h"
16
17/* We're going to re-use TCGType in setting of the SF bit, which controls
18   the size of the operation performed.  If we know the values match, it
19   makes things much cleaner.  */
20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21
22#ifdef CONFIG_DEBUG_TCG
23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28
29    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33};
34#endif /* CONFIG_DEBUG_TCG */
35
36static const int tcg_target_reg_alloc_order[] = {
37    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39    TCG_REG_X28, /* we will reserve this for guest_base if configured */
40
41    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
43    TCG_REG_X16, TCG_REG_X17,
44
45    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
46    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
47
48    /* X18 reserved by system */
49    /* X19 reserved for AREG0 */
50    /* X29 reserved as fp */
51    /* X30 reserved as temporary */
52
53    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
54    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
55    /* V8 - V15 are call-saved, and skipped.  */
56    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
57    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
58    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
59    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
60};
61
62static const int tcg_target_call_iarg_regs[8] = {
63    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
64    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
65};
66
67static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
68{
69    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
70    tcg_debug_assert(slot >= 0 && slot <= 1);
71    return TCG_REG_X0 + slot;
72}
73
74#define TCG_REG_TMP TCG_REG_X30
75#define TCG_VEC_TMP TCG_REG_V31
76
77#ifndef CONFIG_SOFTMMU
78/* Note that XZR cannot be encoded in the address base register slot,
79   as that actaully encodes SP.  So if we need to zero-extend the guest
80   address, via the address index register slot, we need to load even
81   a zero guest base into a register.  */
82#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
83#define TCG_REG_GUEST_BASE TCG_REG_X28
84#endif
85
86static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
87{
88    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
89    ptrdiff_t offset = target - src_rx;
90
91    if (offset == sextract64(offset, 0, 26)) {
92        /* read instruction, mask away previous PC_REL26 parameter contents,
93           set the proper offset, then write back the instruction. */
94        *src_rw = deposit32(*src_rw, 0, 26, offset);
95        return true;
96    }
97    return false;
98}
99
100static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
101{
102    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
103    ptrdiff_t offset = target - src_rx;
104
105    if (offset == sextract64(offset, 0, 19)) {
106        *src_rw = deposit32(*src_rw, 5, 19, offset);
107        return true;
108    }
109    return false;
110}
111
112static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
113                        intptr_t value, intptr_t addend)
114{
115    tcg_debug_assert(addend == 0);
116    switch (type) {
117    case R_AARCH64_JUMP26:
118    case R_AARCH64_CALL26:
119        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
120    case R_AARCH64_CONDBR19:
121        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
122    default:
123        g_assert_not_reached();
124    }
125}
126
127#define TCG_CT_CONST_AIMM 0x100
128#define TCG_CT_CONST_LIMM 0x200
129#define TCG_CT_CONST_ZERO 0x400
130#define TCG_CT_CONST_MONE 0x800
131#define TCG_CT_CONST_ORRI 0x1000
132#define TCG_CT_CONST_ANDI 0x2000
133
134#define ALL_GENERAL_REGS  0xffffffffu
135#define ALL_VECTOR_REGS   0xffffffff00000000ull
136
137#ifdef CONFIG_SOFTMMU
138#define ALL_QLDST_REGS \
139    (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
140                          (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
141#else
142#define ALL_QLDST_REGS   ALL_GENERAL_REGS
143#endif
144
145/* Match a constant valid for addition (12-bit, optionally shifted).  */
146static inline bool is_aimm(uint64_t val)
147{
148    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
149}
150
151/* Match a constant valid for logical operations.  */
152static inline bool is_limm(uint64_t val)
153{
154    /* Taking a simplified view of the logical immediates for now, ignoring
155       the replication that can happen across the field.  Match bit patterns
156       of the forms
157           0....01....1
158           0..01..10..0
159       and their inverses.  */
160
161    /* Make things easier below, by testing the form with msb clear. */
162    if ((int64_t)val < 0) {
163        val = ~val;
164    }
165    if (val == 0) {
166        return false;
167    }
168    val += val & -val;
169    return (val & (val - 1)) == 0;
170}
171
172/* Return true if v16 is a valid 16-bit shifted immediate.  */
173static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
174{
175    if (v16 == (v16 & 0xff)) {
176        *cmode = 0x8;
177        *imm8 = v16 & 0xff;
178        return true;
179    } else if (v16 == (v16 & 0xff00)) {
180        *cmode = 0xa;
181        *imm8 = v16 >> 8;
182        return true;
183    }
184    return false;
185}
186
187/* Return true if v32 is a valid 32-bit shifted immediate.  */
188static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
189{
190    if (v32 == (v32 & 0xff)) {
191        *cmode = 0x0;
192        *imm8 = v32 & 0xff;
193        return true;
194    } else if (v32 == (v32 & 0xff00)) {
195        *cmode = 0x2;
196        *imm8 = (v32 >> 8) & 0xff;
197        return true;
198    } else if (v32 == (v32 & 0xff0000)) {
199        *cmode = 0x4;
200        *imm8 = (v32 >> 16) & 0xff;
201        return true;
202    } else if (v32 == (v32 & 0xff000000)) {
203        *cmode = 0x6;
204        *imm8 = v32 >> 24;
205        return true;
206    }
207    return false;
208}
209
210/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
211static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
212{
213    if ((v32 & 0xffff00ff) == 0xff) {
214        *cmode = 0xc;
215        *imm8 = (v32 >> 8) & 0xff;
216        return true;
217    } else if ((v32 & 0xff00ffff) == 0xffff) {
218        *cmode = 0xd;
219        *imm8 = (v32 >> 16) & 0xff;
220        return true;
221    }
222    return false;
223}
224
225/* Return true if v32 is a valid float32 immediate.  */
226static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
227{
228    if (extract32(v32, 0, 19) == 0
229        && (extract32(v32, 25, 6) == 0x20
230            || extract32(v32, 25, 6) == 0x1f)) {
231        *cmode = 0xf;
232        *imm8 = (extract32(v32, 31, 1) << 7)
233              | (extract32(v32, 25, 1) << 6)
234              | extract32(v32, 19, 6);
235        return true;
236    }
237    return false;
238}
239
240/* Return true if v64 is a valid float64 immediate.  */
241static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
242{
243    if (extract64(v64, 0, 48) == 0
244        && (extract64(v64, 54, 9) == 0x100
245            || extract64(v64, 54, 9) == 0x0ff)) {
246        *cmode = 0xf;
247        *imm8 = (extract64(v64, 63, 1) << 7)
248              | (extract64(v64, 54, 1) << 6)
249              | extract64(v64, 48, 6);
250        return true;
251    }
252    return false;
253}
254
255/*
256 * Return non-zero if v32 can be formed by MOVI+ORR.
257 * Place the parameters for MOVI in (cmode, imm8).
258 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
259 */
260static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
261{
262    int i;
263
264    for (i = 6; i > 0; i -= 2) {
265        /* Mask out one byte we can add with ORR.  */
266        uint32_t tmp = v32 & ~(0xffu << (i * 4));
267        if (is_shimm32(tmp, cmode, imm8) ||
268            is_soimm32(tmp, cmode, imm8)) {
269            break;
270        }
271    }
272    return i;
273}
274
275/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
276static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
277{
278    if (v32 == deposit32(v32, 16, 16, v32)) {
279        return is_shimm16(v32, cmode, imm8);
280    } else {
281        return is_shimm32(v32, cmode, imm8);
282    }
283}
284
285static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
286{
287    if (ct & TCG_CT_CONST) {
288        return 1;
289    }
290    if (type == TCG_TYPE_I32) {
291        val = (int32_t)val;
292    }
293    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
294        return 1;
295    }
296    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
297        return 1;
298    }
299    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
300        return 1;
301    }
302    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
303        return 1;
304    }
305
306    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
307    case 0:
308        break;
309    case TCG_CT_CONST_ANDI:
310        val = ~val;
311        /* fallthru */
312    case TCG_CT_CONST_ORRI:
313        if (val == deposit64(val, 32, 32, val)) {
314            int cmode, imm8;
315            return is_shimm1632(val, &cmode, &imm8);
316        }
317        break;
318    default:
319        /* Both bits should not be set for the same insn.  */
320        g_assert_not_reached();
321    }
322
323    return 0;
324}
325
326enum aarch64_cond_code {
327    COND_EQ = 0x0,
328    COND_NE = 0x1,
329    COND_CS = 0x2,     /* Unsigned greater or equal */
330    COND_HS = COND_CS, /* ALIAS greater or equal */
331    COND_CC = 0x3,     /* Unsigned less than */
332    COND_LO = COND_CC, /* ALIAS Lower */
333    COND_MI = 0x4,     /* Negative */
334    COND_PL = 0x5,     /* Zero or greater */
335    COND_VS = 0x6,     /* Overflow */
336    COND_VC = 0x7,     /* No overflow */
337    COND_HI = 0x8,     /* Unsigned greater than */
338    COND_LS = 0x9,     /* Unsigned less or equal */
339    COND_GE = 0xa,
340    COND_LT = 0xb,
341    COND_GT = 0xc,
342    COND_LE = 0xd,
343    COND_AL = 0xe,
344    COND_NV = 0xf, /* behaves like COND_AL here */
345};
346
347static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
348    [TCG_COND_EQ] = COND_EQ,
349    [TCG_COND_NE] = COND_NE,
350    [TCG_COND_LT] = COND_LT,
351    [TCG_COND_GE] = COND_GE,
352    [TCG_COND_LE] = COND_LE,
353    [TCG_COND_GT] = COND_GT,
354    /* unsigned */
355    [TCG_COND_LTU] = COND_LO,
356    [TCG_COND_GTU] = COND_HI,
357    [TCG_COND_GEU] = COND_HS,
358    [TCG_COND_LEU] = COND_LS,
359};
360
361typedef enum {
362    LDST_ST = 0,    /* store */
363    LDST_LD = 1,    /* load */
364    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
365    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
366} AArch64LdstType;
367
368/* We encode the format of the insn into the beginning of the name, so that
369   we can have the preprocessor help "typecheck" the insn vs the output
370   function.  Arm didn't provide us with nice names for the formats, so we
371   use the section number of the architecture reference manual in which the
372   instruction group is described.  */
373typedef enum {
374    /* Compare and branch (immediate).  */
375    I3201_CBZ       = 0x34000000,
376    I3201_CBNZ      = 0x35000000,
377
378    /* Conditional branch (immediate).  */
379    I3202_B_C       = 0x54000000,
380
381    /* Unconditional branch (immediate).  */
382    I3206_B         = 0x14000000,
383    I3206_BL        = 0x94000000,
384
385    /* Unconditional branch (register).  */
386    I3207_BR        = 0xd61f0000,
387    I3207_BLR       = 0xd63f0000,
388    I3207_RET       = 0xd65f0000,
389
390    /* AdvSIMD load/store single structure.  */
391    I3303_LD1R      = 0x0d40c000,
392
393    /* Load literal for loading the address at pc-relative offset */
394    I3305_LDR       = 0x58000000,
395    I3305_LDR_v64   = 0x5c000000,
396    I3305_LDR_v128  = 0x9c000000,
397
398    /* Load/store register.  Described here as 3.3.12, but the helper
399       that emits them can transform to 3.3.10 or 3.3.13.  */
400    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
401    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
402    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
403    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
404
405    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
406    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
407    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
408    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
409
410    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
411    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
412
413    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
414    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
415    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
416
417    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
418    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
419
420    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
421    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
422
423    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
424    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
425
426    I3312_TO_I3310  = 0x00200800,
427    I3312_TO_I3313  = 0x01000000,
428
429    /* Load/store register pair instructions.  */
430    I3314_LDP       = 0x28400000,
431    I3314_STP       = 0x28000000,
432
433    /* Add/subtract immediate instructions.  */
434    I3401_ADDI      = 0x11000000,
435    I3401_ADDSI     = 0x31000000,
436    I3401_SUBI      = 0x51000000,
437    I3401_SUBSI     = 0x71000000,
438
439    /* Bitfield instructions.  */
440    I3402_BFM       = 0x33000000,
441    I3402_SBFM      = 0x13000000,
442    I3402_UBFM      = 0x53000000,
443
444    /* Extract instruction.  */
445    I3403_EXTR      = 0x13800000,
446
447    /* Logical immediate instructions.  */
448    I3404_ANDI      = 0x12000000,
449    I3404_ORRI      = 0x32000000,
450    I3404_EORI      = 0x52000000,
451    I3404_ANDSI     = 0x72000000,
452
453    /* Move wide immediate instructions.  */
454    I3405_MOVN      = 0x12800000,
455    I3405_MOVZ      = 0x52800000,
456    I3405_MOVK      = 0x72800000,
457
458    /* PC relative addressing instructions.  */
459    I3406_ADR       = 0x10000000,
460    I3406_ADRP      = 0x90000000,
461
462    /* Add/subtract shifted register instructions (without a shift).  */
463    I3502_ADD       = 0x0b000000,
464    I3502_ADDS      = 0x2b000000,
465    I3502_SUB       = 0x4b000000,
466    I3502_SUBS      = 0x6b000000,
467
468    /* Add/subtract shifted register instructions (with a shift).  */
469    I3502S_ADD_LSL  = I3502_ADD,
470
471    /* Add/subtract with carry instructions.  */
472    I3503_ADC       = 0x1a000000,
473    I3503_SBC       = 0x5a000000,
474
475    /* Conditional select instructions.  */
476    I3506_CSEL      = 0x1a800000,
477    I3506_CSINC     = 0x1a800400,
478    I3506_CSINV     = 0x5a800000,
479    I3506_CSNEG     = 0x5a800400,
480
481    /* Data-processing (1 source) instructions.  */
482    I3507_CLZ       = 0x5ac01000,
483    I3507_RBIT      = 0x5ac00000,
484    I3507_REV       = 0x5ac00000, /* + size << 10 */
485
486    /* Data-processing (2 source) instructions.  */
487    I3508_LSLV      = 0x1ac02000,
488    I3508_LSRV      = 0x1ac02400,
489    I3508_ASRV      = 0x1ac02800,
490    I3508_RORV      = 0x1ac02c00,
491    I3508_SMULH     = 0x9b407c00,
492    I3508_UMULH     = 0x9bc07c00,
493    I3508_UDIV      = 0x1ac00800,
494    I3508_SDIV      = 0x1ac00c00,
495
496    /* Data-processing (3 source) instructions.  */
497    I3509_MADD      = 0x1b000000,
498    I3509_MSUB      = 0x1b008000,
499
500    /* Logical shifted register instructions (without a shift).  */
501    I3510_AND       = 0x0a000000,
502    I3510_BIC       = 0x0a200000,
503    I3510_ORR       = 0x2a000000,
504    I3510_ORN       = 0x2a200000,
505    I3510_EOR       = 0x4a000000,
506    I3510_EON       = 0x4a200000,
507    I3510_ANDS      = 0x6a000000,
508
509    /* Logical shifted register instructions (with a shift).  */
510    I3502S_AND_LSR  = I3510_AND | (1 << 22),
511
512    /* AdvSIMD copy */
513    I3605_DUP      = 0x0e000400,
514    I3605_INS      = 0x4e001c00,
515    I3605_UMOV     = 0x0e003c00,
516
517    /* AdvSIMD modified immediate */
518    I3606_MOVI      = 0x0f000400,
519    I3606_MVNI      = 0x2f000400,
520    I3606_BIC       = 0x2f001400,
521    I3606_ORR       = 0x0f001400,
522
523    /* AdvSIMD scalar shift by immediate */
524    I3609_SSHR      = 0x5f000400,
525    I3609_SSRA      = 0x5f001400,
526    I3609_SHL       = 0x5f005400,
527    I3609_USHR      = 0x7f000400,
528    I3609_USRA      = 0x7f001400,
529    I3609_SLI       = 0x7f005400,
530
531    /* AdvSIMD scalar three same */
532    I3611_SQADD     = 0x5e200c00,
533    I3611_SQSUB     = 0x5e202c00,
534    I3611_CMGT      = 0x5e203400,
535    I3611_CMGE      = 0x5e203c00,
536    I3611_SSHL      = 0x5e204400,
537    I3611_ADD       = 0x5e208400,
538    I3611_CMTST     = 0x5e208c00,
539    I3611_UQADD     = 0x7e200c00,
540    I3611_UQSUB     = 0x7e202c00,
541    I3611_CMHI      = 0x7e203400,
542    I3611_CMHS      = 0x7e203c00,
543    I3611_USHL      = 0x7e204400,
544    I3611_SUB       = 0x7e208400,
545    I3611_CMEQ      = 0x7e208c00,
546
547    /* AdvSIMD scalar two-reg misc */
548    I3612_CMGT0     = 0x5e208800,
549    I3612_CMEQ0     = 0x5e209800,
550    I3612_CMLT0     = 0x5e20a800,
551    I3612_ABS       = 0x5e20b800,
552    I3612_CMGE0     = 0x7e208800,
553    I3612_CMLE0     = 0x7e209800,
554    I3612_NEG       = 0x7e20b800,
555
556    /* AdvSIMD shift by immediate */
557    I3614_SSHR      = 0x0f000400,
558    I3614_SSRA      = 0x0f001400,
559    I3614_SHL       = 0x0f005400,
560    I3614_SLI       = 0x2f005400,
561    I3614_USHR      = 0x2f000400,
562    I3614_USRA      = 0x2f001400,
563
564    /* AdvSIMD three same.  */
565    I3616_ADD       = 0x0e208400,
566    I3616_AND       = 0x0e201c00,
567    I3616_BIC       = 0x0e601c00,
568    I3616_BIF       = 0x2ee01c00,
569    I3616_BIT       = 0x2ea01c00,
570    I3616_BSL       = 0x2e601c00,
571    I3616_EOR       = 0x2e201c00,
572    I3616_MUL       = 0x0e209c00,
573    I3616_ORR       = 0x0ea01c00,
574    I3616_ORN       = 0x0ee01c00,
575    I3616_SUB       = 0x2e208400,
576    I3616_CMGT      = 0x0e203400,
577    I3616_CMGE      = 0x0e203c00,
578    I3616_CMTST     = 0x0e208c00,
579    I3616_CMHI      = 0x2e203400,
580    I3616_CMHS      = 0x2e203c00,
581    I3616_CMEQ      = 0x2e208c00,
582    I3616_SMAX      = 0x0e206400,
583    I3616_SMIN      = 0x0e206c00,
584    I3616_SSHL      = 0x0e204400,
585    I3616_SQADD     = 0x0e200c00,
586    I3616_SQSUB     = 0x0e202c00,
587    I3616_UMAX      = 0x2e206400,
588    I3616_UMIN      = 0x2e206c00,
589    I3616_UQADD     = 0x2e200c00,
590    I3616_UQSUB     = 0x2e202c00,
591    I3616_USHL      = 0x2e204400,
592
593    /* AdvSIMD two-reg misc.  */
594    I3617_CMGT0     = 0x0e208800,
595    I3617_CMEQ0     = 0x0e209800,
596    I3617_CMLT0     = 0x0e20a800,
597    I3617_CMGE0     = 0x2e208800,
598    I3617_CMLE0     = 0x2e209800,
599    I3617_NOT       = 0x2e205800,
600    I3617_ABS       = 0x0e20b800,
601    I3617_NEG       = 0x2e20b800,
602
603    /* System instructions.  */
604    NOP             = 0xd503201f,
605    DMB_ISH         = 0xd50338bf,
606    DMB_LD          = 0x00000100,
607    DMB_ST          = 0x00000200,
608} AArch64Insn;
609
610static inline uint32_t tcg_in32(TCGContext *s)
611{
612    uint32_t v = *(uint32_t *)s->code_ptr;
613    return v;
614}
615
616/* Emit an opcode with "type-checking" of the format.  */
617#define tcg_out_insn(S, FMT, OP, ...) \
618    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
619
620static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
621                              TCGReg rt, TCGReg rn, unsigned size)
622{
623    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
624}
625
626static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
627                              int imm19, TCGReg rt)
628{
629    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
630}
631
632static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
633                              TCGReg rt, int imm19)
634{
635    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
636}
637
638static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
639                              TCGCond c, int imm19)
640{
641    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
642}
643
644static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
645{
646    tcg_out32(s, insn | (imm26 & 0x03ffffff));
647}
648
649static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
650{
651    tcg_out32(s, insn | rn << 5);
652}
653
654static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
655                              TCGReg r1, TCGReg r2, TCGReg rn,
656                              tcg_target_long ofs, bool pre, bool w)
657{
658    insn |= 1u << 31; /* ext */
659    insn |= pre << 24;
660    insn |= w << 23;
661
662    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
663    insn |= (ofs & (0x7f << 3)) << (15 - 3);
664
665    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
666}
667
668static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
669                              TCGReg rd, TCGReg rn, uint64_t aimm)
670{
671    if (aimm > 0xfff) {
672        tcg_debug_assert((aimm & 0xfff) == 0);
673        aimm >>= 12;
674        tcg_debug_assert(aimm <= 0xfff);
675        aimm |= 1 << 12;  /* apply LSL 12 */
676    }
677    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
678}
679
680/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
681   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
682   that feed the DecodeBitMasks pseudo function.  */
683static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
684                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
685{
686    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
687              | rn << 5 | rd);
688}
689
690#define tcg_out_insn_3404  tcg_out_insn_3402
691
692static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
693                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
694{
695    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
696              | rn << 5 | rd);
697}
698
699/* This function is used for the Move (wide immediate) instruction group.
700   Note that SHIFT is a full shift count, not the 2 bit HW field. */
701static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
702                              TCGReg rd, uint16_t half, unsigned shift)
703{
704    tcg_debug_assert((shift & ~0x30) == 0);
705    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
706}
707
708static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
709                              TCGReg rd, int64_t disp)
710{
711    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
712}
713
714/* This function is for both 3.5.2 (Add/Subtract shifted register), for
715   the rare occasion when we actually want to supply a shift amount.  */
716static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
717                                      TCGType ext, TCGReg rd, TCGReg rn,
718                                      TCGReg rm, int imm6)
719{
720    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
721}
722
723/* This function is for 3.5.2 (Add/subtract shifted register),
724   and 3.5.10 (Logical shifted register), for the vast majorty of cases
725   when we don't want to apply a shift.  Thus it can also be used for
726   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
727static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
728                              TCGReg rd, TCGReg rn, TCGReg rm)
729{
730    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
731}
732
733#define tcg_out_insn_3503  tcg_out_insn_3502
734#define tcg_out_insn_3508  tcg_out_insn_3502
735#define tcg_out_insn_3510  tcg_out_insn_3502
736
737static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
738                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
739{
740    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
741              | tcg_cond_to_aarch64[c] << 12);
742}
743
744static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
745                              TCGReg rd, TCGReg rn)
746{
747    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
748}
749
750static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
751                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
752{
753    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
754}
755
756static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
757                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
758{
759    /* Note that bit 11 set means general register input.  Therefore
760       we can handle both register sets with one function.  */
761    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
762              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
763}
764
765static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
766                              TCGReg rd, bool op, int cmode, uint8_t imm8)
767{
768    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
769              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
770}
771
772static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
773                              TCGReg rd, TCGReg rn, unsigned immhb)
774{
775    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
776}
777
778static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
779                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
780{
781    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
782              | (rn & 0x1f) << 5 | (rd & 0x1f));
783}
784
785static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
786                              unsigned size, TCGReg rd, TCGReg rn)
787{
788    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
789}
790
791static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
792                              TCGReg rd, TCGReg rn, unsigned immhb)
793{
794    tcg_out32(s, insn | q << 30 | immhb << 16
795              | (rn & 0x1f) << 5 | (rd & 0x1f));
796}
797
798static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
799                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
800{
801    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
802              | (rn & 0x1f) << 5 | (rd & 0x1f));
803}
804
805static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
806                              unsigned size, TCGReg rd, TCGReg rn)
807{
808    tcg_out32(s, insn | q << 30 | (size << 22)
809              | (rn & 0x1f) << 5 | (rd & 0x1f));
810}
811
812static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
813                              TCGReg rd, TCGReg base, TCGType ext,
814                              TCGReg regoff)
815{
816    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
817    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
818              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
819}
820
821static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
822                              TCGReg rd, TCGReg rn, intptr_t offset)
823{
824    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
825}
826
827static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
828                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
829{
830    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
831    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
832              | rn << 5 | (rd & 0x1f));
833}
834
835/* Register to register move using ORR (shifted register with no shift). */
836static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
837{
838    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
839}
840
841/* Register to register move using ADDI (move to/from SP).  */
842static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
843{
844    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
845}
846
847/* This function is used for the Logical (immediate) instruction group.
848   The value of LIMM must satisfy IS_LIMM.  See the comment above about
849   only supporting simplified logical immediates.  */
850static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
851                             TCGReg rd, TCGReg rn, uint64_t limm)
852{
853    unsigned h, l, r, c;
854
855    tcg_debug_assert(is_limm(limm));
856
857    h = clz64(limm);
858    l = ctz64(limm);
859    if (l == 0) {
860        r = 0;                  /* form 0....01....1 */
861        c = ctz64(~limm) - 1;
862        if (h == 0) {
863            r = clz64(~limm);   /* form 1..10..01..1 */
864            c += r;
865        }
866    } else {
867        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
868        c = r - h - 1;
869    }
870    if (ext == TCG_TYPE_I32) {
871        r &= 31;
872        c &= 31;
873    }
874
875    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
876}
877
878static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
879                             TCGReg rd, int64_t v64)
880{
881    bool q = type == TCG_TYPE_V128;
882    int cmode, imm8, i;
883
884    /* Test all bytes equal first.  */
885    if (vece == MO_8) {
886        imm8 = (uint8_t)v64;
887        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
888        return;
889    }
890
891    /*
892     * Test all bytes 0x00 or 0xff second.  This can match cases that
893     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
894     */
895    for (i = imm8 = 0; i < 8; i++) {
896        uint8_t byte = v64 >> (i * 8);
897        if (byte == 0xff) {
898            imm8 |= 1 << i;
899        } else if (byte != 0) {
900            goto fail_bytes;
901        }
902    }
903    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
904    return;
905 fail_bytes:
906
907    /*
908     * Tests for various replications.  For each element width, if we
909     * cannot find an expansion there's no point checking a larger
910     * width because we already know by replication it cannot match.
911     */
912    if (vece == MO_16) {
913        uint16_t v16 = v64;
914
915        if (is_shimm16(v16, &cmode, &imm8)) {
916            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
917            return;
918        }
919        if (is_shimm16(~v16, &cmode, &imm8)) {
920            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
921            return;
922        }
923
924        /*
925         * Otherwise, all remaining constants can be loaded in two insns:
926         * rd = v16 & 0xff, rd |= v16 & 0xff00.
927         */
928        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
929        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
930        return;
931    } else if (vece == MO_32) {
932        uint32_t v32 = v64;
933        uint32_t n32 = ~v32;
934
935        if (is_shimm32(v32, &cmode, &imm8) ||
936            is_soimm32(v32, &cmode, &imm8) ||
937            is_fimm32(v32, &cmode, &imm8)) {
938            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
939            return;
940        }
941        if (is_shimm32(n32, &cmode, &imm8) ||
942            is_soimm32(n32, &cmode, &imm8)) {
943            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
944            return;
945        }
946
947        /*
948         * Restrict the set of constants to those we can load with
949         * two instructions.  Others we load from the pool.
950         */
951        i = is_shimm32_pair(v32, &cmode, &imm8);
952        if (i) {
953            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
954            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
955            return;
956        }
957        i = is_shimm32_pair(n32, &cmode, &imm8);
958        if (i) {
959            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
960            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
961            return;
962        }
963    } else if (is_fimm64(v64, &cmode, &imm8)) {
964        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
965        return;
966    }
967
968    /*
969     * As a last resort, load from the constant pool.  Sadly there
970     * is no LD1R (literal), so store the full 16-byte vector.
971     */
972    if (type == TCG_TYPE_V128) {
973        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
974        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
975    } else {
976        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
977        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
978    }
979}
980
981static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
982                            TCGReg rd, TCGReg rs)
983{
984    int is_q = type - TCG_TYPE_V64;
985    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
986    return true;
987}
988
989static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
990                             TCGReg r, TCGReg base, intptr_t offset)
991{
992    TCGReg temp = TCG_REG_TMP;
993
994    if (offset < -0xffffff || offset > 0xffffff) {
995        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
996        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
997        base = temp;
998    } else {
999        AArch64Insn add_insn = I3401_ADDI;
1000
1001        if (offset < 0) {
1002            add_insn = I3401_SUBI;
1003            offset = -offset;
1004        }
1005        if (offset & 0xfff000) {
1006            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1007            base = temp;
1008        }
1009        if (offset & 0xfff) {
1010            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1011            base = temp;
1012        }
1013    }
1014    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1015    return true;
1016}
1017
1018static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1019                         tcg_target_long value)
1020{
1021    tcg_target_long svalue = value;
1022    tcg_target_long ivalue = ~value;
1023    tcg_target_long t0, t1, t2;
1024    int s0, s1;
1025    AArch64Insn opc;
1026
1027    switch (type) {
1028    case TCG_TYPE_I32:
1029    case TCG_TYPE_I64:
1030        tcg_debug_assert(rd < 32);
1031        break;
1032    default:
1033        g_assert_not_reached();
1034    }
1035
1036    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1037       values within [2**31, 2**32-1], we can create smaller sequences by
1038       interpreting this as a negative 32-bit number, while ensuring that
1039       the high 32 bits are cleared by setting SF=0.  */
1040    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1041        svalue = (int32_t)value;
1042        value = (uint32_t)value;
1043        ivalue = (uint32_t)ivalue;
1044        type = TCG_TYPE_I32;
1045    }
1046
1047    /* Speed things up by handling the common case of small positive
1048       and negative values specially.  */
1049    if ((value & ~0xffffull) == 0) {
1050        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1051        return;
1052    } else if ((ivalue & ~0xffffull) == 0) {
1053        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1054        return;
1055    }
1056
1057    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1058       use the sign-extended value.  That lets us match rotated values such
1059       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1060    if (is_limm(svalue)) {
1061        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1062        return;
1063    }
1064
1065    /* Look for host pointer values within 4G of the PC.  This happens
1066       often when loading pointers to QEMU's own data structures.  */
1067    if (type == TCG_TYPE_I64) {
1068        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1069        tcg_target_long disp = value - src_rx;
1070        if (disp == sextract64(disp, 0, 21)) {
1071            tcg_out_insn(s, 3406, ADR, rd, disp);
1072            return;
1073        }
1074        disp = (value >> 12) - (src_rx >> 12);
1075        if (disp == sextract64(disp, 0, 21)) {
1076            tcg_out_insn(s, 3406, ADRP, rd, disp);
1077            if (value & 0xfff) {
1078                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1079            }
1080            return;
1081        }
1082    }
1083
1084    /* Would it take fewer insns to begin with MOVN?  */
1085    if (ctpop64(value) >= 32) {
1086        t0 = ivalue;
1087        opc = I3405_MOVN;
1088    } else {
1089        t0 = value;
1090        opc = I3405_MOVZ;
1091    }
1092    s0 = ctz64(t0) & (63 & -16);
1093    t1 = t0 & ~(0xffffull << s0);
1094    s1 = ctz64(t1) & (63 & -16);
1095    t2 = t1 & ~(0xffffull << s1);
1096    if (t2 == 0) {
1097        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1098        if (t1 != 0) {
1099            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1100        }
1101        return;
1102    }
1103
1104    /* For more than 2 insns, dump it into the constant pool.  */
1105    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1106    tcg_out_insn(s, 3305, LDR, 0, rd);
1107}
1108
1109static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1110{
1111    return false;
1112}
1113
1114static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1115                             tcg_target_long imm)
1116{
1117    /* This function is only used for passing structs by reference. */
1118    g_assert_not_reached();
1119}
1120
1121/* Define something more legible for general use.  */
1122#define tcg_out_ldst_r  tcg_out_insn_3310
1123
1124static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1125                         TCGReg rn, intptr_t offset, int lgsize)
1126{
1127    /* If the offset is naturally aligned and in range, then we can
1128       use the scaled uimm12 encoding */
1129    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1130        uintptr_t scaled_uimm = offset >> lgsize;
1131        if (scaled_uimm <= 0xfff) {
1132            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1133            return;
1134        }
1135    }
1136
1137    /* Small signed offsets can use the unscaled encoding.  */
1138    if (offset >= -256 && offset < 256) {
1139        tcg_out_insn_3312(s, insn, rd, rn, offset);
1140        return;
1141    }
1142
1143    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1144    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1145    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1146}
1147
1148static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1149{
1150    if (ret == arg) {
1151        return true;
1152    }
1153    switch (type) {
1154    case TCG_TYPE_I32:
1155    case TCG_TYPE_I64:
1156        if (ret < 32 && arg < 32) {
1157            tcg_out_movr(s, type, ret, arg);
1158            break;
1159        } else if (ret < 32) {
1160            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1161            break;
1162        } else if (arg < 32) {
1163            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1164            break;
1165        }
1166        /* FALLTHRU */
1167
1168    case TCG_TYPE_V64:
1169        tcg_debug_assert(ret >= 32 && arg >= 32);
1170        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1171        break;
1172    case TCG_TYPE_V128:
1173        tcg_debug_assert(ret >= 32 && arg >= 32);
1174        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1175        break;
1176
1177    default:
1178        g_assert_not_reached();
1179    }
1180    return true;
1181}
1182
1183static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1184                       TCGReg base, intptr_t ofs)
1185{
1186    AArch64Insn insn;
1187    int lgsz;
1188
1189    switch (type) {
1190    case TCG_TYPE_I32:
1191        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1192        lgsz = 2;
1193        break;
1194    case TCG_TYPE_I64:
1195        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1196        lgsz = 3;
1197        break;
1198    case TCG_TYPE_V64:
1199        insn = I3312_LDRVD;
1200        lgsz = 3;
1201        break;
1202    case TCG_TYPE_V128:
1203        insn = I3312_LDRVQ;
1204        lgsz = 4;
1205        break;
1206    default:
1207        g_assert_not_reached();
1208    }
1209    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1210}
1211
1212static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1213                       TCGReg base, intptr_t ofs)
1214{
1215    AArch64Insn insn;
1216    int lgsz;
1217
1218    switch (type) {
1219    case TCG_TYPE_I32:
1220        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1221        lgsz = 2;
1222        break;
1223    case TCG_TYPE_I64:
1224        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1225        lgsz = 3;
1226        break;
1227    case TCG_TYPE_V64:
1228        insn = I3312_STRVD;
1229        lgsz = 3;
1230        break;
1231    case TCG_TYPE_V128:
1232        insn = I3312_STRVQ;
1233        lgsz = 4;
1234        break;
1235    default:
1236        g_assert_not_reached();
1237    }
1238    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1239}
1240
1241static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1242                               TCGReg base, intptr_t ofs)
1243{
1244    if (type <= TCG_TYPE_I64 && val == 0) {
1245        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1246        return true;
1247    }
1248    return false;
1249}
1250
1251static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1252                               TCGReg rn, unsigned int a, unsigned int b)
1253{
1254    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1255}
1256
1257static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1258                                TCGReg rn, unsigned int a, unsigned int b)
1259{
1260    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1261}
1262
1263static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1264                                TCGReg rn, unsigned int a, unsigned int b)
1265{
1266    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1267}
1268
1269static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1270                                TCGReg rn, TCGReg rm, unsigned int a)
1271{
1272    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1273}
1274
1275static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1276                               TCGReg rd, TCGReg rn, unsigned int m)
1277{
1278    int bits = ext ? 64 : 32;
1279    int max = bits - 1;
1280    tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1281}
1282
1283static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1284                               TCGReg rd, TCGReg rn, unsigned int m)
1285{
1286    int max = ext ? 63 : 31;
1287    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1288}
1289
1290static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1291                               TCGReg rd, TCGReg rn, unsigned int m)
1292{
1293    int max = ext ? 63 : 31;
1294    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1295}
1296
1297static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1298                                TCGReg rd, TCGReg rn, unsigned int m)
1299{
1300    int max = ext ? 63 : 31;
1301    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1302}
1303
1304static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1305                                TCGReg rd, TCGReg rn, unsigned int m)
1306{
1307    int max = ext ? 63 : 31;
1308    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1309}
1310
1311static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1312                               TCGReg rn, unsigned lsb, unsigned width)
1313{
1314    unsigned size = ext ? 64 : 32;
1315    unsigned a = (size - lsb) & (size - 1);
1316    unsigned b = width - 1;
1317    tcg_out_bfm(s, ext, rd, rn, a, b);
1318}
1319
1320static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1321                        tcg_target_long b, bool const_b)
1322{
1323    if (const_b) {
1324        /* Using CMP or CMN aliases.  */
1325        if (b >= 0) {
1326            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1327        } else {
1328            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1329        }
1330    } else {
1331        /* Using CMP alias SUBS wzr, Wn, Wm */
1332        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1333    }
1334}
1335
1336static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1337{
1338    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1339    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1340    tcg_out_insn(s, 3206, B, offset);
1341}
1342
1343static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1344{
1345    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1346    if (offset == sextract64(offset, 0, 26)) {
1347        tcg_out_insn(s, 3206, B, offset);
1348    } else {
1349        /* Choose X9 as a call-clobbered non-LR temporary. */
1350        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
1351        tcg_out_insn(s, 3207, BR, TCG_REG_X9);
1352    }
1353}
1354
1355static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1356{
1357    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1358    if (offset == sextract64(offset, 0, 26)) {
1359        tcg_out_insn(s, 3206, BL, offset);
1360    } else {
1361        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1362        tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
1363    }
1364}
1365
1366static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1367                         const TCGHelperInfo *info)
1368{
1369    tcg_out_call_int(s, target);
1370}
1371
1372static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1373{
1374    if (!l->has_value) {
1375        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1376        tcg_out_insn(s, 3206, B, 0);
1377    } else {
1378        tcg_out_goto(s, l->u.value_ptr);
1379    }
1380}
1381
1382static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1383                           TCGArg b, bool b_const, TCGLabel *l)
1384{
1385    intptr_t offset;
1386    bool need_cmp;
1387
1388    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1389        need_cmp = false;
1390    } else {
1391        need_cmp = true;
1392        tcg_out_cmp(s, ext, a, b, b_const);
1393    }
1394
1395    if (!l->has_value) {
1396        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1397        offset = tcg_in32(s) >> 5;
1398    } else {
1399        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1400        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1401    }
1402
1403    if (need_cmp) {
1404        tcg_out_insn(s, 3202, B_C, c, offset);
1405    } else if (c == TCG_COND_EQ) {
1406        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1407    } else {
1408        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1409    }
1410}
1411
1412static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1413                               TCGReg rd, TCGReg rn)
1414{
1415    /* REV, REV16, REV32 */
1416    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1417}
1418
1419static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1420                               TCGReg rd, TCGReg rn)
1421{
1422    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1423    int bits = (8 << s_bits) - 1;
1424    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1425}
1426
1427static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1428{
1429    tcg_out_sxt(s, type, MO_8, rd, rn);
1430}
1431
1432static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1433{
1434    tcg_out_sxt(s, type, MO_16, rd, rn);
1435}
1436
1437static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1438{
1439    tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1440}
1441
1442static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1443{
1444    tcg_out_ext32s(s, rd, rn);
1445}
1446
1447static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1448                               TCGReg rd, TCGReg rn)
1449{
1450    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1451    int bits = (8 << s_bits) - 1;
1452    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1453}
1454
1455static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1456{
1457    tcg_out_uxt(s, MO_8, rd, rn);
1458}
1459
1460static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1461{
1462    tcg_out_uxt(s, MO_16, rd, rn);
1463}
1464
1465static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1466{
1467    tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1468}
1469
1470static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1471{
1472    tcg_out_ext32u(s, rd, rn);
1473}
1474
1475static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1476{
1477    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1478}
1479
1480static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1481                            TCGReg rn, int64_t aimm)
1482{
1483    if (aimm >= 0) {
1484        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1485    } else {
1486        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1487    }
1488}
1489
1490static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1491                            TCGReg rh, TCGReg al, TCGReg ah,
1492                            tcg_target_long bl, tcg_target_long bh,
1493                            bool const_bl, bool const_bh, bool sub)
1494{
1495    TCGReg orig_rl = rl;
1496    AArch64Insn insn;
1497
1498    if (rl == ah || (!const_bh && rl == bh)) {
1499        rl = TCG_REG_TMP;
1500    }
1501
1502    if (const_bl) {
1503        if (bl < 0) {
1504            bl = -bl;
1505            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1506        } else {
1507            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1508        }
1509
1510        if (unlikely(al == TCG_REG_XZR)) {
1511            /* ??? We want to allow al to be zero for the benefit of
1512               negation via subtraction.  However, that leaves open the
1513               possibility of adding 0+const in the low part, and the
1514               immediate add instructions encode XSP not XZR.  Don't try
1515               anything more elaborate here than loading another zero.  */
1516            al = TCG_REG_TMP;
1517            tcg_out_movi(s, ext, al, 0);
1518        }
1519        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1520    } else {
1521        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1522    }
1523
1524    insn = I3503_ADC;
1525    if (const_bh) {
1526        /* Note that the only two constants we support are 0 and -1, and
1527           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1528        if ((bh != 0) ^ sub) {
1529            insn = I3503_SBC;
1530        }
1531        bh = TCG_REG_XZR;
1532    } else if (sub) {
1533        insn = I3503_SBC;
1534    }
1535    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1536
1537    tcg_out_mov(s, ext, orig_rl, rl);
1538}
1539
1540static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1541{
1542    static const uint32_t sync[] = {
1543        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1544        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1545        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1546        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1547        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1548    };
1549    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1550}
1551
1552static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1553                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1554{
1555    TCGReg a1 = a0;
1556    if (is_ctz) {
1557        a1 = TCG_REG_TMP;
1558        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1559    }
1560    if (const_b && b == (ext ? 64 : 32)) {
1561        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1562    } else {
1563        AArch64Insn sel = I3506_CSEL;
1564
1565        tcg_out_cmp(s, ext, a0, 0, 1);
1566        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1567
1568        if (const_b) {
1569            if (b == -1) {
1570                b = TCG_REG_XZR;
1571                sel = I3506_CSINV;
1572            } else if (b == 0) {
1573                b = TCG_REG_XZR;
1574            } else {
1575                tcg_out_movi(s, ext, d, b);
1576                b = d;
1577            }
1578        }
1579        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1580    }
1581}
1582
1583static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1584{
1585    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1586    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1587    tcg_out_insn(s, 3406, ADR, rd, offset);
1588}
1589
1590typedef struct {
1591    TCGReg base;
1592    TCGReg index;
1593    TCGType index_ext;
1594} HostAddress;
1595
1596#ifdef CONFIG_SOFTMMU
1597/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1598 *                                     MemOpIdx oi, uintptr_t ra)
1599 */
1600static void * const qemu_ld_helpers[MO_SIZE + 1] = {
1601    [MO_8]  = helper_ret_ldub_mmu,
1602#if HOST_BIG_ENDIAN
1603    [MO_16] = helper_be_lduw_mmu,
1604    [MO_32] = helper_be_ldul_mmu,
1605    [MO_64] = helper_be_ldq_mmu,
1606#else
1607    [MO_16] = helper_le_lduw_mmu,
1608    [MO_32] = helper_le_ldul_mmu,
1609    [MO_64] = helper_le_ldq_mmu,
1610#endif
1611};
1612
1613/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1614 *                                     uintxx_t val, MemOpIdx oi,
1615 *                                     uintptr_t ra)
1616 */
1617static void * const qemu_st_helpers[MO_SIZE + 1] = {
1618    [MO_8]  = helper_ret_stb_mmu,
1619#if HOST_BIG_ENDIAN
1620    [MO_16] = helper_be_stw_mmu,
1621    [MO_32] = helper_be_stl_mmu,
1622    [MO_64] = helper_be_stq_mmu,
1623#else
1624    [MO_16] = helper_le_stw_mmu,
1625    [MO_32] = helper_le_stl_mmu,
1626    [MO_64] = helper_le_stq_mmu,
1627#endif
1628};
1629
1630static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1631{
1632    MemOpIdx oi = lb->oi;
1633    MemOp opc = get_memop(oi);
1634
1635    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1636        return false;
1637    }
1638
1639    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1640    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1641    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1642    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1643    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1644
1645    tcg_out_movext(s, lb->type, lb->datalo_reg,
1646                   TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_X0);
1647    tcg_out_goto(s, lb->raddr);
1648    return true;
1649}
1650
1651static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1652{
1653    MemOpIdx oi = lb->oi;
1654    MemOp opc = get_memop(oi);
1655    MemOp size = opc & MO_SIZE;
1656
1657    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1658        return false;
1659    }
1660
1661    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1662    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1663    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1664    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1665    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1666    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1667    tcg_out_goto(s, lb->raddr);
1668    return true;
1669}
1670
1671static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1672                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1673                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1674{
1675    TCGLabelQemuLdst *label = new_ldst_label(s);
1676
1677    label->is_ld = is_ld;
1678    label->oi = oi;
1679    label->type = ext;
1680    label->datalo_reg = data_reg;
1681    label->addrlo_reg = addr_reg;
1682    label->raddr = tcg_splitwx_to_rx(raddr);
1683    label->label_ptr[0] = label_ptr;
1684}
1685
1686/* We expect to use a 7-bit scaled negative offset from ENV.  */
1687QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1688QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1689
1690/* These offsets are built into the LDP below.  */
1691QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1692QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1693
1694/* Load and compare a TLB entry, emitting the conditional jump to the
1695   slow path for the failure case, which will be patched later when finalizing
1696   the slow path. Generated code returns the host addend in X1,
1697   clobbers X0,X2,X3,TMP. */
1698static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1699                             tcg_insn_unit **label_ptr, int mem_index,
1700                             bool is_read)
1701{
1702    unsigned a_bits = get_alignment_bits(opc);
1703    unsigned s_bits = opc & MO_SIZE;
1704    unsigned a_mask = (1u << a_bits) - 1;
1705    unsigned s_mask = (1u << s_bits) - 1;
1706    TCGReg x3;
1707    TCGType mask_type;
1708    uint64_t compare_mask;
1709
1710    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1711                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1712
1713    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1714    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1715                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1716
1717    /* Extract the TLB index from the address into X0.  */
1718    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1719                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1720                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1721
1722    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1723    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1724
1725    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1726    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1727               ? offsetof(CPUTLBEntry, addr_read)
1728               : offsetof(CPUTLBEntry, addr_write));
1729    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1730               offsetof(CPUTLBEntry, addend));
1731
1732    /* For aligned accesses, we check the first byte and include the alignment
1733       bits within the address.  For unaligned access, we check that we don't
1734       cross pages using the address of the last byte of the access.  */
1735    if (a_bits >= s_bits) {
1736        x3 = addr_reg;
1737    } else {
1738        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1739                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1740        x3 = TCG_REG_X3;
1741    }
1742    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1743
1744    /* Store the page mask part of the address into X3.  */
1745    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1746                     TCG_REG_X3, x3, compare_mask);
1747
1748    /* Perform the address comparison. */
1749    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1750
1751    /* If not equal, we jump to the slow path. */
1752    *label_ptr = s->code_ptr;
1753    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1754}
1755
1756#else
1757static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
1758                                   unsigned a_bits)
1759{
1760    unsigned a_mask = (1 << a_bits) - 1;
1761    TCGLabelQemuLdst *label = new_ldst_label(s);
1762
1763    label->is_ld = is_ld;
1764    label->addrlo_reg = addr_reg;
1765
1766    /* tst addr, #mask */
1767    tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1768
1769    label->label_ptr[0] = s->code_ptr;
1770
1771    /* b.ne slow_path */
1772    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1773
1774    label->raddr = tcg_splitwx_to_rx(s->code_ptr);
1775}
1776
1777static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
1778{
1779    if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1780        return false;
1781    }
1782
1783    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg);
1784    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1785
1786    /* "Tail call" to the helper, with the return address back inline. */
1787    tcg_out_adr(s, TCG_REG_LR, l->raddr);
1788    tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld
1789                                        : helper_unaligned_st));
1790    return true;
1791}
1792
1793static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1794{
1795    return tcg_out_fail_alignment(s, l);
1796}
1797
1798static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1799{
1800    return tcg_out_fail_alignment(s, l);
1801}
1802#endif /* CONFIG_SOFTMMU */
1803
1804static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1805                                   TCGReg data_r, HostAddress h)
1806{
1807    switch (memop & MO_SSIZE) {
1808    case MO_UB:
1809        tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
1810        break;
1811    case MO_SB:
1812        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1813                       data_r, h.base, h.index_ext, h.index);
1814        break;
1815    case MO_UW:
1816        tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
1817        break;
1818    case MO_SW:
1819        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1820                       data_r, h.base, h.index_ext, h.index);
1821        break;
1822    case MO_UL:
1823        tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
1824        break;
1825    case MO_SL:
1826        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
1827        break;
1828    case MO_UQ:
1829        tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
1830        break;
1831    default:
1832        g_assert_not_reached();
1833    }
1834}
1835
1836static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1837                                   TCGReg data_r, HostAddress h)
1838{
1839    switch (memop & MO_SIZE) {
1840    case MO_8:
1841        tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
1842        break;
1843    case MO_16:
1844        tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
1845        break;
1846    case MO_32:
1847        tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
1848        break;
1849    case MO_64:
1850        tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
1851        break;
1852    default:
1853        g_assert_not_reached();
1854    }
1855}
1856
1857static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1858                            MemOpIdx oi, TCGType data_type)
1859{
1860    MemOp memop = get_memop(oi);
1861    TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1862    HostAddress h;
1863
1864    /* Byte swapping is left to middle-end expansion. */
1865    tcg_debug_assert((memop & MO_BSWAP) == 0);
1866
1867#ifdef CONFIG_SOFTMMU
1868    tcg_insn_unit *label_ptr;
1869
1870    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 1);
1871
1872    h = (HostAddress){
1873        .base = TCG_REG_X1,
1874        .index = addr_reg,
1875        .index_ext = addr_type
1876    };
1877    tcg_out_qemu_ld_direct(s, memop, data_type, data_reg, h);
1878
1879    add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
1880                        s->code_ptr, label_ptr);
1881#else /* !CONFIG_SOFTMMU */
1882    unsigned a_bits = get_alignment_bits(memop);
1883    if (a_bits) {
1884        tcg_out_test_alignment(s, true, addr_reg, a_bits);
1885    }
1886    if (USE_GUEST_BASE) {
1887        h = (HostAddress){
1888            .base = TCG_REG_GUEST_BASE,
1889            .index = addr_reg,
1890            .index_ext = addr_type
1891        };
1892    } else {
1893        h = (HostAddress){
1894            .base = addr_reg,
1895            .index = TCG_REG_XZR,
1896            .index_ext = TCG_TYPE_I64
1897        };
1898    }
1899    tcg_out_qemu_ld_direct(s, memop, data_type, data_reg, h);
1900#endif /* CONFIG_SOFTMMU */
1901}
1902
1903static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1904                            MemOpIdx oi, TCGType data_type)
1905{
1906    MemOp memop = get_memop(oi);
1907    TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1908    HostAddress h;
1909
1910    /* Byte swapping is left to middle-end expansion. */
1911    tcg_debug_assert((memop & MO_BSWAP) == 0);
1912
1913#ifdef CONFIG_SOFTMMU
1914    tcg_insn_unit *label_ptr;
1915
1916    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 0);
1917
1918    h = (HostAddress){
1919        .base = TCG_REG_X1,
1920        .index = addr_reg,
1921        .index_ext = addr_type
1922    };
1923    tcg_out_qemu_st_direct(s, memop, data_reg, h);
1924
1925    add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
1926                        s->code_ptr, label_ptr);
1927#else /* !CONFIG_SOFTMMU */
1928    unsigned a_bits = get_alignment_bits(memop);
1929    if (a_bits) {
1930        tcg_out_test_alignment(s, false, addr_reg, a_bits);
1931    }
1932    if (USE_GUEST_BASE) {
1933        h = (HostAddress){
1934            .base = TCG_REG_GUEST_BASE,
1935            .index = addr_reg,
1936            .index_ext = addr_type
1937        };
1938    } else {
1939        h = (HostAddress){
1940            .base = addr_reg,
1941            .index = TCG_REG_XZR,
1942            .index_ext = TCG_TYPE_I64
1943        };
1944    }
1945    tcg_out_qemu_st_direct(s, memop, data_reg, h);
1946#endif /* CONFIG_SOFTMMU */
1947}
1948
1949static const tcg_insn_unit *tb_ret_addr;
1950
1951static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1952{
1953    /* Reuse the zeroing that exists for goto_ptr.  */
1954    if (a0 == 0) {
1955        tcg_out_goto_long(s, tcg_code_gen_epilogue);
1956    } else {
1957        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1958        tcg_out_goto_long(s, tb_ret_addr);
1959    }
1960}
1961
1962static void tcg_out_goto_tb(TCGContext *s, int which)
1963{
1964    /*
1965     * Direct branch, or indirect address load, will be patched
1966     * by tb_target_set_jmp_target.  Assert indirect load offset
1967     * in range early, regardless of direct branch distance.
1968     */
1969    intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1970    tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1971
1972    set_jmp_insn_offset(s, which);
1973    tcg_out32(s, I3206_B);
1974    tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1975    set_jmp_reset_offset(s, which);
1976}
1977
1978void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1979                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1980{
1981    uintptr_t d_addr = tb->jmp_target_addr[n];
1982    ptrdiff_t d_offset = d_addr - jmp_rx;
1983    tcg_insn_unit insn;
1984
1985    /* Either directly branch, or indirect branch load. */
1986    if (d_offset == sextract64(d_offset, 0, 28)) {
1987        insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
1988    } else {
1989        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
1990        ptrdiff_t i_offset = i_addr - jmp_rx;
1991
1992        /* Note that we asserted this in range in tcg_out_goto_tb. */
1993        insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2);
1994    }
1995    qatomic_set((uint32_t *)jmp_rw, insn);
1996    flush_idcache_range(jmp_rx, jmp_rw, 4);
1997}
1998
1999static void tcg_out_op(TCGContext *s, TCGOpcode opc,
2000                       const TCGArg args[TCG_MAX_OP_ARGS],
2001                       const int const_args[TCG_MAX_OP_ARGS])
2002{
2003    /* 99% of the time, we can signal the use of extension registers
2004       by looking to see if the opcode handles 64-bit data.  */
2005    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
2006
2007    /* Hoist the loads of the most common arguments.  */
2008    TCGArg a0 = args[0];
2009    TCGArg a1 = args[1];
2010    TCGArg a2 = args[2];
2011    int c2 = const_args[2];
2012
2013    /* Some operands are defined with "rZ" constraint, a register or
2014       the zero register.  These need not actually test args[I] == 0.  */
2015#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
2016
2017    switch (opc) {
2018    case INDEX_op_goto_ptr:
2019        tcg_out_insn(s, 3207, BR, a0);
2020        break;
2021
2022    case INDEX_op_br:
2023        tcg_out_goto_label(s, arg_label(a0));
2024        break;
2025
2026    case INDEX_op_ld8u_i32:
2027    case INDEX_op_ld8u_i64:
2028        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
2029        break;
2030    case INDEX_op_ld8s_i32:
2031        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
2032        break;
2033    case INDEX_op_ld8s_i64:
2034        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
2035        break;
2036    case INDEX_op_ld16u_i32:
2037    case INDEX_op_ld16u_i64:
2038        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
2039        break;
2040    case INDEX_op_ld16s_i32:
2041        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
2042        break;
2043    case INDEX_op_ld16s_i64:
2044        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
2045        break;
2046    case INDEX_op_ld_i32:
2047    case INDEX_op_ld32u_i64:
2048        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
2049        break;
2050    case INDEX_op_ld32s_i64:
2051        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
2052        break;
2053    case INDEX_op_ld_i64:
2054        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
2055        break;
2056
2057    case INDEX_op_st8_i32:
2058    case INDEX_op_st8_i64:
2059        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
2060        break;
2061    case INDEX_op_st16_i32:
2062    case INDEX_op_st16_i64:
2063        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
2064        break;
2065    case INDEX_op_st_i32:
2066    case INDEX_op_st32_i64:
2067        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
2068        break;
2069    case INDEX_op_st_i64:
2070        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
2071        break;
2072
2073    case INDEX_op_add_i32:
2074        a2 = (int32_t)a2;
2075        /* FALLTHRU */
2076    case INDEX_op_add_i64:
2077        if (c2) {
2078            tcg_out_addsubi(s, ext, a0, a1, a2);
2079        } else {
2080            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2081        }
2082        break;
2083
2084    case INDEX_op_sub_i32:
2085        a2 = (int32_t)a2;
2086        /* FALLTHRU */
2087    case INDEX_op_sub_i64:
2088        if (c2) {
2089            tcg_out_addsubi(s, ext, a0, a1, -a2);
2090        } else {
2091            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2092        }
2093        break;
2094
2095    case INDEX_op_neg_i64:
2096    case INDEX_op_neg_i32:
2097        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2098        break;
2099
2100    case INDEX_op_and_i32:
2101        a2 = (int32_t)a2;
2102        /* FALLTHRU */
2103    case INDEX_op_and_i64:
2104        if (c2) {
2105            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2106        } else {
2107            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2108        }
2109        break;
2110
2111    case INDEX_op_andc_i32:
2112        a2 = (int32_t)a2;
2113        /* FALLTHRU */
2114    case INDEX_op_andc_i64:
2115        if (c2) {
2116            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2117        } else {
2118            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2119        }
2120        break;
2121
2122    case INDEX_op_or_i32:
2123        a2 = (int32_t)a2;
2124        /* FALLTHRU */
2125    case INDEX_op_or_i64:
2126        if (c2) {
2127            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2128        } else {
2129            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2130        }
2131        break;
2132
2133    case INDEX_op_orc_i32:
2134        a2 = (int32_t)a2;
2135        /* FALLTHRU */
2136    case INDEX_op_orc_i64:
2137        if (c2) {
2138            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2139        } else {
2140            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2141        }
2142        break;
2143
2144    case INDEX_op_xor_i32:
2145        a2 = (int32_t)a2;
2146        /* FALLTHRU */
2147    case INDEX_op_xor_i64:
2148        if (c2) {
2149            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2150        } else {
2151            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2152        }
2153        break;
2154
2155    case INDEX_op_eqv_i32:
2156        a2 = (int32_t)a2;
2157        /* FALLTHRU */
2158    case INDEX_op_eqv_i64:
2159        if (c2) {
2160            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2161        } else {
2162            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2163        }
2164        break;
2165
2166    case INDEX_op_not_i64:
2167    case INDEX_op_not_i32:
2168        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2169        break;
2170
2171    case INDEX_op_mul_i64:
2172    case INDEX_op_mul_i32:
2173        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2174        break;
2175
2176    case INDEX_op_div_i64:
2177    case INDEX_op_div_i32:
2178        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2179        break;
2180    case INDEX_op_divu_i64:
2181    case INDEX_op_divu_i32:
2182        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2183        break;
2184
2185    case INDEX_op_rem_i64:
2186    case INDEX_op_rem_i32:
2187        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2188        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2189        break;
2190    case INDEX_op_remu_i64:
2191    case INDEX_op_remu_i32:
2192        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2193        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2194        break;
2195
2196    case INDEX_op_shl_i64:
2197    case INDEX_op_shl_i32:
2198        if (c2) {
2199            tcg_out_shl(s, ext, a0, a1, a2);
2200        } else {
2201            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2202        }
2203        break;
2204
2205    case INDEX_op_shr_i64:
2206    case INDEX_op_shr_i32:
2207        if (c2) {
2208            tcg_out_shr(s, ext, a0, a1, a2);
2209        } else {
2210            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2211        }
2212        break;
2213
2214    case INDEX_op_sar_i64:
2215    case INDEX_op_sar_i32:
2216        if (c2) {
2217            tcg_out_sar(s, ext, a0, a1, a2);
2218        } else {
2219            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2220        }
2221        break;
2222
2223    case INDEX_op_rotr_i64:
2224    case INDEX_op_rotr_i32:
2225        if (c2) {
2226            tcg_out_rotr(s, ext, a0, a1, a2);
2227        } else {
2228            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2229        }
2230        break;
2231
2232    case INDEX_op_rotl_i64:
2233    case INDEX_op_rotl_i32:
2234        if (c2) {
2235            tcg_out_rotl(s, ext, a0, a1, a2);
2236        } else {
2237            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2238            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2239        }
2240        break;
2241
2242    case INDEX_op_clz_i64:
2243    case INDEX_op_clz_i32:
2244        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2245        break;
2246    case INDEX_op_ctz_i64:
2247    case INDEX_op_ctz_i32:
2248        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2249        break;
2250
2251    case INDEX_op_brcond_i32:
2252        a1 = (int32_t)a1;
2253        /* FALLTHRU */
2254    case INDEX_op_brcond_i64:
2255        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2256        break;
2257
2258    case INDEX_op_setcond_i32:
2259        a2 = (int32_t)a2;
2260        /* FALLTHRU */
2261    case INDEX_op_setcond_i64:
2262        tcg_out_cmp(s, ext, a1, a2, c2);
2263        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2264        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2265                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2266        break;
2267
2268    case INDEX_op_movcond_i32:
2269        a2 = (int32_t)a2;
2270        /* FALLTHRU */
2271    case INDEX_op_movcond_i64:
2272        tcg_out_cmp(s, ext, a1, a2, c2);
2273        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2274        break;
2275
2276    case INDEX_op_qemu_ld_i32:
2277    case INDEX_op_qemu_ld_i64:
2278        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2279        break;
2280    case INDEX_op_qemu_st_i32:
2281    case INDEX_op_qemu_st_i64:
2282        tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
2283        break;
2284
2285    case INDEX_op_bswap64_i64:
2286        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2287        break;
2288    case INDEX_op_bswap32_i64:
2289        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2290        if (a2 & TCG_BSWAP_OS) {
2291            tcg_out_ext32s(s, a0, a0);
2292        }
2293        break;
2294    case INDEX_op_bswap32_i32:
2295        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2296        break;
2297    case INDEX_op_bswap16_i64:
2298    case INDEX_op_bswap16_i32:
2299        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2300        if (a2 & TCG_BSWAP_OS) {
2301            /* Output must be sign-extended. */
2302            tcg_out_ext16s(s, ext, a0, a0);
2303        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2304            /* Output must be zero-extended, but input isn't. */
2305            tcg_out_ext16u(s, a0, a0);
2306        }
2307        break;
2308
2309    case INDEX_op_deposit_i64:
2310    case INDEX_op_deposit_i32:
2311        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2312        break;
2313
2314    case INDEX_op_extract_i64:
2315    case INDEX_op_extract_i32:
2316        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2317        break;
2318
2319    case INDEX_op_sextract_i64:
2320    case INDEX_op_sextract_i32:
2321        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2322        break;
2323
2324    case INDEX_op_extract2_i64:
2325    case INDEX_op_extract2_i32:
2326        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2327        break;
2328
2329    case INDEX_op_add2_i32:
2330        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2331                        (int32_t)args[4], args[5], const_args[4],
2332                        const_args[5], false);
2333        break;
2334    case INDEX_op_add2_i64:
2335        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2336                        args[5], const_args[4], const_args[5], false);
2337        break;
2338    case INDEX_op_sub2_i32:
2339        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2340                        (int32_t)args[4], args[5], const_args[4],
2341                        const_args[5], true);
2342        break;
2343    case INDEX_op_sub2_i64:
2344        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2345                        args[5], const_args[4], const_args[5], true);
2346        break;
2347
2348    case INDEX_op_muluh_i64:
2349        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2350        break;
2351    case INDEX_op_mulsh_i64:
2352        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2353        break;
2354
2355    case INDEX_op_mb:
2356        tcg_out_mb(s, a0);
2357        break;
2358
2359    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2360    case INDEX_op_mov_i64:
2361    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2362    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2363    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2364    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2365    case INDEX_op_ext8s_i64:
2366    case INDEX_op_ext8u_i32:
2367    case INDEX_op_ext8u_i64:
2368    case INDEX_op_ext16s_i64:
2369    case INDEX_op_ext16s_i32:
2370    case INDEX_op_ext16u_i64:
2371    case INDEX_op_ext16u_i32:
2372    case INDEX_op_ext32s_i64:
2373    case INDEX_op_ext32u_i64:
2374    case INDEX_op_ext_i32_i64:
2375    case INDEX_op_extu_i32_i64:
2376    case INDEX_op_extrl_i64_i32:
2377    default:
2378        g_assert_not_reached();
2379    }
2380
2381#undef REG0
2382}
2383
2384static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2385                           unsigned vecl, unsigned vece,
2386                           const TCGArg args[TCG_MAX_OP_ARGS],
2387                           const int const_args[TCG_MAX_OP_ARGS])
2388{
2389    static const AArch64Insn cmp_vec_insn[16] = {
2390        [TCG_COND_EQ] = I3616_CMEQ,
2391        [TCG_COND_GT] = I3616_CMGT,
2392        [TCG_COND_GE] = I3616_CMGE,
2393        [TCG_COND_GTU] = I3616_CMHI,
2394        [TCG_COND_GEU] = I3616_CMHS,
2395    };
2396    static const AArch64Insn cmp_scalar_insn[16] = {
2397        [TCG_COND_EQ] = I3611_CMEQ,
2398        [TCG_COND_GT] = I3611_CMGT,
2399        [TCG_COND_GE] = I3611_CMGE,
2400        [TCG_COND_GTU] = I3611_CMHI,
2401        [TCG_COND_GEU] = I3611_CMHS,
2402    };
2403    static const AArch64Insn cmp0_vec_insn[16] = {
2404        [TCG_COND_EQ] = I3617_CMEQ0,
2405        [TCG_COND_GT] = I3617_CMGT0,
2406        [TCG_COND_GE] = I3617_CMGE0,
2407        [TCG_COND_LT] = I3617_CMLT0,
2408        [TCG_COND_LE] = I3617_CMLE0,
2409    };
2410    static const AArch64Insn cmp0_scalar_insn[16] = {
2411        [TCG_COND_EQ] = I3612_CMEQ0,
2412        [TCG_COND_GT] = I3612_CMGT0,
2413        [TCG_COND_GE] = I3612_CMGE0,
2414        [TCG_COND_LT] = I3612_CMLT0,
2415        [TCG_COND_LE] = I3612_CMLE0,
2416    };
2417
2418    TCGType type = vecl + TCG_TYPE_V64;
2419    unsigned is_q = vecl;
2420    bool is_scalar = !is_q && vece == MO_64;
2421    TCGArg a0, a1, a2, a3;
2422    int cmode, imm8;
2423
2424    a0 = args[0];
2425    a1 = args[1];
2426    a2 = args[2];
2427
2428    switch (opc) {
2429    case INDEX_op_ld_vec:
2430        tcg_out_ld(s, type, a0, a1, a2);
2431        break;
2432    case INDEX_op_st_vec:
2433        tcg_out_st(s, type, a0, a1, a2);
2434        break;
2435    case INDEX_op_dupm_vec:
2436        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2437        break;
2438    case INDEX_op_add_vec:
2439        if (is_scalar) {
2440            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2441        } else {
2442            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2443        }
2444        break;
2445    case INDEX_op_sub_vec:
2446        if (is_scalar) {
2447            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2448        } else {
2449            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2450        }
2451        break;
2452    case INDEX_op_mul_vec:
2453        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2454        break;
2455    case INDEX_op_neg_vec:
2456        if (is_scalar) {
2457            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2458        } else {
2459            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2460        }
2461        break;
2462    case INDEX_op_abs_vec:
2463        if (is_scalar) {
2464            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2465        } else {
2466            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2467        }
2468        break;
2469    case INDEX_op_and_vec:
2470        if (const_args[2]) {
2471            is_shimm1632(~a2, &cmode, &imm8);
2472            if (a0 == a1) {
2473                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2474                return;
2475            }
2476            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2477            a2 = a0;
2478        }
2479        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2480        break;
2481    case INDEX_op_or_vec:
2482        if (const_args[2]) {
2483            is_shimm1632(a2, &cmode, &imm8);
2484            if (a0 == a1) {
2485                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2486                return;
2487            }
2488            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2489            a2 = a0;
2490        }
2491        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2492        break;
2493    case INDEX_op_andc_vec:
2494        if (const_args[2]) {
2495            is_shimm1632(a2, &cmode, &imm8);
2496            if (a0 == a1) {
2497                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2498                return;
2499            }
2500            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2501            a2 = a0;
2502        }
2503        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2504        break;
2505    case INDEX_op_orc_vec:
2506        if (const_args[2]) {
2507            is_shimm1632(~a2, &cmode, &imm8);
2508            if (a0 == a1) {
2509                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2510                return;
2511            }
2512            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2513            a2 = a0;
2514        }
2515        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2516        break;
2517    case INDEX_op_xor_vec:
2518        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2519        break;
2520    case INDEX_op_ssadd_vec:
2521        if (is_scalar) {
2522            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2523        } else {
2524            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2525        }
2526        break;
2527    case INDEX_op_sssub_vec:
2528        if (is_scalar) {
2529            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2530        } else {
2531            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2532        }
2533        break;
2534    case INDEX_op_usadd_vec:
2535        if (is_scalar) {
2536            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2537        } else {
2538            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2539        }
2540        break;
2541    case INDEX_op_ussub_vec:
2542        if (is_scalar) {
2543            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2544        } else {
2545            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2546        }
2547        break;
2548    case INDEX_op_smax_vec:
2549        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2550        break;
2551    case INDEX_op_smin_vec:
2552        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2553        break;
2554    case INDEX_op_umax_vec:
2555        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2556        break;
2557    case INDEX_op_umin_vec:
2558        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2559        break;
2560    case INDEX_op_not_vec:
2561        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2562        break;
2563    case INDEX_op_shli_vec:
2564        if (is_scalar) {
2565            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2566        } else {
2567            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2568        }
2569        break;
2570    case INDEX_op_shri_vec:
2571        if (is_scalar) {
2572            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2573        } else {
2574            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2575        }
2576        break;
2577    case INDEX_op_sari_vec:
2578        if (is_scalar) {
2579            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2580        } else {
2581            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2582        }
2583        break;
2584    case INDEX_op_aa64_sli_vec:
2585        if (is_scalar) {
2586            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2587        } else {
2588            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2589        }
2590        break;
2591    case INDEX_op_shlv_vec:
2592        if (is_scalar) {
2593            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2594        } else {
2595            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2596        }
2597        break;
2598    case INDEX_op_aa64_sshl_vec:
2599        if (is_scalar) {
2600            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2601        } else {
2602            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2603        }
2604        break;
2605    case INDEX_op_cmp_vec:
2606        {
2607            TCGCond cond = args[3];
2608            AArch64Insn insn;
2609
2610            if (cond == TCG_COND_NE) {
2611                if (const_args[2]) {
2612                    if (is_scalar) {
2613                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2614                    } else {
2615                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2616                    }
2617                } else {
2618                    if (is_scalar) {
2619                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2620                    } else {
2621                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2622                    }
2623                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2624                }
2625            } else {
2626                if (const_args[2]) {
2627                    if (is_scalar) {
2628                        insn = cmp0_scalar_insn[cond];
2629                        if (insn) {
2630                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2631                            break;
2632                        }
2633                    } else {
2634                        insn = cmp0_vec_insn[cond];
2635                        if (insn) {
2636                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2637                            break;
2638                        }
2639                    }
2640                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2641                    a2 = TCG_VEC_TMP;
2642                }
2643                if (is_scalar) {
2644                    insn = cmp_scalar_insn[cond];
2645                    if (insn == 0) {
2646                        TCGArg t;
2647                        t = a1, a1 = a2, a2 = t;
2648                        cond = tcg_swap_cond(cond);
2649                        insn = cmp_scalar_insn[cond];
2650                        tcg_debug_assert(insn != 0);
2651                    }
2652                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2653                } else {
2654                    insn = cmp_vec_insn[cond];
2655                    if (insn == 0) {
2656                        TCGArg t;
2657                        t = a1, a1 = a2, a2 = t;
2658                        cond = tcg_swap_cond(cond);
2659                        insn = cmp_vec_insn[cond];
2660                        tcg_debug_assert(insn != 0);
2661                    }
2662                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2663                }
2664            }
2665        }
2666        break;
2667
2668    case INDEX_op_bitsel_vec:
2669        a3 = args[3];
2670        if (a0 == a3) {
2671            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2672        } else if (a0 == a2) {
2673            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2674        } else {
2675            if (a0 != a1) {
2676                tcg_out_mov(s, type, a0, a1);
2677            }
2678            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2679        }
2680        break;
2681
2682    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2683    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2684    default:
2685        g_assert_not_reached();
2686    }
2687}
2688
2689int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2690{
2691    switch (opc) {
2692    case INDEX_op_add_vec:
2693    case INDEX_op_sub_vec:
2694    case INDEX_op_and_vec:
2695    case INDEX_op_or_vec:
2696    case INDEX_op_xor_vec:
2697    case INDEX_op_andc_vec:
2698    case INDEX_op_orc_vec:
2699    case INDEX_op_neg_vec:
2700    case INDEX_op_abs_vec:
2701    case INDEX_op_not_vec:
2702    case INDEX_op_cmp_vec:
2703    case INDEX_op_shli_vec:
2704    case INDEX_op_shri_vec:
2705    case INDEX_op_sari_vec:
2706    case INDEX_op_ssadd_vec:
2707    case INDEX_op_sssub_vec:
2708    case INDEX_op_usadd_vec:
2709    case INDEX_op_ussub_vec:
2710    case INDEX_op_shlv_vec:
2711    case INDEX_op_bitsel_vec:
2712        return 1;
2713    case INDEX_op_rotli_vec:
2714    case INDEX_op_shrv_vec:
2715    case INDEX_op_sarv_vec:
2716    case INDEX_op_rotlv_vec:
2717    case INDEX_op_rotrv_vec:
2718        return -1;
2719    case INDEX_op_mul_vec:
2720    case INDEX_op_smax_vec:
2721    case INDEX_op_smin_vec:
2722    case INDEX_op_umax_vec:
2723    case INDEX_op_umin_vec:
2724        return vece < MO_64;
2725
2726    default:
2727        return 0;
2728    }
2729}
2730
2731void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2732                       TCGArg a0, ...)
2733{
2734    va_list va;
2735    TCGv_vec v0, v1, v2, t1, t2, c1;
2736    TCGArg a2;
2737
2738    va_start(va, a0);
2739    v0 = temp_tcgv_vec(arg_temp(a0));
2740    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2741    a2 = va_arg(va, TCGArg);
2742    va_end(va);
2743
2744    switch (opc) {
2745    case INDEX_op_rotli_vec:
2746        t1 = tcg_temp_new_vec(type);
2747        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2748        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2749                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2750        tcg_temp_free_vec(t1);
2751        break;
2752
2753    case INDEX_op_shrv_vec:
2754    case INDEX_op_sarv_vec:
2755        /* Right shifts are negative left shifts for AArch64.  */
2756        v2 = temp_tcgv_vec(arg_temp(a2));
2757        t1 = tcg_temp_new_vec(type);
2758        tcg_gen_neg_vec(vece, t1, v2);
2759        opc = (opc == INDEX_op_shrv_vec
2760               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2761        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2762                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2763        tcg_temp_free_vec(t1);
2764        break;
2765
2766    case INDEX_op_rotlv_vec:
2767        v2 = temp_tcgv_vec(arg_temp(a2));
2768        t1 = tcg_temp_new_vec(type);
2769        c1 = tcg_constant_vec(type, vece, 8 << vece);
2770        tcg_gen_sub_vec(vece, t1, v2, c1);
2771        /* Right shifts are negative left shifts for AArch64.  */
2772        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2773                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2774        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2775                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2776        tcg_gen_or_vec(vece, v0, v0, t1);
2777        tcg_temp_free_vec(t1);
2778        break;
2779
2780    case INDEX_op_rotrv_vec:
2781        v2 = temp_tcgv_vec(arg_temp(a2));
2782        t1 = tcg_temp_new_vec(type);
2783        t2 = tcg_temp_new_vec(type);
2784        c1 = tcg_constant_vec(type, vece, 8 << vece);
2785        tcg_gen_neg_vec(vece, t1, v2);
2786        tcg_gen_sub_vec(vece, t2, c1, v2);
2787        /* Right shifts are negative left shifts for AArch64.  */
2788        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2789                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2790        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2791                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2792        tcg_gen_or_vec(vece, v0, t1, t2);
2793        tcg_temp_free_vec(t1);
2794        tcg_temp_free_vec(t2);
2795        break;
2796
2797    default:
2798        g_assert_not_reached();
2799    }
2800}
2801
2802static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2803{
2804    switch (op) {
2805    case INDEX_op_goto_ptr:
2806        return C_O0_I1(r);
2807
2808    case INDEX_op_ld8u_i32:
2809    case INDEX_op_ld8s_i32:
2810    case INDEX_op_ld16u_i32:
2811    case INDEX_op_ld16s_i32:
2812    case INDEX_op_ld_i32:
2813    case INDEX_op_ld8u_i64:
2814    case INDEX_op_ld8s_i64:
2815    case INDEX_op_ld16u_i64:
2816    case INDEX_op_ld16s_i64:
2817    case INDEX_op_ld32u_i64:
2818    case INDEX_op_ld32s_i64:
2819    case INDEX_op_ld_i64:
2820    case INDEX_op_neg_i32:
2821    case INDEX_op_neg_i64:
2822    case INDEX_op_not_i32:
2823    case INDEX_op_not_i64:
2824    case INDEX_op_bswap16_i32:
2825    case INDEX_op_bswap32_i32:
2826    case INDEX_op_bswap16_i64:
2827    case INDEX_op_bswap32_i64:
2828    case INDEX_op_bswap64_i64:
2829    case INDEX_op_ext8s_i32:
2830    case INDEX_op_ext16s_i32:
2831    case INDEX_op_ext8u_i32:
2832    case INDEX_op_ext16u_i32:
2833    case INDEX_op_ext8s_i64:
2834    case INDEX_op_ext16s_i64:
2835    case INDEX_op_ext32s_i64:
2836    case INDEX_op_ext8u_i64:
2837    case INDEX_op_ext16u_i64:
2838    case INDEX_op_ext32u_i64:
2839    case INDEX_op_ext_i32_i64:
2840    case INDEX_op_extu_i32_i64:
2841    case INDEX_op_extract_i32:
2842    case INDEX_op_extract_i64:
2843    case INDEX_op_sextract_i32:
2844    case INDEX_op_sextract_i64:
2845        return C_O1_I1(r, r);
2846
2847    case INDEX_op_st8_i32:
2848    case INDEX_op_st16_i32:
2849    case INDEX_op_st_i32:
2850    case INDEX_op_st8_i64:
2851    case INDEX_op_st16_i64:
2852    case INDEX_op_st32_i64:
2853    case INDEX_op_st_i64:
2854        return C_O0_I2(rZ, r);
2855
2856    case INDEX_op_add_i32:
2857    case INDEX_op_add_i64:
2858    case INDEX_op_sub_i32:
2859    case INDEX_op_sub_i64:
2860    case INDEX_op_setcond_i32:
2861    case INDEX_op_setcond_i64:
2862        return C_O1_I2(r, r, rA);
2863
2864    case INDEX_op_mul_i32:
2865    case INDEX_op_mul_i64:
2866    case INDEX_op_div_i32:
2867    case INDEX_op_div_i64:
2868    case INDEX_op_divu_i32:
2869    case INDEX_op_divu_i64:
2870    case INDEX_op_rem_i32:
2871    case INDEX_op_rem_i64:
2872    case INDEX_op_remu_i32:
2873    case INDEX_op_remu_i64:
2874    case INDEX_op_muluh_i64:
2875    case INDEX_op_mulsh_i64:
2876        return C_O1_I2(r, r, r);
2877
2878    case INDEX_op_and_i32:
2879    case INDEX_op_and_i64:
2880    case INDEX_op_or_i32:
2881    case INDEX_op_or_i64:
2882    case INDEX_op_xor_i32:
2883    case INDEX_op_xor_i64:
2884    case INDEX_op_andc_i32:
2885    case INDEX_op_andc_i64:
2886    case INDEX_op_orc_i32:
2887    case INDEX_op_orc_i64:
2888    case INDEX_op_eqv_i32:
2889    case INDEX_op_eqv_i64:
2890        return C_O1_I2(r, r, rL);
2891
2892    case INDEX_op_shl_i32:
2893    case INDEX_op_shr_i32:
2894    case INDEX_op_sar_i32:
2895    case INDEX_op_rotl_i32:
2896    case INDEX_op_rotr_i32:
2897    case INDEX_op_shl_i64:
2898    case INDEX_op_shr_i64:
2899    case INDEX_op_sar_i64:
2900    case INDEX_op_rotl_i64:
2901    case INDEX_op_rotr_i64:
2902        return C_O1_I2(r, r, ri);
2903
2904    case INDEX_op_clz_i32:
2905    case INDEX_op_ctz_i32:
2906    case INDEX_op_clz_i64:
2907    case INDEX_op_ctz_i64:
2908        return C_O1_I2(r, r, rAL);
2909
2910    case INDEX_op_brcond_i32:
2911    case INDEX_op_brcond_i64:
2912        return C_O0_I2(r, rA);
2913
2914    case INDEX_op_movcond_i32:
2915    case INDEX_op_movcond_i64:
2916        return C_O1_I4(r, r, rA, rZ, rZ);
2917
2918    case INDEX_op_qemu_ld_i32:
2919    case INDEX_op_qemu_ld_i64:
2920        return C_O1_I1(r, l);
2921    case INDEX_op_qemu_st_i32:
2922    case INDEX_op_qemu_st_i64:
2923        return C_O0_I2(lZ, l);
2924
2925    case INDEX_op_deposit_i32:
2926    case INDEX_op_deposit_i64:
2927        return C_O1_I2(r, 0, rZ);
2928
2929    case INDEX_op_extract2_i32:
2930    case INDEX_op_extract2_i64:
2931        return C_O1_I2(r, rZ, rZ);
2932
2933    case INDEX_op_add2_i32:
2934    case INDEX_op_add2_i64:
2935    case INDEX_op_sub2_i32:
2936    case INDEX_op_sub2_i64:
2937        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2938
2939    case INDEX_op_add_vec:
2940    case INDEX_op_sub_vec:
2941    case INDEX_op_mul_vec:
2942    case INDEX_op_xor_vec:
2943    case INDEX_op_ssadd_vec:
2944    case INDEX_op_sssub_vec:
2945    case INDEX_op_usadd_vec:
2946    case INDEX_op_ussub_vec:
2947    case INDEX_op_smax_vec:
2948    case INDEX_op_smin_vec:
2949    case INDEX_op_umax_vec:
2950    case INDEX_op_umin_vec:
2951    case INDEX_op_shlv_vec:
2952    case INDEX_op_shrv_vec:
2953    case INDEX_op_sarv_vec:
2954    case INDEX_op_aa64_sshl_vec:
2955        return C_O1_I2(w, w, w);
2956    case INDEX_op_not_vec:
2957    case INDEX_op_neg_vec:
2958    case INDEX_op_abs_vec:
2959    case INDEX_op_shli_vec:
2960    case INDEX_op_shri_vec:
2961    case INDEX_op_sari_vec:
2962        return C_O1_I1(w, w);
2963    case INDEX_op_ld_vec:
2964    case INDEX_op_dupm_vec:
2965        return C_O1_I1(w, r);
2966    case INDEX_op_st_vec:
2967        return C_O0_I2(w, r);
2968    case INDEX_op_dup_vec:
2969        return C_O1_I1(w, wr);
2970    case INDEX_op_or_vec:
2971    case INDEX_op_andc_vec:
2972        return C_O1_I2(w, w, wO);
2973    case INDEX_op_and_vec:
2974    case INDEX_op_orc_vec:
2975        return C_O1_I2(w, w, wN);
2976    case INDEX_op_cmp_vec:
2977        return C_O1_I2(w, w, wZ);
2978    case INDEX_op_bitsel_vec:
2979        return C_O1_I3(w, w, w, w);
2980    case INDEX_op_aa64_sli_vec:
2981        return C_O1_I2(w, 0, w);
2982
2983    default:
2984        g_assert_not_reached();
2985    }
2986}
2987
2988static void tcg_target_init(TCGContext *s)
2989{
2990    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2991    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2992    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2993    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2994
2995    tcg_target_call_clobber_regs = -1ull;
2996    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2997    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2998    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2999    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
3000    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
3001    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
3002    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
3003    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
3004    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
3005    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
3006    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
3007    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3008    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3009    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3010    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3011    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3012    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3013    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3014    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3015
3016    s->reserved_regs = 0;
3017    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
3018    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
3019    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
3020    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
3021    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
3022}
3023
3024/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
3025#define PUSH_SIZE  ((30 - 19 + 1) * 8)
3026
3027#define FRAME_SIZE \
3028    ((PUSH_SIZE \
3029      + TCG_STATIC_CALL_ARGS_SIZE \
3030      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3031      + TCG_TARGET_STACK_ALIGN - 1) \
3032     & ~(TCG_TARGET_STACK_ALIGN - 1))
3033
3034/* We're expecting a 2 byte uleb128 encoded value.  */
3035QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3036
3037/* We're expecting to use a single ADDI insn.  */
3038QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
3039
3040static void tcg_target_qemu_prologue(TCGContext *s)
3041{
3042    TCGReg r;
3043
3044    /* Push (FP, LR) and allocate space for all saved registers.  */
3045    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
3046                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
3047
3048    /* Set up frame pointer for canonical unwinding.  */
3049    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
3050
3051    /* Store callee-preserved regs x19..x28.  */
3052    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3053        int ofs = (r - TCG_REG_X19 + 2) * 8;
3054        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3055    }
3056
3057    /* Make stack space for TCG locals.  */
3058    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3059                 FRAME_SIZE - PUSH_SIZE);
3060
3061    /* Inform TCG about how to find TCG locals with register, offset, size.  */
3062    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
3063                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3064
3065#if !defined(CONFIG_SOFTMMU)
3066    if (USE_GUEST_BASE) {
3067        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3068        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3069    }
3070#endif
3071
3072    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3073    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3074
3075    /*
3076     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3077     * and fall through to the rest of the epilogue.
3078     */
3079    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3080    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3081
3082    /* TB epilogue */
3083    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3084
3085    /* Remove TCG locals stack space.  */
3086    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3087                 FRAME_SIZE - PUSH_SIZE);
3088
3089    /* Restore registers x19..x28.  */
3090    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3091        int ofs = (r - TCG_REG_X19 + 2) * 8;
3092        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3093    }
3094
3095    /* Pop (FP, LR), restore SP to previous frame.  */
3096    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3097                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3098    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3099}
3100
3101static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3102{
3103    int i;
3104    for (i = 0; i < count; ++i) {
3105        p[i] = NOP;
3106    }
3107}
3108
3109typedef struct {
3110    DebugFrameHeader h;
3111    uint8_t fde_def_cfa[4];
3112    uint8_t fde_reg_ofs[24];
3113} DebugFrame;
3114
3115#define ELF_HOST_MACHINE EM_AARCH64
3116
3117static const DebugFrame debug_frame = {
3118    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3119    .h.cie.id = -1,
3120    .h.cie.version = 1,
3121    .h.cie.code_align = 1,
3122    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3123    .h.cie.return_column = TCG_REG_LR,
3124
3125    /* Total FDE size does not include the "len" member.  */
3126    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3127
3128    .fde_def_cfa = {
3129        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3130        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3131        (FRAME_SIZE >> 7)
3132    },
3133    .fde_reg_ofs = {
3134        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3135        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3136        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3137        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3138        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3139        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3140        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3141        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3142        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3143        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3144        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3145        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3146    }
3147};
3148
3149void tcg_register_jit(const void *buf, size_t buf_size)
3150{
3151    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3152}
3153