xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision 0cadc1eda1a3120c37c713ab6d6b7a02da0d2e6f)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-ldst.c.inc"
14#include "../tcg-pool.c.inc"
15#include "qemu/bitops.h"
16
17/* We're going to re-use TCGType in setting of the SF bit, which controls
18   the size of the operation performed.  If we know the values match, it
19   makes things much cleaner.  */
20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21
22#ifdef CONFIG_DEBUG_TCG
23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28
29    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33};
34#endif /* CONFIG_DEBUG_TCG */
35
36static const int tcg_target_reg_alloc_order[] = {
37    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39    TCG_REG_X28, /* we will reserve this for guest_base if configured */
40
41    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
43    TCG_REG_X16, TCG_REG_X17,
44
45    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
46    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
47
48    /* X18 reserved by system */
49    /* X19 reserved for AREG0 */
50    /* X29 reserved as fp */
51    /* X30 reserved as temporary */
52
53    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
54    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
55    /* V8 - V15 are call-saved, and skipped.  */
56    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
57    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
58    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
59    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
60};
61
62static const int tcg_target_call_iarg_regs[8] = {
63    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
64    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
65};
66
67static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
68{
69    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
70    tcg_debug_assert(slot >= 0 && slot <= 1);
71    return TCG_REG_X0 + slot;
72}
73
74#define TCG_REG_TMP TCG_REG_X30
75#define TCG_VEC_TMP TCG_REG_V31
76
77#ifndef CONFIG_SOFTMMU
78/* Note that XZR cannot be encoded in the address base register slot,
79   as that actaully encodes SP.  So if we need to zero-extend the guest
80   address, via the address index register slot, we need to load even
81   a zero guest base into a register.  */
82#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
83#define TCG_REG_GUEST_BASE TCG_REG_X28
84#endif
85
86static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
87{
88    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
89    ptrdiff_t offset = target - src_rx;
90
91    if (offset == sextract64(offset, 0, 26)) {
92        /* read instruction, mask away previous PC_REL26 parameter contents,
93           set the proper offset, then write back the instruction. */
94        *src_rw = deposit32(*src_rw, 0, 26, offset);
95        return true;
96    }
97    return false;
98}
99
100static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
101{
102    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
103    ptrdiff_t offset = target - src_rx;
104
105    if (offset == sextract64(offset, 0, 19)) {
106        *src_rw = deposit32(*src_rw, 5, 19, offset);
107        return true;
108    }
109    return false;
110}
111
112static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
113                        intptr_t value, intptr_t addend)
114{
115    tcg_debug_assert(addend == 0);
116    switch (type) {
117    case R_AARCH64_JUMP26:
118    case R_AARCH64_CALL26:
119        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
120    case R_AARCH64_CONDBR19:
121        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
122    default:
123        g_assert_not_reached();
124    }
125}
126
127#define TCG_CT_CONST_AIMM 0x100
128#define TCG_CT_CONST_LIMM 0x200
129#define TCG_CT_CONST_ZERO 0x400
130#define TCG_CT_CONST_MONE 0x800
131#define TCG_CT_CONST_ORRI 0x1000
132#define TCG_CT_CONST_ANDI 0x2000
133
134#define ALL_GENERAL_REGS  0xffffffffu
135#define ALL_VECTOR_REGS   0xffffffff00000000ull
136
137#ifdef CONFIG_SOFTMMU
138#define ALL_QLDST_REGS \
139    (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
140                          (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
141#else
142#define ALL_QLDST_REGS   ALL_GENERAL_REGS
143#endif
144
145/* Match a constant valid for addition (12-bit, optionally shifted).  */
146static inline bool is_aimm(uint64_t val)
147{
148    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
149}
150
151/* Match a constant valid for logical operations.  */
152static inline bool is_limm(uint64_t val)
153{
154    /* Taking a simplified view of the logical immediates for now, ignoring
155       the replication that can happen across the field.  Match bit patterns
156       of the forms
157           0....01....1
158           0..01..10..0
159       and their inverses.  */
160
161    /* Make things easier below, by testing the form with msb clear. */
162    if ((int64_t)val < 0) {
163        val = ~val;
164    }
165    if (val == 0) {
166        return false;
167    }
168    val += val & -val;
169    return (val & (val - 1)) == 0;
170}
171
172/* Return true if v16 is a valid 16-bit shifted immediate.  */
173static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
174{
175    if (v16 == (v16 & 0xff)) {
176        *cmode = 0x8;
177        *imm8 = v16 & 0xff;
178        return true;
179    } else if (v16 == (v16 & 0xff00)) {
180        *cmode = 0xa;
181        *imm8 = v16 >> 8;
182        return true;
183    }
184    return false;
185}
186
187/* Return true if v32 is a valid 32-bit shifted immediate.  */
188static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
189{
190    if (v32 == (v32 & 0xff)) {
191        *cmode = 0x0;
192        *imm8 = v32 & 0xff;
193        return true;
194    } else if (v32 == (v32 & 0xff00)) {
195        *cmode = 0x2;
196        *imm8 = (v32 >> 8) & 0xff;
197        return true;
198    } else if (v32 == (v32 & 0xff0000)) {
199        *cmode = 0x4;
200        *imm8 = (v32 >> 16) & 0xff;
201        return true;
202    } else if (v32 == (v32 & 0xff000000)) {
203        *cmode = 0x6;
204        *imm8 = v32 >> 24;
205        return true;
206    }
207    return false;
208}
209
210/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
211static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
212{
213    if ((v32 & 0xffff00ff) == 0xff) {
214        *cmode = 0xc;
215        *imm8 = (v32 >> 8) & 0xff;
216        return true;
217    } else if ((v32 & 0xff00ffff) == 0xffff) {
218        *cmode = 0xd;
219        *imm8 = (v32 >> 16) & 0xff;
220        return true;
221    }
222    return false;
223}
224
225/* Return true if v32 is a valid float32 immediate.  */
226static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
227{
228    if (extract32(v32, 0, 19) == 0
229        && (extract32(v32, 25, 6) == 0x20
230            || extract32(v32, 25, 6) == 0x1f)) {
231        *cmode = 0xf;
232        *imm8 = (extract32(v32, 31, 1) << 7)
233              | (extract32(v32, 25, 1) << 6)
234              | extract32(v32, 19, 6);
235        return true;
236    }
237    return false;
238}
239
240/* Return true if v64 is a valid float64 immediate.  */
241static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
242{
243    if (extract64(v64, 0, 48) == 0
244        && (extract64(v64, 54, 9) == 0x100
245            || extract64(v64, 54, 9) == 0x0ff)) {
246        *cmode = 0xf;
247        *imm8 = (extract64(v64, 63, 1) << 7)
248              | (extract64(v64, 54, 1) << 6)
249              | extract64(v64, 48, 6);
250        return true;
251    }
252    return false;
253}
254
255/*
256 * Return non-zero if v32 can be formed by MOVI+ORR.
257 * Place the parameters for MOVI in (cmode, imm8).
258 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
259 */
260static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
261{
262    int i;
263
264    for (i = 6; i > 0; i -= 2) {
265        /* Mask out one byte we can add with ORR.  */
266        uint32_t tmp = v32 & ~(0xffu << (i * 4));
267        if (is_shimm32(tmp, cmode, imm8) ||
268            is_soimm32(tmp, cmode, imm8)) {
269            break;
270        }
271    }
272    return i;
273}
274
275/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
276static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
277{
278    if (v32 == deposit32(v32, 16, 16, v32)) {
279        return is_shimm16(v32, cmode, imm8);
280    } else {
281        return is_shimm32(v32, cmode, imm8);
282    }
283}
284
285static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
286{
287    if (ct & TCG_CT_CONST) {
288        return 1;
289    }
290    if (type == TCG_TYPE_I32) {
291        val = (int32_t)val;
292    }
293    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
294        return 1;
295    }
296    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
297        return 1;
298    }
299    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
300        return 1;
301    }
302    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
303        return 1;
304    }
305
306    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
307    case 0:
308        break;
309    case TCG_CT_CONST_ANDI:
310        val = ~val;
311        /* fallthru */
312    case TCG_CT_CONST_ORRI:
313        if (val == deposit64(val, 32, 32, val)) {
314            int cmode, imm8;
315            return is_shimm1632(val, &cmode, &imm8);
316        }
317        break;
318    default:
319        /* Both bits should not be set for the same insn.  */
320        g_assert_not_reached();
321    }
322
323    return 0;
324}
325
326enum aarch64_cond_code {
327    COND_EQ = 0x0,
328    COND_NE = 0x1,
329    COND_CS = 0x2,     /* Unsigned greater or equal */
330    COND_HS = COND_CS, /* ALIAS greater or equal */
331    COND_CC = 0x3,     /* Unsigned less than */
332    COND_LO = COND_CC, /* ALIAS Lower */
333    COND_MI = 0x4,     /* Negative */
334    COND_PL = 0x5,     /* Zero or greater */
335    COND_VS = 0x6,     /* Overflow */
336    COND_VC = 0x7,     /* No overflow */
337    COND_HI = 0x8,     /* Unsigned greater than */
338    COND_LS = 0x9,     /* Unsigned less or equal */
339    COND_GE = 0xa,
340    COND_LT = 0xb,
341    COND_GT = 0xc,
342    COND_LE = 0xd,
343    COND_AL = 0xe,
344    COND_NV = 0xf, /* behaves like COND_AL here */
345};
346
347static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
348    [TCG_COND_EQ] = COND_EQ,
349    [TCG_COND_NE] = COND_NE,
350    [TCG_COND_LT] = COND_LT,
351    [TCG_COND_GE] = COND_GE,
352    [TCG_COND_LE] = COND_LE,
353    [TCG_COND_GT] = COND_GT,
354    /* unsigned */
355    [TCG_COND_LTU] = COND_LO,
356    [TCG_COND_GTU] = COND_HI,
357    [TCG_COND_GEU] = COND_HS,
358    [TCG_COND_LEU] = COND_LS,
359};
360
361typedef enum {
362    LDST_ST = 0,    /* store */
363    LDST_LD = 1,    /* load */
364    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
365    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
366} AArch64LdstType;
367
368/* We encode the format of the insn into the beginning of the name, so that
369   we can have the preprocessor help "typecheck" the insn vs the output
370   function.  Arm didn't provide us with nice names for the formats, so we
371   use the section number of the architecture reference manual in which the
372   instruction group is described.  */
373typedef enum {
374    /* Compare and branch (immediate).  */
375    I3201_CBZ       = 0x34000000,
376    I3201_CBNZ      = 0x35000000,
377
378    /* Conditional branch (immediate).  */
379    I3202_B_C       = 0x54000000,
380
381    /* Unconditional branch (immediate).  */
382    I3206_B         = 0x14000000,
383    I3206_BL        = 0x94000000,
384
385    /* Unconditional branch (register).  */
386    I3207_BR        = 0xd61f0000,
387    I3207_BLR       = 0xd63f0000,
388    I3207_RET       = 0xd65f0000,
389
390    /* AdvSIMD load/store single structure.  */
391    I3303_LD1R      = 0x0d40c000,
392
393    /* Load literal for loading the address at pc-relative offset */
394    I3305_LDR       = 0x58000000,
395    I3305_LDR_v64   = 0x5c000000,
396    I3305_LDR_v128  = 0x9c000000,
397
398    /* Load/store register.  Described here as 3.3.12, but the helper
399       that emits them can transform to 3.3.10 or 3.3.13.  */
400    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
401    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
402    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
403    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
404
405    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
406    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
407    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
408    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
409
410    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
411    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
412
413    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
414    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
415    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
416
417    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
418    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
419
420    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
421    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
422
423    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
424    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
425
426    I3312_TO_I3310  = 0x00200800,
427    I3312_TO_I3313  = 0x01000000,
428
429    /* Load/store register pair instructions.  */
430    I3314_LDP       = 0x28400000,
431    I3314_STP       = 0x28000000,
432
433    /* Add/subtract immediate instructions.  */
434    I3401_ADDI      = 0x11000000,
435    I3401_ADDSI     = 0x31000000,
436    I3401_SUBI      = 0x51000000,
437    I3401_SUBSI     = 0x71000000,
438
439    /* Bitfield instructions.  */
440    I3402_BFM       = 0x33000000,
441    I3402_SBFM      = 0x13000000,
442    I3402_UBFM      = 0x53000000,
443
444    /* Extract instruction.  */
445    I3403_EXTR      = 0x13800000,
446
447    /* Logical immediate instructions.  */
448    I3404_ANDI      = 0x12000000,
449    I3404_ORRI      = 0x32000000,
450    I3404_EORI      = 0x52000000,
451    I3404_ANDSI     = 0x72000000,
452
453    /* Move wide immediate instructions.  */
454    I3405_MOVN      = 0x12800000,
455    I3405_MOVZ      = 0x52800000,
456    I3405_MOVK      = 0x72800000,
457
458    /* PC relative addressing instructions.  */
459    I3406_ADR       = 0x10000000,
460    I3406_ADRP      = 0x90000000,
461
462    /* Add/subtract shifted register instructions (without a shift).  */
463    I3502_ADD       = 0x0b000000,
464    I3502_ADDS      = 0x2b000000,
465    I3502_SUB       = 0x4b000000,
466    I3502_SUBS      = 0x6b000000,
467
468    /* Add/subtract shifted register instructions (with a shift).  */
469    I3502S_ADD_LSL  = I3502_ADD,
470
471    /* Add/subtract with carry instructions.  */
472    I3503_ADC       = 0x1a000000,
473    I3503_SBC       = 0x5a000000,
474
475    /* Conditional select instructions.  */
476    I3506_CSEL      = 0x1a800000,
477    I3506_CSINC     = 0x1a800400,
478    I3506_CSINV     = 0x5a800000,
479    I3506_CSNEG     = 0x5a800400,
480
481    /* Data-processing (1 source) instructions.  */
482    I3507_CLZ       = 0x5ac01000,
483    I3507_RBIT      = 0x5ac00000,
484    I3507_REV       = 0x5ac00000, /* + size << 10 */
485
486    /* Data-processing (2 source) instructions.  */
487    I3508_LSLV      = 0x1ac02000,
488    I3508_LSRV      = 0x1ac02400,
489    I3508_ASRV      = 0x1ac02800,
490    I3508_RORV      = 0x1ac02c00,
491    I3508_SMULH     = 0x9b407c00,
492    I3508_UMULH     = 0x9bc07c00,
493    I3508_UDIV      = 0x1ac00800,
494    I3508_SDIV      = 0x1ac00c00,
495
496    /* Data-processing (3 source) instructions.  */
497    I3509_MADD      = 0x1b000000,
498    I3509_MSUB      = 0x1b008000,
499
500    /* Logical shifted register instructions (without a shift).  */
501    I3510_AND       = 0x0a000000,
502    I3510_BIC       = 0x0a200000,
503    I3510_ORR       = 0x2a000000,
504    I3510_ORN       = 0x2a200000,
505    I3510_EOR       = 0x4a000000,
506    I3510_EON       = 0x4a200000,
507    I3510_ANDS      = 0x6a000000,
508
509    /* Logical shifted register instructions (with a shift).  */
510    I3502S_AND_LSR  = I3510_AND | (1 << 22),
511
512    /* AdvSIMD copy */
513    I3605_DUP      = 0x0e000400,
514    I3605_INS      = 0x4e001c00,
515    I3605_UMOV     = 0x0e003c00,
516
517    /* AdvSIMD modified immediate */
518    I3606_MOVI      = 0x0f000400,
519    I3606_MVNI      = 0x2f000400,
520    I3606_BIC       = 0x2f001400,
521    I3606_ORR       = 0x0f001400,
522
523    /* AdvSIMD scalar shift by immediate */
524    I3609_SSHR      = 0x5f000400,
525    I3609_SSRA      = 0x5f001400,
526    I3609_SHL       = 0x5f005400,
527    I3609_USHR      = 0x7f000400,
528    I3609_USRA      = 0x7f001400,
529    I3609_SLI       = 0x7f005400,
530
531    /* AdvSIMD scalar three same */
532    I3611_SQADD     = 0x5e200c00,
533    I3611_SQSUB     = 0x5e202c00,
534    I3611_CMGT      = 0x5e203400,
535    I3611_CMGE      = 0x5e203c00,
536    I3611_SSHL      = 0x5e204400,
537    I3611_ADD       = 0x5e208400,
538    I3611_CMTST     = 0x5e208c00,
539    I3611_UQADD     = 0x7e200c00,
540    I3611_UQSUB     = 0x7e202c00,
541    I3611_CMHI      = 0x7e203400,
542    I3611_CMHS      = 0x7e203c00,
543    I3611_USHL      = 0x7e204400,
544    I3611_SUB       = 0x7e208400,
545    I3611_CMEQ      = 0x7e208c00,
546
547    /* AdvSIMD scalar two-reg misc */
548    I3612_CMGT0     = 0x5e208800,
549    I3612_CMEQ0     = 0x5e209800,
550    I3612_CMLT0     = 0x5e20a800,
551    I3612_ABS       = 0x5e20b800,
552    I3612_CMGE0     = 0x7e208800,
553    I3612_CMLE0     = 0x7e209800,
554    I3612_NEG       = 0x7e20b800,
555
556    /* AdvSIMD shift by immediate */
557    I3614_SSHR      = 0x0f000400,
558    I3614_SSRA      = 0x0f001400,
559    I3614_SHL       = 0x0f005400,
560    I3614_SLI       = 0x2f005400,
561    I3614_USHR      = 0x2f000400,
562    I3614_USRA      = 0x2f001400,
563
564    /* AdvSIMD three same.  */
565    I3616_ADD       = 0x0e208400,
566    I3616_AND       = 0x0e201c00,
567    I3616_BIC       = 0x0e601c00,
568    I3616_BIF       = 0x2ee01c00,
569    I3616_BIT       = 0x2ea01c00,
570    I3616_BSL       = 0x2e601c00,
571    I3616_EOR       = 0x2e201c00,
572    I3616_MUL       = 0x0e209c00,
573    I3616_ORR       = 0x0ea01c00,
574    I3616_ORN       = 0x0ee01c00,
575    I3616_SUB       = 0x2e208400,
576    I3616_CMGT      = 0x0e203400,
577    I3616_CMGE      = 0x0e203c00,
578    I3616_CMTST     = 0x0e208c00,
579    I3616_CMHI      = 0x2e203400,
580    I3616_CMHS      = 0x2e203c00,
581    I3616_CMEQ      = 0x2e208c00,
582    I3616_SMAX      = 0x0e206400,
583    I3616_SMIN      = 0x0e206c00,
584    I3616_SSHL      = 0x0e204400,
585    I3616_SQADD     = 0x0e200c00,
586    I3616_SQSUB     = 0x0e202c00,
587    I3616_UMAX      = 0x2e206400,
588    I3616_UMIN      = 0x2e206c00,
589    I3616_UQADD     = 0x2e200c00,
590    I3616_UQSUB     = 0x2e202c00,
591    I3616_USHL      = 0x2e204400,
592
593    /* AdvSIMD two-reg misc.  */
594    I3617_CMGT0     = 0x0e208800,
595    I3617_CMEQ0     = 0x0e209800,
596    I3617_CMLT0     = 0x0e20a800,
597    I3617_CMGE0     = 0x2e208800,
598    I3617_CMLE0     = 0x2e209800,
599    I3617_NOT       = 0x2e205800,
600    I3617_ABS       = 0x0e20b800,
601    I3617_NEG       = 0x2e20b800,
602
603    /* System instructions.  */
604    NOP             = 0xd503201f,
605    DMB_ISH         = 0xd50338bf,
606    DMB_LD          = 0x00000100,
607    DMB_ST          = 0x00000200,
608} AArch64Insn;
609
610static inline uint32_t tcg_in32(TCGContext *s)
611{
612    uint32_t v = *(uint32_t *)s->code_ptr;
613    return v;
614}
615
616/* Emit an opcode with "type-checking" of the format.  */
617#define tcg_out_insn(S, FMT, OP, ...) \
618    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
619
620static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
621                              TCGReg rt, TCGReg rn, unsigned size)
622{
623    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
624}
625
626static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
627                              int imm19, TCGReg rt)
628{
629    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
630}
631
632static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
633                              TCGReg rt, int imm19)
634{
635    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
636}
637
638static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
639                              TCGCond c, int imm19)
640{
641    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
642}
643
644static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
645{
646    tcg_out32(s, insn | (imm26 & 0x03ffffff));
647}
648
649static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
650{
651    tcg_out32(s, insn | rn << 5);
652}
653
654static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
655                              TCGReg r1, TCGReg r2, TCGReg rn,
656                              tcg_target_long ofs, bool pre, bool w)
657{
658    insn |= 1u << 31; /* ext */
659    insn |= pre << 24;
660    insn |= w << 23;
661
662    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
663    insn |= (ofs & (0x7f << 3)) << (15 - 3);
664
665    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
666}
667
668static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
669                              TCGReg rd, TCGReg rn, uint64_t aimm)
670{
671    if (aimm > 0xfff) {
672        tcg_debug_assert((aimm & 0xfff) == 0);
673        aimm >>= 12;
674        tcg_debug_assert(aimm <= 0xfff);
675        aimm |= 1 << 12;  /* apply LSL 12 */
676    }
677    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
678}
679
680/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
681   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
682   that feed the DecodeBitMasks pseudo function.  */
683static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
684                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
685{
686    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
687              | rn << 5 | rd);
688}
689
690#define tcg_out_insn_3404  tcg_out_insn_3402
691
692static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
693                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
694{
695    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
696              | rn << 5 | rd);
697}
698
699/* This function is used for the Move (wide immediate) instruction group.
700   Note that SHIFT is a full shift count, not the 2 bit HW field. */
701static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
702                              TCGReg rd, uint16_t half, unsigned shift)
703{
704    tcg_debug_assert((shift & ~0x30) == 0);
705    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
706}
707
708static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
709                              TCGReg rd, int64_t disp)
710{
711    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
712}
713
714/* This function is for both 3.5.2 (Add/Subtract shifted register), for
715   the rare occasion when we actually want to supply a shift amount.  */
716static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
717                                      TCGType ext, TCGReg rd, TCGReg rn,
718                                      TCGReg rm, int imm6)
719{
720    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
721}
722
723/* This function is for 3.5.2 (Add/subtract shifted register),
724   and 3.5.10 (Logical shifted register), for the vast majorty of cases
725   when we don't want to apply a shift.  Thus it can also be used for
726   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
727static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
728                              TCGReg rd, TCGReg rn, TCGReg rm)
729{
730    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
731}
732
733#define tcg_out_insn_3503  tcg_out_insn_3502
734#define tcg_out_insn_3508  tcg_out_insn_3502
735#define tcg_out_insn_3510  tcg_out_insn_3502
736
737static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
738                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
739{
740    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
741              | tcg_cond_to_aarch64[c] << 12);
742}
743
744static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
745                              TCGReg rd, TCGReg rn)
746{
747    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
748}
749
750static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
751                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
752{
753    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
754}
755
756static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
757                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
758{
759    /* Note that bit 11 set means general register input.  Therefore
760       we can handle both register sets with one function.  */
761    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
762              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
763}
764
765static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
766                              TCGReg rd, bool op, int cmode, uint8_t imm8)
767{
768    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
769              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
770}
771
772static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
773                              TCGReg rd, TCGReg rn, unsigned immhb)
774{
775    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
776}
777
778static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
779                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
780{
781    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
782              | (rn & 0x1f) << 5 | (rd & 0x1f));
783}
784
785static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
786                              unsigned size, TCGReg rd, TCGReg rn)
787{
788    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
789}
790
791static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
792                              TCGReg rd, TCGReg rn, unsigned immhb)
793{
794    tcg_out32(s, insn | q << 30 | immhb << 16
795              | (rn & 0x1f) << 5 | (rd & 0x1f));
796}
797
798static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
799                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
800{
801    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
802              | (rn & 0x1f) << 5 | (rd & 0x1f));
803}
804
805static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
806                              unsigned size, TCGReg rd, TCGReg rn)
807{
808    tcg_out32(s, insn | q << 30 | (size << 22)
809              | (rn & 0x1f) << 5 | (rd & 0x1f));
810}
811
812static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
813                              TCGReg rd, TCGReg base, TCGType ext,
814                              TCGReg regoff)
815{
816    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
817    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
818              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
819}
820
821static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
822                              TCGReg rd, TCGReg rn, intptr_t offset)
823{
824    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
825}
826
827static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
828                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
829{
830    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
831    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
832              | rn << 5 | (rd & 0x1f));
833}
834
835/* Register to register move using ORR (shifted register with no shift). */
836static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
837{
838    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
839}
840
841/* Register to register move using ADDI (move to/from SP).  */
842static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
843{
844    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
845}
846
847/* This function is used for the Logical (immediate) instruction group.
848   The value of LIMM must satisfy IS_LIMM.  See the comment above about
849   only supporting simplified logical immediates.  */
850static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
851                             TCGReg rd, TCGReg rn, uint64_t limm)
852{
853    unsigned h, l, r, c;
854
855    tcg_debug_assert(is_limm(limm));
856
857    h = clz64(limm);
858    l = ctz64(limm);
859    if (l == 0) {
860        r = 0;                  /* form 0....01....1 */
861        c = ctz64(~limm) - 1;
862        if (h == 0) {
863            r = clz64(~limm);   /* form 1..10..01..1 */
864            c += r;
865        }
866    } else {
867        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
868        c = r - h - 1;
869    }
870    if (ext == TCG_TYPE_I32) {
871        r &= 31;
872        c &= 31;
873    }
874
875    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
876}
877
878static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
879                             TCGReg rd, int64_t v64)
880{
881    bool q = type == TCG_TYPE_V128;
882    int cmode, imm8, i;
883
884    /* Test all bytes equal first.  */
885    if (vece == MO_8) {
886        imm8 = (uint8_t)v64;
887        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
888        return;
889    }
890
891    /*
892     * Test all bytes 0x00 or 0xff second.  This can match cases that
893     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
894     */
895    for (i = imm8 = 0; i < 8; i++) {
896        uint8_t byte = v64 >> (i * 8);
897        if (byte == 0xff) {
898            imm8 |= 1 << i;
899        } else if (byte != 0) {
900            goto fail_bytes;
901        }
902    }
903    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
904    return;
905 fail_bytes:
906
907    /*
908     * Tests for various replications.  For each element width, if we
909     * cannot find an expansion there's no point checking a larger
910     * width because we already know by replication it cannot match.
911     */
912    if (vece == MO_16) {
913        uint16_t v16 = v64;
914
915        if (is_shimm16(v16, &cmode, &imm8)) {
916            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
917            return;
918        }
919        if (is_shimm16(~v16, &cmode, &imm8)) {
920            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
921            return;
922        }
923
924        /*
925         * Otherwise, all remaining constants can be loaded in two insns:
926         * rd = v16 & 0xff, rd |= v16 & 0xff00.
927         */
928        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
929        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
930        return;
931    } else if (vece == MO_32) {
932        uint32_t v32 = v64;
933        uint32_t n32 = ~v32;
934
935        if (is_shimm32(v32, &cmode, &imm8) ||
936            is_soimm32(v32, &cmode, &imm8) ||
937            is_fimm32(v32, &cmode, &imm8)) {
938            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
939            return;
940        }
941        if (is_shimm32(n32, &cmode, &imm8) ||
942            is_soimm32(n32, &cmode, &imm8)) {
943            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
944            return;
945        }
946
947        /*
948         * Restrict the set of constants to those we can load with
949         * two instructions.  Others we load from the pool.
950         */
951        i = is_shimm32_pair(v32, &cmode, &imm8);
952        if (i) {
953            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
954            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
955            return;
956        }
957        i = is_shimm32_pair(n32, &cmode, &imm8);
958        if (i) {
959            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
960            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
961            return;
962        }
963    } else if (is_fimm64(v64, &cmode, &imm8)) {
964        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
965        return;
966    }
967
968    /*
969     * As a last resort, load from the constant pool.  Sadly there
970     * is no LD1R (literal), so store the full 16-byte vector.
971     */
972    if (type == TCG_TYPE_V128) {
973        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
974        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
975    } else {
976        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
977        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
978    }
979}
980
981static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
982                            TCGReg rd, TCGReg rs)
983{
984    int is_q = type - TCG_TYPE_V64;
985    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
986    return true;
987}
988
989static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
990                             TCGReg r, TCGReg base, intptr_t offset)
991{
992    TCGReg temp = TCG_REG_TMP;
993
994    if (offset < -0xffffff || offset > 0xffffff) {
995        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
996        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
997        base = temp;
998    } else {
999        AArch64Insn add_insn = I3401_ADDI;
1000
1001        if (offset < 0) {
1002            add_insn = I3401_SUBI;
1003            offset = -offset;
1004        }
1005        if (offset & 0xfff000) {
1006            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1007            base = temp;
1008        }
1009        if (offset & 0xfff) {
1010            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1011            base = temp;
1012        }
1013    }
1014    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1015    return true;
1016}
1017
1018static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1019                         tcg_target_long value)
1020{
1021    tcg_target_long svalue = value;
1022    tcg_target_long ivalue = ~value;
1023    tcg_target_long t0, t1, t2;
1024    int s0, s1;
1025    AArch64Insn opc;
1026
1027    switch (type) {
1028    case TCG_TYPE_I32:
1029    case TCG_TYPE_I64:
1030        tcg_debug_assert(rd < 32);
1031        break;
1032    default:
1033        g_assert_not_reached();
1034    }
1035
1036    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1037       values within [2**31, 2**32-1], we can create smaller sequences by
1038       interpreting this as a negative 32-bit number, while ensuring that
1039       the high 32 bits are cleared by setting SF=0.  */
1040    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1041        svalue = (int32_t)value;
1042        value = (uint32_t)value;
1043        ivalue = (uint32_t)ivalue;
1044        type = TCG_TYPE_I32;
1045    }
1046
1047    /* Speed things up by handling the common case of small positive
1048       and negative values specially.  */
1049    if ((value & ~0xffffull) == 0) {
1050        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1051        return;
1052    } else if ((ivalue & ~0xffffull) == 0) {
1053        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1054        return;
1055    }
1056
1057    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1058       use the sign-extended value.  That lets us match rotated values such
1059       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1060    if (is_limm(svalue)) {
1061        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1062        return;
1063    }
1064
1065    /* Look for host pointer values within 4G of the PC.  This happens
1066       often when loading pointers to QEMU's own data structures.  */
1067    if (type == TCG_TYPE_I64) {
1068        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1069        tcg_target_long disp = value - src_rx;
1070        if (disp == sextract64(disp, 0, 21)) {
1071            tcg_out_insn(s, 3406, ADR, rd, disp);
1072            return;
1073        }
1074        disp = (value >> 12) - (src_rx >> 12);
1075        if (disp == sextract64(disp, 0, 21)) {
1076            tcg_out_insn(s, 3406, ADRP, rd, disp);
1077            if (value & 0xfff) {
1078                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1079            }
1080            return;
1081        }
1082    }
1083
1084    /* Would it take fewer insns to begin with MOVN?  */
1085    if (ctpop64(value) >= 32) {
1086        t0 = ivalue;
1087        opc = I3405_MOVN;
1088    } else {
1089        t0 = value;
1090        opc = I3405_MOVZ;
1091    }
1092    s0 = ctz64(t0) & (63 & -16);
1093    t1 = t0 & ~(0xffffull << s0);
1094    s1 = ctz64(t1) & (63 & -16);
1095    t2 = t1 & ~(0xffffull << s1);
1096    if (t2 == 0) {
1097        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1098        if (t1 != 0) {
1099            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1100        }
1101        return;
1102    }
1103
1104    /* For more than 2 insns, dump it into the constant pool.  */
1105    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1106    tcg_out_insn(s, 3305, LDR, 0, rd);
1107}
1108
1109static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1110{
1111    return false;
1112}
1113
1114static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1115                             tcg_target_long imm)
1116{
1117    /* This function is only used for passing structs by reference. */
1118    g_assert_not_reached();
1119}
1120
1121/* Define something more legible for general use.  */
1122#define tcg_out_ldst_r  tcg_out_insn_3310
1123
1124static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1125                         TCGReg rn, intptr_t offset, int lgsize)
1126{
1127    /* If the offset is naturally aligned and in range, then we can
1128       use the scaled uimm12 encoding */
1129    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1130        uintptr_t scaled_uimm = offset >> lgsize;
1131        if (scaled_uimm <= 0xfff) {
1132            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1133            return;
1134        }
1135    }
1136
1137    /* Small signed offsets can use the unscaled encoding.  */
1138    if (offset >= -256 && offset < 256) {
1139        tcg_out_insn_3312(s, insn, rd, rn, offset);
1140        return;
1141    }
1142
1143    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1144    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1145    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1146}
1147
1148static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1149{
1150    if (ret == arg) {
1151        return true;
1152    }
1153    switch (type) {
1154    case TCG_TYPE_I32:
1155    case TCG_TYPE_I64:
1156        if (ret < 32 && arg < 32) {
1157            tcg_out_movr(s, type, ret, arg);
1158            break;
1159        } else if (ret < 32) {
1160            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1161            break;
1162        } else if (arg < 32) {
1163            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1164            break;
1165        }
1166        /* FALLTHRU */
1167
1168    case TCG_TYPE_V64:
1169        tcg_debug_assert(ret >= 32 && arg >= 32);
1170        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1171        break;
1172    case TCG_TYPE_V128:
1173        tcg_debug_assert(ret >= 32 && arg >= 32);
1174        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1175        break;
1176
1177    default:
1178        g_assert_not_reached();
1179    }
1180    return true;
1181}
1182
1183static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1184                       TCGReg base, intptr_t ofs)
1185{
1186    AArch64Insn insn;
1187    int lgsz;
1188
1189    switch (type) {
1190    case TCG_TYPE_I32:
1191        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1192        lgsz = 2;
1193        break;
1194    case TCG_TYPE_I64:
1195        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1196        lgsz = 3;
1197        break;
1198    case TCG_TYPE_V64:
1199        insn = I3312_LDRVD;
1200        lgsz = 3;
1201        break;
1202    case TCG_TYPE_V128:
1203        insn = I3312_LDRVQ;
1204        lgsz = 4;
1205        break;
1206    default:
1207        g_assert_not_reached();
1208    }
1209    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1210}
1211
1212static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1213                       TCGReg base, intptr_t ofs)
1214{
1215    AArch64Insn insn;
1216    int lgsz;
1217
1218    switch (type) {
1219    case TCG_TYPE_I32:
1220        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1221        lgsz = 2;
1222        break;
1223    case TCG_TYPE_I64:
1224        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1225        lgsz = 3;
1226        break;
1227    case TCG_TYPE_V64:
1228        insn = I3312_STRVD;
1229        lgsz = 3;
1230        break;
1231    case TCG_TYPE_V128:
1232        insn = I3312_STRVQ;
1233        lgsz = 4;
1234        break;
1235    default:
1236        g_assert_not_reached();
1237    }
1238    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1239}
1240
1241static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1242                               TCGReg base, intptr_t ofs)
1243{
1244    if (type <= TCG_TYPE_I64 && val == 0) {
1245        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1246        return true;
1247    }
1248    return false;
1249}
1250
1251static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1252                               TCGReg rn, unsigned int a, unsigned int b)
1253{
1254    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1255}
1256
1257static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1258                                TCGReg rn, unsigned int a, unsigned int b)
1259{
1260    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1261}
1262
1263static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1264                                TCGReg rn, unsigned int a, unsigned int b)
1265{
1266    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1267}
1268
1269static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1270                                TCGReg rn, TCGReg rm, unsigned int a)
1271{
1272    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1273}
1274
1275static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1276                               TCGReg rd, TCGReg rn, unsigned int m)
1277{
1278    int bits = ext ? 64 : 32;
1279    int max = bits - 1;
1280    tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1281}
1282
1283static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1284                               TCGReg rd, TCGReg rn, unsigned int m)
1285{
1286    int max = ext ? 63 : 31;
1287    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1288}
1289
1290static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1291                               TCGReg rd, TCGReg rn, unsigned int m)
1292{
1293    int max = ext ? 63 : 31;
1294    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1295}
1296
1297static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1298                                TCGReg rd, TCGReg rn, unsigned int m)
1299{
1300    int max = ext ? 63 : 31;
1301    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1302}
1303
1304static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1305                                TCGReg rd, TCGReg rn, unsigned int m)
1306{
1307    int max = ext ? 63 : 31;
1308    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1309}
1310
1311static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1312                               TCGReg rn, unsigned lsb, unsigned width)
1313{
1314    unsigned size = ext ? 64 : 32;
1315    unsigned a = (size - lsb) & (size - 1);
1316    unsigned b = width - 1;
1317    tcg_out_bfm(s, ext, rd, rn, a, b);
1318}
1319
1320static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1321                        tcg_target_long b, bool const_b)
1322{
1323    if (const_b) {
1324        /* Using CMP or CMN aliases.  */
1325        if (b >= 0) {
1326            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1327        } else {
1328            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1329        }
1330    } else {
1331        /* Using CMP alias SUBS wzr, Wn, Wm */
1332        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1333    }
1334}
1335
1336static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1337{
1338    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1339    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1340    tcg_out_insn(s, 3206, B, offset);
1341}
1342
1343static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1344{
1345    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1346    if (offset == sextract64(offset, 0, 26)) {
1347        tcg_out_insn(s, 3206, B, offset);
1348    } else {
1349        /* Choose X9 as a call-clobbered non-LR temporary. */
1350        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
1351        tcg_out_insn(s, 3207, BR, TCG_REG_X9);
1352    }
1353}
1354
1355static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1356{
1357    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1358    if (offset == sextract64(offset, 0, 26)) {
1359        tcg_out_insn(s, 3206, BL, offset);
1360    } else {
1361        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1362        tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
1363    }
1364}
1365
1366static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1367                         const TCGHelperInfo *info)
1368{
1369    tcg_out_call_int(s, target);
1370}
1371
1372static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1373{
1374    if (!l->has_value) {
1375        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1376        tcg_out_insn(s, 3206, B, 0);
1377    } else {
1378        tcg_out_goto(s, l->u.value_ptr);
1379    }
1380}
1381
1382static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1383                           TCGArg b, bool b_const, TCGLabel *l)
1384{
1385    intptr_t offset;
1386    bool need_cmp;
1387
1388    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1389        need_cmp = false;
1390    } else {
1391        need_cmp = true;
1392        tcg_out_cmp(s, ext, a, b, b_const);
1393    }
1394
1395    if (!l->has_value) {
1396        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1397        offset = tcg_in32(s) >> 5;
1398    } else {
1399        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1400        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1401    }
1402
1403    if (need_cmp) {
1404        tcg_out_insn(s, 3202, B_C, c, offset);
1405    } else if (c == TCG_COND_EQ) {
1406        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1407    } else {
1408        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1409    }
1410}
1411
1412static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1413                               TCGReg rd, TCGReg rn)
1414{
1415    /* REV, REV16, REV32 */
1416    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1417}
1418
1419static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1420                               TCGReg rd, TCGReg rn)
1421{
1422    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1423    int bits = (8 << s_bits) - 1;
1424    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1425}
1426
1427static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1428{
1429    tcg_out_sxt(s, type, MO_8, rd, rn);
1430}
1431
1432static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1433{
1434    tcg_out_sxt(s, type, MO_16, rd, rn);
1435}
1436
1437static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1438{
1439    tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1440}
1441
1442static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1443{
1444    tcg_out_ext32s(s, rd, rn);
1445}
1446
1447static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1448                               TCGReg rd, TCGReg rn)
1449{
1450    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1451    int bits = (8 << s_bits) - 1;
1452    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1453}
1454
1455static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1456{
1457    tcg_out_uxt(s, MO_8, rd, rn);
1458}
1459
1460static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1461{
1462    tcg_out_uxt(s, MO_16, rd, rn);
1463}
1464
1465static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1466{
1467    tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1468}
1469
1470static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1471{
1472    tcg_out_ext32u(s, rd, rn);
1473}
1474
1475static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1476{
1477    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1478}
1479
1480static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1481                            TCGReg rn, int64_t aimm)
1482{
1483    if (aimm >= 0) {
1484        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1485    } else {
1486        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1487    }
1488}
1489
1490static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1491                            TCGReg rh, TCGReg al, TCGReg ah,
1492                            tcg_target_long bl, tcg_target_long bh,
1493                            bool const_bl, bool const_bh, bool sub)
1494{
1495    TCGReg orig_rl = rl;
1496    AArch64Insn insn;
1497
1498    if (rl == ah || (!const_bh && rl == bh)) {
1499        rl = TCG_REG_TMP;
1500    }
1501
1502    if (const_bl) {
1503        if (bl < 0) {
1504            bl = -bl;
1505            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1506        } else {
1507            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1508        }
1509
1510        if (unlikely(al == TCG_REG_XZR)) {
1511            /* ??? We want to allow al to be zero for the benefit of
1512               negation via subtraction.  However, that leaves open the
1513               possibility of adding 0+const in the low part, and the
1514               immediate add instructions encode XSP not XZR.  Don't try
1515               anything more elaborate here than loading another zero.  */
1516            al = TCG_REG_TMP;
1517            tcg_out_movi(s, ext, al, 0);
1518        }
1519        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1520    } else {
1521        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1522    }
1523
1524    insn = I3503_ADC;
1525    if (const_bh) {
1526        /* Note that the only two constants we support are 0 and -1, and
1527           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1528        if ((bh != 0) ^ sub) {
1529            insn = I3503_SBC;
1530        }
1531        bh = TCG_REG_XZR;
1532    } else if (sub) {
1533        insn = I3503_SBC;
1534    }
1535    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1536
1537    tcg_out_mov(s, ext, orig_rl, rl);
1538}
1539
1540static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1541{
1542    static const uint32_t sync[] = {
1543        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1544        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1545        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1546        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1547        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1548    };
1549    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1550}
1551
1552static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1553                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1554{
1555    TCGReg a1 = a0;
1556    if (is_ctz) {
1557        a1 = TCG_REG_TMP;
1558        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1559    }
1560    if (const_b && b == (ext ? 64 : 32)) {
1561        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1562    } else {
1563        AArch64Insn sel = I3506_CSEL;
1564
1565        tcg_out_cmp(s, ext, a0, 0, 1);
1566        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1567
1568        if (const_b) {
1569            if (b == -1) {
1570                b = TCG_REG_XZR;
1571                sel = I3506_CSINV;
1572            } else if (b == 0) {
1573                b = TCG_REG_XZR;
1574            } else {
1575                tcg_out_movi(s, ext, d, b);
1576                b = d;
1577            }
1578        }
1579        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1580    }
1581}
1582
1583typedef struct {
1584    TCGReg base;
1585    TCGReg index;
1586    TCGType index_ext;
1587} HostAddress;
1588
1589#ifdef CONFIG_SOFTMMU
1590static const TCGLdstHelperParam ldst_helper_param = {
1591    .ntmp = 1, .tmp = { TCG_REG_TMP }
1592};
1593
1594static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1595{
1596    MemOp opc = get_memop(lb->oi);
1597
1598    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1599        return false;
1600    }
1601
1602    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1603    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1604    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1605    tcg_out_goto(s, lb->raddr);
1606    return true;
1607}
1608
1609static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1610{
1611    MemOp opc = get_memop(lb->oi);
1612
1613    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1614        return false;
1615    }
1616
1617    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1618    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1619    tcg_out_goto(s, lb->raddr);
1620    return true;
1621}
1622#else
1623static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1624{
1625    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1626    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1627    tcg_out_insn(s, 3406, ADR, rd, offset);
1628}
1629
1630static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
1631{
1632    if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1633        return false;
1634    }
1635
1636    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg);
1637    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1638
1639    /* "Tail call" to the helper, with the return address back inline. */
1640    tcg_out_adr(s, TCG_REG_LR, l->raddr);
1641    tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld
1642                                        : helper_unaligned_st));
1643    return true;
1644}
1645
1646static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1647{
1648    return tcg_out_fail_alignment(s, l);
1649}
1650
1651static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1652{
1653    return tcg_out_fail_alignment(s, l);
1654}
1655#endif /* CONFIG_SOFTMMU */
1656
1657/*
1658 * For softmmu, perform the TLB load and compare.
1659 * For useronly, perform any required alignment tests.
1660 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1661 * is required and fill in @h with the host address for the fast path.
1662 */
1663static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1664                                           TCGReg addr_reg, MemOpIdx oi,
1665                                           bool is_ld)
1666{
1667    TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1668    TCGLabelQemuLdst *ldst = NULL;
1669    MemOp opc = get_memop(oi);
1670    unsigned a_bits = get_alignment_bits(opc);
1671    unsigned a_mask = (1u << a_bits) - 1;
1672
1673#ifdef CONFIG_SOFTMMU
1674    unsigned s_bits = opc & MO_SIZE;
1675    unsigned s_mask = (1u << s_bits) - 1;
1676    unsigned mem_index = get_mmuidx(oi);
1677    TCGReg x3;
1678    TCGType mask_type;
1679    uint64_t compare_mask;
1680
1681    ldst = new_ldst_label(s);
1682    ldst->is_ld = is_ld;
1683    ldst->oi = oi;
1684    ldst->addrlo_reg = addr_reg;
1685
1686    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1687                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1688
1689    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1690    QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1691    QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1692    QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1693    QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1694    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1695                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1696
1697    /* Extract the TLB index from the address into X0.  */
1698    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1699                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1700                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1701
1702    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1703    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1704
1705    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1706    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1,
1707               is_ld ? offsetof(CPUTLBEntry, addr_read)
1708                     : offsetof(CPUTLBEntry, addr_write));
1709    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1710               offsetof(CPUTLBEntry, addend));
1711
1712    /*
1713     * For aligned accesses, we check the first byte and include the alignment
1714     * bits within the address.  For unaligned access, we check that we don't
1715     * cross pages using the address of the last byte of the access.
1716     */
1717    if (a_bits >= s_bits) {
1718        x3 = addr_reg;
1719    } else {
1720        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1721                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1722        x3 = TCG_REG_X3;
1723    }
1724    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1725
1726    /* Store the page mask part of the address into X3.  */
1727    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1728                     TCG_REG_X3, x3, compare_mask);
1729
1730    /* Perform the address comparison. */
1731    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1732
1733    /* If not equal, we jump to the slow path. */
1734    ldst->label_ptr[0] = s->code_ptr;
1735    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1736
1737    *h = (HostAddress){
1738        .base = TCG_REG_X1,
1739        .index = addr_reg,
1740        .index_ext = addr_type
1741    };
1742#else
1743    if (a_mask) {
1744        ldst = new_ldst_label(s);
1745
1746        ldst->is_ld = is_ld;
1747        ldst->oi = oi;
1748        ldst->addrlo_reg = addr_reg;
1749
1750        /* tst addr, #mask */
1751        tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1752
1753        /* b.ne slow_path */
1754        ldst->label_ptr[0] = s->code_ptr;
1755        tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1756    }
1757
1758    if (USE_GUEST_BASE) {
1759        *h = (HostAddress){
1760            .base = TCG_REG_GUEST_BASE,
1761            .index = addr_reg,
1762            .index_ext = addr_type
1763        };
1764    } else {
1765        *h = (HostAddress){
1766            .base = addr_reg,
1767            .index = TCG_REG_XZR,
1768            .index_ext = TCG_TYPE_I64
1769        };
1770    }
1771#endif
1772
1773    return ldst;
1774}
1775
1776static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1777                                   TCGReg data_r, HostAddress h)
1778{
1779    switch (memop & MO_SSIZE) {
1780    case MO_UB:
1781        tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
1782        break;
1783    case MO_SB:
1784        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1785                       data_r, h.base, h.index_ext, h.index);
1786        break;
1787    case MO_UW:
1788        tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
1789        break;
1790    case MO_SW:
1791        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1792                       data_r, h.base, h.index_ext, h.index);
1793        break;
1794    case MO_UL:
1795        tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
1796        break;
1797    case MO_SL:
1798        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
1799        break;
1800    case MO_UQ:
1801        tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
1802        break;
1803    default:
1804        g_assert_not_reached();
1805    }
1806}
1807
1808static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1809                                   TCGReg data_r, HostAddress h)
1810{
1811    switch (memop & MO_SIZE) {
1812    case MO_8:
1813        tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
1814        break;
1815    case MO_16:
1816        tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
1817        break;
1818    case MO_32:
1819        tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
1820        break;
1821    case MO_64:
1822        tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
1823        break;
1824    default:
1825        g_assert_not_reached();
1826    }
1827}
1828
1829static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1830                            MemOpIdx oi, TCGType data_type)
1831{
1832    TCGLabelQemuLdst *ldst;
1833    HostAddress h;
1834
1835    ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1836    tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
1837
1838    if (ldst) {
1839        ldst->type = data_type;
1840        ldst->datalo_reg = data_reg;
1841        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1842    }
1843}
1844
1845static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1846                            MemOpIdx oi, TCGType data_type)
1847{
1848    TCGLabelQemuLdst *ldst;
1849    HostAddress h;
1850
1851    ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1852    tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1853
1854    if (ldst) {
1855        ldst->type = data_type;
1856        ldst->datalo_reg = data_reg;
1857        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1858    }
1859}
1860
1861static const tcg_insn_unit *tb_ret_addr;
1862
1863static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1864{
1865    /* Reuse the zeroing that exists for goto_ptr.  */
1866    if (a0 == 0) {
1867        tcg_out_goto_long(s, tcg_code_gen_epilogue);
1868    } else {
1869        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1870        tcg_out_goto_long(s, tb_ret_addr);
1871    }
1872}
1873
1874static void tcg_out_goto_tb(TCGContext *s, int which)
1875{
1876    /*
1877     * Direct branch, or indirect address load, will be patched
1878     * by tb_target_set_jmp_target.  Assert indirect load offset
1879     * in range early, regardless of direct branch distance.
1880     */
1881    intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1882    tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1883
1884    set_jmp_insn_offset(s, which);
1885    tcg_out32(s, I3206_B);
1886    tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1887    set_jmp_reset_offset(s, which);
1888}
1889
1890void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1891                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1892{
1893    uintptr_t d_addr = tb->jmp_target_addr[n];
1894    ptrdiff_t d_offset = d_addr - jmp_rx;
1895    tcg_insn_unit insn;
1896
1897    /* Either directly branch, or indirect branch load. */
1898    if (d_offset == sextract64(d_offset, 0, 28)) {
1899        insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
1900    } else {
1901        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
1902        ptrdiff_t i_offset = i_addr - jmp_rx;
1903
1904        /* Note that we asserted this in range in tcg_out_goto_tb. */
1905        insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2);
1906    }
1907    qatomic_set((uint32_t *)jmp_rw, insn);
1908    flush_idcache_range(jmp_rx, jmp_rw, 4);
1909}
1910
1911static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1912                       const TCGArg args[TCG_MAX_OP_ARGS],
1913                       const int const_args[TCG_MAX_OP_ARGS])
1914{
1915    /* 99% of the time, we can signal the use of extension registers
1916       by looking to see if the opcode handles 64-bit data.  */
1917    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1918
1919    /* Hoist the loads of the most common arguments.  */
1920    TCGArg a0 = args[0];
1921    TCGArg a1 = args[1];
1922    TCGArg a2 = args[2];
1923    int c2 = const_args[2];
1924
1925    /* Some operands are defined with "rZ" constraint, a register or
1926       the zero register.  These need not actually test args[I] == 0.  */
1927#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1928
1929    switch (opc) {
1930    case INDEX_op_goto_ptr:
1931        tcg_out_insn(s, 3207, BR, a0);
1932        break;
1933
1934    case INDEX_op_br:
1935        tcg_out_goto_label(s, arg_label(a0));
1936        break;
1937
1938    case INDEX_op_ld8u_i32:
1939    case INDEX_op_ld8u_i64:
1940        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1941        break;
1942    case INDEX_op_ld8s_i32:
1943        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1944        break;
1945    case INDEX_op_ld8s_i64:
1946        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1947        break;
1948    case INDEX_op_ld16u_i32:
1949    case INDEX_op_ld16u_i64:
1950        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1951        break;
1952    case INDEX_op_ld16s_i32:
1953        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1954        break;
1955    case INDEX_op_ld16s_i64:
1956        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1957        break;
1958    case INDEX_op_ld_i32:
1959    case INDEX_op_ld32u_i64:
1960        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1961        break;
1962    case INDEX_op_ld32s_i64:
1963        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1964        break;
1965    case INDEX_op_ld_i64:
1966        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1967        break;
1968
1969    case INDEX_op_st8_i32:
1970    case INDEX_op_st8_i64:
1971        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1972        break;
1973    case INDEX_op_st16_i32:
1974    case INDEX_op_st16_i64:
1975        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1976        break;
1977    case INDEX_op_st_i32:
1978    case INDEX_op_st32_i64:
1979        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1980        break;
1981    case INDEX_op_st_i64:
1982        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1983        break;
1984
1985    case INDEX_op_add_i32:
1986        a2 = (int32_t)a2;
1987        /* FALLTHRU */
1988    case INDEX_op_add_i64:
1989        if (c2) {
1990            tcg_out_addsubi(s, ext, a0, a1, a2);
1991        } else {
1992            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1993        }
1994        break;
1995
1996    case INDEX_op_sub_i32:
1997        a2 = (int32_t)a2;
1998        /* FALLTHRU */
1999    case INDEX_op_sub_i64:
2000        if (c2) {
2001            tcg_out_addsubi(s, ext, a0, a1, -a2);
2002        } else {
2003            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2004        }
2005        break;
2006
2007    case INDEX_op_neg_i64:
2008    case INDEX_op_neg_i32:
2009        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2010        break;
2011
2012    case INDEX_op_and_i32:
2013        a2 = (int32_t)a2;
2014        /* FALLTHRU */
2015    case INDEX_op_and_i64:
2016        if (c2) {
2017            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2018        } else {
2019            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2020        }
2021        break;
2022
2023    case INDEX_op_andc_i32:
2024        a2 = (int32_t)a2;
2025        /* FALLTHRU */
2026    case INDEX_op_andc_i64:
2027        if (c2) {
2028            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2029        } else {
2030            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2031        }
2032        break;
2033
2034    case INDEX_op_or_i32:
2035        a2 = (int32_t)a2;
2036        /* FALLTHRU */
2037    case INDEX_op_or_i64:
2038        if (c2) {
2039            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2040        } else {
2041            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2042        }
2043        break;
2044
2045    case INDEX_op_orc_i32:
2046        a2 = (int32_t)a2;
2047        /* FALLTHRU */
2048    case INDEX_op_orc_i64:
2049        if (c2) {
2050            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2051        } else {
2052            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2053        }
2054        break;
2055
2056    case INDEX_op_xor_i32:
2057        a2 = (int32_t)a2;
2058        /* FALLTHRU */
2059    case INDEX_op_xor_i64:
2060        if (c2) {
2061            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2062        } else {
2063            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2064        }
2065        break;
2066
2067    case INDEX_op_eqv_i32:
2068        a2 = (int32_t)a2;
2069        /* FALLTHRU */
2070    case INDEX_op_eqv_i64:
2071        if (c2) {
2072            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2073        } else {
2074            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2075        }
2076        break;
2077
2078    case INDEX_op_not_i64:
2079    case INDEX_op_not_i32:
2080        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2081        break;
2082
2083    case INDEX_op_mul_i64:
2084    case INDEX_op_mul_i32:
2085        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2086        break;
2087
2088    case INDEX_op_div_i64:
2089    case INDEX_op_div_i32:
2090        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2091        break;
2092    case INDEX_op_divu_i64:
2093    case INDEX_op_divu_i32:
2094        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2095        break;
2096
2097    case INDEX_op_rem_i64:
2098    case INDEX_op_rem_i32:
2099        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2100        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2101        break;
2102    case INDEX_op_remu_i64:
2103    case INDEX_op_remu_i32:
2104        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2105        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2106        break;
2107
2108    case INDEX_op_shl_i64:
2109    case INDEX_op_shl_i32:
2110        if (c2) {
2111            tcg_out_shl(s, ext, a0, a1, a2);
2112        } else {
2113            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2114        }
2115        break;
2116
2117    case INDEX_op_shr_i64:
2118    case INDEX_op_shr_i32:
2119        if (c2) {
2120            tcg_out_shr(s, ext, a0, a1, a2);
2121        } else {
2122            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2123        }
2124        break;
2125
2126    case INDEX_op_sar_i64:
2127    case INDEX_op_sar_i32:
2128        if (c2) {
2129            tcg_out_sar(s, ext, a0, a1, a2);
2130        } else {
2131            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2132        }
2133        break;
2134
2135    case INDEX_op_rotr_i64:
2136    case INDEX_op_rotr_i32:
2137        if (c2) {
2138            tcg_out_rotr(s, ext, a0, a1, a2);
2139        } else {
2140            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2141        }
2142        break;
2143
2144    case INDEX_op_rotl_i64:
2145    case INDEX_op_rotl_i32:
2146        if (c2) {
2147            tcg_out_rotl(s, ext, a0, a1, a2);
2148        } else {
2149            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2150            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2151        }
2152        break;
2153
2154    case INDEX_op_clz_i64:
2155    case INDEX_op_clz_i32:
2156        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2157        break;
2158    case INDEX_op_ctz_i64:
2159    case INDEX_op_ctz_i32:
2160        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2161        break;
2162
2163    case INDEX_op_brcond_i32:
2164        a1 = (int32_t)a1;
2165        /* FALLTHRU */
2166    case INDEX_op_brcond_i64:
2167        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2168        break;
2169
2170    case INDEX_op_setcond_i32:
2171        a2 = (int32_t)a2;
2172        /* FALLTHRU */
2173    case INDEX_op_setcond_i64:
2174        tcg_out_cmp(s, ext, a1, a2, c2);
2175        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2176        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2177                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2178        break;
2179
2180    case INDEX_op_movcond_i32:
2181        a2 = (int32_t)a2;
2182        /* FALLTHRU */
2183    case INDEX_op_movcond_i64:
2184        tcg_out_cmp(s, ext, a1, a2, c2);
2185        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2186        break;
2187
2188    case INDEX_op_qemu_ld_i32:
2189    case INDEX_op_qemu_ld_i64:
2190        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2191        break;
2192    case INDEX_op_qemu_st_i32:
2193    case INDEX_op_qemu_st_i64:
2194        tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
2195        break;
2196
2197    case INDEX_op_bswap64_i64:
2198        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2199        break;
2200    case INDEX_op_bswap32_i64:
2201        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2202        if (a2 & TCG_BSWAP_OS) {
2203            tcg_out_ext32s(s, a0, a0);
2204        }
2205        break;
2206    case INDEX_op_bswap32_i32:
2207        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2208        break;
2209    case INDEX_op_bswap16_i64:
2210    case INDEX_op_bswap16_i32:
2211        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2212        if (a2 & TCG_BSWAP_OS) {
2213            /* Output must be sign-extended. */
2214            tcg_out_ext16s(s, ext, a0, a0);
2215        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2216            /* Output must be zero-extended, but input isn't. */
2217            tcg_out_ext16u(s, a0, a0);
2218        }
2219        break;
2220
2221    case INDEX_op_deposit_i64:
2222    case INDEX_op_deposit_i32:
2223        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2224        break;
2225
2226    case INDEX_op_extract_i64:
2227    case INDEX_op_extract_i32:
2228        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2229        break;
2230
2231    case INDEX_op_sextract_i64:
2232    case INDEX_op_sextract_i32:
2233        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2234        break;
2235
2236    case INDEX_op_extract2_i64:
2237    case INDEX_op_extract2_i32:
2238        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2239        break;
2240
2241    case INDEX_op_add2_i32:
2242        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2243                        (int32_t)args[4], args[5], const_args[4],
2244                        const_args[5], false);
2245        break;
2246    case INDEX_op_add2_i64:
2247        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2248                        args[5], const_args[4], const_args[5], false);
2249        break;
2250    case INDEX_op_sub2_i32:
2251        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2252                        (int32_t)args[4], args[5], const_args[4],
2253                        const_args[5], true);
2254        break;
2255    case INDEX_op_sub2_i64:
2256        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2257                        args[5], const_args[4], const_args[5], true);
2258        break;
2259
2260    case INDEX_op_muluh_i64:
2261        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2262        break;
2263    case INDEX_op_mulsh_i64:
2264        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2265        break;
2266
2267    case INDEX_op_mb:
2268        tcg_out_mb(s, a0);
2269        break;
2270
2271    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2272    case INDEX_op_mov_i64:
2273    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2274    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2275    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2276    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2277    case INDEX_op_ext8s_i64:
2278    case INDEX_op_ext8u_i32:
2279    case INDEX_op_ext8u_i64:
2280    case INDEX_op_ext16s_i64:
2281    case INDEX_op_ext16s_i32:
2282    case INDEX_op_ext16u_i64:
2283    case INDEX_op_ext16u_i32:
2284    case INDEX_op_ext32s_i64:
2285    case INDEX_op_ext32u_i64:
2286    case INDEX_op_ext_i32_i64:
2287    case INDEX_op_extu_i32_i64:
2288    case INDEX_op_extrl_i64_i32:
2289    default:
2290        g_assert_not_reached();
2291    }
2292
2293#undef REG0
2294}
2295
2296static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2297                           unsigned vecl, unsigned vece,
2298                           const TCGArg args[TCG_MAX_OP_ARGS],
2299                           const int const_args[TCG_MAX_OP_ARGS])
2300{
2301    static const AArch64Insn cmp_vec_insn[16] = {
2302        [TCG_COND_EQ] = I3616_CMEQ,
2303        [TCG_COND_GT] = I3616_CMGT,
2304        [TCG_COND_GE] = I3616_CMGE,
2305        [TCG_COND_GTU] = I3616_CMHI,
2306        [TCG_COND_GEU] = I3616_CMHS,
2307    };
2308    static const AArch64Insn cmp_scalar_insn[16] = {
2309        [TCG_COND_EQ] = I3611_CMEQ,
2310        [TCG_COND_GT] = I3611_CMGT,
2311        [TCG_COND_GE] = I3611_CMGE,
2312        [TCG_COND_GTU] = I3611_CMHI,
2313        [TCG_COND_GEU] = I3611_CMHS,
2314    };
2315    static const AArch64Insn cmp0_vec_insn[16] = {
2316        [TCG_COND_EQ] = I3617_CMEQ0,
2317        [TCG_COND_GT] = I3617_CMGT0,
2318        [TCG_COND_GE] = I3617_CMGE0,
2319        [TCG_COND_LT] = I3617_CMLT0,
2320        [TCG_COND_LE] = I3617_CMLE0,
2321    };
2322    static const AArch64Insn cmp0_scalar_insn[16] = {
2323        [TCG_COND_EQ] = I3612_CMEQ0,
2324        [TCG_COND_GT] = I3612_CMGT0,
2325        [TCG_COND_GE] = I3612_CMGE0,
2326        [TCG_COND_LT] = I3612_CMLT0,
2327        [TCG_COND_LE] = I3612_CMLE0,
2328    };
2329
2330    TCGType type = vecl + TCG_TYPE_V64;
2331    unsigned is_q = vecl;
2332    bool is_scalar = !is_q && vece == MO_64;
2333    TCGArg a0, a1, a2, a3;
2334    int cmode, imm8;
2335
2336    a0 = args[0];
2337    a1 = args[1];
2338    a2 = args[2];
2339
2340    switch (opc) {
2341    case INDEX_op_ld_vec:
2342        tcg_out_ld(s, type, a0, a1, a2);
2343        break;
2344    case INDEX_op_st_vec:
2345        tcg_out_st(s, type, a0, a1, a2);
2346        break;
2347    case INDEX_op_dupm_vec:
2348        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2349        break;
2350    case INDEX_op_add_vec:
2351        if (is_scalar) {
2352            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2353        } else {
2354            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2355        }
2356        break;
2357    case INDEX_op_sub_vec:
2358        if (is_scalar) {
2359            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2360        } else {
2361            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2362        }
2363        break;
2364    case INDEX_op_mul_vec:
2365        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2366        break;
2367    case INDEX_op_neg_vec:
2368        if (is_scalar) {
2369            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2370        } else {
2371            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2372        }
2373        break;
2374    case INDEX_op_abs_vec:
2375        if (is_scalar) {
2376            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2377        } else {
2378            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2379        }
2380        break;
2381    case INDEX_op_and_vec:
2382        if (const_args[2]) {
2383            is_shimm1632(~a2, &cmode, &imm8);
2384            if (a0 == a1) {
2385                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2386                return;
2387            }
2388            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2389            a2 = a0;
2390        }
2391        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2392        break;
2393    case INDEX_op_or_vec:
2394        if (const_args[2]) {
2395            is_shimm1632(a2, &cmode, &imm8);
2396            if (a0 == a1) {
2397                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2398                return;
2399            }
2400            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2401            a2 = a0;
2402        }
2403        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2404        break;
2405    case INDEX_op_andc_vec:
2406        if (const_args[2]) {
2407            is_shimm1632(a2, &cmode, &imm8);
2408            if (a0 == a1) {
2409                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2410                return;
2411            }
2412            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2413            a2 = a0;
2414        }
2415        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2416        break;
2417    case INDEX_op_orc_vec:
2418        if (const_args[2]) {
2419            is_shimm1632(~a2, &cmode, &imm8);
2420            if (a0 == a1) {
2421                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2422                return;
2423            }
2424            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2425            a2 = a0;
2426        }
2427        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2428        break;
2429    case INDEX_op_xor_vec:
2430        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2431        break;
2432    case INDEX_op_ssadd_vec:
2433        if (is_scalar) {
2434            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2435        } else {
2436            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2437        }
2438        break;
2439    case INDEX_op_sssub_vec:
2440        if (is_scalar) {
2441            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2442        } else {
2443            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2444        }
2445        break;
2446    case INDEX_op_usadd_vec:
2447        if (is_scalar) {
2448            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2449        } else {
2450            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2451        }
2452        break;
2453    case INDEX_op_ussub_vec:
2454        if (is_scalar) {
2455            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2456        } else {
2457            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2458        }
2459        break;
2460    case INDEX_op_smax_vec:
2461        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2462        break;
2463    case INDEX_op_smin_vec:
2464        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2465        break;
2466    case INDEX_op_umax_vec:
2467        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2468        break;
2469    case INDEX_op_umin_vec:
2470        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2471        break;
2472    case INDEX_op_not_vec:
2473        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2474        break;
2475    case INDEX_op_shli_vec:
2476        if (is_scalar) {
2477            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2478        } else {
2479            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2480        }
2481        break;
2482    case INDEX_op_shri_vec:
2483        if (is_scalar) {
2484            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2485        } else {
2486            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2487        }
2488        break;
2489    case INDEX_op_sari_vec:
2490        if (is_scalar) {
2491            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2492        } else {
2493            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2494        }
2495        break;
2496    case INDEX_op_aa64_sli_vec:
2497        if (is_scalar) {
2498            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2499        } else {
2500            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2501        }
2502        break;
2503    case INDEX_op_shlv_vec:
2504        if (is_scalar) {
2505            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2506        } else {
2507            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2508        }
2509        break;
2510    case INDEX_op_aa64_sshl_vec:
2511        if (is_scalar) {
2512            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2513        } else {
2514            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2515        }
2516        break;
2517    case INDEX_op_cmp_vec:
2518        {
2519            TCGCond cond = args[3];
2520            AArch64Insn insn;
2521
2522            if (cond == TCG_COND_NE) {
2523                if (const_args[2]) {
2524                    if (is_scalar) {
2525                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2526                    } else {
2527                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2528                    }
2529                } else {
2530                    if (is_scalar) {
2531                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2532                    } else {
2533                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2534                    }
2535                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2536                }
2537            } else {
2538                if (const_args[2]) {
2539                    if (is_scalar) {
2540                        insn = cmp0_scalar_insn[cond];
2541                        if (insn) {
2542                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2543                            break;
2544                        }
2545                    } else {
2546                        insn = cmp0_vec_insn[cond];
2547                        if (insn) {
2548                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2549                            break;
2550                        }
2551                    }
2552                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2553                    a2 = TCG_VEC_TMP;
2554                }
2555                if (is_scalar) {
2556                    insn = cmp_scalar_insn[cond];
2557                    if (insn == 0) {
2558                        TCGArg t;
2559                        t = a1, a1 = a2, a2 = t;
2560                        cond = tcg_swap_cond(cond);
2561                        insn = cmp_scalar_insn[cond];
2562                        tcg_debug_assert(insn != 0);
2563                    }
2564                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2565                } else {
2566                    insn = cmp_vec_insn[cond];
2567                    if (insn == 0) {
2568                        TCGArg t;
2569                        t = a1, a1 = a2, a2 = t;
2570                        cond = tcg_swap_cond(cond);
2571                        insn = cmp_vec_insn[cond];
2572                        tcg_debug_assert(insn != 0);
2573                    }
2574                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2575                }
2576            }
2577        }
2578        break;
2579
2580    case INDEX_op_bitsel_vec:
2581        a3 = args[3];
2582        if (a0 == a3) {
2583            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2584        } else if (a0 == a2) {
2585            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2586        } else {
2587            if (a0 != a1) {
2588                tcg_out_mov(s, type, a0, a1);
2589            }
2590            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2591        }
2592        break;
2593
2594    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2595    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2596    default:
2597        g_assert_not_reached();
2598    }
2599}
2600
2601int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2602{
2603    switch (opc) {
2604    case INDEX_op_add_vec:
2605    case INDEX_op_sub_vec:
2606    case INDEX_op_and_vec:
2607    case INDEX_op_or_vec:
2608    case INDEX_op_xor_vec:
2609    case INDEX_op_andc_vec:
2610    case INDEX_op_orc_vec:
2611    case INDEX_op_neg_vec:
2612    case INDEX_op_abs_vec:
2613    case INDEX_op_not_vec:
2614    case INDEX_op_cmp_vec:
2615    case INDEX_op_shli_vec:
2616    case INDEX_op_shri_vec:
2617    case INDEX_op_sari_vec:
2618    case INDEX_op_ssadd_vec:
2619    case INDEX_op_sssub_vec:
2620    case INDEX_op_usadd_vec:
2621    case INDEX_op_ussub_vec:
2622    case INDEX_op_shlv_vec:
2623    case INDEX_op_bitsel_vec:
2624        return 1;
2625    case INDEX_op_rotli_vec:
2626    case INDEX_op_shrv_vec:
2627    case INDEX_op_sarv_vec:
2628    case INDEX_op_rotlv_vec:
2629    case INDEX_op_rotrv_vec:
2630        return -1;
2631    case INDEX_op_mul_vec:
2632    case INDEX_op_smax_vec:
2633    case INDEX_op_smin_vec:
2634    case INDEX_op_umax_vec:
2635    case INDEX_op_umin_vec:
2636        return vece < MO_64;
2637
2638    default:
2639        return 0;
2640    }
2641}
2642
2643void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2644                       TCGArg a0, ...)
2645{
2646    va_list va;
2647    TCGv_vec v0, v1, v2, t1, t2, c1;
2648    TCGArg a2;
2649
2650    va_start(va, a0);
2651    v0 = temp_tcgv_vec(arg_temp(a0));
2652    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2653    a2 = va_arg(va, TCGArg);
2654    va_end(va);
2655
2656    switch (opc) {
2657    case INDEX_op_rotli_vec:
2658        t1 = tcg_temp_new_vec(type);
2659        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2660        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2661                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2662        tcg_temp_free_vec(t1);
2663        break;
2664
2665    case INDEX_op_shrv_vec:
2666    case INDEX_op_sarv_vec:
2667        /* Right shifts are negative left shifts for AArch64.  */
2668        v2 = temp_tcgv_vec(arg_temp(a2));
2669        t1 = tcg_temp_new_vec(type);
2670        tcg_gen_neg_vec(vece, t1, v2);
2671        opc = (opc == INDEX_op_shrv_vec
2672               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2673        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2674                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2675        tcg_temp_free_vec(t1);
2676        break;
2677
2678    case INDEX_op_rotlv_vec:
2679        v2 = temp_tcgv_vec(arg_temp(a2));
2680        t1 = tcg_temp_new_vec(type);
2681        c1 = tcg_constant_vec(type, vece, 8 << vece);
2682        tcg_gen_sub_vec(vece, t1, v2, c1);
2683        /* Right shifts are negative left shifts for AArch64.  */
2684        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2685                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2686        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2687                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2688        tcg_gen_or_vec(vece, v0, v0, t1);
2689        tcg_temp_free_vec(t1);
2690        break;
2691
2692    case INDEX_op_rotrv_vec:
2693        v2 = temp_tcgv_vec(arg_temp(a2));
2694        t1 = tcg_temp_new_vec(type);
2695        t2 = tcg_temp_new_vec(type);
2696        c1 = tcg_constant_vec(type, vece, 8 << vece);
2697        tcg_gen_neg_vec(vece, t1, v2);
2698        tcg_gen_sub_vec(vece, t2, c1, v2);
2699        /* Right shifts are negative left shifts for AArch64.  */
2700        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2701                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2702        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2703                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2704        tcg_gen_or_vec(vece, v0, t1, t2);
2705        tcg_temp_free_vec(t1);
2706        tcg_temp_free_vec(t2);
2707        break;
2708
2709    default:
2710        g_assert_not_reached();
2711    }
2712}
2713
2714static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2715{
2716    switch (op) {
2717    case INDEX_op_goto_ptr:
2718        return C_O0_I1(r);
2719
2720    case INDEX_op_ld8u_i32:
2721    case INDEX_op_ld8s_i32:
2722    case INDEX_op_ld16u_i32:
2723    case INDEX_op_ld16s_i32:
2724    case INDEX_op_ld_i32:
2725    case INDEX_op_ld8u_i64:
2726    case INDEX_op_ld8s_i64:
2727    case INDEX_op_ld16u_i64:
2728    case INDEX_op_ld16s_i64:
2729    case INDEX_op_ld32u_i64:
2730    case INDEX_op_ld32s_i64:
2731    case INDEX_op_ld_i64:
2732    case INDEX_op_neg_i32:
2733    case INDEX_op_neg_i64:
2734    case INDEX_op_not_i32:
2735    case INDEX_op_not_i64:
2736    case INDEX_op_bswap16_i32:
2737    case INDEX_op_bswap32_i32:
2738    case INDEX_op_bswap16_i64:
2739    case INDEX_op_bswap32_i64:
2740    case INDEX_op_bswap64_i64:
2741    case INDEX_op_ext8s_i32:
2742    case INDEX_op_ext16s_i32:
2743    case INDEX_op_ext8u_i32:
2744    case INDEX_op_ext16u_i32:
2745    case INDEX_op_ext8s_i64:
2746    case INDEX_op_ext16s_i64:
2747    case INDEX_op_ext32s_i64:
2748    case INDEX_op_ext8u_i64:
2749    case INDEX_op_ext16u_i64:
2750    case INDEX_op_ext32u_i64:
2751    case INDEX_op_ext_i32_i64:
2752    case INDEX_op_extu_i32_i64:
2753    case INDEX_op_extract_i32:
2754    case INDEX_op_extract_i64:
2755    case INDEX_op_sextract_i32:
2756    case INDEX_op_sextract_i64:
2757        return C_O1_I1(r, r);
2758
2759    case INDEX_op_st8_i32:
2760    case INDEX_op_st16_i32:
2761    case INDEX_op_st_i32:
2762    case INDEX_op_st8_i64:
2763    case INDEX_op_st16_i64:
2764    case INDEX_op_st32_i64:
2765    case INDEX_op_st_i64:
2766        return C_O0_I2(rZ, r);
2767
2768    case INDEX_op_add_i32:
2769    case INDEX_op_add_i64:
2770    case INDEX_op_sub_i32:
2771    case INDEX_op_sub_i64:
2772    case INDEX_op_setcond_i32:
2773    case INDEX_op_setcond_i64:
2774        return C_O1_I2(r, r, rA);
2775
2776    case INDEX_op_mul_i32:
2777    case INDEX_op_mul_i64:
2778    case INDEX_op_div_i32:
2779    case INDEX_op_div_i64:
2780    case INDEX_op_divu_i32:
2781    case INDEX_op_divu_i64:
2782    case INDEX_op_rem_i32:
2783    case INDEX_op_rem_i64:
2784    case INDEX_op_remu_i32:
2785    case INDEX_op_remu_i64:
2786    case INDEX_op_muluh_i64:
2787    case INDEX_op_mulsh_i64:
2788        return C_O1_I2(r, r, r);
2789
2790    case INDEX_op_and_i32:
2791    case INDEX_op_and_i64:
2792    case INDEX_op_or_i32:
2793    case INDEX_op_or_i64:
2794    case INDEX_op_xor_i32:
2795    case INDEX_op_xor_i64:
2796    case INDEX_op_andc_i32:
2797    case INDEX_op_andc_i64:
2798    case INDEX_op_orc_i32:
2799    case INDEX_op_orc_i64:
2800    case INDEX_op_eqv_i32:
2801    case INDEX_op_eqv_i64:
2802        return C_O1_I2(r, r, rL);
2803
2804    case INDEX_op_shl_i32:
2805    case INDEX_op_shr_i32:
2806    case INDEX_op_sar_i32:
2807    case INDEX_op_rotl_i32:
2808    case INDEX_op_rotr_i32:
2809    case INDEX_op_shl_i64:
2810    case INDEX_op_shr_i64:
2811    case INDEX_op_sar_i64:
2812    case INDEX_op_rotl_i64:
2813    case INDEX_op_rotr_i64:
2814        return C_O1_I2(r, r, ri);
2815
2816    case INDEX_op_clz_i32:
2817    case INDEX_op_ctz_i32:
2818    case INDEX_op_clz_i64:
2819    case INDEX_op_ctz_i64:
2820        return C_O1_I2(r, r, rAL);
2821
2822    case INDEX_op_brcond_i32:
2823    case INDEX_op_brcond_i64:
2824        return C_O0_I2(r, rA);
2825
2826    case INDEX_op_movcond_i32:
2827    case INDEX_op_movcond_i64:
2828        return C_O1_I4(r, r, rA, rZ, rZ);
2829
2830    case INDEX_op_qemu_ld_i32:
2831    case INDEX_op_qemu_ld_i64:
2832        return C_O1_I1(r, l);
2833    case INDEX_op_qemu_st_i32:
2834    case INDEX_op_qemu_st_i64:
2835        return C_O0_I2(lZ, l);
2836
2837    case INDEX_op_deposit_i32:
2838    case INDEX_op_deposit_i64:
2839        return C_O1_I2(r, 0, rZ);
2840
2841    case INDEX_op_extract2_i32:
2842    case INDEX_op_extract2_i64:
2843        return C_O1_I2(r, rZ, rZ);
2844
2845    case INDEX_op_add2_i32:
2846    case INDEX_op_add2_i64:
2847    case INDEX_op_sub2_i32:
2848    case INDEX_op_sub2_i64:
2849        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2850
2851    case INDEX_op_add_vec:
2852    case INDEX_op_sub_vec:
2853    case INDEX_op_mul_vec:
2854    case INDEX_op_xor_vec:
2855    case INDEX_op_ssadd_vec:
2856    case INDEX_op_sssub_vec:
2857    case INDEX_op_usadd_vec:
2858    case INDEX_op_ussub_vec:
2859    case INDEX_op_smax_vec:
2860    case INDEX_op_smin_vec:
2861    case INDEX_op_umax_vec:
2862    case INDEX_op_umin_vec:
2863    case INDEX_op_shlv_vec:
2864    case INDEX_op_shrv_vec:
2865    case INDEX_op_sarv_vec:
2866    case INDEX_op_aa64_sshl_vec:
2867        return C_O1_I2(w, w, w);
2868    case INDEX_op_not_vec:
2869    case INDEX_op_neg_vec:
2870    case INDEX_op_abs_vec:
2871    case INDEX_op_shli_vec:
2872    case INDEX_op_shri_vec:
2873    case INDEX_op_sari_vec:
2874        return C_O1_I1(w, w);
2875    case INDEX_op_ld_vec:
2876    case INDEX_op_dupm_vec:
2877        return C_O1_I1(w, r);
2878    case INDEX_op_st_vec:
2879        return C_O0_I2(w, r);
2880    case INDEX_op_dup_vec:
2881        return C_O1_I1(w, wr);
2882    case INDEX_op_or_vec:
2883    case INDEX_op_andc_vec:
2884        return C_O1_I2(w, w, wO);
2885    case INDEX_op_and_vec:
2886    case INDEX_op_orc_vec:
2887        return C_O1_I2(w, w, wN);
2888    case INDEX_op_cmp_vec:
2889        return C_O1_I2(w, w, wZ);
2890    case INDEX_op_bitsel_vec:
2891        return C_O1_I3(w, w, w, w);
2892    case INDEX_op_aa64_sli_vec:
2893        return C_O1_I2(w, 0, w);
2894
2895    default:
2896        g_assert_not_reached();
2897    }
2898}
2899
2900static void tcg_target_init(TCGContext *s)
2901{
2902    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2903    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2904    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2905    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2906
2907    tcg_target_call_clobber_regs = -1ull;
2908    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2909    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2910    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2911    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2912    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2913    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2914    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2915    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2916    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2917    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2918    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2919    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2920    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2921    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2922    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2923    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2924    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2925    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2926    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2927
2928    s->reserved_regs = 0;
2929    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2930    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2931    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2932    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2933    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2934}
2935
2936/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2937#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2938
2939#define FRAME_SIZE \
2940    ((PUSH_SIZE \
2941      + TCG_STATIC_CALL_ARGS_SIZE \
2942      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2943      + TCG_TARGET_STACK_ALIGN - 1) \
2944     & ~(TCG_TARGET_STACK_ALIGN - 1))
2945
2946/* We're expecting a 2 byte uleb128 encoded value.  */
2947QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2948
2949/* We're expecting to use a single ADDI insn.  */
2950QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2951
2952static void tcg_target_qemu_prologue(TCGContext *s)
2953{
2954    TCGReg r;
2955
2956    /* Push (FP, LR) and allocate space for all saved registers.  */
2957    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2958                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2959
2960    /* Set up frame pointer for canonical unwinding.  */
2961    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2962
2963    /* Store callee-preserved regs x19..x28.  */
2964    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2965        int ofs = (r - TCG_REG_X19 + 2) * 8;
2966        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2967    }
2968
2969    /* Make stack space for TCG locals.  */
2970    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2971                 FRAME_SIZE - PUSH_SIZE);
2972
2973    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2974    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2975                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2976
2977#if !defined(CONFIG_SOFTMMU)
2978    if (USE_GUEST_BASE) {
2979        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2980        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2981    }
2982#endif
2983
2984    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2985    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2986
2987    /*
2988     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2989     * and fall through to the rest of the epilogue.
2990     */
2991    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2992    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2993
2994    /* TB epilogue */
2995    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
2996
2997    /* Remove TCG locals stack space.  */
2998    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2999                 FRAME_SIZE - PUSH_SIZE);
3000
3001    /* Restore registers x19..x28.  */
3002    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3003        int ofs = (r - TCG_REG_X19 + 2) * 8;
3004        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3005    }
3006
3007    /* Pop (FP, LR), restore SP to previous frame.  */
3008    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3009                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3010    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3011}
3012
3013static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3014{
3015    int i;
3016    for (i = 0; i < count; ++i) {
3017        p[i] = NOP;
3018    }
3019}
3020
3021typedef struct {
3022    DebugFrameHeader h;
3023    uint8_t fde_def_cfa[4];
3024    uint8_t fde_reg_ofs[24];
3025} DebugFrame;
3026
3027#define ELF_HOST_MACHINE EM_AARCH64
3028
3029static const DebugFrame debug_frame = {
3030    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3031    .h.cie.id = -1,
3032    .h.cie.version = 1,
3033    .h.cie.code_align = 1,
3034    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3035    .h.cie.return_column = TCG_REG_LR,
3036
3037    /* Total FDE size does not include the "len" member.  */
3038    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3039
3040    .fde_def_cfa = {
3041        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3042        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3043        (FRAME_SIZE >> 7)
3044    },
3045    .fde_reg_ofs = {
3046        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3047        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3048        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3049        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3050        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3051        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3052        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3053        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3054        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3055        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3056        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3057        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3058    }
3059};
3060
3061void tcg_register_jit(const void *buf, size_t buf_size)
3062{
3063    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3064}
3065