xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision edf64786)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-pool.c.inc"
14#include "qemu/bitops.h"
15
16/* We're going to re-use TCGType in setting of the SF bit, which controls
17   the size of the operation performed.  If we know the values match, it
18   makes things much cleaner.  */
19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21#ifdef CONFIG_DEBUG_TCG
22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
27
28    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
32};
33#endif /* CONFIG_DEBUG_TCG */
34
35static const int tcg_target_reg_alloc_order[] = {
36    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38    TCG_REG_X28, /* we will reserve this for guest_base if configured */
39
40    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42    TCG_REG_X16, TCG_REG_X17,
43
44    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47    /* X18 reserved by system */
48    /* X19 reserved for AREG0 */
49    /* X29 reserved as fp */
50    /* X30 reserved as temporary */
51
52    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54    /* V8 - V15 are call-saved, and skipped.  */
55    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
59};
60
61static const int tcg_target_call_iarg_regs[8] = {
62    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
64};
65static const int tcg_target_call_oarg_regs[1] = {
66    TCG_REG_X0
67};
68
69#define TCG_REG_TMP TCG_REG_X30
70#define TCG_VEC_TMP TCG_REG_V31
71
72#ifndef CONFIG_SOFTMMU
73/* Note that XZR cannot be encoded in the address base register slot,
74   as that actaully encodes SP.  So if we need to zero-extend the guest
75   address, via the address index register slot, we need to load even
76   a zero guest base into a register.  */
77#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
78#define TCG_REG_GUEST_BASE TCG_REG_X28
79#endif
80
81static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
82{
83    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
84    ptrdiff_t offset = target - src_rx;
85
86    if (offset == sextract64(offset, 0, 26)) {
87        /* read instruction, mask away previous PC_REL26 parameter contents,
88           set the proper offset, then write back the instruction. */
89        *src_rw = deposit32(*src_rw, 0, 26, offset);
90        return true;
91    }
92    return false;
93}
94
95static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
96{
97    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
98    ptrdiff_t offset = target - src_rx;
99
100    if (offset == sextract64(offset, 0, 19)) {
101        *src_rw = deposit32(*src_rw, 5, 19, offset);
102        return true;
103    }
104    return false;
105}
106
107static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
108                        intptr_t value, intptr_t addend)
109{
110    tcg_debug_assert(addend == 0);
111    switch (type) {
112    case R_AARCH64_JUMP26:
113    case R_AARCH64_CALL26:
114        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
115    case R_AARCH64_CONDBR19:
116        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
117    default:
118        g_assert_not_reached();
119    }
120}
121
122#define TCG_CT_CONST_AIMM 0x100
123#define TCG_CT_CONST_LIMM 0x200
124#define TCG_CT_CONST_ZERO 0x400
125#define TCG_CT_CONST_MONE 0x800
126#define TCG_CT_CONST_ORRI 0x1000
127#define TCG_CT_CONST_ANDI 0x2000
128
129/* parse target specific constraints */
130static const char *target_parse_constraint(TCGArgConstraint *ct,
131                                           const char *ct_str, TCGType type)
132{
133    switch (*ct_str++) {
134    case 'r': /* general registers */
135        ct->regs |= 0xffffffffu;
136        break;
137    case 'w': /* advsimd registers */
138        ct->regs |= 0xffffffff00000000ull;
139        break;
140    case 'l': /* qemu_ld / qemu_st address, data_reg */
141        ct->regs = 0xffffffffu;
142#ifdef CONFIG_SOFTMMU
143        /* x0 and x1 will be overwritten when reading the tlb entry,
144           and x2, and x3 for helper args, better to avoid using them. */
145        tcg_regset_reset_reg(ct->regs, TCG_REG_X0);
146        tcg_regset_reset_reg(ct->regs, TCG_REG_X1);
147        tcg_regset_reset_reg(ct->regs, TCG_REG_X2);
148        tcg_regset_reset_reg(ct->regs, TCG_REG_X3);
149#endif
150        break;
151    case 'A': /* Valid for arithmetic immediate (positive or negative).  */
152        ct->ct |= TCG_CT_CONST_AIMM;
153        break;
154    case 'L': /* Valid for logical immediate.  */
155        ct->ct |= TCG_CT_CONST_LIMM;
156        break;
157    case 'M': /* minus one */
158        ct->ct |= TCG_CT_CONST_MONE;
159        break;
160    case 'O': /* vector orr/bic immediate */
161        ct->ct |= TCG_CT_CONST_ORRI;
162        break;
163    case 'N': /* vector orr/bic immediate, inverted */
164        ct->ct |= TCG_CT_CONST_ANDI;
165        break;
166    case 'Z': /* zero */
167        ct->ct |= TCG_CT_CONST_ZERO;
168        break;
169    default:
170        return NULL;
171    }
172    return ct_str;
173}
174
175/* Match a constant valid for addition (12-bit, optionally shifted).  */
176static inline bool is_aimm(uint64_t val)
177{
178    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
179}
180
181/* Match a constant valid for logical operations.  */
182static inline bool is_limm(uint64_t val)
183{
184    /* Taking a simplified view of the logical immediates for now, ignoring
185       the replication that can happen across the field.  Match bit patterns
186       of the forms
187           0....01....1
188           0..01..10..0
189       and their inverses.  */
190
191    /* Make things easier below, by testing the form with msb clear. */
192    if ((int64_t)val < 0) {
193        val = ~val;
194    }
195    if (val == 0) {
196        return false;
197    }
198    val += val & -val;
199    return (val & (val - 1)) == 0;
200}
201
202/* Return true if v16 is a valid 16-bit shifted immediate.  */
203static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
204{
205    if (v16 == (v16 & 0xff)) {
206        *cmode = 0x8;
207        *imm8 = v16 & 0xff;
208        return true;
209    } else if (v16 == (v16 & 0xff00)) {
210        *cmode = 0xa;
211        *imm8 = v16 >> 8;
212        return true;
213    }
214    return false;
215}
216
217/* Return true if v32 is a valid 32-bit shifted immediate.  */
218static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
219{
220    if (v32 == (v32 & 0xff)) {
221        *cmode = 0x0;
222        *imm8 = v32 & 0xff;
223        return true;
224    } else if (v32 == (v32 & 0xff00)) {
225        *cmode = 0x2;
226        *imm8 = (v32 >> 8) & 0xff;
227        return true;
228    } else if (v32 == (v32 & 0xff0000)) {
229        *cmode = 0x4;
230        *imm8 = (v32 >> 16) & 0xff;
231        return true;
232    } else if (v32 == (v32 & 0xff000000)) {
233        *cmode = 0x6;
234        *imm8 = v32 >> 24;
235        return true;
236    }
237    return false;
238}
239
240/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
241static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
242{
243    if ((v32 & 0xffff00ff) == 0xff) {
244        *cmode = 0xc;
245        *imm8 = (v32 >> 8) & 0xff;
246        return true;
247    } else if ((v32 & 0xff00ffff) == 0xffff) {
248        *cmode = 0xd;
249        *imm8 = (v32 >> 16) & 0xff;
250        return true;
251    }
252    return false;
253}
254
255/* Return true if v32 is a valid float32 immediate.  */
256static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
257{
258    if (extract32(v32, 0, 19) == 0
259        && (extract32(v32, 25, 6) == 0x20
260            || extract32(v32, 25, 6) == 0x1f)) {
261        *cmode = 0xf;
262        *imm8 = (extract32(v32, 31, 1) << 7)
263              | (extract32(v32, 25, 1) << 6)
264              | extract32(v32, 19, 6);
265        return true;
266    }
267    return false;
268}
269
270/* Return true if v64 is a valid float64 immediate.  */
271static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
272{
273    if (extract64(v64, 0, 48) == 0
274        && (extract64(v64, 54, 9) == 0x100
275            || extract64(v64, 54, 9) == 0x0ff)) {
276        *cmode = 0xf;
277        *imm8 = (extract64(v64, 63, 1) << 7)
278              | (extract64(v64, 54, 1) << 6)
279              | extract64(v64, 48, 6);
280        return true;
281    }
282    return false;
283}
284
285/*
286 * Return non-zero if v32 can be formed by MOVI+ORR.
287 * Place the parameters for MOVI in (cmode, imm8).
288 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
289 */
290static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
291{
292    int i;
293
294    for (i = 6; i > 0; i -= 2) {
295        /* Mask out one byte we can add with ORR.  */
296        uint32_t tmp = v32 & ~(0xffu << (i * 4));
297        if (is_shimm32(tmp, cmode, imm8) ||
298            is_soimm32(tmp, cmode, imm8)) {
299            break;
300        }
301    }
302    return i;
303}
304
305/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
306static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
307{
308    if (v32 == deposit32(v32, 16, 16, v32)) {
309        return is_shimm16(v32, cmode, imm8);
310    } else {
311        return is_shimm32(v32, cmode, imm8);
312    }
313}
314
315static int tcg_target_const_match(tcg_target_long val, TCGType type,
316                                  const TCGArgConstraint *arg_ct)
317{
318    int ct = arg_ct->ct;
319
320    if (ct & TCG_CT_CONST) {
321        return 1;
322    }
323    if (type == TCG_TYPE_I32) {
324        val = (int32_t)val;
325    }
326    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
327        return 1;
328    }
329    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
330        return 1;
331    }
332    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
333        return 1;
334    }
335    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
336        return 1;
337    }
338
339    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
340    case 0:
341        break;
342    case TCG_CT_CONST_ANDI:
343        val = ~val;
344        /* fallthru */
345    case TCG_CT_CONST_ORRI:
346        if (val == deposit64(val, 32, 32, val)) {
347            int cmode, imm8;
348            return is_shimm1632(val, &cmode, &imm8);
349        }
350        break;
351    default:
352        /* Both bits should not be set for the same insn.  */
353        g_assert_not_reached();
354    }
355
356    return 0;
357}
358
359enum aarch64_cond_code {
360    COND_EQ = 0x0,
361    COND_NE = 0x1,
362    COND_CS = 0x2,     /* Unsigned greater or equal */
363    COND_HS = COND_CS, /* ALIAS greater or equal */
364    COND_CC = 0x3,     /* Unsigned less than */
365    COND_LO = COND_CC, /* ALIAS Lower */
366    COND_MI = 0x4,     /* Negative */
367    COND_PL = 0x5,     /* Zero or greater */
368    COND_VS = 0x6,     /* Overflow */
369    COND_VC = 0x7,     /* No overflow */
370    COND_HI = 0x8,     /* Unsigned greater than */
371    COND_LS = 0x9,     /* Unsigned less or equal */
372    COND_GE = 0xa,
373    COND_LT = 0xb,
374    COND_GT = 0xc,
375    COND_LE = 0xd,
376    COND_AL = 0xe,
377    COND_NV = 0xf, /* behaves like COND_AL here */
378};
379
380static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
381    [TCG_COND_EQ] = COND_EQ,
382    [TCG_COND_NE] = COND_NE,
383    [TCG_COND_LT] = COND_LT,
384    [TCG_COND_GE] = COND_GE,
385    [TCG_COND_LE] = COND_LE,
386    [TCG_COND_GT] = COND_GT,
387    /* unsigned */
388    [TCG_COND_LTU] = COND_LO,
389    [TCG_COND_GTU] = COND_HI,
390    [TCG_COND_GEU] = COND_HS,
391    [TCG_COND_LEU] = COND_LS,
392};
393
394typedef enum {
395    LDST_ST = 0,    /* store */
396    LDST_LD = 1,    /* load */
397    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
398    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
399} AArch64LdstType;
400
401/* We encode the format of the insn into the beginning of the name, so that
402   we can have the preprocessor help "typecheck" the insn vs the output
403   function.  Arm didn't provide us with nice names for the formats, so we
404   use the section number of the architecture reference manual in which the
405   instruction group is described.  */
406typedef enum {
407    /* Compare and branch (immediate).  */
408    I3201_CBZ       = 0x34000000,
409    I3201_CBNZ      = 0x35000000,
410
411    /* Conditional branch (immediate).  */
412    I3202_B_C       = 0x54000000,
413
414    /* Unconditional branch (immediate).  */
415    I3206_B         = 0x14000000,
416    I3206_BL        = 0x94000000,
417
418    /* Unconditional branch (register).  */
419    I3207_BR        = 0xd61f0000,
420    I3207_BLR       = 0xd63f0000,
421    I3207_RET       = 0xd65f0000,
422
423    /* AdvSIMD load/store single structure.  */
424    I3303_LD1R      = 0x0d40c000,
425
426    /* Load literal for loading the address at pc-relative offset */
427    I3305_LDR       = 0x58000000,
428    I3305_LDR_v64   = 0x5c000000,
429    I3305_LDR_v128  = 0x9c000000,
430
431    /* Load/store register.  Described here as 3.3.12, but the helper
432       that emits them can transform to 3.3.10 or 3.3.13.  */
433    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
434    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
435    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
436    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
437
438    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
439    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
440    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
441    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
442
443    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
444    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
445
446    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
447    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
448    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
449
450    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
451    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
452
453    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
454    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
455
456    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
457    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
458
459    I3312_TO_I3310  = 0x00200800,
460    I3312_TO_I3313  = 0x01000000,
461
462    /* Load/store register pair instructions.  */
463    I3314_LDP       = 0x28400000,
464    I3314_STP       = 0x28000000,
465
466    /* Add/subtract immediate instructions.  */
467    I3401_ADDI      = 0x11000000,
468    I3401_ADDSI     = 0x31000000,
469    I3401_SUBI      = 0x51000000,
470    I3401_SUBSI     = 0x71000000,
471
472    /* Bitfield instructions.  */
473    I3402_BFM       = 0x33000000,
474    I3402_SBFM      = 0x13000000,
475    I3402_UBFM      = 0x53000000,
476
477    /* Extract instruction.  */
478    I3403_EXTR      = 0x13800000,
479
480    /* Logical immediate instructions.  */
481    I3404_ANDI      = 0x12000000,
482    I3404_ORRI      = 0x32000000,
483    I3404_EORI      = 0x52000000,
484
485    /* Move wide immediate instructions.  */
486    I3405_MOVN      = 0x12800000,
487    I3405_MOVZ      = 0x52800000,
488    I3405_MOVK      = 0x72800000,
489
490    /* PC relative addressing instructions.  */
491    I3406_ADR       = 0x10000000,
492    I3406_ADRP      = 0x90000000,
493
494    /* Add/subtract shifted register instructions (without a shift).  */
495    I3502_ADD       = 0x0b000000,
496    I3502_ADDS      = 0x2b000000,
497    I3502_SUB       = 0x4b000000,
498    I3502_SUBS      = 0x6b000000,
499
500    /* Add/subtract shifted register instructions (with a shift).  */
501    I3502S_ADD_LSL  = I3502_ADD,
502
503    /* Add/subtract with carry instructions.  */
504    I3503_ADC       = 0x1a000000,
505    I3503_SBC       = 0x5a000000,
506
507    /* Conditional select instructions.  */
508    I3506_CSEL      = 0x1a800000,
509    I3506_CSINC     = 0x1a800400,
510    I3506_CSINV     = 0x5a800000,
511    I3506_CSNEG     = 0x5a800400,
512
513    /* Data-processing (1 source) instructions.  */
514    I3507_CLZ       = 0x5ac01000,
515    I3507_RBIT      = 0x5ac00000,
516    I3507_REV16     = 0x5ac00400,
517    I3507_REV32     = 0x5ac00800,
518    I3507_REV64     = 0x5ac00c00,
519
520    /* Data-processing (2 source) instructions.  */
521    I3508_LSLV      = 0x1ac02000,
522    I3508_LSRV      = 0x1ac02400,
523    I3508_ASRV      = 0x1ac02800,
524    I3508_RORV      = 0x1ac02c00,
525    I3508_SMULH     = 0x9b407c00,
526    I3508_UMULH     = 0x9bc07c00,
527    I3508_UDIV      = 0x1ac00800,
528    I3508_SDIV      = 0x1ac00c00,
529
530    /* Data-processing (3 source) instructions.  */
531    I3509_MADD      = 0x1b000000,
532    I3509_MSUB      = 0x1b008000,
533
534    /* Logical shifted register instructions (without a shift).  */
535    I3510_AND       = 0x0a000000,
536    I3510_BIC       = 0x0a200000,
537    I3510_ORR       = 0x2a000000,
538    I3510_ORN       = 0x2a200000,
539    I3510_EOR       = 0x4a000000,
540    I3510_EON       = 0x4a200000,
541    I3510_ANDS      = 0x6a000000,
542
543    /* Logical shifted register instructions (with a shift).  */
544    I3502S_AND_LSR  = I3510_AND | (1 << 22),
545
546    /* AdvSIMD copy */
547    I3605_DUP      = 0x0e000400,
548    I3605_INS      = 0x4e001c00,
549    I3605_UMOV     = 0x0e003c00,
550
551    /* AdvSIMD modified immediate */
552    I3606_MOVI      = 0x0f000400,
553    I3606_MVNI      = 0x2f000400,
554    I3606_BIC       = 0x2f001400,
555    I3606_ORR       = 0x0f001400,
556
557    /* AdvSIMD shift by immediate */
558    I3614_SSHR      = 0x0f000400,
559    I3614_SSRA      = 0x0f001400,
560    I3614_SHL       = 0x0f005400,
561    I3614_SLI       = 0x2f005400,
562    I3614_USHR      = 0x2f000400,
563    I3614_USRA      = 0x2f001400,
564
565    /* AdvSIMD three same.  */
566    I3616_ADD       = 0x0e208400,
567    I3616_AND       = 0x0e201c00,
568    I3616_BIC       = 0x0e601c00,
569    I3616_BIF       = 0x2ee01c00,
570    I3616_BIT       = 0x2ea01c00,
571    I3616_BSL       = 0x2e601c00,
572    I3616_EOR       = 0x2e201c00,
573    I3616_MUL       = 0x0e209c00,
574    I3616_ORR       = 0x0ea01c00,
575    I3616_ORN       = 0x0ee01c00,
576    I3616_SUB       = 0x2e208400,
577    I3616_CMGT      = 0x0e203400,
578    I3616_CMGE      = 0x0e203c00,
579    I3616_CMTST     = 0x0e208c00,
580    I3616_CMHI      = 0x2e203400,
581    I3616_CMHS      = 0x2e203c00,
582    I3616_CMEQ      = 0x2e208c00,
583    I3616_SMAX      = 0x0e206400,
584    I3616_SMIN      = 0x0e206c00,
585    I3616_SSHL      = 0x0e204400,
586    I3616_SQADD     = 0x0e200c00,
587    I3616_SQSUB     = 0x0e202c00,
588    I3616_UMAX      = 0x2e206400,
589    I3616_UMIN      = 0x2e206c00,
590    I3616_UQADD     = 0x2e200c00,
591    I3616_UQSUB     = 0x2e202c00,
592    I3616_USHL      = 0x2e204400,
593
594    /* AdvSIMD two-reg misc.  */
595    I3617_CMGT0     = 0x0e208800,
596    I3617_CMEQ0     = 0x0e209800,
597    I3617_CMLT0     = 0x0e20a800,
598    I3617_CMGE0     = 0x2e208800,
599    I3617_CMLE0     = 0x2e20a800,
600    I3617_NOT       = 0x2e205800,
601    I3617_ABS       = 0x0e20b800,
602    I3617_NEG       = 0x2e20b800,
603
604    /* System instructions.  */
605    NOP             = 0xd503201f,
606    DMB_ISH         = 0xd50338bf,
607    DMB_LD          = 0x00000100,
608    DMB_ST          = 0x00000200,
609} AArch64Insn;
610
611static inline uint32_t tcg_in32(TCGContext *s)
612{
613    uint32_t v = *(uint32_t *)s->code_ptr;
614    return v;
615}
616
617/* Emit an opcode with "type-checking" of the format.  */
618#define tcg_out_insn(S, FMT, OP, ...) \
619    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
620
621static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
622                              TCGReg rt, TCGReg rn, unsigned size)
623{
624    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
625}
626
627static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
628                              int imm19, TCGReg rt)
629{
630    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
631}
632
633static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
634                              TCGReg rt, int imm19)
635{
636    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
637}
638
639static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
640                              TCGCond c, int imm19)
641{
642    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
643}
644
645static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
646{
647    tcg_out32(s, insn | (imm26 & 0x03ffffff));
648}
649
650static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
651{
652    tcg_out32(s, insn | rn << 5);
653}
654
655static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
656                              TCGReg r1, TCGReg r2, TCGReg rn,
657                              tcg_target_long ofs, bool pre, bool w)
658{
659    insn |= 1u << 31; /* ext */
660    insn |= pre << 24;
661    insn |= w << 23;
662
663    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
664    insn |= (ofs & (0x7f << 3)) << (15 - 3);
665
666    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
667}
668
669static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
670                              TCGReg rd, TCGReg rn, uint64_t aimm)
671{
672    if (aimm > 0xfff) {
673        tcg_debug_assert((aimm & 0xfff) == 0);
674        aimm >>= 12;
675        tcg_debug_assert(aimm <= 0xfff);
676        aimm |= 1 << 12;  /* apply LSL 12 */
677    }
678    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
679}
680
681/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
682   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
683   that feed the DecodeBitMasks pseudo function.  */
684static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
685                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
686{
687    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
688              | rn << 5 | rd);
689}
690
691#define tcg_out_insn_3404  tcg_out_insn_3402
692
693static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
694                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
695{
696    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
697              | rn << 5 | rd);
698}
699
700/* This function is used for the Move (wide immediate) instruction group.
701   Note that SHIFT is a full shift count, not the 2 bit HW field. */
702static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
703                              TCGReg rd, uint16_t half, unsigned shift)
704{
705    tcg_debug_assert((shift & ~0x30) == 0);
706    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
707}
708
709static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
710                              TCGReg rd, int64_t disp)
711{
712    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
713}
714
715/* This function is for both 3.5.2 (Add/Subtract shifted register), for
716   the rare occasion when we actually want to supply a shift amount.  */
717static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
718                                      TCGType ext, TCGReg rd, TCGReg rn,
719                                      TCGReg rm, int imm6)
720{
721    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
722}
723
724/* This function is for 3.5.2 (Add/subtract shifted register),
725   and 3.5.10 (Logical shifted register), for the vast majorty of cases
726   when we don't want to apply a shift.  Thus it can also be used for
727   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
728static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
729                              TCGReg rd, TCGReg rn, TCGReg rm)
730{
731    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
732}
733
734#define tcg_out_insn_3503  tcg_out_insn_3502
735#define tcg_out_insn_3508  tcg_out_insn_3502
736#define tcg_out_insn_3510  tcg_out_insn_3502
737
738static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
739                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
740{
741    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
742              | tcg_cond_to_aarch64[c] << 12);
743}
744
745static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
746                              TCGReg rd, TCGReg rn)
747{
748    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
749}
750
751static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
752                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
753{
754    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
755}
756
757static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
758                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
759{
760    /* Note that bit 11 set means general register input.  Therefore
761       we can handle both register sets with one function.  */
762    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
763              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
764}
765
766static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
767                              TCGReg rd, bool op, int cmode, uint8_t imm8)
768{
769    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
770              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
771}
772
773static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
774                              TCGReg rd, TCGReg rn, unsigned immhb)
775{
776    tcg_out32(s, insn | q << 30 | immhb << 16
777              | (rn & 0x1f) << 5 | (rd & 0x1f));
778}
779
780static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
781                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
782{
783    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
784              | (rn & 0x1f) << 5 | (rd & 0x1f));
785}
786
787static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
788                              unsigned size, TCGReg rd, TCGReg rn)
789{
790    tcg_out32(s, insn | q << 30 | (size << 22)
791              | (rn & 0x1f) << 5 | (rd & 0x1f));
792}
793
794static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
795                              TCGReg rd, TCGReg base, TCGType ext,
796                              TCGReg regoff)
797{
798    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
799    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
800              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
801}
802
803static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
804                              TCGReg rd, TCGReg rn, intptr_t offset)
805{
806    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
807}
808
809static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
810                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
811{
812    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
813    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
814              | rn << 5 | (rd & 0x1f));
815}
816
817/* Register to register move using ORR (shifted register with no shift). */
818static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
819{
820    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
821}
822
823/* Register to register move using ADDI (move to/from SP).  */
824static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
825{
826    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
827}
828
829/* This function is used for the Logical (immediate) instruction group.
830   The value of LIMM must satisfy IS_LIMM.  See the comment above about
831   only supporting simplified logical immediates.  */
832static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
833                             TCGReg rd, TCGReg rn, uint64_t limm)
834{
835    unsigned h, l, r, c;
836
837    tcg_debug_assert(is_limm(limm));
838
839    h = clz64(limm);
840    l = ctz64(limm);
841    if (l == 0) {
842        r = 0;                  /* form 0....01....1 */
843        c = ctz64(~limm) - 1;
844        if (h == 0) {
845            r = clz64(~limm);   /* form 1..10..01..1 */
846            c += r;
847        }
848    } else {
849        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
850        c = r - h - 1;
851    }
852    if (ext == TCG_TYPE_I32) {
853        r &= 31;
854        c &= 31;
855    }
856
857    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
858}
859
860static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
861                             TCGReg rd, int64_t v64)
862{
863    bool q = type == TCG_TYPE_V128;
864    int cmode, imm8, i;
865
866    /* Test all bytes equal first.  */
867    if (vece == MO_8) {
868        imm8 = (uint8_t)v64;
869        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
870        return;
871    }
872
873    /*
874     * Test all bytes 0x00 or 0xff second.  This can match cases that
875     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
876     */
877    for (i = imm8 = 0; i < 8; i++) {
878        uint8_t byte = v64 >> (i * 8);
879        if (byte == 0xff) {
880            imm8 |= 1 << i;
881        } else if (byte != 0) {
882            goto fail_bytes;
883        }
884    }
885    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
886    return;
887 fail_bytes:
888
889    /*
890     * Tests for various replications.  For each element width, if we
891     * cannot find an expansion there's no point checking a larger
892     * width because we already know by replication it cannot match.
893     */
894    if (vece == MO_16) {
895        uint16_t v16 = v64;
896
897        if (is_shimm16(v16, &cmode, &imm8)) {
898            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
899            return;
900        }
901        if (is_shimm16(~v16, &cmode, &imm8)) {
902            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
903            return;
904        }
905
906        /*
907         * Otherwise, all remaining constants can be loaded in two insns:
908         * rd = v16 & 0xff, rd |= v16 & 0xff00.
909         */
910        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
911        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
912        return;
913    } else if (vece == MO_32) {
914        uint32_t v32 = v64;
915        uint32_t n32 = ~v32;
916
917        if (is_shimm32(v32, &cmode, &imm8) ||
918            is_soimm32(v32, &cmode, &imm8) ||
919            is_fimm32(v32, &cmode, &imm8)) {
920            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
921            return;
922        }
923        if (is_shimm32(n32, &cmode, &imm8) ||
924            is_soimm32(n32, &cmode, &imm8)) {
925            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
926            return;
927        }
928
929        /*
930         * Restrict the set of constants to those we can load with
931         * two instructions.  Others we load from the pool.
932         */
933        i = is_shimm32_pair(v32, &cmode, &imm8);
934        if (i) {
935            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
936            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
937            return;
938        }
939        i = is_shimm32_pair(n32, &cmode, &imm8);
940        if (i) {
941            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
942            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
943            return;
944        }
945    } else if (is_fimm64(v64, &cmode, &imm8)) {
946        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
947        return;
948    }
949
950    /*
951     * As a last resort, load from the constant pool.  Sadly there
952     * is no LD1R (literal), so store the full 16-byte vector.
953     */
954    if (type == TCG_TYPE_V128) {
955        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
956        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
957    } else {
958        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
959        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
960    }
961}
962
963static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
964                            TCGReg rd, TCGReg rs)
965{
966    int is_q = type - TCG_TYPE_V64;
967    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
968    return true;
969}
970
971static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
972                             TCGReg r, TCGReg base, intptr_t offset)
973{
974    TCGReg temp = TCG_REG_TMP;
975
976    if (offset < -0xffffff || offset > 0xffffff) {
977        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
978        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
979        base = temp;
980    } else {
981        AArch64Insn add_insn = I3401_ADDI;
982
983        if (offset < 0) {
984            add_insn = I3401_SUBI;
985            offset = -offset;
986        }
987        if (offset & 0xfff000) {
988            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
989            base = temp;
990        }
991        if (offset & 0xfff) {
992            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
993            base = temp;
994        }
995    }
996    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
997    return true;
998}
999
1000static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1001                         tcg_target_long value)
1002{
1003    tcg_target_long svalue = value;
1004    tcg_target_long ivalue = ~value;
1005    tcg_target_long t0, t1, t2;
1006    int s0, s1;
1007    AArch64Insn opc;
1008
1009    switch (type) {
1010    case TCG_TYPE_I32:
1011    case TCG_TYPE_I64:
1012        tcg_debug_assert(rd < 32);
1013        break;
1014    default:
1015        g_assert_not_reached();
1016    }
1017
1018    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1019       values within [2**31, 2**32-1], we can create smaller sequences by
1020       interpreting this as a negative 32-bit number, while ensuring that
1021       the high 32 bits are cleared by setting SF=0.  */
1022    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1023        svalue = (int32_t)value;
1024        value = (uint32_t)value;
1025        ivalue = (uint32_t)ivalue;
1026        type = TCG_TYPE_I32;
1027    }
1028
1029    /* Speed things up by handling the common case of small positive
1030       and negative values specially.  */
1031    if ((value & ~0xffffull) == 0) {
1032        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1033        return;
1034    } else if ((ivalue & ~0xffffull) == 0) {
1035        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1036        return;
1037    }
1038
1039    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1040       use the sign-extended value.  That lets us match rotated values such
1041       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1042    if (is_limm(svalue)) {
1043        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1044        return;
1045    }
1046
1047    /* Look for host pointer values within 4G of the PC.  This happens
1048       often when loading pointers to QEMU's own data structures.  */
1049    if (type == TCG_TYPE_I64) {
1050        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1051        tcg_target_long disp = value - src_rx;
1052        if (disp == sextract64(disp, 0, 21)) {
1053            tcg_out_insn(s, 3406, ADR, rd, disp);
1054            return;
1055        }
1056        disp = (value >> 12) - (src_rx >> 12);
1057        if (disp == sextract64(disp, 0, 21)) {
1058            tcg_out_insn(s, 3406, ADRP, rd, disp);
1059            if (value & 0xfff) {
1060                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1061            }
1062            return;
1063        }
1064    }
1065
1066    /* Would it take fewer insns to begin with MOVN?  */
1067    if (ctpop64(value) >= 32) {
1068        t0 = ivalue;
1069        opc = I3405_MOVN;
1070    } else {
1071        t0 = value;
1072        opc = I3405_MOVZ;
1073    }
1074    s0 = ctz64(t0) & (63 & -16);
1075    t1 = t0 & ~(0xffffUL << s0);
1076    s1 = ctz64(t1) & (63 & -16);
1077    t2 = t1 & ~(0xffffUL << s1);
1078    if (t2 == 0) {
1079        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1080        if (t1 != 0) {
1081            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1082        }
1083        return;
1084    }
1085
1086    /* For more than 2 insns, dump it into the constant pool.  */
1087    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1088    tcg_out_insn(s, 3305, LDR, 0, rd);
1089}
1090
1091/* Define something more legible for general use.  */
1092#define tcg_out_ldst_r  tcg_out_insn_3310
1093
1094static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1095                         TCGReg rn, intptr_t offset, int lgsize)
1096{
1097    /* If the offset is naturally aligned and in range, then we can
1098       use the scaled uimm12 encoding */
1099    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1100        uintptr_t scaled_uimm = offset >> lgsize;
1101        if (scaled_uimm <= 0xfff) {
1102            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1103            return;
1104        }
1105    }
1106
1107    /* Small signed offsets can use the unscaled encoding.  */
1108    if (offset >= -256 && offset < 256) {
1109        tcg_out_insn_3312(s, insn, rd, rn, offset);
1110        return;
1111    }
1112
1113    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1114    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1115    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1116}
1117
1118static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1119{
1120    if (ret == arg) {
1121        return true;
1122    }
1123    switch (type) {
1124    case TCG_TYPE_I32:
1125    case TCG_TYPE_I64:
1126        if (ret < 32 && arg < 32) {
1127            tcg_out_movr(s, type, ret, arg);
1128            break;
1129        } else if (ret < 32) {
1130            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1131            break;
1132        } else if (arg < 32) {
1133            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1134            break;
1135        }
1136        /* FALLTHRU */
1137
1138    case TCG_TYPE_V64:
1139        tcg_debug_assert(ret >= 32 && arg >= 32);
1140        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1141        break;
1142    case TCG_TYPE_V128:
1143        tcg_debug_assert(ret >= 32 && arg >= 32);
1144        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1145        break;
1146
1147    default:
1148        g_assert_not_reached();
1149    }
1150    return true;
1151}
1152
1153static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1154                       TCGReg base, intptr_t ofs)
1155{
1156    AArch64Insn insn;
1157    int lgsz;
1158
1159    switch (type) {
1160    case TCG_TYPE_I32:
1161        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1162        lgsz = 2;
1163        break;
1164    case TCG_TYPE_I64:
1165        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1166        lgsz = 3;
1167        break;
1168    case TCG_TYPE_V64:
1169        insn = I3312_LDRVD;
1170        lgsz = 3;
1171        break;
1172    case TCG_TYPE_V128:
1173        insn = I3312_LDRVQ;
1174        lgsz = 4;
1175        break;
1176    default:
1177        g_assert_not_reached();
1178    }
1179    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1180}
1181
1182static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1183                       TCGReg base, intptr_t ofs)
1184{
1185    AArch64Insn insn;
1186    int lgsz;
1187
1188    switch (type) {
1189    case TCG_TYPE_I32:
1190        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1191        lgsz = 2;
1192        break;
1193    case TCG_TYPE_I64:
1194        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1195        lgsz = 3;
1196        break;
1197    case TCG_TYPE_V64:
1198        insn = I3312_STRVD;
1199        lgsz = 3;
1200        break;
1201    case TCG_TYPE_V128:
1202        insn = I3312_STRVQ;
1203        lgsz = 4;
1204        break;
1205    default:
1206        g_assert_not_reached();
1207    }
1208    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1209}
1210
1211static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1212                               TCGReg base, intptr_t ofs)
1213{
1214    if (type <= TCG_TYPE_I64 && val == 0) {
1215        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1216        return true;
1217    }
1218    return false;
1219}
1220
1221static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1222                               TCGReg rn, unsigned int a, unsigned int b)
1223{
1224    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1225}
1226
1227static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1228                                TCGReg rn, unsigned int a, unsigned int b)
1229{
1230    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1231}
1232
1233static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1234                                TCGReg rn, unsigned int a, unsigned int b)
1235{
1236    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1237}
1238
1239static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1240                                TCGReg rn, TCGReg rm, unsigned int a)
1241{
1242    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1243}
1244
1245static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1246                               TCGReg rd, TCGReg rn, unsigned int m)
1247{
1248    int bits = ext ? 64 : 32;
1249    int max = bits - 1;
1250    tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1251}
1252
1253static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1254                               TCGReg rd, TCGReg rn, unsigned int m)
1255{
1256    int max = ext ? 63 : 31;
1257    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1258}
1259
1260static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1261                               TCGReg rd, TCGReg rn, unsigned int m)
1262{
1263    int max = ext ? 63 : 31;
1264    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1265}
1266
1267static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1268                                TCGReg rd, TCGReg rn, unsigned int m)
1269{
1270    int max = ext ? 63 : 31;
1271    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1272}
1273
1274static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1275                                TCGReg rd, TCGReg rn, unsigned int m)
1276{
1277    int bits = ext ? 64 : 32;
1278    int max = bits - 1;
1279    tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1280}
1281
1282static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1283                               TCGReg rn, unsigned lsb, unsigned width)
1284{
1285    unsigned size = ext ? 64 : 32;
1286    unsigned a = (size - lsb) & (size - 1);
1287    unsigned b = width - 1;
1288    tcg_out_bfm(s, ext, rd, rn, a, b);
1289}
1290
1291static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1292                        tcg_target_long b, bool const_b)
1293{
1294    if (const_b) {
1295        /* Using CMP or CMN aliases.  */
1296        if (b >= 0) {
1297            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1298        } else {
1299            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1300        }
1301    } else {
1302        /* Using CMP alias SUBS wzr, Wn, Wm */
1303        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1304    }
1305}
1306
1307static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1308{
1309    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1310    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1311    tcg_out_insn(s, 3206, B, offset);
1312}
1313
1314static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1315{
1316    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1317    if (offset == sextract64(offset, 0, 26)) {
1318        tcg_out_insn(s, 3206, B, offset);
1319    } else {
1320        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1321        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1322    }
1323}
1324
1325static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1326{
1327    tcg_out_insn(s, 3207, BLR, reg);
1328}
1329
1330static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
1331{
1332    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1333    if (offset == sextract64(offset, 0, 26)) {
1334        tcg_out_insn(s, 3206, BL, offset);
1335    } else {
1336        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1337        tcg_out_callr(s, TCG_REG_TMP);
1338    }
1339}
1340
1341void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
1342                              uintptr_t jmp_rw, uintptr_t addr)
1343{
1344    tcg_insn_unit i1, i2;
1345    TCGType rt = TCG_TYPE_I64;
1346    TCGReg  rd = TCG_REG_TMP;
1347    uint64_t pair;
1348
1349    ptrdiff_t offset = addr - jmp_rx;
1350
1351    if (offset == sextract64(offset, 0, 26)) {
1352        i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1353        i2 = NOP;
1354    } else {
1355        offset = (addr >> 12) - (jmp_rx >> 12);
1356
1357        /* patch ADRP */
1358        i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1359        /* patch ADDI */
1360        i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1361    }
1362    pair = (uint64_t)i2 << 32 | i1;
1363    qatomic_set((uint64_t *)jmp_rw, pair);
1364    flush_idcache_range(jmp_rx, jmp_rw, 8);
1365}
1366
1367static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1368{
1369    if (!l->has_value) {
1370        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1371        tcg_out_insn(s, 3206, B, 0);
1372    } else {
1373        tcg_out_goto(s, l->u.value_ptr);
1374    }
1375}
1376
1377static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1378                           TCGArg b, bool b_const, TCGLabel *l)
1379{
1380    intptr_t offset;
1381    bool need_cmp;
1382
1383    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1384        need_cmp = false;
1385    } else {
1386        need_cmp = true;
1387        tcg_out_cmp(s, ext, a, b, b_const);
1388    }
1389
1390    if (!l->has_value) {
1391        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1392        offset = tcg_in32(s) >> 5;
1393    } else {
1394        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1395        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1396    }
1397
1398    if (need_cmp) {
1399        tcg_out_insn(s, 3202, B_C, c, offset);
1400    } else if (c == TCG_COND_EQ) {
1401        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1402    } else {
1403        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1404    }
1405}
1406
1407static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1408{
1409    tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1410}
1411
1412static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1413{
1414    tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1415}
1416
1417static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1418{
1419    tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1420}
1421
1422static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1423                               TCGReg rd, TCGReg rn)
1424{
1425    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1426    int bits = (8 << s_bits) - 1;
1427    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1428}
1429
1430static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1431                               TCGReg rd, TCGReg rn)
1432{
1433    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1434    int bits = (8 << s_bits) - 1;
1435    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1436}
1437
1438static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1439                            TCGReg rn, int64_t aimm)
1440{
1441    if (aimm >= 0) {
1442        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1443    } else {
1444        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1445    }
1446}
1447
1448static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1449                                   TCGReg rh, TCGReg al, TCGReg ah,
1450                                   tcg_target_long bl, tcg_target_long bh,
1451                                   bool const_bl, bool const_bh, bool sub)
1452{
1453    TCGReg orig_rl = rl;
1454    AArch64Insn insn;
1455
1456    if (rl == ah || (!const_bh && rl == bh)) {
1457        rl = TCG_REG_TMP;
1458    }
1459
1460    if (const_bl) {
1461        insn = I3401_ADDSI;
1462        if ((bl < 0) ^ sub) {
1463            insn = I3401_SUBSI;
1464            bl = -bl;
1465        }
1466        if (unlikely(al == TCG_REG_XZR)) {
1467            /* ??? We want to allow al to be zero for the benefit of
1468               negation via subtraction.  However, that leaves open the
1469               possibility of adding 0+const in the low part, and the
1470               immediate add instructions encode XSP not XZR.  Don't try
1471               anything more elaborate here than loading another zero.  */
1472            al = TCG_REG_TMP;
1473            tcg_out_movi(s, ext, al, 0);
1474        }
1475        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1476    } else {
1477        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1478    }
1479
1480    insn = I3503_ADC;
1481    if (const_bh) {
1482        /* Note that the only two constants we support are 0 and -1, and
1483           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1484        if ((bh != 0) ^ sub) {
1485            insn = I3503_SBC;
1486        }
1487        bh = TCG_REG_XZR;
1488    } else if (sub) {
1489        insn = I3503_SBC;
1490    }
1491    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1492
1493    tcg_out_mov(s, ext, orig_rl, rl);
1494}
1495
1496static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1497{
1498    static const uint32_t sync[] = {
1499        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1500        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1501        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1502        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1503        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1504    };
1505    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1506}
1507
1508static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1509                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1510{
1511    TCGReg a1 = a0;
1512    if (is_ctz) {
1513        a1 = TCG_REG_TMP;
1514        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1515    }
1516    if (const_b && b == (ext ? 64 : 32)) {
1517        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1518    } else {
1519        AArch64Insn sel = I3506_CSEL;
1520
1521        tcg_out_cmp(s, ext, a0, 0, 1);
1522        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1523
1524        if (const_b) {
1525            if (b == -1) {
1526                b = TCG_REG_XZR;
1527                sel = I3506_CSINV;
1528            } else if (b == 0) {
1529                b = TCG_REG_XZR;
1530            } else {
1531                tcg_out_movi(s, ext, d, b);
1532                b = d;
1533            }
1534        }
1535        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1536    }
1537}
1538
1539#ifdef CONFIG_SOFTMMU
1540#include "../tcg-ldst.c.inc"
1541
1542/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1543 *                                     TCGMemOpIdx oi, uintptr_t ra)
1544 */
1545static void * const qemu_ld_helpers[16] = {
1546    [MO_UB]   = helper_ret_ldub_mmu,
1547    [MO_LEUW] = helper_le_lduw_mmu,
1548    [MO_LEUL] = helper_le_ldul_mmu,
1549    [MO_LEQ]  = helper_le_ldq_mmu,
1550    [MO_BEUW] = helper_be_lduw_mmu,
1551    [MO_BEUL] = helper_be_ldul_mmu,
1552    [MO_BEQ]  = helper_be_ldq_mmu,
1553};
1554
1555/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1556 *                                     uintxx_t val, TCGMemOpIdx oi,
1557 *                                     uintptr_t ra)
1558 */
1559static void * const qemu_st_helpers[16] = {
1560    [MO_UB]   = helper_ret_stb_mmu,
1561    [MO_LEUW] = helper_le_stw_mmu,
1562    [MO_LEUL] = helper_le_stl_mmu,
1563    [MO_LEQ]  = helper_le_stq_mmu,
1564    [MO_BEUW] = helper_be_stw_mmu,
1565    [MO_BEUL] = helper_be_stl_mmu,
1566    [MO_BEQ]  = helper_be_stq_mmu,
1567};
1568
1569static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1570{
1571    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1572    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1573    tcg_out_insn(s, 3406, ADR, rd, offset);
1574}
1575
1576static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1577{
1578    TCGMemOpIdx oi = lb->oi;
1579    MemOp opc = get_memop(oi);
1580    MemOp size = opc & MO_SIZE;
1581
1582    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1583        return false;
1584    }
1585
1586    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1587    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1588    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1589    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1590    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1591    if (opc & MO_SIGN) {
1592        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1593    } else {
1594        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1595    }
1596
1597    tcg_out_goto(s, lb->raddr);
1598    return true;
1599}
1600
1601static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1602{
1603    TCGMemOpIdx oi = lb->oi;
1604    MemOp opc = get_memop(oi);
1605    MemOp size = opc & MO_SIZE;
1606
1607    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1608        return false;
1609    }
1610
1611    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1612    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1613    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1614    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1615    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1616    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1617    tcg_out_goto(s, lb->raddr);
1618    return true;
1619}
1620
1621static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1622                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1623                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1624{
1625    TCGLabelQemuLdst *label = new_ldst_label(s);
1626
1627    label->is_ld = is_ld;
1628    label->oi = oi;
1629    label->type = ext;
1630    label->datalo_reg = data_reg;
1631    label->addrlo_reg = addr_reg;
1632    label->raddr = tcg_splitwx_to_rx(raddr);
1633    label->label_ptr[0] = label_ptr;
1634}
1635
1636/* We expect to use a 7-bit scaled negative offset from ENV.  */
1637QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1638QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1639
1640/* These offsets are built into the LDP below.  */
1641QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1642QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1643
1644/* Load and compare a TLB entry, emitting the conditional jump to the
1645   slow path for the failure case, which will be patched later when finalizing
1646   the slow path. Generated code returns the host addend in X1,
1647   clobbers X0,X2,X3,TMP. */
1648static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1649                             tcg_insn_unit **label_ptr, int mem_index,
1650                             bool is_read)
1651{
1652    unsigned a_bits = get_alignment_bits(opc);
1653    unsigned s_bits = opc & MO_SIZE;
1654    unsigned a_mask = (1u << a_bits) - 1;
1655    unsigned s_mask = (1u << s_bits) - 1;
1656    TCGReg x3;
1657    TCGType mask_type;
1658    uint64_t compare_mask;
1659
1660    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1661                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1662
1663    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1664    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1665                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1666
1667    /* Extract the TLB index from the address into X0.  */
1668    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1669                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1670                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1671
1672    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1673    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1674
1675    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1676    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1677               ? offsetof(CPUTLBEntry, addr_read)
1678               : offsetof(CPUTLBEntry, addr_write));
1679    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1680               offsetof(CPUTLBEntry, addend));
1681
1682    /* For aligned accesses, we check the first byte and include the alignment
1683       bits within the address.  For unaligned access, we check that we don't
1684       cross pages using the address of the last byte of the access.  */
1685    if (a_bits >= s_bits) {
1686        x3 = addr_reg;
1687    } else {
1688        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1689                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1690        x3 = TCG_REG_X3;
1691    }
1692    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1693
1694    /* Store the page mask part of the address into X3.  */
1695    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1696                     TCG_REG_X3, x3, compare_mask);
1697
1698    /* Perform the address comparison. */
1699    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1700
1701    /* If not equal, we jump to the slow path. */
1702    *label_ptr = s->code_ptr;
1703    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1704}
1705
1706#endif /* CONFIG_SOFTMMU */
1707
1708static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1709                                   TCGReg data_r, TCGReg addr_r,
1710                                   TCGType otype, TCGReg off_r)
1711{
1712    const MemOp bswap = memop & MO_BSWAP;
1713
1714    switch (memop & MO_SSIZE) {
1715    case MO_UB:
1716        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1717        break;
1718    case MO_SB:
1719        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1720                       data_r, addr_r, otype, off_r);
1721        break;
1722    case MO_UW:
1723        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1724        if (bswap) {
1725            tcg_out_rev16(s, data_r, data_r);
1726        }
1727        break;
1728    case MO_SW:
1729        if (bswap) {
1730            tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1731            tcg_out_rev16(s, data_r, data_r);
1732            tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1733        } else {
1734            tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1735                           data_r, addr_r, otype, off_r);
1736        }
1737        break;
1738    case MO_UL:
1739        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1740        if (bswap) {
1741            tcg_out_rev32(s, data_r, data_r);
1742        }
1743        break;
1744    case MO_SL:
1745        if (bswap) {
1746            tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1747            tcg_out_rev32(s, data_r, data_r);
1748            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1749        } else {
1750            tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1751        }
1752        break;
1753    case MO_Q:
1754        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1755        if (bswap) {
1756            tcg_out_rev64(s, data_r, data_r);
1757        }
1758        break;
1759    default:
1760        tcg_abort();
1761    }
1762}
1763
1764static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1765                                   TCGReg data_r, TCGReg addr_r,
1766                                   TCGType otype, TCGReg off_r)
1767{
1768    const MemOp bswap = memop & MO_BSWAP;
1769
1770    switch (memop & MO_SIZE) {
1771    case MO_8:
1772        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1773        break;
1774    case MO_16:
1775        if (bswap && data_r != TCG_REG_XZR) {
1776            tcg_out_rev16(s, TCG_REG_TMP, data_r);
1777            data_r = TCG_REG_TMP;
1778        }
1779        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1780        break;
1781    case MO_32:
1782        if (bswap && data_r != TCG_REG_XZR) {
1783            tcg_out_rev32(s, TCG_REG_TMP, data_r);
1784            data_r = TCG_REG_TMP;
1785        }
1786        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1787        break;
1788    case MO_64:
1789        if (bswap && data_r != TCG_REG_XZR) {
1790            tcg_out_rev64(s, TCG_REG_TMP, data_r);
1791            data_r = TCG_REG_TMP;
1792        }
1793        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1794        break;
1795    default:
1796        tcg_abort();
1797    }
1798}
1799
1800static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1801                            TCGMemOpIdx oi, TCGType ext)
1802{
1803    MemOp memop = get_memop(oi);
1804    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1805#ifdef CONFIG_SOFTMMU
1806    unsigned mem_index = get_mmuidx(oi);
1807    tcg_insn_unit *label_ptr;
1808
1809    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1810    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1811                           TCG_REG_X1, otype, addr_reg);
1812    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1813                        s->code_ptr, label_ptr);
1814#else /* !CONFIG_SOFTMMU */
1815    if (USE_GUEST_BASE) {
1816        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1817                               TCG_REG_GUEST_BASE, otype, addr_reg);
1818    } else {
1819        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1820                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1821    }
1822#endif /* CONFIG_SOFTMMU */
1823}
1824
1825static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1826                            TCGMemOpIdx oi)
1827{
1828    MemOp memop = get_memop(oi);
1829    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1830#ifdef CONFIG_SOFTMMU
1831    unsigned mem_index = get_mmuidx(oi);
1832    tcg_insn_unit *label_ptr;
1833
1834    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1835    tcg_out_qemu_st_direct(s, memop, data_reg,
1836                           TCG_REG_X1, otype, addr_reg);
1837    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1838                        data_reg, addr_reg, s->code_ptr, label_ptr);
1839#else /* !CONFIG_SOFTMMU */
1840    if (USE_GUEST_BASE) {
1841        tcg_out_qemu_st_direct(s, memop, data_reg,
1842                               TCG_REG_GUEST_BASE, otype, addr_reg);
1843    } else {
1844        tcg_out_qemu_st_direct(s, memop, data_reg,
1845                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1846    }
1847#endif /* CONFIG_SOFTMMU */
1848}
1849
1850static const tcg_insn_unit *tb_ret_addr;
1851
1852static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1853                       const TCGArg args[TCG_MAX_OP_ARGS],
1854                       const int const_args[TCG_MAX_OP_ARGS])
1855{
1856    /* 99% of the time, we can signal the use of extension registers
1857       by looking to see if the opcode handles 64-bit data.  */
1858    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1859
1860    /* Hoist the loads of the most common arguments.  */
1861    TCGArg a0 = args[0];
1862    TCGArg a1 = args[1];
1863    TCGArg a2 = args[2];
1864    int c2 = const_args[2];
1865
1866    /* Some operands are defined with "rZ" constraint, a register or
1867       the zero register.  These need not actually test args[I] == 0.  */
1868#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1869
1870    switch (opc) {
1871    case INDEX_op_exit_tb:
1872        /* Reuse the zeroing that exists for goto_ptr.  */
1873        if (a0 == 0) {
1874            tcg_out_goto_long(s, tcg_code_gen_epilogue);
1875        } else {
1876            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1877            tcg_out_goto_long(s, tb_ret_addr);
1878        }
1879        break;
1880
1881    case INDEX_op_goto_tb:
1882        if (s->tb_jmp_insn_offset != NULL) {
1883            /* TCG_TARGET_HAS_direct_jump */
1884            /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1885               write can be used to patch the target address. */
1886            if ((uintptr_t)s->code_ptr & 7) {
1887                tcg_out32(s, NOP);
1888            }
1889            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1890            /* actual branch destination will be patched by
1891               tb_target_set_jmp_target later. */
1892            tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1893            tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1894        } else {
1895            /* !TCG_TARGET_HAS_direct_jump */
1896            tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1897            intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1898            tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1899        }
1900        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1901        set_jmp_reset_offset(s, a0);
1902        break;
1903
1904    case INDEX_op_goto_ptr:
1905        tcg_out_insn(s, 3207, BR, a0);
1906        break;
1907
1908    case INDEX_op_br:
1909        tcg_out_goto_label(s, arg_label(a0));
1910        break;
1911
1912    case INDEX_op_ld8u_i32:
1913    case INDEX_op_ld8u_i64:
1914        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1915        break;
1916    case INDEX_op_ld8s_i32:
1917        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1918        break;
1919    case INDEX_op_ld8s_i64:
1920        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1921        break;
1922    case INDEX_op_ld16u_i32:
1923    case INDEX_op_ld16u_i64:
1924        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1925        break;
1926    case INDEX_op_ld16s_i32:
1927        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1928        break;
1929    case INDEX_op_ld16s_i64:
1930        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1931        break;
1932    case INDEX_op_ld_i32:
1933    case INDEX_op_ld32u_i64:
1934        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1935        break;
1936    case INDEX_op_ld32s_i64:
1937        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1938        break;
1939    case INDEX_op_ld_i64:
1940        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1941        break;
1942
1943    case INDEX_op_st8_i32:
1944    case INDEX_op_st8_i64:
1945        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1946        break;
1947    case INDEX_op_st16_i32:
1948    case INDEX_op_st16_i64:
1949        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1950        break;
1951    case INDEX_op_st_i32:
1952    case INDEX_op_st32_i64:
1953        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1954        break;
1955    case INDEX_op_st_i64:
1956        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1957        break;
1958
1959    case INDEX_op_add_i32:
1960        a2 = (int32_t)a2;
1961        /* FALLTHRU */
1962    case INDEX_op_add_i64:
1963        if (c2) {
1964            tcg_out_addsubi(s, ext, a0, a1, a2);
1965        } else {
1966            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1967        }
1968        break;
1969
1970    case INDEX_op_sub_i32:
1971        a2 = (int32_t)a2;
1972        /* FALLTHRU */
1973    case INDEX_op_sub_i64:
1974        if (c2) {
1975            tcg_out_addsubi(s, ext, a0, a1, -a2);
1976        } else {
1977            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1978        }
1979        break;
1980
1981    case INDEX_op_neg_i64:
1982    case INDEX_op_neg_i32:
1983        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1984        break;
1985
1986    case INDEX_op_and_i32:
1987        a2 = (int32_t)a2;
1988        /* FALLTHRU */
1989    case INDEX_op_and_i64:
1990        if (c2) {
1991            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1992        } else {
1993            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1994        }
1995        break;
1996
1997    case INDEX_op_andc_i32:
1998        a2 = (int32_t)a2;
1999        /* FALLTHRU */
2000    case INDEX_op_andc_i64:
2001        if (c2) {
2002            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2003        } else {
2004            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2005        }
2006        break;
2007
2008    case INDEX_op_or_i32:
2009        a2 = (int32_t)a2;
2010        /* FALLTHRU */
2011    case INDEX_op_or_i64:
2012        if (c2) {
2013            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2014        } else {
2015            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2016        }
2017        break;
2018
2019    case INDEX_op_orc_i32:
2020        a2 = (int32_t)a2;
2021        /* FALLTHRU */
2022    case INDEX_op_orc_i64:
2023        if (c2) {
2024            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2025        } else {
2026            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2027        }
2028        break;
2029
2030    case INDEX_op_xor_i32:
2031        a2 = (int32_t)a2;
2032        /* FALLTHRU */
2033    case INDEX_op_xor_i64:
2034        if (c2) {
2035            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2036        } else {
2037            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2038        }
2039        break;
2040
2041    case INDEX_op_eqv_i32:
2042        a2 = (int32_t)a2;
2043        /* FALLTHRU */
2044    case INDEX_op_eqv_i64:
2045        if (c2) {
2046            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2047        } else {
2048            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2049        }
2050        break;
2051
2052    case INDEX_op_not_i64:
2053    case INDEX_op_not_i32:
2054        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2055        break;
2056
2057    case INDEX_op_mul_i64:
2058    case INDEX_op_mul_i32:
2059        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2060        break;
2061
2062    case INDEX_op_div_i64:
2063    case INDEX_op_div_i32:
2064        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2065        break;
2066    case INDEX_op_divu_i64:
2067    case INDEX_op_divu_i32:
2068        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2069        break;
2070
2071    case INDEX_op_rem_i64:
2072    case INDEX_op_rem_i32:
2073        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2074        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2075        break;
2076    case INDEX_op_remu_i64:
2077    case INDEX_op_remu_i32:
2078        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2079        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2080        break;
2081
2082    case INDEX_op_shl_i64:
2083    case INDEX_op_shl_i32:
2084        if (c2) {
2085            tcg_out_shl(s, ext, a0, a1, a2);
2086        } else {
2087            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2088        }
2089        break;
2090
2091    case INDEX_op_shr_i64:
2092    case INDEX_op_shr_i32:
2093        if (c2) {
2094            tcg_out_shr(s, ext, a0, a1, a2);
2095        } else {
2096            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2097        }
2098        break;
2099
2100    case INDEX_op_sar_i64:
2101    case INDEX_op_sar_i32:
2102        if (c2) {
2103            tcg_out_sar(s, ext, a0, a1, a2);
2104        } else {
2105            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2106        }
2107        break;
2108
2109    case INDEX_op_rotr_i64:
2110    case INDEX_op_rotr_i32:
2111        if (c2) {
2112            tcg_out_rotr(s, ext, a0, a1, a2);
2113        } else {
2114            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2115        }
2116        break;
2117
2118    case INDEX_op_rotl_i64:
2119    case INDEX_op_rotl_i32:
2120        if (c2) {
2121            tcg_out_rotl(s, ext, a0, a1, a2);
2122        } else {
2123            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2124            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2125        }
2126        break;
2127
2128    case INDEX_op_clz_i64:
2129    case INDEX_op_clz_i32:
2130        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2131        break;
2132    case INDEX_op_ctz_i64:
2133    case INDEX_op_ctz_i32:
2134        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2135        break;
2136
2137    case INDEX_op_brcond_i32:
2138        a1 = (int32_t)a1;
2139        /* FALLTHRU */
2140    case INDEX_op_brcond_i64:
2141        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2142        break;
2143
2144    case INDEX_op_setcond_i32:
2145        a2 = (int32_t)a2;
2146        /* FALLTHRU */
2147    case INDEX_op_setcond_i64:
2148        tcg_out_cmp(s, ext, a1, a2, c2);
2149        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2150        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2151                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2152        break;
2153
2154    case INDEX_op_movcond_i32:
2155        a2 = (int32_t)a2;
2156        /* FALLTHRU */
2157    case INDEX_op_movcond_i64:
2158        tcg_out_cmp(s, ext, a1, a2, c2);
2159        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2160        break;
2161
2162    case INDEX_op_qemu_ld_i32:
2163    case INDEX_op_qemu_ld_i64:
2164        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2165        break;
2166    case INDEX_op_qemu_st_i32:
2167    case INDEX_op_qemu_st_i64:
2168        tcg_out_qemu_st(s, REG0(0), a1, a2);
2169        break;
2170
2171    case INDEX_op_bswap64_i64:
2172        tcg_out_rev64(s, a0, a1);
2173        break;
2174    case INDEX_op_bswap32_i64:
2175    case INDEX_op_bswap32_i32:
2176        tcg_out_rev32(s, a0, a1);
2177        break;
2178    case INDEX_op_bswap16_i64:
2179    case INDEX_op_bswap16_i32:
2180        tcg_out_rev16(s, a0, a1);
2181        break;
2182
2183    case INDEX_op_ext8s_i64:
2184    case INDEX_op_ext8s_i32:
2185        tcg_out_sxt(s, ext, MO_8, a0, a1);
2186        break;
2187    case INDEX_op_ext16s_i64:
2188    case INDEX_op_ext16s_i32:
2189        tcg_out_sxt(s, ext, MO_16, a0, a1);
2190        break;
2191    case INDEX_op_ext_i32_i64:
2192    case INDEX_op_ext32s_i64:
2193        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2194        break;
2195    case INDEX_op_ext8u_i64:
2196    case INDEX_op_ext8u_i32:
2197        tcg_out_uxt(s, MO_8, a0, a1);
2198        break;
2199    case INDEX_op_ext16u_i64:
2200    case INDEX_op_ext16u_i32:
2201        tcg_out_uxt(s, MO_16, a0, a1);
2202        break;
2203    case INDEX_op_extu_i32_i64:
2204    case INDEX_op_ext32u_i64:
2205        tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2206        break;
2207
2208    case INDEX_op_deposit_i64:
2209    case INDEX_op_deposit_i32:
2210        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2211        break;
2212
2213    case INDEX_op_extract_i64:
2214    case INDEX_op_extract_i32:
2215        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2216        break;
2217
2218    case INDEX_op_sextract_i64:
2219    case INDEX_op_sextract_i32:
2220        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2221        break;
2222
2223    case INDEX_op_extract2_i64:
2224    case INDEX_op_extract2_i32:
2225        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2226        break;
2227
2228    case INDEX_op_add2_i32:
2229        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2230                        (int32_t)args[4], args[5], const_args[4],
2231                        const_args[5], false);
2232        break;
2233    case INDEX_op_add2_i64:
2234        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2235                        args[5], const_args[4], const_args[5], false);
2236        break;
2237    case INDEX_op_sub2_i32:
2238        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2239                        (int32_t)args[4], args[5], const_args[4],
2240                        const_args[5], true);
2241        break;
2242    case INDEX_op_sub2_i64:
2243        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2244                        args[5], const_args[4], const_args[5], true);
2245        break;
2246
2247    case INDEX_op_muluh_i64:
2248        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2249        break;
2250    case INDEX_op_mulsh_i64:
2251        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2252        break;
2253
2254    case INDEX_op_mb:
2255        tcg_out_mb(s, a0);
2256        break;
2257
2258    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2259    case INDEX_op_mov_i64:
2260    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2261    default:
2262        g_assert_not_reached();
2263    }
2264
2265#undef REG0
2266}
2267
2268static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2269                           unsigned vecl, unsigned vece,
2270                           const TCGArg *args, const int *const_args)
2271{
2272    static const AArch64Insn cmp_insn[16] = {
2273        [TCG_COND_EQ] = I3616_CMEQ,
2274        [TCG_COND_GT] = I3616_CMGT,
2275        [TCG_COND_GE] = I3616_CMGE,
2276        [TCG_COND_GTU] = I3616_CMHI,
2277        [TCG_COND_GEU] = I3616_CMHS,
2278    };
2279    static const AArch64Insn cmp0_insn[16] = {
2280        [TCG_COND_EQ] = I3617_CMEQ0,
2281        [TCG_COND_GT] = I3617_CMGT0,
2282        [TCG_COND_GE] = I3617_CMGE0,
2283        [TCG_COND_LT] = I3617_CMLT0,
2284        [TCG_COND_LE] = I3617_CMLE0,
2285    };
2286
2287    TCGType type = vecl + TCG_TYPE_V64;
2288    unsigned is_q = vecl;
2289    TCGArg a0, a1, a2, a3;
2290    int cmode, imm8;
2291
2292    a0 = args[0];
2293    a1 = args[1];
2294    a2 = args[2];
2295
2296    switch (opc) {
2297    case INDEX_op_ld_vec:
2298        tcg_out_ld(s, type, a0, a1, a2);
2299        break;
2300    case INDEX_op_st_vec:
2301        tcg_out_st(s, type, a0, a1, a2);
2302        break;
2303    case INDEX_op_dupm_vec:
2304        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2305        break;
2306    case INDEX_op_add_vec:
2307        tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2308        break;
2309    case INDEX_op_sub_vec:
2310        tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2311        break;
2312    case INDEX_op_mul_vec:
2313        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2314        break;
2315    case INDEX_op_neg_vec:
2316        tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2317        break;
2318    case INDEX_op_abs_vec:
2319        tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2320        break;
2321    case INDEX_op_and_vec:
2322        if (const_args[2]) {
2323            is_shimm1632(~a2, &cmode, &imm8);
2324            if (a0 == a1) {
2325                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2326                return;
2327            }
2328            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2329            a2 = a0;
2330        }
2331        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2332        break;
2333    case INDEX_op_or_vec:
2334        if (const_args[2]) {
2335            is_shimm1632(a2, &cmode, &imm8);
2336            if (a0 == a1) {
2337                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2338                return;
2339            }
2340            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2341            a2 = a0;
2342        }
2343        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2344        break;
2345    case INDEX_op_andc_vec:
2346        if (const_args[2]) {
2347            is_shimm1632(a2, &cmode, &imm8);
2348            if (a0 == a1) {
2349                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2350                return;
2351            }
2352            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2353            a2 = a0;
2354        }
2355        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2356        break;
2357    case INDEX_op_orc_vec:
2358        if (const_args[2]) {
2359            is_shimm1632(~a2, &cmode, &imm8);
2360            if (a0 == a1) {
2361                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2362                return;
2363            }
2364            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2365            a2 = a0;
2366        }
2367        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2368        break;
2369    case INDEX_op_xor_vec:
2370        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2371        break;
2372    case INDEX_op_ssadd_vec:
2373        tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2374        break;
2375    case INDEX_op_sssub_vec:
2376        tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2377        break;
2378    case INDEX_op_usadd_vec:
2379        tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2380        break;
2381    case INDEX_op_ussub_vec:
2382        tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2383        break;
2384    case INDEX_op_smax_vec:
2385        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2386        break;
2387    case INDEX_op_smin_vec:
2388        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2389        break;
2390    case INDEX_op_umax_vec:
2391        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2392        break;
2393    case INDEX_op_umin_vec:
2394        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2395        break;
2396    case INDEX_op_not_vec:
2397        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2398        break;
2399    case INDEX_op_shli_vec:
2400        tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2401        break;
2402    case INDEX_op_shri_vec:
2403        tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2404        break;
2405    case INDEX_op_sari_vec:
2406        tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2407        break;
2408    case INDEX_op_aa64_sli_vec:
2409        tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2410        break;
2411    case INDEX_op_shlv_vec:
2412        tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2413        break;
2414    case INDEX_op_aa64_sshl_vec:
2415        tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2416        break;
2417    case INDEX_op_cmp_vec:
2418        {
2419            TCGCond cond = args[3];
2420            AArch64Insn insn;
2421
2422            if (cond == TCG_COND_NE) {
2423                if (const_args[2]) {
2424                    tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2425                } else {
2426                    tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2427                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2428                }
2429            } else {
2430                if (const_args[2]) {
2431                    insn = cmp0_insn[cond];
2432                    if (insn) {
2433                        tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2434                        break;
2435                    }
2436                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2437                    a2 = TCG_VEC_TMP;
2438                }
2439                insn = cmp_insn[cond];
2440                if (insn == 0) {
2441                    TCGArg t;
2442                    t = a1, a1 = a2, a2 = t;
2443                    cond = tcg_swap_cond(cond);
2444                    insn = cmp_insn[cond];
2445                    tcg_debug_assert(insn != 0);
2446                }
2447                tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2448            }
2449        }
2450        break;
2451
2452    case INDEX_op_bitsel_vec:
2453        a3 = args[3];
2454        if (a0 == a3) {
2455            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2456        } else if (a0 == a2) {
2457            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2458        } else {
2459            if (a0 != a1) {
2460                tcg_out_mov(s, type, a0, a1);
2461            }
2462            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2463        }
2464        break;
2465
2466    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2467    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2468    default:
2469        g_assert_not_reached();
2470    }
2471}
2472
2473int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2474{
2475    switch (opc) {
2476    case INDEX_op_add_vec:
2477    case INDEX_op_sub_vec:
2478    case INDEX_op_and_vec:
2479    case INDEX_op_or_vec:
2480    case INDEX_op_xor_vec:
2481    case INDEX_op_andc_vec:
2482    case INDEX_op_orc_vec:
2483    case INDEX_op_neg_vec:
2484    case INDEX_op_abs_vec:
2485    case INDEX_op_not_vec:
2486    case INDEX_op_cmp_vec:
2487    case INDEX_op_shli_vec:
2488    case INDEX_op_shri_vec:
2489    case INDEX_op_sari_vec:
2490    case INDEX_op_ssadd_vec:
2491    case INDEX_op_sssub_vec:
2492    case INDEX_op_usadd_vec:
2493    case INDEX_op_ussub_vec:
2494    case INDEX_op_shlv_vec:
2495    case INDEX_op_bitsel_vec:
2496        return 1;
2497    case INDEX_op_rotli_vec:
2498    case INDEX_op_shrv_vec:
2499    case INDEX_op_sarv_vec:
2500    case INDEX_op_rotlv_vec:
2501    case INDEX_op_rotrv_vec:
2502        return -1;
2503    case INDEX_op_mul_vec:
2504    case INDEX_op_smax_vec:
2505    case INDEX_op_smin_vec:
2506    case INDEX_op_umax_vec:
2507    case INDEX_op_umin_vec:
2508        return vece < MO_64;
2509
2510    default:
2511        return 0;
2512    }
2513}
2514
2515void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2516                       TCGArg a0, ...)
2517{
2518    va_list va;
2519    TCGv_vec v0, v1, v2, t1, t2, c1;
2520    TCGArg a2;
2521
2522    va_start(va, a0);
2523    v0 = temp_tcgv_vec(arg_temp(a0));
2524    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2525    a2 = va_arg(va, TCGArg);
2526    v2 = temp_tcgv_vec(arg_temp(a2));
2527
2528    switch (opc) {
2529    case INDEX_op_rotli_vec:
2530        t1 = tcg_temp_new_vec(type);
2531        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2532        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2533                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2534        tcg_temp_free_vec(t1);
2535        break;
2536
2537    case INDEX_op_shrv_vec:
2538    case INDEX_op_sarv_vec:
2539        /* Right shifts are negative left shifts for AArch64.  */
2540        t1 = tcg_temp_new_vec(type);
2541        tcg_gen_neg_vec(vece, t1, v2);
2542        opc = (opc == INDEX_op_shrv_vec
2543               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2544        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2545                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2546        tcg_temp_free_vec(t1);
2547        break;
2548
2549    case INDEX_op_rotlv_vec:
2550        t1 = tcg_temp_new_vec(type);
2551        c1 = tcg_constant_vec(type, vece, 8 << vece);
2552        tcg_gen_sub_vec(vece, t1, v2, c1);
2553        /* Right shifts are negative left shifts for AArch64.  */
2554        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2555                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2556        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2557                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2558        tcg_gen_or_vec(vece, v0, v0, t1);
2559        tcg_temp_free_vec(t1);
2560        break;
2561
2562    case INDEX_op_rotrv_vec:
2563        t1 = tcg_temp_new_vec(type);
2564        t2 = tcg_temp_new_vec(type);
2565        c1 = tcg_constant_vec(type, vece, 8 << vece);
2566        tcg_gen_neg_vec(vece, t1, v2);
2567        tcg_gen_sub_vec(vece, t2, c1, v2);
2568        /* Right shifts are negative left shifts for AArch64.  */
2569        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2570                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2571        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2572                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2573        tcg_gen_or_vec(vece, v0, t1, t2);
2574        tcg_temp_free_vec(t1);
2575        tcg_temp_free_vec(t2);
2576        break;
2577
2578    default:
2579        g_assert_not_reached();
2580    }
2581
2582    va_end(va);
2583}
2584
2585static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2586{
2587    static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2588    static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2589    static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2590    static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2591    static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
2592    static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2593    static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2594    static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2595    static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2596    static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2597    static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2598    static const TCGTargetOpDef w_0_w = { .args_ct_str = { "w", "0", "w" } };
2599    static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } };
2600    static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } };
2601    static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
2602    static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2603    static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2604    static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2605    static const TCGTargetOpDef r_r_rAL
2606        = { .args_ct_str = { "r", "r", "rAL" } };
2607    static const TCGTargetOpDef dep
2608        = { .args_ct_str = { "r", "0", "rZ" } };
2609    static const TCGTargetOpDef ext2
2610        = { .args_ct_str = { "r", "rZ", "rZ" } };
2611    static const TCGTargetOpDef movc
2612        = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2613    static const TCGTargetOpDef add2
2614        = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2615    static const TCGTargetOpDef w_w_w_w
2616        = { .args_ct_str = { "w", "w", "w", "w" } };
2617
2618    switch (op) {
2619    case INDEX_op_goto_ptr:
2620        return &r;
2621
2622    case INDEX_op_ld8u_i32:
2623    case INDEX_op_ld8s_i32:
2624    case INDEX_op_ld16u_i32:
2625    case INDEX_op_ld16s_i32:
2626    case INDEX_op_ld_i32:
2627    case INDEX_op_ld8u_i64:
2628    case INDEX_op_ld8s_i64:
2629    case INDEX_op_ld16u_i64:
2630    case INDEX_op_ld16s_i64:
2631    case INDEX_op_ld32u_i64:
2632    case INDEX_op_ld32s_i64:
2633    case INDEX_op_ld_i64:
2634    case INDEX_op_neg_i32:
2635    case INDEX_op_neg_i64:
2636    case INDEX_op_not_i32:
2637    case INDEX_op_not_i64:
2638    case INDEX_op_bswap16_i32:
2639    case INDEX_op_bswap32_i32:
2640    case INDEX_op_bswap16_i64:
2641    case INDEX_op_bswap32_i64:
2642    case INDEX_op_bswap64_i64:
2643    case INDEX_op_ext8s_i32:
2644    case INDEX_op_ext16s_i32:
2645    case INDEX_op_ext8u_i32:
2646    case INDEX_op_ext16u_i32:
2647    case INDEX_op_ext8s_i64:
2648    case INDEX_op_ext16s_i64:
2649    case INDEX_op_ext32s_i64:
2650    case INDEX_op_ext8u_i64:
2651    case INDEX_op_ext16u_i64:
2652    case INDEX_op_ext32u_i64:
2653    case INDEX_op_ext_i32_i64:
2654    case INDEX_op_extu_i32_i64:
2655    case INDEX_op_extract_i32:
2656    case INDEX_op_extract_i64:
2657    case INDEX_op_sextract_i32:
2658    case INDEX_op_sextract_i64:
2659        return &r_r;
2660
2661    case INDEX_op_st8_i32:
2662    case INDEX_op_st16_i32:
2663    case INDEX_op_st_i32:
2664    case INDEX_op_st8_i64:
2665    case INDEX_op_st16_i64:
2666    case INDEX_op_st32_i64:
2667    case INDEX_op_st_i64:
2668        return &rZ_r;
2669
2670    case INDEX_op_add_i32:
2671    case INDEX_op_add_i64:
2672    case INDEX_op_sub_i32:
2673    case INDEX_op_sub_i64:
2674    case INDEX_op_setcond_i32:
2675    case INDEX_op_setcond_i64:
2676        return &r_r_rA;
2677
2678    case INDEX_op_mul_i32:
2679    case INDEX_op_mul_i64:
2680    case INDEX_op_div_i32:
2681    case INDEX_op_div_i64:
2682    case INDEX_op_divu_i32:
2683    case INDEX_op_divu_i64:
2684    case INDEX_op_rem_i32:
2685    case INDEX_op_rem_i64:
2686    case INDEX_op_remu_i32:
2687    case INDEX_op_remu_i64:
2688    case INDEX_op_muluh_i64:
2689    case INDEX_op_mulsh_i64:
2690        return &r_r_r;
2691
2692    case INDEX_op_and_i32:
2693    case INDEX_op_and_i64:
2694    case INDEX_op_or_i32:
2695    case INDEX_op_or_i64:
2696    case INDEX_op_xor_i32:
2697    case INDEX_op_xor_i64:
2698    case INDEX_op_andc_i32:
2699    case INDEX_op_andc_i64:
2700    case INDEX_op_orc_i32:
2701    case INDEX_op_orc_i64:
2702    case INDEX_op_eqv_i32:
2703    case INDEX_op_eqv_i64:
2704        return &r_r_rL;
2705
2706    case INDEX_op_shl_i32:
2707    case INDEX_op_shr_i32:
2708    case INDEX_op_sar_i32:
2709    case INDEX_op_rotl_i32:
2710    case INDEX_op_rotr_i32:
2711    case INDEX_op_shl_i64:
2712    case INDEX_op_shr_i64:
2713    case INDEX_op_sar_i64:
2714    case INDEX_op_rotl_i64:
2715    case INDEX_op_rotr_i64:
2716        return &r_r_ri;
2717
2718    case INDEX_op_clz_i32:
2719    case INDEX_op_ctz_i32:
2720    case INDEX_op_clz_i64:
2721    case INDEX_op_ctz_i64:
2722        return &r_r_rAL;
2723
2724    case INDEX_op_brcond_i32:
2725    case INDEX_op_brcond_i64:
2726        return &r_rA;
2727
2728    case INDEX_op_movcond_i32:
2729    case INDEX_op_movcond_i64:
2730        return &movc;
2731
2732    case INDEX_op_qemu_ld_i32:
2733    case INDEX_op_qemu_ld_i64:
2734        return &r_l;
2735    case INDEX_op_qemu_st_i32:
2736    case INDEX_op_qemu_st_i64:
2737        return &lZ_l;
2738
2739    case INDEX_op_deposit_i32:
2740    case INDEX_op_deposit_i64:
2741        return &dep;
2742
2743    case INDEX_op_extract2_i32:
2744    case INDEX_op_extract2_i64:
2745        return &ext2;
2746
2747    case INDEX_op_add2_i32:
2748    case INDEX_op_add2_i64:
2749    case INDEX_op_sub2_i32:
2750    case INDEX_op_sub2_i64:
2751        return &add2;
2752
2753    case INDEX_op_add_vec:
2754    case INDEX_op_sub_vec:
2755    case INDEX_op_mul_vec:
2756    case INDEX_op_xor_vec:
2757    case INDEX_op_ssadd_vec:
2758    case INDEX_op_sssub_vec:
2759    case INDEX_op_usadd_vec:
2760    case INDEX_op_ussub_vec:
2761    case INDEX_op_smax_vec:
2762    case INDEX_op_smin_vec:
2763    case INDEX_op_umax_vec:
2764    case INDEX_op_umin_vec:
2765    case INDEX_op_shlv_vec:
2766    case INDEX_op_shrv_vec:
2767    case INDEX_op_sarv_vec:
2768    case INDEX_op_aa64_sshl_vec:
2769        return &w_w_w;
2770    case INDEX_op_not_vec:
2771    case INDEX_op_neg_vec:
2772    case INDEX_op_abs_vec:
2773    case INDEX_op_shli_vec:
2774    case INDEX_op_shri_vec:
2775    case INDEX_op_sari_vec:
2776        return &w_w;
2777    case INDEX_op_ld_vec:
2778    case INDEX_op_st_vec:
2779    case INDEX_op_dupm_vec:
2780        return &w_r;
2781    case INDEX_op_dup_vec:
2782        return &w_wr;
2783    case INDEX_op_or_vec:
2784    case INDEX_op_andc_vec:
2785        return &w_w_wO;
2786    case INDEX_op_and_vec:
2787    case INDEX_op_orc_vec:
2788        return &w_w_wN;
2789    case INDEX_op_cmp_vec:
2790        return &w_w_wZ;
2791    case INDEX_op_bitsel_vec:
2792        return &w_w_w_w;
2793    case INDEX_op_aa64_sli_vec:
2794        return &w_0_w;
2795
2796    default:
2797        return NULL;
2798    }
2799}
2800
2801static void tcg_target_init(TCGContext *s)
2802{
2803    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2804    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2805    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2806    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2807
2808    tcg_target_call_clobber_regs = -1ull;
2809    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2810    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2811    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2812    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2813    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2814    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2815    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2816    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2817    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2818    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2819    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2820    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2821    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2822    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2823    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2824    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2825    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2826    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2827    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2828
2829    s->reserved_regs = 0;
2830    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2831    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2832    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2833    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2834    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2835}
2836
2837/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2838#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2839
2840#define FRAME_SIZE \
2841    ((PUSH_SIZE \
2842      + TCG_STATIC_CALL_ARGS_SIZE \
2843      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2844      + TCG_TARGET_STACK_ALIGN - 1) \
2845     & ~(TCG_TARGET_STACK_ALIGN - 1))
2846
2847/* We're expecting a 2 byte uleb128 encoded value.  */
2848QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2849
2850/* We're expecting to use a single ADDI insn.  */
2851QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2852
2853static void tcg_target_qemu_prologue(TCGContext *s)
2854{
2855    TCGReg r;
2856
2857    /* Push (FP, LR) and allocate space for all saved registers.  */
2858    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2859                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2860
2861    /* Set up frame pointer for canonical unwinding.  */
2862    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2863
2864    /* Store callee-preserved regs x19..x28.  */
2865    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2866        int ofs = (r - TCG_REG_X19 + 2) * 8;
2867        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2868    }
2869
2870    /* Make stack space for TCG locals.  */
2871    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2872                 FRAME_SIZE - PUSH_SIZE);
2873
2874    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2875    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2876                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2877
2878#if !defined(CONFIG_SOFTMMU)
2879    if (USE_GUEST_BASE) {
2880        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2881        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2882    }
2883#endif
2884
2885    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2886    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2887
2888    /*
2889     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2890     * and fall through to the rest of the epilogue.
2891     */
2892    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2893    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2894
2895    /* TB epilogue */
2896    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
2897
2898    /* Remove TCG locals stack space.  */
2899    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2900                 FRAME_SIZE - PUSH_SIZE);
2901
2902    /* Restore registers x19..x28.  */
2903    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2904        int ofs = (r - TCG_REG_X19 + 2) * 8;
2905        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2906    }
2907
2908    /* Pop (FP, LR), restore SP to previous frame.  */
2909    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2910                 TCG_REG_SP, PUSH_SIZE, 0, 1);
2911    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2912}
2913
2914static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2915{
2916    int i;
2917    for (i = 0; i < count; ++i) {
2918        p[i] = NOP;
2919    }
2920}
2921
2922typedef struct {
2923    DebugFrameHeader h;
2924    uint8_t fde_def_cfa[4];
2925    uint8_t fde_reg_ofs[24];
2926} DebugFrame;
2927
2928#define ELF_HOST_MACHINE EM_AARCH64
2929
2930static const DebugFrame debug_frame = {
2931    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2932    .h.cie.id = -1,
2933    .h.cie.version = 1,
2934    .h.cie.code_align = 1,
2935    .h.cie.data_align = 0x78,             /* sleb128 -8 */
2936    .h.cie.return_column = TCG_REG_LR,
2937
2938    /* Total FDE size does not include the "len" member.  */
2939    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2940
2941    .fde_def_cfa = {
2942        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
2943        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2944        (FRAME_SIZE >> 7)
2945    },
2946    .fde_reg_ofs = {
2947        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
2948        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
2949        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
2950        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
2951        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
2952        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
2953        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
2954        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
2955        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
2956        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
2957        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
2958        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
2959    }
2960};
2961
2962void tcg_register_jit(const void *buf, size_t buf_size)
2963{
2964    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2965}
2966