xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision e6b5a071)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-pool.c.inc"
14#include "qemu/bitops.h"
15
16/* We're going to re-use TCGType in setting of the SF bit, which controls
17   the size of the operation performed.  If we know the values match, it
18   makes things much cleaner.  */
19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21#ifdef CONFIG_DEBUG_TCG
22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
27
28    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
32};
33#endif /* CONFIG_DEBUG_TCG */
34
35static const int tcg_target_reg_alloc_order[] = {
36    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38    TCG_REG_X28, /* we will reserve this for guest_base if configured */
39
40    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42    TCG_REG_X16, TCG_REG_X17,
43
44    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47    /* X18 reserved by system */
48    /* X19 reserved for AREG0 */
49    /* X29 reserved as fp */
50    /* X30 reserved as temporary */
51
52    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54    /* V8 - V15 are call-saved, and skipped.  */
55    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
59};
60
61static const int tcg_target_call_iarg_regs[8] = {
62    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
64};
65static const int tcg_target_call_oarg_regs[1] = {
66    TCG_REG_X0
67};
68
69#define TCG_REG_TMP TCG_REG_X30
70#define TCG_VEC_TMP TCG_REG_V31
71
72#ifndef CONFIG_SOFTMMU
73/* Note that XZR cannot be encoded in the address base register slot,
74   as that actaully encodes SP.  So if we need to zero-extend the guest
75   address, via the address index register slot, we need to load even
76   a zero guest base into a register.  */
77#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
78#define TCG_REG_GUEST_BASE TCG_REG_X28
79#endif
80
81static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
82{
83    ptrdiff_t offset = target - code_ptr;
84    if (offset == sextract64(offset, 0, 26)) {
85        /* read instruction, mask away previous PC_REL26 parameter contents,
86           set the proper offset, then write back the instruction. */
87        *code_ptr = deposit32(*code_ptr, 0, 26, offset);
88        return true;
89    }
90    return false;
91}
92
93static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
94{
95    ptrdiff_t offset = target - code_ptr;
96    if (offset == sextract64(offset, 0, 19)) {
97        *code_ptr = deposit32(*code_ptr, 5, 19, offset);
98        return true;
99    }
100    return false;
101}
102
103static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type,
104                               intptr_t value, intptr_t addend)
105{
106    tcg_debug_assert(addend == 0);
107    switch (type) {
108    case R_AARCH64_JUMP26:
109    case R_AARCH64_CALL26:
110        return reloc_pc26(code_ptr, (tcg_insn_unit *)value);
111    case R_AARCH64_CONDBR19:
112        return reloc_pc19(code_ptr, (tcg_insn_unit *)value);
113    default:
114        g_assert_not_reached();
115    }
116}
117
118#define TCG_CT_CONST_AIMM 0x100
119#define TCG_CT_CONST_LIMM 0x200
120#define TCG_CT_CONST_ZERO 0x400
121#define TCG_CT_CONST_MONE 0x800
122#define TCG_CT_CONST_ORRI 0x1000
123#define TCG_CT_CONST_ANDI 0x2000
124
125/* parse target specific constraints */
126static const char *target_parse_constraint(TCGArgConstraint *ct,
127                                           const char *ct_str, TCGType type)
128{
129    switch (*ct_str++) {
130    case 'r': /* general registers */
131        ct->regs |= 0xffffffffu;
132        break;
133    case 'w': /* advsimd registers */
134        ct->regs |= 0xffffffff00000000ull;
135        break;
136    case 'l': /* qemu_ld / qemu_st address, data_reg */
137        ct->regs = 0xffffffffu;
138#ifdef CONFIG_SOFTMMU
139        /* x0 and x1 will be overwritten when reading the tlb entry,
140           and x2, and x3 for helper args, better to avoid using them. */
141        tcg_regset_reset_reg(ct->regs, TCG_REG_X0);
142        tcg_regset_reset_reg(ct->regs, TCG_REG_X1);
143        tcg_regset_reset_reg(ct->regs, TCG_REG_X2);
144        tcg_regset_reset_reg(ct->regs, TCG_REG_X3);
145#endif
146        break;
147    case 'A': /* Valid for arithmetic immediate (positive or negative).  */
148        ct->ct |= TCG_CT_CONST_AIMM;
149        break;
150    case 'L': /* Valid for logical immediate.  */
151        ct->ct |= TCG_CT_CONST_LIMM;
152        break;
153    case 'M': /* minus one */
154        ct->ct |= TCG_CT_CONST_MONE;
155        break;
156    case 'O': /* vector orr/bic immediate */
157        ct->ct |= TCG_CT_CONST_ORRI;
158        break;
159    case 'N': /* vector orr/bic immediate, inverted */
160        ct->ct |= TCG_CT_CONST_ANDI;
161        break;
162    case 'Z': /* zero */
163        ct->ct |= TCG_CT_CONST_ZERO;
164        break;
165    default:
166        return NULL;
167    }
168    return ct_str;
169}
170
171/* Match a constant valid for addition (12-bit, optionally shifted).  */
172static inline bool is_aimm(uint64_t val)
173{
174    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
175}
176
177/* Match a constant valid for logical operations.  */
178static inline bool is_limm(uint64_t val)
179{
180    /* Taking a simplified view of the logical immediates for now, ignoring
181       the replication that can happen across the field.  Match bit patterns
182       of the forms
183           0....01....1
184           0..01..10..0
185       and their inverses.  */
186
187    /* Make things easier below, by testing the form with msb clear. */
188    if ((int64_t)val < 0) {
189        val = ~val;
190    }
191    if (val == 0) {
192        return false;
193    }
194    val += val & -val;
195    return (val & (val - 1)) == 0;
196}
197
198/* Return true if v16 is a valid 16-bit shifted immediate.  */
199static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
200{
201    if (v16 == (v16 & 0xff)) {
202        *cmode = 0x8;
203        *imm8 = v16 & 0xff;
204        return true;
205    } else if (v16 == (v16 & 0xff00)) {
206        *cmode = 0xa;
207        *imm8 = v16 >> 8;
208        return true;
209    }
210    return false;
211}
212
213/* Return true if v32 is a valid 32-bit shifted immediate.  */
214static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
215{
216    if (v32 == (v32 & 0xff)) {
217        *cmode = 0x0;
218        *imm8 = v32 & 0xff;
219        return true;
220    } else if (v32 == (v32 & 0xff00)) {
221        *cmode = 0x2;
222        *imm8 = (v32 >> 8) & 0xff;
223        return true;
224    } else if (v32 == (v32 & 0xff0000)) {
225        *cmode = 0x4;
226        *imm8 = (v32 >> 16) & 0xff;
227        return true;
228    } else if (v32 == (v32 & 0xff000000)) {
229        *cmode = 0x6;
230        *imm8 = v32 >> 24;
231        return true;
232    }
233    return false;
234}
235
236/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
237static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
238{
239    if ((v32 & 0xffff00ff) == 0xff) {
240        *cmode = 0xc;
241        *imm8 = (v32 >> 8) & 0xff;
242        return true;
243    } else if ((v32 & 0xff00ffff) == 0xffff) {
244        *cmode = 0xd;
245        *imm8 = (v32 >> 16) & 0xff;
246        return true;
247    }
248    return false;
249}
250
251/* Return true if v32 is a valid float32 immediate.  */
252static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
253{
254    if (extract32(v32, 0, 19) == 0
255        && (extract32(v32, 25, 6) == 0x20
256            || extract32(v32, 25, 6) == 0x1f)) {
257        *cmode = 0xf;
258        *imm8 = (extract32(v32, 31, 1) << 7)
259              | (extract32(v32, 25, 1) << 6)
260              | extract32(v32, 19, 6);
261        return true;
262    }
263    return false;
264}
265
266/* Return true if v64 is a valid float64 immediate.  */
267static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
268{
269    if (extract64(v64, 0, 48) == 0
270        && (extract64(v64, 54, 9) == 0x100
271            || extract64(v64, 54, 9) == 0x0ff)) {
272        *cmode = 0xf;
273        *imm8 = (extract64(v64, 63, 1) << 7)
274              | (extract64(v64, 54, 1) << 6)
275              | extract64(v64, 48, 6);
276        return true;
277    }
278    return false;
279}
280
281/*
282 * Return non-zero if v32 can be formed by MOVI+ORR.
283 * Place the parameters for MOVI in (cmode, imm8).
284 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
285 */
286static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
287{
288    int i;
289
290    for (i = 6; i > 0; i -= 2) {
291        /* Mask out one byte we can add with ORR.  */
292        uint32_t tmp = v32 & ~(0xffu << (i * 4));
293        if (is_shimm32(tmp, cmode, imm8) ||
294            is_soimm32(tmp, cmode, imm8)) {
295            break;
296        }
297    }
298    return i;
299}
300
301/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
302static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
303{
304    if (v32 == deposit32(v32, 16, 16, v32)) {
305        return is_shimm16(v32, cmode, imm8);
306    } else {
307        return is_shimm32(v32, cmode, imm8);
308    }
309}
310
311static int tcg_target_const_match(tcg_target_long val, TCGType type,
312                                  const TCGArgConstraint *arg_ct)
313{
314    int ct = arg_ct->ct;
315
316    if (ct & TCG_CT_CONST) {
317        return 1;
318    }
319    if (type == TCG_TYPE_I32) {
320        val = (int32_t)val;
321    }
322    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
323        return 1;
324    }
325    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
326        return 1;
327    }
328    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
329        return 1;
330    }
331    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
332        return 1;
333    }
334
335    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
336    case 0:
337        break;
338    case TCG_CT_CONST_ANDI:
339        val = ~val;
340        /* fallthru */
341    case TCG_CT_CONST_ORRI:
342        if (val == deposit64(val, 32, 32, val)) {
343            int cmode, imm8;
344            return is_shimm1632(val, &cmode, &imm8);
345        }
346        break;
347    default:
348        /* Both bits should not be set for the same insn.  */
349        g_assert_not_reached();
350    }
351
352    return 0;
353}
354
355enum aarch64_cond_code {
356    COND_EQ = 0x0,
357    COND_NE = 0x1,
358    COND_CS = 0x2,     /* Unsigned greater or equal */
359    COND_HS = COND_CS, /* ALIAS greater or equal */
360    COND_CC = 0x3,     /* Unsigned less than */
361    COND_LO = COND_CC, /* ALIAS Lower */
362    COND_MI = 0x4,     /* Negative */
363    COND_PL = 0x5,     /* Zero or greater */
364    COND_VS = 0x6,     /* Overflow */
365    COND_VC = 0x7,     /* No overflow */
366    COND_HI = 0x8,     /* Unsigned greater than */
367    COND_LS = 0x9,     /* Unsigned less or equal */
368    COND_GE = 0xa,
369    COND_LT = 0xb,
370    COND_GT = 0xc,
371    COND_LE = 0xd,
372    COND_AL = 0xe,
373    COND_NV = 0xf, /* behaves like COND_AL here */
374};
375
376static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
377    [TCG_COND_EQ] = COND_EQ,
378    [TCG_COND_NE] = COND_NE,
379    [TCG_COND_LT] = COND_LT,
380    [TCG_COND_GE] = COND_GE,
381    [TCG_COND_LE] = COND_LE,
382    [TCG_COND_GT] = COND_GT,
383    /* unsigned */
384    [TCG_COND_LTU] = COND_LO,
385    [TCG_COND_GTU] = COND_HI,
386    [TCG_COND_GEU] = COND_HS,
387    [TCG_COND_LEU] = COND_LS,
388};
389
390typedef enum {
391    LDST_ST = 0,    /* store */
392    LDST_LD = 1,    /* load */
393    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
394    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
395} AArch64LdstType;
396
397/* We encode the format of the insn into the beginning of the name, so that
398   we can have the preprocessor help "typecheck" the insn vs the output
399   function.  Arm didn't provide us with nice names for the formats, so we
400   use the section number of the architecture reference manual in which the
401   instruction group is described.  */
402typedef enum {
403    /* Compare and branch (immediate).  */
404    I3201_CBZ       = 0x34000000,
405    I3201_CBNZ      = 0x35000000,
406
407    /* Conditional branch (immediate).  */
408    I3202_B_C       = 0x54000000,
409
410    /* Unconditional branch (immediate).  */
411    I3206_B         = 0x14000000,
412    I3206_BL        = 0x94000000,
413
414    /* Unconditional branch (register).  */
415    I3207_BR        = 0xd61f0000,
416    I3207_BLR       = 0xd63f0000,
417    I3207_RET       = 0xd65f0000,
418
419    /* AdvSIMD load/store single structure.  */
420    I3303_LD1R      = 0x0d40c000,
421
422    /* Load literal for loading the address at pc-relative offset */
423    I3305_LDR       = 0x58000000,
424    I3305_LDR_v64   = 0x5c000000,
425    I3305_LDR_v128  = 0x9c000000,
426
427    /* Load/store register.  Described here as 3.3.12, but the helper
428       that emits them can transform to 3.3.10 or 3.3.13.  */
429    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
430    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
431    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
432    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
433
434    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
435    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
436    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
437    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
438
439    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
440    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
441
442    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
443    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
444    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
445
446    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
447    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
448
449    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
450    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
451
452    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
453    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
454
455    I3312_TO_I3310  = 0x00200800,
456    I3312_TO_I3313  = 0x01000000,
457
458    /* Load/store register pair instructions.  */
459    I3314_LDP       = 0x28400000,
460    I3314_STP       = 0x28000000,
461
462    /* Add/subtract immediate instructions.  */
463    I3401_ADDI      = 0x11000000,
464    I3401_ADDSI     = 0x31000000,
465    I3401_SUBI      = 0x51000000,
466    I3401_SUBSI     = 0x71000000,
467
468    /* Bitfield instructions.  */
469    I3402_BFM       = 0x33000000,
470    I3402_SBFM      = 0x13000000,
471    I3402_UBFM      = 0x53000000,
472
473    /* Extract instruction.  */
474    I3403_EXTR      = 0x13800000,
475
476    /* Logical immediate instructions.  */
477    I3404_ANDI      = 0x12000000,
478    I3404_ORRI      = 0x32000000,
479    I3404_EORI      = 0x52000000,
480
481    /* Move wide immediate instructions.  */
482    I3405_MOVN      = 0x12800000,
483    I3405_MOVZ      = 0x52800000,
484    I3405_MOVK      = 0x72800000,
485
486    /* PC relative addressing instructions.  */
487    I3406_ADR       = 0x10000000,
488    I3406_ADRP      = 0x90000000,
489
490    /* Add/subtract shifted register instructions (without a shift).  */
491    I3502_ADD       = 0x0b000000,
492    I3502_ADDS      = 0x2b000000,
493    I3502_SUB       = 0x4b000000,
494    I3502_SUBS      = 0x6b000000,
495
496    /* Add/subtract shifted register instructions (with a shift).  */
497    I3502S_ADD_LSL  = I3502_ADD,
498
499    /* Add/subtract with carry instructions.  */
500    I3503_ADC       = 0x1a000000,
501    I3503_SBC       = 0x5a000000,
502
503    /* Conditional select instructions.  */
504    I3506_CSEL      = 0x1a800000,
505    I3506_CSINC     = 0x1a800400,
506    I3506_CSINV     = 0x5a800000,
507    I3506_CSNEG     = 0x5a800400,
508
509    /* Data-processing (1 source) instructions.  */
510    I3507_CLZ       = 0x5ac01000,
511    I3507_RBIT      = 0x5ac00000,
512    I3507_REV16     = 0x5ac00400,
513    I3507_REV32     = 0x5ac00800,
514    I3507_REV64     = 0x5ac00c00,
515
516    /* Data-processing (2 source) instructions.  */
517    I3508_LSLV      = 0x1ac02000,
518    I3508_LSRV      = 0x1ac02400,
519    I3508_ASRV      = 0x1ac02800,
520    I3508_RORV      = 0x1ac02c00,
521    I3508_SMULH     = 0x9b407c00,
522    I3508_UMULH     = 0x9bc07c00,
523    I3508_UDIV      = 0x1ac00800,
524    I3508_SDIV      = 0x1ac00c00,
525
526    /* Data-processing (3 source) instructions.  */
527    I3509_MADD      = 0x1b000000,
528    I3509_MSUB      = 0x1b008000,
529
530    /* Logical shifted register instructions (without a shift).  */
531    I3510_AND       = 0x0a000000,
532    I3510_BIC       = 0x0a200000,
533    I3510_ORR       = 0x2a000000,
534    I3510_ORN       = 0x2a200000,
535    I3510_EOR       = 0x4a000000,
536    I3510_EON       = 0x4a200000,
537    I3510_ANDS      = 0x6a000000,
538
539    /* Logical shifted register instructions (with a shift).  */
540    I3502S_AND_LSR  = I3510_AND | (1 << 22),
541
542    /* AdvSIMD copy */
543    I3605_DUP      = 0x0e000400,
544    I3605_INS      = 0x4e001c00,
545    I3605_UMOV     = 0x0e003c00,
546
547    /* AdvSIMD modified immediate */
548    I3606_MOVI      = 0x0f000400,
549    I3606_MVNI      = 0x2f000400,
550    I3606_BIC       = 0x2f001400,
551    I3606_ORR       = 0x0f001400,
552
553    /* AdvSIMD shift by immediate */
554    I3614_SSHR      = 0x0f000400,
555    I3614_SSRA      = 0x0f001400,
556    I3614_SHL       = 0x0f005400,
557    I3614_SLI       = 0x2f005400,
558    I3614_USHR      = 0x2f000400,
559    I3614_USRA      = 0x2f001400,
560
561    /* AdvSIMD three same.  */
562    I3616_ADD       = 0x0e208400,
563    I3616_AND       = 0x0e201c00,
564    I3616_BIC       = 0x0e601c00,
565    I3616_BIF       = 0x2ee01c00,
566    I3616_BIT       = 0x2ea01c00,
567    I3616_BSL       = 0x2e601c00,
568    I3616_EOR       = 0x2e201c00,
569    I3616_MUL       = 0x0e209c00,
570    I3616_ORR       = 0x0ea01c00,
571    I3616_ORN       = 0x0ee01c00,
572    I3616_SUB       = 0x2e208400,
573    I3616_CMGT      = 0x0e203400,
574    I3616_CMGE      = 0x0e203c00,
575    I3616_CMTST     = 0x0e208c00,
576    I3616_CMHI      = 0x2e203400,
577    I3616_CMHS      = 0x2e203c00,
578    I3616_CMEQ      = 0x2e208c00,
579    I3616_SMAX      = 0x0e206400,
580    I3616_SMIN      = 0x0e206c00,
581    I3616_SSHL      = 0x0e204400,
582    I3616_SQADD     = 0x0e200c00,
583    I3616_SQSUB     = 0x0e202c00,
584    I3616_UMAX      = 0x2e206400,
585    I3616_UMIN      = 0x2e206c00,
586    I3616_UQADD     = 0x2e200c00,
587    I3616_UQSUB     = 0x2e202c00,
588    I3616_USHL      = 0x2e204400,
589
590    /* AdvSIMD two-reg misc.  */
591    I3617_CMGT0     = 0x0e208800,
592    I3617_CMEQ0     = 0x0e209800,
593    I3617_CMLT0     = 0x0e20a800,
594    I3617_CMGE0     = 0x2e208800,
595    I3617_CMLE0     = 0x2e20a800,
596    I3617_NOT       = 0x2e205800,
597    I3617_ABS       = 0x0e20b800,
598    I3617_NEG       = 0x2e20b800,
599
600    /* System instructions.  */
601    NOP             = 0xd503201f,
602    DMB_ISH         = 0xd50338bf,
603    DMB_LD          = 0x00000100,
604    DMB_ST          = 0x00000200,
605} AArch64Insn;
606
607static inline uint32_t tcg_in32(TCGContext *s)
608{
609    uint32_t v = *(uint32_t *)s->code_ptr;
610    return v;
611}
612
613/* Emit an opcode with "type-checking" of the format.  */
614#define tcg_out_insn(S, FMT, OP, ...) \
615    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
616
617static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
618                              TCGReg rt, TCGReg rn, unsigned size)
619{
620    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
621}
622
623static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
624                              int imm19, TCGReg rt)
625{
626    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
627}
628
629static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
630                              TCGReg rt, int imm19)
631{
632    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
633}
634
635static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
636                              TCGCond c, int imm19)
637{
638    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
639}
640
641static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
642{
643    tcg_out32(s, insn | (imm26 & 0x03ffffff));
644}
645
646static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
647{
648    tcg_out32(s, insn | rn << 5);
649}
650
651static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
652                              TCGReg r1, TCGReg r2, TCGReg rn,
653                              tcg_target_long ofs, bool pre, bool w)
654{
655    insn |= 1u << 31; /* ext */
656    insn |= pre << 24;
657    insn |= w << 23;
658
659    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
660    insn |= (ofs & (0x7f << 3)) << (15 - 3);
661
662    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
663}
664
665static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
666                              TCGReg rd, TCGReg rn, uint64_t aimm)
667{
668    if (aimm > 0xfff) {
669        tcg_debug_assert((aimm & 0xfff) == 0);
670        aimm >>= 12;
671        tcg_debug_assert(aimm <= 0xfff);
672        aimm |= 1 << 12;  /* apply LSL 12 */
673    }
674    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
675}
676
677/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
678   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
679   that feed the DecodeBitMasks pseudo function.  */
680static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
681                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
682{
683    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
684              | rn << 5 | rd);
685}
686
687#define tcg_out_insn_3404  tcg_out_insn_3402
688
689static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
690                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
691{
692    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
693              | rn << 5 | rd);
694}
695
696/* This function is used for the Move (wide immediate) instruction group.
697   Note that SHIFT is a full shift count, not the 2 bit HW field. */
698static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
699                              TCGReg rd, uint16_t half, unsigned shift)
700{
701    tcg_debug_assert((shift & ~0x30) == 0);
702    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
703}
704
705static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
706                              TCGReg rd, int64_t disp)
707{
708    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
709}
710
711/* This function is for both 3.5.2 (Add/Subtract shifted register), for
712   the rare occasion when we actually want to supply a shift amount.  */
713static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
714                                      TCGType ext, TCGReg rd, TCGReg rn,
715                                      TCGReg rm, int imm6)
716{
717    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
718}
719
720/* This function is for 3.5.2 (Add/subtract shifted register),
721   and 3.5.10 (Logical shifted register), for the vast majorty of cases
722   when we don't want to apply a shift.  Thus it can also be used for
723   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
724static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
725                              TCGReg rd, TCGReg rn, TCGReg rm)
726{
727    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
728}
729
730#define tcg_out_insn_3503  tcg_out_insn_3502
731#define tcg_out_insn_3508  tcg_out_insn_3502
732#define tcg_out_insn_3510  tcg_out_insn_3502
733
734static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
735                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
736{
737    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
738              | tcg_cond_to_aarch64[c] << 12);
739}
740
741static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
742                              TCGReg rd, TCGReg rn)
743{
744    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
745}
746
747static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
748                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
749{
750    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
751}
752
753static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
754                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
755{
756    /* Note that bit 11 set means general register input.  Therefore
757       we can handle both register sets with one function.  */
758    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
759              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
760}
761
762static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
763                              TCGReg rd, bool op, int cmode, uint8_t imm8)
764{
765    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
766              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
767}
768
769static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
770                              TCGReg rd, TCGReg rn, unsigned immhb)
771{
772    tcg_out32(s, insn | q << 30 | immhb << 16
773              | (rn & 0x1f) << 5 | (rd & 0x1f));
774}
775
776static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
777                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
778{
779    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
780              | (rn & 0x1f) << 5 | (rd & 0x1f));
781}
782
783static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
784                              unsigned size, TCGReg rd, TCGReg rn)
785{
786    tcg_out32(s, insn | q << 30 | (size << 22)
787              | (rn & 0x1f) << 5 | (rd & 0x1f));
788}
789
790static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
791                              TCGReg rd, TCGReg base, TCGType ext,
792                              TCGReg regoff)
793{
794    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
795    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
796              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
797}
798
799static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
800                              TCGReg rd, TCGReg rn, intptr_t offset)
801{
802    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
803}
804
805static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
806                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
807{
808    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
809    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
810              | rn << 5 | (rd & 0x1f));
811}
812
813/* Register to register move using ORR (shifted register with no shift). */
814static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
815{
816    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
817}
818
819/* Register to register move using ADDI (move to/from SP).  */
820static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
821{
822    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
823}
824
825/* This function is used for the Logical (immediate) instruction group.
826   The value of LIMM must satisfy IS_LIMM.  See the comment above about
827   only supporting simplified logical immediates.  */
828static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
829                             TCGReg rd, TCGReg rn, uint64_t limm)
830{
831    unsigned h, l, r, c;
832
833    tcg_debug_assert(is_limm(limm));
834
835    h = clz64(limm);
836    l = ctz64(limm);
837    if (l == 0) {
838        r = 0;                  /* form 0....01....1 */
839        c = ctz64(~limm) - 1;
840        if (h == 0) {
841            r = clz64(~limm);   /* form 1..10..01..1 */
842            c += r;
843        }
844    } else {
845        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
846        c = r - h - 1;
847    }
848    if (ext == TCG_TYPE_I32) {
849        r &= 31;
850        c &= 31;
851    }
852
853    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
854}
855
856static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
857                             TCGReg rd, tcg_target_long v64)
858{
859    bool q = type == TCG_TYPE_V128;
860    int cmode, imm8, i;
861
862    /* Test all bytes equal first.  */
863    if (v64 == dup_const(MO_8, v64)) {
864        imm8 = (uint8_t)v64;
865        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
866        return;
867    }
868
869    /*
870     * Test all bytes 0x00 or 0xff second.  This can match cases that
871     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
872     */
873    for (i = imm8 = 0; i < 8; i++) {
874        uint8_t byte = v64 >> (i * 8);
875        if (byte == 0xff) {
876            imm8 |= 1 << i;
877        } else if (byte != 0) {
878            goto fail_bytes;
879        }
880    }
881    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
882    return;
883 fail_bytes:
884
885    /*
886     * Tests for various replications.  For each element width, if we
887     * cannot find an expansion there's no point checking a larger
888     * width because we already know by replication it cannot match.
889     */
890    if (v64 == dup_const(MO_16, v64)) {
891        uint16_t v16 = v64;
892
893        if (is_shimm16(v16, &cmode, &imm8)) {
894            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
895            return;
896        }
897        if (is_shimm16(~v16, &cmode, &imm8)) {
898            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
899            return;
900        }
901
902        /*
903         * Otherwise, all remaining constants can be loaded in two insns:
904         * rd = v16 & 0xff, rd |= v16 & 0xff00.
905         */
906        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
907        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
908        return;
909    } else if (v64 == dup_const(MO_32, v64)) {
910        uint32_t v32 = v64;
911        uint32_t n32 = ~v32;
912
913        if (is_shimm32(v32, &cmode, &imm8) ||
914            is_soimm32(v32, &cmode, &imm8) ||
915            is_fimm32(v32, &cmode, &imm8)) {
916            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
917            return;
918        }
919        if (is_shimm32(n32, &cmode, &imm8) ||
920            is_soimm32(n32, &cmode, &imm8)) {
921            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
922            return;
923        }
924
925        /*
926         * Restrict the set of constants to those we can load with
927         * two instructions.  Others we load from the pool.
928         */
929        i = is_shimm32_pair(v32, &cmode, &imm8);
930        if (i) {
931            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
932            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
933            return;
934        }
935        i = is_shimm32_pair(n32, &cmode, &imm8);
936        if (i) {
937            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
938            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
939            return;
940        }
941    } else if (is_fimm64(v64, &cmode, &imm8)) {
942        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
943        return;
944    }
945
946    /*
947     * As a last resort, load from the constant pool.  Sadly there
948     * is no LD1R (literal), so store the full 16-byte vector.
949     */
950    if (type == TCG_TYPE_V128) {
951        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
952        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
953    } else {
954        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
955        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
956    }
957}
958
959static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
960                            TCGReg rd, TCGReg rs)
961{
962    int is_q = type - TCG_TYPE_V64;
963    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
964    return true;
965}
966
967static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
968                             TCGReg r, TCGReg base, intptr_t offset)
969{
970    TCGReg temp = TCG_REG_TMP;
971
972    if (offset < -0xffffff || offset > 0xffffff) {
973        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
974        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
975        base = temp;
976    } else {
977        AArch64Insn add_insn = I3401_ADDI;
978
979        if (offset < 0) {
980            add_insn = I3401_SUBI;
981            offset = -offset;
982        }
983        if (offset & 0xfff000) {
984            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
985            base = temp;
986        }
987        if (offset & 0xfff) {
988            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
989            base = temp;
990        }
991    }
992    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
993    return true;
994}
995
996static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
997                         tcg_target_long value)
998{
999    tcg_target_long svalue = value;
1000    tcg_target_long ivalue = ~value;
1001    tcg_target_long t0, t1, t2;
1002    int s0, s1;
1003    AArch64Insn opc;
1004
1005    switch (type) {
1006    case TCG_TYPE_I32:
1007    case TCG_TYPE_I64:
1008        tcg_debug_assert(rd < 32);
1009        break;
1010
1011    case TCG_TYPE_V64:
1012    case TCG_TYPE_V128:
1013        tcg_debug_assert(rd >= 32);
1014        tcg_out_dupi_vec(s, type, rd, value);
1015        return;
1016
1017    default:
1018        g_assert_not_reached();
1019    }
1020
1021    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1022       values within [2**31, 2**32-1], we can create smaller sequences by
1023       interpreting this as a negative 32-bit number, while ensuring that
1024       the high 32 bits are cleared by setting SF=0.  */
1025    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1026        svalue = (int32_t)value;
1027        value = (uint32_t)value;
1028        ivalue = (uint32_t)ivalue;
1029        type = TCG_TYPE_I32;
1030    }
1031
1032    /* Speed things up by handling the common case of small positive
1033       and negative values specially.  */
1034    if ((value & ~0xffffull) == 0) {
1035        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1036        return;
1037    } else if ((ivalue & ~0xffffull) == 0) {
1038        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1039        return;
1040    }
1041
1042    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1043       use the sign-extended value.  That lets us match rotated values such
1044       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1045    if (is_limm(svalue)) {
1046        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1047        return;
1048    }
1049
1050    /* Look for host pointer values within 4G of the PC.  This happens
1051       often when loading pointers to QEMU's own data structures.  */
1052    if (type == TCG_TYPE_I64) {
1053        tcg_target_long disp = value - (intptr_t)s->code_ptr;
1054        if (disp == sextract64(disp, 0, 21)) {
1055            tcg_out_insn(s, 3406, ADR, rd, disp);
1056            return;
1057        }
1058        disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
1059        if (disp == sextract64(disp, 0, 21)) {
1060            tcg_out_insn(s, 3406, ADRP, rd, disp);
1061            if (value & 0xfff) {
1062                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1063            }
1064            return;
1065        }
1066    }
1067
1068    /* Would it take fewer insns to begin with MOVN?  */
1069    if (ctpop64(value) >= 32) {
1070        t0 = ivalue;
1071        opc = I3405_MOVN;
1072    } else {
1073        t0 = value;
1074        opc = I3405_MOVZ;
1075    }
1076    s0 = ctz64(t0) & (63 & -16);
1077    t1 = t0 & ~(0xffffUL << s0);
1078    s1 = ctz64(t1) & (63 & -16);
1079    t2 = t1 & ~(0xffffUL << s1);
1080    if (t2 == 0) {
1081        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1082        if (t1 != 0) {
1083            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1084        }
1085        return;
1086    }
1087
1088    /* For more than 2 insns, dump it into the constant pool.  */
1089    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1090    tcg_out_insn(s, 3305, LDR, 0, rd);
1091}
1092
1093/* Define something more legible for general use.  */
1094#define tcg_out_ldst_r  tcg_out_insn_3310
1095
1096static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1097                         TCGReg rn, intptr_t offset, int lgsize)
1098{
1099    /* If the offset is naturally aligned and in range, then we can
1100       use the scaled uimm12 encoding */
1101    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1102        uintptr_t scaled_uimm = offset >> lgsize;
1103        if (scaled_uimm <= 0xfff) {
1104            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1105            return;
1106        }
1107    }
1108
1109    /* Small signed offsets can use the unscaled encoding.  */
1110    if (offset >= -256 && offset < 256) {
1111        tcg_out_insn_3312(s, insn, rd, rn, offset);
1112        return;
1113    }
1114
1115    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1116    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1117    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1118}
1119
1120static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1121{
1122    if (ret == arg) {
1123        return true;
1124    }
1125    switch (type) {
1126    case TCG_TYPE_I32:
1127    case TCG_TYPE_I64:
1128        if (ret < 32 && arg < 32) {
1129            tcg_out_movr(s, type, ret, arg);
1130            break;
1131        } else if (ret < 32) {
1132            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1133            break;
1134        } else if (arg < 32) {
1135            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1136            break;
1137        }
1138        /* FALLTHRU */
1139
1140    case TCG_TYPE_V64:
1141        tcg_debug_assert(ret >= 32 && arg >= 32);
1142        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1143        break;
1144    case TCG_TYPE_V128:
1145        tcg_debug_assert(ret >= 32 && arg >= 32);
1146        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1147        break;
1148
1149    default:
1150        g_assert_not_reached();
1151    }
1152    return true;
1153}
1154
1155static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1156                       TCGReg base, intptr_t ofs)
1157{
1158    AArch64Insn insn;
1159    int lgsz;
1160
1161    switch (type) {
1162    case TCG_TYPE_I32:
1163        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1164        lgsz = 2;
1165        break;
1166    case TCG_TYPE_I64:
1167        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1168        lgsz = 3;
1169        break;
1170    case TCG_TYPE_V64:
1171        insn = I3312_LDRVD;
1172        lgsz = 3;
1173        break;
1174    case TCG_TYPE_V128:
1175        insn = I3312_LDRVQ;
1176        lgsz = 4;
1177        break;
1178    default:
1179        g_assert_not_reached();
1180    }
1181    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1182}
1183
1184static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1185                       TCGReg base, intptr_t ofs)
1186{
1187    AArch64Insn insn;
1188    int lgsz;
1189
1190    switch (type) {
1191    case TCG_TYPE_I32:
1192        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1193        lgsz = 2;
1194        break;
1195    case TCG_TYPE_I64:
1196        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1197        lgsz = 3;
1198        break;
1199    case TCG_TYPE_V64:
1200        insn = I3312_STRVD;
1201        lgsz = 3;
1202        break;
1203    case TCG_TYPE_V128:
1204        insn = I3312_STRVQ;
1205        lgsz = 4;
1206        break;
1207    default:
1208        g_assert_not_reached();
1209    }
1210    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1211}
1212
1213static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1214                               TCGReg base, intptr_t ofs)
1215{
1216    if (type <= TCG_TYPE_I64 && val == 0) {
1217        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1218        return true;
1219    }
1220    return false;
1221}
1222
1223static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1224                               TCGReg rn, unsigned int a, unsigned int b)
1225{
1226    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1227}
1228
1229static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1230                                TCGReg rn, unsigned int a, unsigned int b)
1231{
1232    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1233}
1234
1235static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1236                                TCGReg rn, unsigned int a, unsigned int b)
1237{
1238    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1239}
1240
1241static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1242                                TCGReg rn, TCGReg rm, unsigned int a)
1243{
1244    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1245}
1246
1247static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1248                               TCGReg rd, TCGReg rn, unsigned int m)
1249{
1250    int bits = ext ? 64 : 32;
1251    int max = bits - 1;
1252    tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1253}
1254
1255static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1256                               TCGReg rd, TCGReg rn, unsigned int m)
1257{
1258    int max = ext ? 63 : 31;
1259    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1260}
1261
1262static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1263                               TCGReg rd, TCGReg rn, unsigned int m)
1264{
1265    int max = ext ? 63 : 31;
1266    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1267}
1268
1269static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1270                                TCGReg rd, TCGReg rn, unsigned int m)
1271{
1272    int max = ext ? 63 : 31;
1273    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1274}
1275
1276static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1277                                TCGReg rd, TCGReg rn, unsigned int m)
1278{
1279    int bits = ext ? 64 : 32;
1280    int max = bits - 1;
1281    tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1282}
1283
1284static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1285                               TCGReg rn, unsigned lsb, unsigned width)
1286{
1287    unsigned size = ext ? 64 : 32;
1288    unsigned a = (size - lsb) & (size - 1);
1289    unsigned b = width - 1;
1290    tcg_out_bfm(s, ext, rd, rn, a, b);
1291}
1292
1293static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1294                        tcg_target_long b, bool const_b)
1295{
1296    if (const_b) {
1297        /* Using CMP or CMN aliases.  */
1298        if (b >= 0) {
1299            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1300        } else {
1301            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1302        }
1303    } else {
1304        /* Using CMP alias SUBS wzr, Wn, Wm */
1305        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1306    }
1307}
1308
1309static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
1310{
1311    ptrdiff_t offset = target - s->code_ptr;
1312    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1313    tcg_out_insn(s, 3206, B, offset);
1314}
1315
1316static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
1317{
1318    ptrdiff_t offset = target - s->code_ptr;
1319    if (offset == sextract64(offset, 0, 26)) {
1320        tcg_out_insn(s, 3206, BL, offset);
1321    } else {
1322        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1323        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1324    }
1325}
1326
1327static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1328{
1329    tcg_out_insn(s, 3207, BLR, reg);
1330}
1331
1332static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1333{
1334    ptrdiff_t offset = target - s->code_ptr;
1335    if (offset == sextract64(offset, 0, 26)) {
1336        tcg_out_insn(s, 3206, BL, offset);
1337    } else {
1338        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1339        tcg_out_callr(s, TCG_REG_TMP);
1340    }
1341}
1342
1343void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1344                              uintptr_t addr)
1345{
1346    tcg_insn_unit i1, i2;
1347    TCGType rt = TCG_TYPE_I64;
1348    TCGReg  rd = TCG_REG_TMP;
1349    uint64_t pair;
1350
1351    ptrdiff_t offset = addr - jmp_addr;
1352
1353    if (offset == sextract64(offset, 0, 26)) {
1354        i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1355        i2 = NOP;
1356    } else {
1357        offset = (addr >> 12) - (jmp_addr >> 12);
1358
1359        /* patch ADRP */
1360        i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1361        /* patch ADDI */
1362        i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1363    }
1364    pair = (uint64_t)i2 << 32 | i1;
1365    qatomic_set((uint64_t *)jmp_addr, pair);
1366    flush_icache_range(jmp_addr, jmp_addr + 8);
1367}
1368
1369static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1370{
1371    if (!l->has_value) {
1372        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1373        tcg_out_insn(s, 3206, B, 0);
1374    } else {
1375        tcg_out_goto(s, l->u.value_ptr);
1376    }
1377}
1378
1379static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1380                           TCGArg b, bool b_const, TCGLabel *l)
1381{
1382    intptr_t offset;
1383    bool need_cmp;
1384
1385    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1386        need_cmp = false;
1387    } else {
1388        need_cmp = true;
1389        tcg_out_cmp(s, ext, a, b, b_const);
1390    }
1391
1392    if (!l->has_value) {
1393        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1394        offset = tcg_in32(s) >> 5;
1395    } else {
1396        offset = l->u.value_ptr - s->code_ptr;
1397        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1398    }
1399
1400    if (need_cmp) {
1401        tcg_out_insn(s, 3202, B_C, c, offset);
1402    } else if (c == TCG_COND_EQ) {
1403        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1404    } else {
1405        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1406    }
1407}
1408
1409static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1410{
1411    tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1412}
1413
1414static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1415{
1416    tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1417}
1418
1419static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1420{
1421    tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1422}
1423
1424static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1425                               TCGReg rd, TCGReg rn)
1426{
1427    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1428    int bits = (8 << s_bits) - 1;
1429    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1430}
1431
1432static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1433                               TCGReg rd, TCGReg rn)
1434{
1435    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1436    int bits = (8 << s_bits) - 1;
1437    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1438}
1439
1440static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1441                            TCGReg rn, int64_t aimm)
1442{
1443    if (aimm >= 0) {
1444        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1445    } else {
1446        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1447    }
1448}
1449
1450static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1451                                   TCGReg rh, TCGReg al, TCGReg ah,
1452                                   tcg_target_long bl, tcg_target_long bh,
1453                                   bool const_bl, bool const_bh, bool sub)
1454{
1455    TCGReg orig_rl = rl;
1456    AArch64Insn insn;
1457
1458    if (rl == ah || (!const_bh && rl == bh)) {
1459        rl = TCG_REG_TMP;
1460    }
1461
1462    if (const_bl) {
1463        insn = I3401_ADDSI;
1464        if ((bl < 0) ^ sub) {
1465            insn = I3401_SUBSI;
1466            bl = -bl;
1467        }
1468        if (unlikely(al == TCG_REG_XZR)) {
1469            /* ??? We want to allow al to be zero for the benefit of
1470               negation via subtraction.  However, that leaves open the
1471               possibility of adding 0+const in the low part, and the
1472               immediate add instructions encode XSP not XZR.  Don't try
1473               anything more elaborate here than loading another zero.  */
1474            al = TCG_REG_TMP;
1475            tcg_out_movi(s, ext, al, 0);
1476        }
1477        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1478    } else {
1479        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1480    }
1481
1482    insn = I3503_ADC;
1483    if (const_bh) {
1484        /* Note that the only two constants we support are 0 and -1, and
1485           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1486        if ((bh != 0) ^ sub) {
1487            insn = I3503_SBC;
1488        }
1489        bh = TCG_REG_XZR;
1490    } else if (sub) {
1491        insn = I3503_SBC;
1492    }
1493    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1494
1495    tcg_out_mov(s, ext, orig_rl, rl);
1496}
1497
1498static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1499{
1500    static const uint32_t sync[] = {
1501        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1502        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1503        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1504        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1505        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1506    };
1507    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1508}
1509
1510static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1511                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1512{
1513    TCGReg a1 = a0;
1514    if (is_ctz) {
1515        a1 = TCG_REG_TMP;
1516        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1517    }
1518    if (const_b && b == (ext ? 64 : 32)) {
1519        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1520    } else {
1521        AArch64Insn sel = I3506_CSEL;
1522
1523        tcg_out_cmp(s, ext, a0, 0, 1);
1524        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1525
1526        if (const_b) {
1527            if (b == -1) {
1528                b = TCG_REG_XZR;
1529                sel = I3506_CSINV;
1530            } else if (b == 0) {
1531                b = TCG_REG_XZR;
1532            } else {
1533                tcg_out_movi(s, ext, d, b);
1534                b = d;
1535            }
1536        }
1537        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1538    }
1539}
1540
1541#ifdef CONFIG_SOFTMMU
1542#include "../tcg-ldst.c.inc"
1543
1544/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1545 *                                     TCGMemOpIdx oi, uintptr_t ra)
1546 */
1547static void * const qemu_ld_helpers[16] = {
1548    [MO_UB]   = helper_ret_ldub_mmu,
1549    [MO_LEUW] = helper_le_lduw_mmu,
1550    [MO_LEUL] = helper_le_ldul_mmu,
1551    [MO_LEQ]  = helper_le_ldq_mmu,
1552    [MO_BEUW] = helper_be_lduw_mmu,
1553    [MO_BEUL] = helper_be_ldul_mmu,
1554    [MO_BEQ]  = helper_be_ldq_mmu,
1555};
1556
1557/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1558 *                                     uintxx_t val, TCGMemOpIdx oi,
1559 *                                     uintptr_t ra)
1560 */
1561static void * const qemu_st_helpers[16] = {
1562    [MO_UB]   = helper_ret_stb_mmu,
1563    [MO_LEUW] = helper_le_stw_mmu,
1564    [MO_LEUL] = helper_le_stl_mmu,
1565    [MO_LEQ]  = helper_le_stq_mmu,
1566    [MO_BEUW] = helper_be_stw_mmu,
1567    [MO_BEUL] = helper_be_stl_mmu,
1568    [MO_BEQ]  = helper_be_stq_mmu,
1569};
1570
1571static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1572{
1573    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1574    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1575    tcg_out_insn(s, 3406, ADR, rd, offset);
1576}
1577
1578static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1579{
1580    TCGMemOpIdx oi = lb->oi;
1581    MemOp opc = get_memop(oi);
1582    MemOp size = opc & MO_SIZE;
1583
1584    if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
1585        return false;
1586    }
1587
1588    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1589    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1590    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1591    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1592    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1593    if (opc & MO_SIGN) {
1594        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1595    } else {
1596        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1597    }
1598
1599    tcg_out_goto(s, lb->raddr);
1600    return true;
1601}
1602
1603static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1604{
1605    TCGMemOpIdx oi = lb->oi;
1606    MemOp opc = get_memop(oi);
1607    MemOp size = opc & MO_SIZE;
1608
1609    if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
1610        return false;
1611    }
1612
1613    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1614    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1615    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1616    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1617    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1618    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1619    tcg_out_goto(s, lb->raddr);
1620    return true;
1621}
1622
1623static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1624                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1625                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1626{
1627    TCGLabelQemuLdst *label = new_ldst_label(s);
1628
1629    label->is_ld = is_ld;
1630    label->oi = oi;
1631    label->type = ext;
1632    label->datalo_reg = data_reg;
1633    label->addrlo_reg = addr_reg;
1634    label->raddr = raddr;
1635    label->label_ptr[0] = label_ptr;
1636}
1637
1638/* We expect to use a 7-bit scaled negative offset from ENV.  */
1639QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1640QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1641
1642/* These offsets are built into the LDP below.  */
1643QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1644QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1645
1646/* Load and compare a TLB entry, emitting the conditional jump to the
1647   slow path for the failure case, which will be patched later when finalizing
1648   the slow path. Generated code returns the host addend in X1,
1649   clobbers X0,X2,X3,TMP. */
1650static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1651                             tcg_insn_unit **label_ptr, int mem_index,
1652                             bool is_read)
1653{
1654    unsigned a_bits = get_alignment_bits(opc);
1655    unsigned s_bits = opc & MO_SIZE;
1656    unsigned a_mask = (1u << a_bits) - 1;
1657    unsigned s_mask = (1u << s_bits) - 1;
1658    TCGReg x3;
1659    TCGType mask_type;
1660    uint64_t compare_mask;
1661
1662    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1663                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1664
1665    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1666    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1667                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1668
1669    /* Extract the TLB index from the address into X0.  */
1670    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1671                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1672                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1673
1674    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1675    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1676
1677    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1678    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1679               ? offsetof(CPUTLBEntry, addr_read)
1680               : offsetof(CPUTLBEntry, addr_write));
1681    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1682               offsetof(CPUTLBEntry, addend));
1683
1684    /* For aligned accesses, we check the first byte and include the alignment
1685       bits within the address.  For unaligned access, we check that we don't
1686       cross pages using the address of the last byte of the access.  */
1687    if (a_bits >= s_bits) {
1688        x3 = addr_reg;
1689    } else {
1690        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1691                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1692        x3 = TCG_REG_X3;
1693    }
1694    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1695
1696    /* Store the page mask part of the address into X3.  */
1697    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1698                     TCG_REG_X3, x3, compare_mask);
1699
1700    /* Perform the address comparison. */
1701    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1702
1703    /* If not equal, we jump to the slow path. */
1704    *label_ptr = s->code_ptr;
1705    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1706}
1707
1708#endif /* CONFIG_SOFTMMU */
1709
1710static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1711                                   TCGReg data_r, TCGReg addr_r,
1712                                   TCGType otype, TCGReg off_r)
1713{
1714    const MemOp bswap = memop & MO_BSWAP;
1715
1716    switch (memop & MO_SSIZE) {
1717    case MO_UB:
1718        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1719        break;
1720    case MO_SB:
1721        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1722                       data_r, addr_r, otype, off_r);
1723        break;
1724    case MO_UW:
1725        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1726        if (bswap) {
1727            tcg_out_rev16(s, data_r, data_r);
1728        }
1729        break;
1730    case MO_SW:
1731        if (bswap) {
1732            tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1733            tcg_out_rev16(s, data_r, data_r);
1734            tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1735        } else {
1736            tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1737                           data_r, addr_r, otype, off_r);
1738        }
1739        break;
1740    case MO_UL:
1741        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1742        if (bswap) {
1743            tcg_out_rev32(s, data_r, data_r);
1744        }
1745        break;
1746    case MO_SL:
1747        if (bswap) {
1748            tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1749            tcg_out_rev32(s, data_r, data_r);
1750            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1751        } else {
1752            tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1753        }
1754        break;
1755    case MO_Q:
1756        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1757        if (bswap) {
1758            tcg_out_rev64(s, data_r, data_r);
1759        }
1760        break;
1761    default:
1762        tcg_abort();
1763    }
1764}
1765
1766static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1767                                   TCGReg data_r, TCGReg addr_r,
1768                                   TCGType otype, TCGReg off_r)
1769{
1770    const MemOp bswap = memop & MO_BSWAP;
1771
1772    switch (memop & MO_SIZE) {
1773    case MO_8:
1774        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1775        break;
1776    case MO_16:
1777        if (bswap && data_r != TCG_REG_XZR) {
1778            tcg_out_rev16(s, TCG_REG_TMP, data_r);
1779            data_r = TCG_REG_TMP;
1780        }
1781        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1782        break;
1783    case MO_32:
1784        if (bswap && data_r != TCG_REG_XZR) {
1785            tcg_out_rev32(s, TCG_REG_TMP, data_r);
1786            data_r = TCG_REG_TMP;
1787        }
1788        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1789        break;
1790    case MO_64:
1791        if (bswap && data_r != TCG_REG_XZR) {
1792            tcg_out_rev64(s, TCG_REG_TMP, data_r);
1793            data_r = TCG_REG_TMP;
1794        }
1795        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1796        break;
1797    default:
1798        tcg_abort();
1799    }
1800}
1801
1802static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1803                            TCGMemOpIdx oi, TCGType ext)
1804{
1805    MemOp memop = get_memop(oi);
1806    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1807#ifdef CONFIG_SOFTMMU
1808    unsigned mem_index = get_mmuidx(oi);
1809    tcg_insn_unit *label_ptr;
1810
1811    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1812    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1813                           TCG_REG_X1, otype, addr_reg);
1814    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1815                        s->code_ptr, label_ptr);
1816#else /* !CONFIG_SOFTMMU */
1817    if (USE_GUEST_BASE) {
1818        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1819                               TCG_REG_GUEST_BASE, otype, addr_reg);
1820    } else {
1821        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1822                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1823    }
1824#endif /* CONFIG_SOFTMMU */
1825}
1826
1827static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1828                            TCGMemOpIdx oi)
1829{
1830    MemOp memop = get_memop(oi);
1831    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1832#ifdef CONFIG_SOFTMMU
1833    unsigned mem_index = get_mmuidx(oi);
1834    tcg_insn_unit *label_ptr;
1835
1836    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1837    tcg_out_qemu_st_direct(s, memop, data_reg,
1838                           TCG_REG_X1, otype, addr_reg);
1839    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1840                        data_reg, addr_reg, s->code_ptr, label_ptr);
1841#else /* !CONFIG_SOFTMMU */
1842    if (USE_GUEST_BASE) {
1843        tcg_out_qemu_st_direct(s, memop, data_reg,
1844                               TCG_REG_GUEST_BASE, otype, addr_reg);
1845    } else {
1846        tcg_out_qemu_st_direct(s, memop, data_reg,
1847                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1848    }
1849#endif /* CONFIG_SOFTMMU */
1850}
1851
1852static tcg_insn_unit *tb_ret_addr;
1853
1854static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1855                       const TCGArg args[TCG_MAX_OP_ARGS],
1856                       const int const_args[TCG_MAX_OP_ARGS])
1857{
1858    /* 99% of the time, we can signal the use of extension registers
1859       by looking to see if the opcode handles 64-bit data.  */
1860    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1861
1862    /* Hoist the loads of the most common arguments.  */
1863    TCGArg a0 = args[0];
1864    TCGArg a1 = args[1];
1865    TCGArg a2 = args[2];
1866    int c2 = const_args[2];
1867
1868    /* Some operands are defined with "rZ" constraint, a register or
1869       the zero register.  These need not actually test args[I] == 0.  */
1870#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1871
1872    switch (opc) {
1873    case INDEX_op_exit_tb:
1874        /* Reuse the zeroing that exists for goto_ptr.  */
1875        if (a0 == 0) {
1876            tcg_out_goto_long(s, s->code_gen_epilogue);
1877        } else {
1878            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1879            tcg_out_goto_long(s, tb_ret_addr);
1880        }
1881        break;
1882
1883    case INDEX_op_goto_tb:
1884        if (s->tb_jmp_insn_offset != NULL) {
1885            /* TCG_TARGET_HAS_direct_jump */
1886            /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1887               write can be used to patch the target address. */
1888            if ((uintptr_t)s->code_ptr & 7) {
1889                tcg_out32(s, NOP);
1890            }
1891            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1892            /* actual branch destination will be patched by
1893               tb_target_set_jmp_target later. */
1894            tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1895            tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1896        } else {
1897            /* !TCG_TARGET_HAS_direct_jump */
1898            tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1899            intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1900            tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1901        }
1902        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1903        set_jmp_reset_offset(s, a0);
1904        break;
1905
1906    case INDEX_op_goto_ptr:
1907        tcg_out_insn(s, 3207, BR, a0);
1908        break;
1909
1910    case INDEX_op_br:
1911        tcg_out_goto_label(s, arg_label(a0));
1912        break;
1913
1914    case INDEX_op_ld8u_i32:
1915    case INDEX_op_ld8u_i64:
1916        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1917        break;
1918    case INDEX_op_ld8s_i32:
1919        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1920        break;
1921    case INDEX_op_ld8s_i64:
1922        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1923        break;
1924    case INDEX_op_ld16u_i32:
1925    case INDEX_op_ld16u_i64:
1926        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1927        break;
1928    case INDEX_op_ld16s_i32:
1929        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1930        break;
1931    case INDEX_op_ld16s_i64:
1932        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1933        break;
1934    case INDEX_op_ld_i32:
1935    case INDEX_op_ld32u_i64:
1936        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1937        break;
1938    case INDEX_op_ld32s_i64:
1939        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1940        break;
1941    case INDEX_op_ld_i64:
1942        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1943        break;
1944
1945    case INDEX_op_st8_i32:
1946    case INDEX_op_st8_i64:
1947        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1948        break;
1949    case INDEX_op_st16_i32:
1950    case INDEX_op_st16_i64:
1951        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1952        break;
1953    case INDEX_op_st_i32:
1954    case INDEX_op_st32_i64:
1955        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1956        break;
1957    case INDEX_op_st_i64:
1958        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1959        break;
1960
1961    case INDEX_op_add_i32:
1962        a2 = (int32_t)a2;
1963        /* FALLTHRU */
1964    case INDEX_op_add_i64:
1965        if (c2) {
1966            tcg_out_addsubi(s, ext, a0, a1, a2);
1967        } else {
1968            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1969        }
1970        break;
1971
1972    case INDEX_op_sub_i32:
1973        a2 = (int32_t)a2;
1974        /* FALLTHRU */
1975    case INDEX_op_sub_i64:
1976        if (c2) {
1977            tcg_out_addsubi(s, ext, a0, a1, -a2);
1978        } else {
1979            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1980        }
1981        break;
1982
1983    case INDEX_op_neg_i64:
1984    case INDEX_op_neg_i32:
1985        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1986        break;
1987
1988    case INDEX_op_and_i32:
1989        a2 = (int32_t)a2;
1990        /* FALLTHRU */
1991    case INDEX_op_and_i64:
1992        if (c2) {
1993            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1994        } else {
1995            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1996        }
1997        break;
1998
1999    case INDEX_op_andc_i32:
2000        a2 = (int32_t)a2;
2001        /* FALLTHRU */
2002    case INDEX_op_andc_i64:
2003        if (c2) {
2004            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2005        } else {
2006            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2007        }
2008        break;
2009
2010    case INDEX_op_or_i32:
2011        a2 = (int32_t)a2;
2012        /* FALLTHRU */
2013    case INDEX_op_or_i64:
2014        if (c2) {
2015            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2016        } else {
2017            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2018        }
2019        break;
2020
2021    case INDEX_op_orc_i32:
2022        a2 = (int32_t)a2;
2023        /* FALLTHRU */
2024    case INDEX_op_orc_i64:
2025        if (c2) {
2026            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2027        } else {
2028            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2029        }
2030        break;
2031
2032    case INDEX_op_xor_i32:
2033        a2 = (int32_t)a2;
2034        /* FALLTHRU */
2035    case INDEX_op_xor_i64:
2036        if (c2) {
2037            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2038        } else {
2039            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2040        }
2041        break;
2042
2043    case INDEX_op_eqv_i32:
2044        a2 = (int32_t)a2;
2045        /* FALLTHRU */
2046    case INDEX_op_eqv_i64:
2047        if (c2) {
2048            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2049        } else {
2050            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2051        }
2052        break;
2053
2054    case INDEX_op_not_i64:
2055    case INDEX_op_not_i32:
2056        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2057        break;
2058
2059    case INDEX_op_mul_i64:
2060    case INDEX_op_mul_i32:
2061        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2062        break;
2063
2064    case INDEX_op_div_i64:
2065    case INDEX_op_div_i32:
2066        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2067        break;
2068    case INDEX_op_divu_i64:
2069    case INDEX_op_divu_i32:
2070        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2071        break;
2072
2073    case INDEX_op_rem_i64:
2074    case INDEX_op_rem_i32:
2075        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2076        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2077        break;
2078    case INDEX_op_remu_i64:
2079    case INDEX_op_remu_i32:
2080        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2081        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2082        break;
2083
2084    case INDEX_op_shl_i64:
2085    case INDEX_op_shl_i32:
2086        if (c2) {
2087            tcg_out_shl(s, ext, a0, a1, a2);
2088        } else {
2089            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2090        }
2091        break;
2092
2093    case INDEX_op_shr_i64:
2094    case INDEX_op_shr_i32:
2095        if (c2) {
2096            tcg_out_shr(s, ext, a0, a1, a2);
2097        } else {
2098            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2099        }
2100        break;
2101
2102    case INDEX_op_sar_i64:
2103    case INDEX_op_sar_i32:
2104        if (c2) {
2105            tcg_out_sar(s, ext, a0, a1, a2);
2106        } else {
2107            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2108        }
2109        break;
2110
2111    case INDEX_op_rotr_i64:
2112    case INDEX_op_rotr_i32:
2113        if (c2) {
2114            tcg_out_rotr(s, ext, a0, a1, a2);
2115        } else {
2116            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2117        }
2118        break;
2119
2120    case INDEX_op_rotl_i64:
2121    case INDEX_op_rotl_i32:
2122        if (c2) {
2123            tcg_out_rotl(s, ext, a0, a1, a2);
2124        } else {
2125            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2126            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2127        }
2128        break;
2129
2130    case INDEX_op_clz_i64:
2131    case INDEX_op_clz_i32:
2132        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2133        break;
2134    case INDEX_op_ctz_i64:
2135    case INDEX_op_ctz_i32:
2136        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2137        break;
2138
2139    case INDEX_op_brcond_i32:
2140        a1 = (int32_t)a1;
2141        /* FALLTHRU */
2142    case INDEX_op_brcond_i64:
2143        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2144        break;
2145
2146    case INDEX_op_setcond_i32:
2147        a2 = (int32_t)a2;
2148        /* FALLTHRU */
2149    case INDEX_op_setcond_i64:
2150        tcg_out_cmp(s, ext, a1, a2, c2);
2151        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2152        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2153                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2154        break;
2155
2156    case INDEX_op_movcond_i32:
2157        a2 = (int32_t)a2;
2158        /* FALLTHRU */
2159    case INDEX_op_movcond_i64:
2160        tcg_out_cmp(s, ext, a1, a2, c2);
2161        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2162        break;
2163
2164    case INDEX_op_qemu_ld_i32:
2165    case INDEX_op_qemu_ld_i64:
2166        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2167        break;
2168    case INDEX_op_qemu_st_i32:
2169    case INDEX_op_qemu_st_i64:
2170        tcg_out_qemu_st(s, REG0(0), a1, a2);
2171        break;
2172
2173    case INDEX_op_bswap64_i64:
2174        tcg_out_rev64(s, a0, a1);
2175        break;
2176    case INDEX_op_bswap32_i64:
2177    case INDEX_op_bswap32_i32:
2178        tcg_out_rev32(s, a0, a1);
2179        break;
2180    case INDEX_op_bswap16_i64:
2181    case INDEX_op_bswap16_i32:
2182        tcg_out_rev16(s, a0, a1);
2183        break;
2184
2185    case INDEX_op_ext8s_i64:
2186    case INDEX_op_ext8s_i32:
2187        tcg_out_sxt(s, ext, MO_8, a0, a1);
2188        break;
2189    case INDEX_op_ext16s_i64:
2190    case INDEX_op_ext16s_i32:
2191        tcg_out_sxt(s, ext, MO_16, a0, a1);
2192        break;
2193    case INDEX_op_ext_i32_i64:
2194    case INDEX_op_ext32s_i64:
2195        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2196        break;
2197    case INDEX_op_ext8u_i64:
2198    case INDEX_op_ext8u_i32:
2199        tcg_out_uxt(s, MO_8, a0, a1);
2200        break;
2201    case INDEX_op_ext16u_i64:
2202    case INDEX_op_ext16u_i32:
2203        tcg_out_uxt(s, MO_16, a0, a1);
2204        break;
2205    case INDEX_op_extu_i32_i64:
2206    case INDEX_op_ext32u_i64:
2207        tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2208        break;
2209
2210    case INDEX_op_deposit_i64:
2211    case INDEX_op_deposit_i32:
2212        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2213        break;
2214
2215    case INDEX_op_extract_i64:
2216    case INDEX_op_extract_i32:
2217        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2218        break;
2219
2220    case INDEX_op_sextract_i64:
2221    case INDEX_op_sextract_i32:
2222        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2223        break;
2224
2225    case INDEX_op_extract2_i64:
2226    case INDEX_op_extract2_i32:
2227        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2228        break;
2229
2230    case INDEX_op_add2_i32:
2231        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2232                        (int32_t)args[4], args[5], const_args[4],
2233                        const_args[5], false);
2234        break;
2235    case INDEX_op_add2_i64:
2236        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2237                        args[5], const_args[4], const_args[5], false);
2238        break;
2239    case INDEX_op_sub2_i32:
2240        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2241                        (int32_t)args[4], args[5], const_args[4],
2242                        const_args[5], true);
2243        break;
2244    case INDEX_op_sub2_i64:
2245        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2246                        args[5], const_args[4], const_args[5], true);
2247        break;
2248
2249    case INDEX_op_muluh_i64:
2250        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2251        break;
2252    case INDEX_op_mulsh_i64:
2253        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2254        break;
2255
2256    case INDEX_op_mb:
2257        tcg_out_mb(s, a0);
2258        break;
2259
2260    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2261    case INDEX_op_mov_i64:
2262    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
2263    case INDEX_op_movi_i64:
2264    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2265    default:
2266        g_assert_not_reached();
2267    }
2268
2269#undef REG0
2270}
2271
2272static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2273                           unsigned vecl, unsigned vece,
2274                           const TCGArg *args, const int *const_args)
2275{
2276    static const AArch64Insn cmp_insn[16] = {
2277        [TCG_COND_EQ] = I3616_CMEQ,
2278        [TCG_COND_GT] = I3616_CMGT,
2279        [TCG_COND_GE] = I3616_CMGE,
2280        [TCG_COND_GTU] = I3616_CMHI,
2281        [TCG_COND_GEU] = I3616_CMHS,
2282    };
2283    static const AArch64Insn cmp0_insn[16] = {
2284        [TCG_COND_EQ] = I3617_CMEQ0,
2285        [TCG_COND_GT] = I3617_CMGT0,
2286        [TCG_COND_GE] = I3617_CMGE0,
2287        [TCG_COND_LT] = I3617_CMLT0,
2288        [TCG_COND_LE] = I3617_CMLE0,
2289    };
2290
2291    TCGType type = vecl + TCG_TYPE_V64;
2292    unsigned is_q = vecl;
2293    TCGArg a0, a1, a2, a3;
2294    int cmode, imm8;
2295
2296    a0 = args[0];
2297    a1 = args[1];
2298    a2 = args[2];
2299
2300    switch (opc) {
2301    case INDEX_op_ld_vec:
2302        tcg_out_ld(s, type, a0, a1, a2);
2303        break;
2304    case INDEX_op_st_vec:
2305        tcg_out_st(s, type, a0, a1, a2);
2306        break;
2307    case INDEX_op_dupm_vec:
2308        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2309        break;
2310    case INDEX_op_add_vec:
2311        tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2312        break;
2313    case INDEX_op_sub_vec:
2314        tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2315        break;
2316    case INDEX_op_mul_vec:
2317        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2318        break;
2319    case INDEX_op_neg_vec:
2320        tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2321        break;
2322    case INDEX_op_abs_vec:
2323        tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2324        break;
2325    case INDEX_op_and_vec:
2326        if (const_args[2]) {
2327            is_shimm1632(~a2, &cmode, &imm8);
2328            if (a0 == a1) {
2329                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2330                return;
2331            }
2332            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2333            a2 = a0;
2334        }
2335        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2336        break;
2337    case INDEX_op_or_vec:
2338        if (const_args[2]) {
2339            is_shimm1632(a2, &cmode, &imm8);
2340            if (a0 == a1) {
2341                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2342                return;
2343            }
2344            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2345            a2 = a0;
2346        }
2347        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2348        break;
2349    case INDEX_op_andc_vec:
2350        if (const_args[2]) {
2351            is_shimm1632(a2, &cmode, &imm8);
2352            if (a0 == a1) {
2353                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2354                return;
2355            }
2356            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2357            a2 = a0;
2358        }
2359        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2360        break;
2361    case INDEX_op_orc_vec:
2362        if (const_args[2]) {
2363            is_shimm1632(~a2, &cmode, &imm8);
2364            if (a0 == a1) {
2365                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2366                return;
2367            }
2368            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2369            a2 = a0;
2370        }
2371        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2372        break;
2373    case INDEX_op_xor_vec:
2374        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2375        break;
2376    case INDEX_op_ssadd_vec:
2377        tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2378        break;
2379    case INDEX_op_sssub_vec:
2380        tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2381        break;
2382    case INDEX_op_usadd_vec:
2383        tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2384        break;
2385    case INDEX_op_ussub_vec:
2386        tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2387        break;
2388    case INDEX_op_smax_vec:
2389        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2390        break;
2391    case INDEX_op_smin_vec:
2392        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2393        break;
2394    case INDEX_op_umax_vec:
2395        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2396        break;
2397    case INDEX_op_umin_vec:
2398        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2399        break;
2400    case INDEX_op_not_vec:
2401        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2402        break;
2403    case INDEX_op_shli_vec:
2404        tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2405        break;
2406    case INDEX_op_shri_vec:
2407        tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2408        break;
2409    case INDEX_op_sari_vec:
2410        tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2411        break;
2412    case INDEX_op_aa64_sli_vec:
2413        tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2414        break;
2415    case INDEX_op_shlv_vec:
2416        tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2417        break;
2418    case INDEX_op_aa64_sshl_vec:
2419        tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2420        break;
2421    case INDEX_op_cmp_vec:
2422        {
2423            TCGCond cond = args[3];
2424            AArch64Insn insn;
2425
2426            if (cond == TCG_COND_NE) {
2427                if (const_args[2]) {
2428                    tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2429                } else {
2430                    tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2431                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2432                }
2433            } else {
2434                if (const_args[2]) {
2435                    insn = cmp0_insn[cond];
2436                    if (insn) {
2437                        tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2438                        break;
2439                    }
2440                    tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
2441                    a2 = TCG_VEC_TMP;
2442                }
2443                insn = cmp_insn[cond];
2444                if (insn == 0) {
2445                    TCGArg t;
2446                    t = a1, a1 = a2, a2 = t;
2447                    cond = tcg_swap_cond(cond);
2448                    insn = cmp_insn[cond];
2449                    tcg_debug_assert(insn != 0);
2450                }
2451                tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2452            }
2453        }
2454        break;
2455
2456    case INDEX_op_bitsel_vec:
2457        a3 = args[3];
2458        if (a0 == a3) {
2459            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2460        } else if (a0 == a2) {
2461            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2462        } else {
2463            if (a0 != a1) {
2464                tcg_out_mov(s, type, a0, a1);
2465            }
2466            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2467        }
2468        break;
2469
2470    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2471    case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi.  */
2472    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2473    default:
2474        g_assert_not_reached();
2475    }
2476}
2477
2478int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2479{
2480    switch (opc) {
2481    case INDEX_op_add_vec:
2482    case INDEX_op_sub_vec:
2483    case INDEX_op_and_vec:
2484    case INDEX_op_or_vec:
2485    case INDEX_op_xor_vec:
2486    case INDEX_op_andc_vec:
2487    case INDEX_op_orc_vec:
2488    case INDEX_op_neg_vec:
2489    case INDEX_op_abs_vec:
2490    case INDEX_op_not_vec:
2491    case INDEX_op_cmp_vec:
2492    case INDEX_op_shli_vec:
2493    case INDEX_op_shri_vec:
2494    case INDEX_op_sari_vec:
2495    case INDEX_op_ssadd_vec:
2496    case INDEX_op_sssub_vec:
2497    case INDEX_op_usadd_vec:
2498    case INDEX_op_ussub_vec:
2499    case INDEX_op_shlv_vec:
2500    case INDEX_op_bitsel_vec:
2501        return 1;
2502    case INDEX_op_rotli_vec:
2503    case INDEX_op_shrv_vec:
2504    case INDEX_op_sarv_vec:
2505    case INDEX_op_rotlv_vec:
2506    case INDEX_op_rotrv_vec:
2507        return -1;
2508    case INDEX_op_mul_vec:
2509    case INDEX_op_smax_vec:
2510    case INDEX_op_smin_vec:
2511    case INDEX_op_umax_vec:
2512    case INDEX_op_umin_vec:
2513        return vece < MO_64;
2514
2515    default:
2516        return 0;
2517    }
2518}
2519
2520void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2521                       TCGArg a0, ...)
2522{
2523    va_list va;
2524    TCGv_vec v0, v1, v2, t1, t2;
2525    TCGArg a2;
2526
2527    va_start(va, a0);
2528    v0 = temp_tcgv_vec(arg_temp(a0));
2529    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2530    a2 = va_arg(va, TCGArg);
2531    v2 = temp_tcgv_vec(arg_temp(a2));
2532
2533    switch (opc) {
2534    case INDEX_op_rotli_vec:
2535        t1 = tcg_temp_new_vec(type);
2536        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2537        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2538                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2539        tcg_temp_free_vec(t1);
2540        break;
2541
2542    case INDEX_op_shrv_vec:
2543    case INDEX_op_sarv_vec:
2544        /* Right shifts are negative left shifts for AArch64.  */
2545        t1 = tcg_temp_new_vec(type);
2546        tcg_gen_neg_vec(vece, t1, v2);
2547        opc = (opc == INDEX_op_shrv_vec
2548               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2549        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2550                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2551        tcg_temp_free_vec(t1);
2552        break;
2553
2554    case INDEX_op_rotlv_vec:
2555        t1 = tcg_temp_new_vec(type);
2556        tcg_gen_dupi_vec(vece, t1, 8 << vece);
2557        tcg_gen_sub_vec(vece, t1, v2, t1);
2558        /* Right shifts are negative left shifts for AArch64.  */
2559        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2560                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2561        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2562                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2563        tcg_gen_or_vec(vece, v0, v0, t1);
2564        tcg_temp_free_vec(t1);
2565        break;
2566
2567    case INDEX_op_rotrv_vec:
2568        t1 = tcg_temp_new_vec(type);
2569        t2 = tcg_temp_new_vec(type);
2570        tcg_gen_neg_vec(vece, t1, v2);
2571        tcg_gen_dupi_vec(vece, t2, 8 << vece);
2572        tcg_gen_add_vec(vece, t2, t1, t2);
2573        /* Right shifts are negative left shifts for AArch64.  */
2574        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2575                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2576        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2577                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2578        tcg_gen_or_vec(vece, v0, t1, t2);
2579        tcg_temp_free_vec(t1);
2580        tcg_temp_free_vec(t2);
2581        break;
2582
2583    default:
2584        g_assert_not_reached();
2585    }
2586
2587    va_end(va);
2588}
2589
2590static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2591{
2592    static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2593    static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2594    static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2595    static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2596    static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
2597    static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2598    static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2599    static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2600    static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2601    static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2602    static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2603    static const TCGTargetOpDef w_0_w = { .args_ct_str = { "w", "0", "w" } };
2604    static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } };
2605    static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } };
2606    static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
2607    static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2608    static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2609    static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2610    static const TCGTargetOpDef r_r_rAL
2611        = { .args_ct_str = { "r", "r", "rAL" } };
2612    static const TCGTargetOpDef dep
2613        = { .args_ct_str = { "r", "0", "rZ" } };
2614    static const TCGTargetOpDef ext2
2615        = { .args_ct_str = { "r", "rZ", "rZ" } };
2616    static const TCGTargetOpDef movc
2617        = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2618    static const TCGTargetOpDef add2
2619        = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2620    static const TCGTargetOpDef w_w_w_w
2621        = { .args_ct_str = { "w", "w", "w", "w" } };
2622
2623    switch (op) {
2624    case INDEX_op_goto_ptr:
2625        return &r;
2626
2627    case INDEX_op_ld8u_i32:
2628    case INDEX_op_ld8s_i32:
2629    case INDEX_op_ld16u_i32:
2630    case INDEX_op_ld16s_i32:
2631    case INDEX_op_ld_i32:
2632    case INDEX_op_ld8u_i64:
2633    case INDEX_op_ld8s_i64:
2634    case INDEX_op_ld16u_i64:
2635    case INDEX_op_ld16s_i64:
2636    case INDEX_op_ld32u_i64:
2637    case INDEX_op_ld32s_i64:
2638    case INDEX_op_ld_i64:
2639    case INDEX_op_neg_i32:
2640    case INDEX_op_neg_i64:
2641    case INDEX_op_not_i32:
2642    case INDEX_op_not_i64:
2643    case INDEX_op_bswap16_i32:
2644    case INDEX_op_bswap32_i32:
2645    case INDEX_op_bswap16_i64:
2646    case INDEX_op_bswap32_i64:
2647    case INDEX_op_bswap64_i64:
2648    case INDEX_op_ext8s_i32:
2649    case INDEX_op_ext16s_i32:
2650    case INDEX_op_ext8u_i32:
2651    case INDEX_op_ext16u_i32:
2652    case INDEX_op_ext8s_i64:
2653    case INDEX_op_ext16s_i64:
2654    case INDEX_op_ext32s_i64:
2655    case INDEX_op_ext8u_i64:
2656    case INDEX_op_ext16u_i64:
2657    case INDEX_op_ext32u_i64:
2658    case INDEX_op_ext_i32_i64:
2659    case INDEX_op_extu_i32_i64:
2660    case INDEX_op_extract_i32:
2661    case INDEX_op_extract_i64:
2662    case INDEX_op_sextract_i32:
2663    case INDEX_op_sextract_i64:
2664        return &r_r;
2665
2666    case INDEX_op_st8_i32:
2667    case INDEX_op_st16_i32:
2668    case INDEX_op_st_i32:
2669    case INDEX_op_st8_i64:
2670    case INDEX_op_st16_i64:
2671    case INDEX_op_st32_i64:
2672    case INDEX_op_st_i64:
2673        return &rZ_r;
2674
2675    case INDEX_op_add_i32:
2676    case INDEX_op_add_i64:
2677    case INDEX_op_sub_i32:
2678    case INDEX_op_sub_i64:
2679    case INDEX_op_setcond_i32:
2680    case INDEX_op_setcond_i64:
2681        return &r_r_rA;
2682
2683    case INDEX_op_mul_i32:
2684    case INDEX_op_mul_i64:
2685    case INDEX_op_div_i32:
2686    case INDEX_op_div_i64:
2687    case INDEX_op_divu_i32:
2688    case INDEX_op_divu_i64:
2689    case INDEX_op_rem_i32:
2690    case INDEX_op_rem_i64:
2691    case INDEX_op_remu_i32:
2692    case INDEX_op_remu_i64:
2693    case INDEX_op_muluh_i64:
2694    case INDEX_op_mulsh_i64:
2695        return &r_r_r;
2696
2697    case INDEX_op_and_i32:
2698    case INDEX_op_and_i64:
2699    case INDEX_op_or_i32:
2700    case INDEX_op_or_i64:
2701    case INDEX_op_xor_i32:
2702    case INDEX_op_xor_i64:
2703    case INDEX_op_andc_i32:
2704    case INDEX_op_andc_i64:
2705    case INDEX_op_orc_i32:
2706    case INDEX_op_orc_i64:
2707    case INDEX_op_eqv_i32:
2708    case INDEX_op_eqv_i64:
2709        return &r_r_rL;
2710
2711    case INDEX_op_shl_i32:
2712    case INDEX_op_shr_i32:
2713    case INDEX_op_sar_i32:
2714    case INDEX_op_rotl_i32:
2715    case INDEX_op_rotr_i32:
2716    case INDEX_op_shl_i64:
2717    case INDEX_op_shr_i64:
2718    case INDEX_op_sar_i64:
2719    case INDEX_op_rotl_i64:
2720    case INDEX_op_rotr_i64:
2721        return &r_r_ri;
2722
2723    case INDEX_op_clz_i32:
2724    case INDEX_op_ctz_i32:
2725    case INDEX_op_clz_i64:
2726    case INDEX_op_ctz_i64:
2727        return &r_r_rAL;
2728
2729    case INDEX_op_brcond_i32:
2730    case INDEX_op_brcond_i64:
2731        return &r_rA;
2732
2733    case INDEX_op_movcond_i32:
2734    case INDEX_op_movcond_i64:
2735        return &movc;
2736
2737    case INDEX_op_qemu_ld_i32:
2738    case INDEX_op_qemu_ld_i64:
2739        return &r_l;
2740    case INDEX_op_qemu_st_i32:
2741    case INDEX_op_qemu_st_i64:
2742        return &lZ_l;
2743
2744    case INDEX_op_deposit_i32:
2745    case INDEX_op_deposit_i64:
2746        return &dep;
2747
2748    case INDEX_op_extract2_i32:
2749    case INDEX_op_extract2_i64:
2750        return &ext2;
2751
2752    case INDEX_op_add2_i32:
2753    case INDEX_op_add2_i64:
2754    case INDEX_op_sub2_i32:
2755    case INDEX_op_sub2_i64:
2756        return &add2;
2757
2758    case INDEX_op_add_vec:
2759    case INDEX_op_sub_vec:
2760    case INDEX_op_mul_vec:
2761    case INDEX_op_xor_vec:
2762    case INDEX_op_ssadd_vec:
2763    case INDEX_op_sssub_vec:
2764    case INDEX_op_usadd_vec:
2765    case INDEX_op_ussub_vec:
2766    case INDEX_op_smax_vec:
2767    case INDEX_op_smin_vec:
2768    case INDEX_op_umax_vec:
2769    case INDEX_op_umin_vec:
2770    case INDEX_op_shlv_vec:
2771    case INDEX_op_shrv_vec:
2772    case INDEX_op_sarv_vec:
2773    case INDEX_op_aa64_sshl_vec:
2774        return &w_w_w;
2775    case INDEX_op_not_vec:
2776    case INDEX_op_neg_vec:
2777    case INDEX_op_abs_vec:
2778    case INDEX_op_shli_vec:
2779    case INDEX_op_shri_vec:
2780    case INDEX_op_sari_vec:
2781        return &w_w;
2782    case INDEX_op_ld_vec:
2783    case INDEX_op_st_vec:
2784    case INDEX_op_dupm_vec:
2785        return &w_r;
2786    case INDEX_op_dup_vec:
2787        return &w_wr;
2788    case INDEX_op_or_vec:
2789    case INDEX_op_andc_vec:
2790        return &w_w_wO;
2791    case INDEX_op_and_vec:
2792    case INDEX_op_orc_vec:
2793        return &w_w_wN;
2794    case INDEX_op_cmp_vec:
2795        return &w_w_wZ;
2796    case INDEX_op_bitsel_vec:
2797        return &w_w_w_w;
2798    case INDEX_op_aa64_sli_vec:
2799        return &w_0_w;
2800
2801    default:
2802        return NULL;
2803    }
2804}
2805
2806static void tcg_target_init(TCGContext *s)
2807{
2808    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2809    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2810    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2811    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2812
2813    tcg_target_call_clobber_regs = -1ull;
2814    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2815    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2816    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2817    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2818    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2819    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2820    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2821    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2822    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2823    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2824    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2825    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2826    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2827    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2828    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2829    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2830    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2831    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2832    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2833
2834    s->reserved_regs = 0;
2835    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2836    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2837    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2838    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2839    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2840}
2841
2842/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2843#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2844
2845#define FRAME_SIZE \
2846    ((PUSH_SIZE \
2847      + TCG_STATIC_CALL_ARGS_SIZE \
2848      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2849      + TCG_TARGET_STACK_ALIGN - 1) \
2850     & ~(TCG_TARGET_STACK_ALIGN - 1))
2851
2852/* We're expecting a 2 byte uleb128 encoded value.  */
2853QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2854
2855/* We're expecting to use a single ADDI insn.  */
2856QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2857
2858static void tcg_target_qemu_prologue(TCGContext *s)
2859{
2860    TCGReg r;
2861
2862    /* Push (FP, LR) and allocate space for all saved registers.  */
2863    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2864                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2865
2866    /* Set up frame pointer for canonical unwinding.  */
2867    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2868
2869    /* Store callee-preserved regs x19..x28.  */
2870    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2871        int ofs = (r - TCG_REG_X19 + 2) * 8;
2872        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2873    }
2874
2875    /* Make stack space for TCG locals.  */
2876    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2877                 FRAME_SIZE - PUSH_SIZE);
2878
2879    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2880    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2881                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2882
2883#if !defined(CONFIG_SOFTMMU)
2884    if (USE_GUEST_BASE) {
2885        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2886        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2887    }
2888#endif
2889
2890    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2891    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2892
2893    /*
2894     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2895     * and fall through to the rest of the epilogue.
2896     */
2897    s->code_gen_epilogue = s->code_ptr;
2898    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2899
2900    /* TB epilogue */
2901    tb_ret_addr = s->code_ptr;
2902
2903    /* Remove TCG locals stack space.  */
2904    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2905                 FRAME_SIZE - PUSH_SIZE);
2906
2907    /* Restore registers x19..x28.  */
2908    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2909        int ofs = (r - TCG_REG_X19 + 2) * 8;
2910        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2911    }
2912
2913    /* Pop (FP, LR), restore SP to previous frame.  */
2914    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2915                 TCG_REG_SP, PUSH_SIZE, 0, 1);
2916    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2917}
2918
2919static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2920{
2921    int i;
2922    for (i = 0; i < count; ++i) {
2923        p[i] = NOP;
2924    }
2925}
2926
2927typedef struct {
2928    DebugFrameHeader h;
2929    uint8_t fde_def_cfa[4];
2930    uint8_t fde_reg_ofs[24];
2931} DebugFrame;
2932
2933#define ELF_HOST_MACHINE EM_AARCH64
2934
2935static const DebugFrame debug_frame = {
2936    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2937    .h.cie.id = -1,
2938    .h.cie.version = 1,
2939    .h.cie.code_align = 1,
2940    .h.cie.data_align = 0x78,             /* sleb128 -8 */
2941    .h.cie.return_column = TCG_REG_LR,
2942
2943    /* Total FDE size does not include the "len" member.  */
2944    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2945
2946    .fde_def_cfa = {
2947        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
2948        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2949        (FRAME_SIZE >> 7)
2950    },
2951    .fde_reg_ofs = {
2952        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
2953        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
2954        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
2955        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
2956        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
2957        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
2958        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
2959        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
2960        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
2961        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
2962        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
2963        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
2964    }
2965};
2966
2967void tcg_register_jit(void *buf, size_t buf_size)
2968{
2969    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2970}
2971