xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision 64ed6f92)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-pool.c.inc"
14#include "qemu/bitops.h"
15
16/* We're going to re-use TCGType in setting of the SF bit, which controls
17   the size of the operation performed.  If we know the values match, it
18   makes things much cleaner.  */
19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21#ifdef CONFIG_DEBUG_TCG
22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
27
28    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
32};
33#endif /* CONFIG_DEBUG_TCG */
34
35static const int tcg_target_reg_alloc_order[] = {
36    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38    TCG_REG_X28, /* we will reserve this for guest_base if configured */
39
40    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42    TCG_REG_X16, TCG_REG_X17,
43
44    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47    /* X18 reserved by system */
48    /* X19 reserved for AREG0 */
49    /* X29 reserved as fp */
50    /* X30 reserved as temporary */
51
52    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54    /* V8 - V15 are call-saved, and skipped.  */
55    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
59};
60
61static const int tcg_target_call_iarg_regs[8] = {
62    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
64};
65static const int tcg_target_call_oarg_regs[1] = {
66    TCG_REG_X0
67};
68
69#define TCG_REG_TMP TCG_REG_X30
70#define TCG_VEC_TMP TCG_REG_V31
71
72#ifndef CONFIG_SOFTMMU
73/* Note that XZR cannot be encoded in the address base register slot,
74   as that actaully encodes SP.  So if we need to zero-extend the guest
75   address, via the address index register slot, we need to load even
76   a zero guest base into a register.  */
77#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
78#define TCG_REG_GUEST_BASE TCG_REG_X28
79#endif
80
81static inline bool reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
82{
83    ptrdiff_t offset = target - code_ptr;
84    if (offset == sextract64(offset, 0, 26)) {
85        /* read instruction, mask away previous PC_REL26 parameter contents,
86           set the proper offset, then write back the instruction. */
87        *code_ptr = deposit32(*code_ptr, 0, 26, offset);
88        return true;
89    }
90    return false;
91}
92
93static inline bool reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
94{
95    ptrdiff_t offset = target - code_ptr;
96    if (offset == sextract64(offset, 0, 19)) {
97        *code_ptr = deposit32(*code_ptr, 5, 19, offset);
98        return true;
99    }
100    return false;
101}
102
103static inline bool patch_reloc(tcg_insn_unit *code_ptr, int type,
104                               intptr_t value, intptr_t addend)
105{
106    tcg_debug_assert(addend == 0);
107    switch (type) {
108    case R_AARCH64_JUMP26:
109    case R_AARCH64_CALL26:
110        return reloc_pc26(code_ptr, (tcg_insn_unit *)value);
111    case R_AARCH64_CONDBR19:
112        return reloc_pc19(code_ptr, (tcg_insn_unit *)value);
113    default:
114        g_assert_not_reached();
115    }
116}
117
118#define TCG_CT_CONST_AIMM 0x100
119#define TCG_CT_CONST_LIMM 0x200
120#define TCG_CT_CONST_ZERO 0x400
121#define TCG_CT_CONST_MONE 0x800
122#define TCG_CT_CONST_ORRI 0x1000
123#define TCG_CT_CONST_ANDI 0x2000
124
125/* parse target specific constraints */
126static const char *target_parse_constraint(TCGArgConstraint *ct,
127                                           const char *ct_str, TCGType type)
128{
129    switch (*ct_str++) {
130    case 'r': /* general registers */
131        ct->ct |= TCG_CT_REG;
132        ct->u.regs |= 0xffffffffu;
133        break;
134    case 'w': /* advsimd registers */
135        ct->ct |= TCG_CT_REG;
136        ct->u.regs |= 0xffffffff00000000ull;
137        break;
138    case 'l': /* qemu_ld / qemu_st address, data_reg */
139        ct->ct |= TCG_CT_REG;
140        ct->u.regs = 0xffffffffu;
141#ifdef CONFIG_SOFTMMU
142        /* x0 and x1 will be overwritten when reading the tlb entry,
143           and x2, and x3 for helper args, better to avoid using them. */
144        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
145        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
146        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
147        tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
148#endif
149        break;
150    case 'A': /* Valid for arithmetic immediate (positive or negative).  */
151        ct->ct |= TCG_CT_CONST_AIMM;
152        break;
153    case 'L': /* Valid for logical immediate.  */
154        ct->ct |= TCG_CT_CONST_LIMM;
155        break;
156    case 'M': /* minus one */
157        ct->ct |= TCG_CT_CONST_MONE;
158        break;
159    case 'O': /* vector orr/bic immediate */
160        ct->ct |= TCG_CT_CONST_ORRI;
161        break;
162    case 'N': /* vector orr/bic immediate, inverted */
163        ct->ct |= TCG_CT_CONST_ANDI;
164        break;
165    case 'Z': /* zero */
166        ct->ct |= TCG_CT_CONST_ZERO;
167        break;
168    default:
169        return NULL;
170    }
171    return ct_str;
172}
173
174/* Match a constant valid for addition (12-bit, optionally shifted).  */
175static inline bool is_aimm(uint64_t val)
176{
177    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
178}
179
180/* Match a constant valid for logical operations.  */
181static inline bool is_limm(uint64_t val)
182{
183    /* Taking a simplified view of the logical immediates for now, ignoring
184       the replication that can happen across the field.  Match bit patterns
185       of the forms
186           0....01....1
187           0..01..10..0
188       and their inverses.  */
189
190    /* Make things easier below, by testing the form with msb clear. */
191    if ((int64_t)val < 0) {
192        val = ~val;
193    }
194    if (val == 0) {
195        return false;
196    }
197    val += val & -val;
198    return (val & (val - 1)) == 0;
199}
200
201/* Return true if v16 is a valid 16-bit shifted immediate.  */
202static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
203{
204    if (v16 == (v16 & 0xff)) {
205        *cmode = 0x8;
206        *imm8 = v16 & 0xff;
207        return true;
208    } else if (v16 == (v16 & 0xff00)) {
209        *cmode = 0xa;
210        *imm8 = v16 >> 8;
211        return true;
212    }
213    return false;
214}
215
216/* Return true if v32 is a valid 32-bit shifted immediate.  */
217static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
218{
219    if (v32 == (v32 & 0xff)) {
220        *cmode = 0x0;
221        *imm8 = v32 & 0xff;
222        return true;
223    } else if (v32 == (v32 & 0xff00)) {
224        *cmode = 0x2;
225        *imm8 = (v32 >> 8) & 0xff;
226        return true;
227    } else if (v32 == (v32 & 0xff0000)) {
228        *cmode = 0x4;
229        *imm8 = (v32 >> 16) & 0xff;
230        return true;
231    } else if (v32 == (v32 & 0xff000000)) {
232        *cmode = 0x6;
233        *imm8 = v32 >> 24;
234        return true;
235    }
236    return false;
237}
238
239/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
240static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
241{
242    if ((v32 & 0xffff00ff) == 0xff) {
243        *cmode = 0xc;
244        *imm8 = (v32 >> 8) & 0xff;
245        return true;
246    } else if ((v32 & 0xff00ffff) == 0xffff) {
247        *cmode = 0xd;
248        *imm8 = (v32 >> 16) & 0xff;
249        return true;
250    }
251    return false;
252}
253
254/* Return true if v32 is a valid float32 immediate.  */
255static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
256{
257    if (extract32(v32, 0, 19) == 0
258        && (extract32(v32, 25, 6) == 0x20
259            || extract32(v32, 25, 6) == 0x1f)) {
260        *cmode = 0xf;
261        *imm8 = (extract32(v32, 31, 1) << 7)
262              | (extract32(v32, 25, 1) << 6)
263              | extract32(v32, 19, 6);
264        return true;
265    }
266    return false;
267}
268
269/* Return true if v64 is a valid float64 immediate.  */
270static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
271{
272    if (extract64(v64, 0, 48) == 0
273        && (extract64(v64, 54, 9) == 0x100
274            || extract64(v64, 54, 9) == 0x0ff)) {
275        *cmode = 0xf;
276        *imm8 = (extract64(v64, 63, 1) << 7)
277              | (extract64(v64, 54, 1) << 6)
278              | extract64(v64, 48, 6);
279        return true;
280    }
281    return false;
282}
283
284/*
285 * Return non-zero if v32 can be formed by MOVI+ORR.
286 * Place the parameters for MOVI in (cmode, imm8).
287 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
288 */
289static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
290{
291    int i;
292
293    for (i = 6; i > 0; i -= 2) {
294        /* Mask out one byte we can add with ORR.  */
295        uint32_t tmp = v32 & ~(0xffu << (i * 4));
296        if (is_shimm32(tmp, cmode, imm8) ||
297            is_soimm32(tmp, cmode, imm8)) {
298            break;
299        }
300    }
301    return i;
302}
303
304/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
305static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
306{
307    if (v32 == deposit32(v32, 16, 16, v32)) {
308        return is_shimm16(v32, cmode, imm8);
309    } else {
310        return is_shimm32(v32, cmode, imm8);
311    }
312}
313
314static int tcg_target_const_match(tcg_target_long val, TCGType type,
315                                  const TCGArgConstraint *arg_ct)
316{
317    int ct = arg_ct->ct;
318
319    if (ct & TCG_CT_CONST) {
320        return 1;
321    }
322    if (type == TCG_TYPE_I32) {
323        val = (int32_t)val;
324    }
325    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
326        return 1;
327    }
328    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
329        return 1;
330    }
331    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
332        return 1;
333    }
334    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
335        return 1;
336    }
337
338    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
339    case 0:
340        break;
341    case TCG_CT_CONST_ANDI:
342        val = ~val;
343        /* fallthru */
344    case TCG_CT_CONST_ORRI:
345        if (val == deposit64(val, 32, 32, val)) {
346            int cmode, imm8;
347            return is_shimm1632(val, &cmode, &imm8);
348        }
349        break;
350    default:
351        /* Both bits should not be set for the same insn.  */
352        g_assert_not_reached();
353    }
354
355    return 0;
356}
357
358enum aarch64_cond_code {
359    COND_EQ = 0x0,
360    COND_NE = 0x1,
361    COND_CS = 0x2,     /* Unsigned greater or equal */
362    COND_HS = COND_CS, /* ALIAS greater or equal */
363    COND_CC = 0x3,     /* Unsigned less than */
364    COND_LO = COND_CC, /* ALIAS Lower */
365    COND_MI = 0x4,     /* Negative */
366    COND_PL = 0x5,     /* Zero or greater */
367    COND_VS = 0x6,     /* Overflow */
368    COND_VC = 0x7,     /* No overflow */
369    COND_HI = 0x8,     /* Unsigned greater than */
370    COND_LS = 0x9,     /* Unsigned less or equal */
371    COND_GE = 0xa,
372    COND_LT = 0xb,
373    COND_GT = 0xc,
374    COND_LE = 0xd,
375    COND_AL = 0xe,
376    COND_NV = 0xf, /* behaves like COND_AL here */
377};
378
379static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
380    [TCG_COND_EQ] = COND_EQ,
381    [TCG_COND_NE] = COND_NE,
382    [TCG_COND_LT] = COND_LT,
383    [TCG_COND_GE] = COND_GE,
384    [TCG_COND_LE] = COND_LE,
385    [TCG_COND_GT] = COND_GT,
386    /* unsigned */
387    [TCG_COND_LTU] = COND_LO,
388    [TCG_COND_GTU] = COND_HI,
389    [TCG_COND_GEU] = COND_HS,
390    [TCG_COND_LEU] = COND_LS,
391};
392
393typedef enum {
394    LDST_ST = 0,    /* store */
395    LDST_LD = 1,    /* load */
396    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
397    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
398} AArch64LdstType;
399
400/* We encode the format of the insn into the beginning of the name, so that
401   we can have the preprocessor help "typecheck" the insn vs the output
402   function.  Arm didn't provide us with nice names for the formats, so we
403   use the section number of the architecture reference manual in which the
404   instruction group is described.  */
405typedef enum {
406    /* Compare and branch (immediate).  */
407    I3201_CBZ       = 0x34000000,
408    I3201_CBNZ      = 0x35000000,
409
410    /* Conditional branch (immediate).  */
411    I3202_B_C       = 0x54000000,
412
413    /* Unconditional branch (immediate).  */
414    I3206_B         = 0x14000000,
415    I3206_BL        = 0x94000000,
416
417    /* Unconditional branch (register).  */
418    I3207_BR        = 0xd61f0000,
419    I3207_BLR       = 0xd63f0000,
420    I3207_RET       = 0xd65f0000,
421
422    /* AdvSIMD load/store single structure.  */
423    I3303_LD1R      = 0x0d40c000,
424
425    /* Load literal for loading the address at pc-relative offset */
426    I3305_LDR       = 0x58000000,
427    I3305_LDR_v64   = 0x5c000000,
428    I3305_LDR_v128  = 0x9c000000,
429
430    /* Load/store register.  Described here as 3.3.12, but the helper
431       that emits them can transform to 3.3.10 or 3.3.13.  */
432    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
433    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
434    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
435    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
436
437    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
438    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
439    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
440    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
441
442    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
443    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
444
445    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
446    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
447    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
448
449    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
450    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
451
452    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
453    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
454
455    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
456    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
457
458    I3312_TO_I3310  = 0x00200800,
459    I3312_TO_I3313  = 0x01000000,
460
461    /* Load/store register pair instructions.  */
462    I3314_LDP       = 0x28400000,
463    I3314_STP       = 0x28000000,
464
465    /* Add/subtract immediate instructions.  */
466    I3401_ADDI      = 0x11000000,
467    I3401_ADDSI     = 0x31000000,
468    I3401_SUBI      = 0x51000000,
469    I3401_SUBSI     = 0x71000000,
470
471    /* Bitfield instructions.  */
472    I3402_BFM       = 0x33000000,
473    I3402_SBFM      = 0x13000000,
474    I3402_UBFM      = 0x53000000,
475
476    /* Extract instruction.  */
477    I3403_EXTR      = 0x13800000,
478
479    /* Logical immediate instructions.  */
480    I3404_ANDI      = 0x12000000,
481    I3404_ORRI      = 0x32000000,
482    I3404_EORI      = 0x52000000,
483
484    /* Move wide immediate instructions.  */
485    I3405_MOVN      = 0x12800000,
486    I3405_MOVZ      = 0x52800000,
487    I3405_MOVK      = 0x72800000,
488
489    /* PC relative addressing instructions.  */
490    I3406_ADR       = 0x10000000,
491    I3406_ADRP      = 0x90000000,
492
493    /* Add/subtract shifted register instructions (without a shift).  */
494    I3502_ADD       = 0x0b000000,
495    I3502_ADDS      = 0x2b000000,
496    I3502_SUB       = 0x4b000000,
497    I3502_SUBS      = 0x6b000000,
498
499    /* Add/subtract shifted register instructions (with a shift).  */
500    I3502S_ADD_LSL  = I3502_ADD,
501
502    /* Add/subtract with carry instructions.  */
503    I3503_ADC       = 0x1a000000,
504    I3503_SBC       = 0x5a000000,
505
506    /* Conditional select instructions.  */
507    I3506_CSEL      = 0x1a800000,
508    I3506_CSINC     = 0x1a800400,
509    I3506_CSINV     = 0x5a800000,
510    I3506_CSNEG     = 0x5a800400,
511
512    /* Data-processing (1 source) instructions.  */
513    I3507_CLZ       = 0x5ac01000,
514    I3507_RBIT      = 0x5ac00000,
515    I3507_REV16     = 0x5ac00400,
516    I3507_REV32     = 0x5ac00800,
517    I3507_REV64     = 0x5ac00c00,
518
519    /* Data-processing (2 source) instructions.  */
520    I3508_LSLV      = 0x1ac02000,
521    I3508_LSRV      = 0x1ac02400,
522    I3508_ASRV      = 0x1ac02800,
523    I3508_RORV      = 0x1ac02c00,
524    I3508_SMULH     = 0x9b407c00,
525    I3508_UMULH     = 0x9bc07c00,
526    I3508_UDIV      = 0x1ac00800,
527    I3508_SDIV      = 0x1ac00c00,
528
529    /* Data-processing (3 source) instructions.  */
530    I3509_MADD      = 0x1b000000,
531    I3509_MSUB      = 0x1b008000,
532
533    /* Logical shifted register instructions (without a shift).  */
534    I3510_AND       = 0x0a000000,
535    I3510_BIC       = 0x0a200000,
536    I3510_ORR       = 0x2a000000,
537    I3510_ORN       = 0x2a200000,
538    I3510_EOR       = 0x4a000000,
539    I3510_EON       = 0x4a200000,
540    I3510_ANDS      = 0x6a000000,
541
542    /* Logical shifted register instructions (with a shift).  */
543    I3502S_AND_LSR  = I3510_AND | (1 << 22),
544
545    /* AdvSIMD copy */
546    I3605_DUP      = 0x0e000400,
547    I3605_INS      = 0x4e001c00,
548    I3605_UMOV     = 0x0e003c00,
549
550    /* AdvSIMD modified immediate */
551    I3606_MOVI      = 0x0f000400,
552    I3606_MVNI      = 0x2f000400,
553    I3606_BIC       = 0x2f001400,
554    I3606_ORR       = 0x0f001400,
555
556    /* AdvSIMD shift by immediate */
557    I3614_SSHR      = 0x0f000400,
558    I3614_SSRA      = 0x0f001400,
559    I3614_SHL       = 0x0f005400,
560    I3614_SLI       = 0x2f005400,
561    I3614_USHR      = 0x2f000400,
562    I3614_USRA      = 0x2f001400,
563
564    /* AdvSIMD three same.  */
565    I3616_ADD       = 0x0e208400,
566    I3616_AND       = 0x0e201c00,
567    I3616_BIC       = 0x0e601c00,
568    I3616_BIF       = 0x2ee01c00,
569    I3616_BIT       = 0x2ea01c00,
570    I3616_BSL       = 0x2e601c00,
571    I3616_EOR       = 0x2e201c00,
572    I3616_MUL       = 0x0e209c00,
573    I3616_ORR       = 0x0ea01c00,
574    I3616_ORN       = 0x0ee01c00,
575    I3616_SUB       = 0x2e208400,
576    I3616_CMGT      = 0x0e203400,
577    I3616_CMGE      = 0x0e203c00,
578    I3616_CMTST     = 0x0e208c00,
579    I3616_CMHI      = 0x2e203400,
580    I3616_CMHS      = 0x2e203c00,
581    I3616_CMEQ      = 0x2e208c00,
582    I3616_SMAX      = 0x0e206400,
583    I3616_SMIN      = 0x0e206c00,
584    I3616_SSHL      = 0x0e204400,
585    I3616_SQADD     = 0x0e200c00,
586    I3616_SQSUB     = 0x0e202c00,
587    I3616_UMAX      = 0x2e206400,
588    I3616_UMIN      = 0x2e206c00,
589    I3616_UQADD     = 0x2e200c00,
590    I3616_UQSUB     = 0x2e202c00,
591    I3616_USHL      = 0x2e204400,
592
593    /* AdvSIMD two-reg misc.  */
594    I3617_CMGT0     = 0x0e208800,
595    I3617_CMEQ0     = 0x0e209800,
596    I3617_CMLT0     = 0x0e20a800,
597    I3617_CMGE0     = 0x2e208800,
598    I3617_CMLE0     = 0x2e20a800,
599    I3617_NOT       = 0x2e205800,
600    I3617_ABS       = 0x0e20b800,
601    I3617_NEG       = 0x2e20b800,
602
603    /* System instructions.  */
604    NOP             = 0xd503201f,
605    DMB_ISH         = 0xd50338bf,
606    DMB_LD          = 0x00000100,
607    DMB_ST          = 0x00000200,
608} AArch64Insn;
609
610static inline uint32_t tcg_in32(TCGContext *s)
611{
612    uint32_t v = *(uint32_t *)s->code_ptr;
613    return v;
614}
615
616/* Emit an opcode with "type-checking" of the format.  */
617#define tcg_out_insn(S, FMT, OP, ...) \
618    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
619
620static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
621                              TCGReg rt, TCGReg rn, unsigned size)
622{
623    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
624}
625
626static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
627                              int imm19, TCGReg rt)
628{
629    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
630}
631
632static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
633                              TCGReg rt, int imm19)
634{
635    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
636}
637
638static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
639                              TCGCond c, int imm19)
640{
641    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
642}
643
644static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
645{
646    tcg_out32(s, insn | (imm26 & 0x03ffffff));
647}
648
649static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
650{
651    tcg_out32(s, insn | rn << 5);
652}
653
654static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
655                              TCGReg r1, TCGReg r2, TCGReg rn,
656                              tcg_target_long ofs, bool pre, bool w)
657{
658    insn |= 1u << 31; /* ext */
659    insn |= pre << 24;
660    insn |= w << 23;
661
662    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
663    insn |= (ofs & (0x7f << 3)) << (15 - 3);
664
665    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
666}
667
668static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
669                              TCGReg rd, TCGReg rn, uint64_t aimm)
670{
671    if (aimm > 0xfff) {
672        tcg_debug_assert((aimm & 0xfff) == 0);
673        aimm >>= 12;
674        tcg_debug_assert(aimm <= 0xfff);
675        aimm |= 1 << 12;  /* apply LSL 12 */
676    }
677    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
678}
679
680/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
681   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
682   that feed the DecodeBitMasks pseudo function.  */
683static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
684                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
685{
686    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
687              | rn << 5 | rd);
688}
689
690#define tcg_out_insn_3404  tcg_out_insn_3402
691
692static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
693                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
694{
695    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
696              | rn << 5 | rd);
697}
698
699/* This function is used for the Move (wide immediate) instruction group.
700   Note that SHIFT is a full shift count, not the 2 bit HW field. */
701static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
702                              TCGReg rd, uint16_t half, unsigned shift)
703{
704    tcg_debug_assert((shift & ~0x30) == 0);
705    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
706}
707
708static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
709                              TCGReg rd, int64_t disp)
710{
711    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
712}
713
714/* This function is for both 3.5.2 (Add/Subtract shifted register), for
715   the rare occasion when we actually want to supply a shift amount.  */
716static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
717                                      TCGType ext, TCGReg rd, TCGReg rn,
718                                      TCGReg rm, int imm6)
719{
720    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
721}
722
723/* This function is for 3.5.2 (Add/subtract shifted register),
724   and 3.5.10 (Logical shifted register), for the vast majorty of cases
725   when we don't want to apply a shift.  Thus it can also be used for
726   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
727static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
728                              TCGReg rd, TCGReg rn, TCGReg rm)
729{
730    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
731}
732
733#define tcg_out_insn_3503  tcg_out_insn_3502
734#define tcg_out_insn_3508  tcg_out_insn_3502
735#define tcg_out_insn_3510  tcg_out_insn_3502
736
737static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
738                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
739{
740    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
741              | tcg_cond_to_aarch64[c] << 12);
742}
743
744static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
745                              TCGReg rd, TCGReg rn)
746{
747    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
748}
749
750static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
751                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
752{
753    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
754}
755
756static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
757                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
758{
759    /* Note that bit 11 set means general register input.  Therefore
760       we can handle both register sets with one function.  */
761    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
762              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
763}
764
765static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
766                              TCGReg rd, bool op, int cmode, uint8_t imm8)
767{
768    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
769              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
770}
771
772static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
773                              TCGReg rd, TCGReg rn, unsigned immhb)
774{
775    tcg_out32(s, insn | q << 30 | immhb << 16
776              | (rn & 0x1f) << 5 | (rd & 0x1f));
777}
778
779static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
780                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
781{
782    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
783              | (rn & 0x1f) << 5 | (rd & 0x1f));
784}
785
786static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
787                              unsigned size, TCGReg rd, TCGReg rn)
788{
789    tcg_out32(s, insn | q << 30 | (size << 22)
790              | (rn & 0x1f) << 5 | (rd & 0x1f));
791}
792
793static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
794                              TCGReg rd, TCGReg base, TCGType ext,
795                              TCGReg regoff)
796{
797    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
798    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
799              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
800}
801
802static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
803                              TCGReg rd, TCGReg rn, intptr_t offset)
804{
805    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
806}
807
808static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
809                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
810{
811    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
812    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
813              | rn << 5 | (rd & 0x1f));
814}
815
816/* Register to register move using ORR (shifted register with no shift). */
817static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
818{
819    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
820}
821
822/* Register to register move using ADDI (move to/from SP).  */
823static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
824{
825    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
826}
827
828/* This function is used for the Logical (immediate) instruction group.
829   The value of LIMM must satisfy IS_LIMM.  See the comment above about
830   only supporting simplified logical immediates.  */
831static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
832                             TCGReg rd, TCGReg rn, uint64_t limm)
833{
834    unsigned h, l, r, c;
835
836    tcg_debug_assert(is_limm(limm));
837
838    h = clz64(limm);
839    l = ctz64(limm);
840    if (l == 0) {
841        r = 0;                  /* form 0....01....1 */
842        c = ctz64(~limm) - 1;
843        if (h == 0) {
844            r = clz64(~limm);   /* form 1..10..01..1 */
845            c += r;
846        }
847    } else {
848        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
849        c = r - h - 1;
850    }
851    if (ext == TCG_TYPE_I32) {
852        r &= 31;
853        c &= 31;
854    }
855
856    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
857}
858
859static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
860                             TCGReg rd, tcg_target_long v64)
861{
862    bool q = type == TCG_TYPE_V128;
863    int cmode, imm8, i;
864
865    /* Test all bytes equal first.  */
866    if (v64 == dup_const(MO_8, v64)) {
867        imm8 = (uint8_t)v64;
868        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
869        return;
870    }
871
872    /*
873     * Test all bytes 0x00 or 0xff second.  This can match cases that
874     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
875     */
876    for (i = imm8 = 0; i < 8; i++) {
877        uint8_t byte = v64 >> (i * 8);
878        if (byte == 0xff) {
879            imm8 |= 1 << i;
880        } else if (byte != 0) {
881            goto fail_bytes;
882        }
883    }
884    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
885    return;
886 fail_bytes:
887
888    /*
889     * Tests for various replications.  For each element width, if we
890     * cannot find an expansion there's no point checking a larger
891     * width because we already know by replication it cannot match.
892     */
893    if (v64 == dup_const(MO_16, v64)) {
894        uint16_t v16 = v64;
895
896        if (is_shimm16(v16, &cmode, &imm8)) {
897            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
898            return;
899        }
900        if (is_shimm16(~v16, &cmode, &imm8)) {
901            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
902            return;
903        }
904
905        /*
906         * Otherwise, all remaining constants can be loaded in two insns:
907         * rd = v16 & 0xff, rd |= v16 & 0xff00.
908         */
909        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
910        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
911        return;
912    } else if (v64 == dup_const(MO_32, v64)) {
913        uint32_t v32 = v64;
914        uint32_t n32 = ~v32;
915
916        if (is_shimm32(v32, &cmode, &imm8) ||
917            is_soimm32(v32, &cmode, &imm8) ||
918            is_fimm32(v32, &cmode, &imm8)) {
919            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
920            return;
921        }
922        if (is_shimm32(n32, &cmode, &imm8) ||
923            is_soimm32(n32, &cmode, &imm8)) {
924            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
925            return;
926        }
927
928        /*
929         * Restrict the set of constants to those we can load with
930         * two instructions.  Others we load from the pool.
931         */
932        i = is_shimm32_pair(v32, &cmode, &imm8);
933        if (i) {
934            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
935            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
936            return;
937        }
938        i = is_shimm32_pair(n32, &cmode, &imm8);
939        if (i) {
940            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
941            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
942            return;
943        }
944    } else if (is_fimm64(v64, &cmode, &imm8)) {
945        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
946        return;
947    }
948
949    /*
950     * As a last resort, load from the constant pool.  Sadly there
951     * is no LD1R (literal), so store the full 16-byte vector.
952     */
953    if (type == TCG_TYPE_V128) {
954        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
955        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
956    } else {
957        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
958        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
959    }
960}
961
962static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
963                            TCGReg rd, TCGReg rs)
964{
965    int is_q = type - TCG_TYPE_V64;
966    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
967    return true;
968}
969
970static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
971                             TCGReg r, TCGReg base, intptr_t offset)
972{
973    TCGReg temp = TCG_REG_TMP;
974
975    if (offset < -0xffffff || offset > 0xffffff) {
976        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
977        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
978        base = temp;
979    } else {
980        AArch64Insn add_insn = I3401_ADDI;
981
982        if (offset < 0) {
983            add_insn = I3401_SUBI;
984            offset = -offset;
985        }
986        if (offset & 0xfff000) {
987            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
988            base = temp;
989        }
990        if (offset & 0xfff) {
991            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
992            base = temp;
993        }
994    }
995    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
996    return true;
997}
998
999static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1000                         tcg_target_long value)
1001{
1002    tcg_target_long svalue = value;
1003    tcg_target_long ivalue = ~value;
1004    tcg_target_long t0, t1, t2;
1005    int s0, s1;
1006    AArch64Insn opc;
1007
1008    switch (type) {
1009    case TCG_TYPE_I32:
1010    case TCG_TYPE_I64:
1011        tcg_debug_assert(rd < 32);
1012        break;
1013
1014    case TCG_TYPE_V64:
1015    case TCG_TYPE_V128:
1016        tcg_debug_assert(rd >= 32);
1017        tcg_out_dupi_vec(s, type, rd, value);
1018        return;
1019
1020    default:
1021        g_assert_not_reached();
1022    }
1023
1024    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1025       values within [2**31, 2**32-1], we can create smaller sequences by
1026       interpreting this as a negative 32-bit number, while ensuring that
1027       the high 32 bits are cleared by setting SF=0.  */
1028    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1029        svalue = (int32_t)value;
1030        value = (uint32_t)value;
1031        ivalue = (uint32_t)ivalue;
1032        type = TCG_TYPE_I32;
1033    }
1034
1035    /* Speed things up by handling the common case of small positive
1036       and negative values specially.  */
1037    if ((value & ~0xffffull) == 0) {
1038        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1039        return;
1040    } else if ((ivalue & ~0xffffull) == 0) {
1041        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1042        return;
1043    }
1044
1045    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1046       use the sign-extended value.  That lets us match rotated values such
1047       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1048    if (is_limm(svalue)) {
1049        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1050        return;
1051    }
1052
1053    /* Look for host pointer values within 4G of the PC.  This happens
1054       often when loading pointers to QEMU's own data structures.  */
1055    if (type == TCG_TYPE_I64) {
1056        tcg_target_long disp = value - (intptr_t)s->code_ptr;
1057        if (disp == sextract64(disp, 0, 21)) {
1058            tcg_out_insn(s, 3406, ADR, rd, disp);
1059            return;
1060        }
1061        disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
1062        if (disp == sextract64(disp, 0, 21)) {
1063            tcg_out_insn(s, 3406, ADRP, rd, disp);
1064            if (value & 0xfff) {
1065                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1066            }
1067            return;
1068        }
1069    }
1070
1071    /* Would it take fewer insns to begin with MOVN?  */
1072    if (ctpop64(value) >= 32) {
1073        t0 = ivalue;
1074        opc = I3405_MOVN;
1075    } else {
1076        t0 = value;
1077        opc = I3405_MOVZ;
1078    }
1079    s0 = ctz64(t0) & (63 & -16);
1080    t1 = t0 & ~(0xffffUL << s0);
1081    s1 = ctz64(t1) & (63 & -16);
1082    t2 = t1 & ~(0xffffUL << s1);
1083    if (t2 == 0) {
1084        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1085        if (t1 != 0) {
1086            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1087        }
1088        return;
1089    }
1090
1091    /* For more than 2 insns, dump it into the constant pool.  */
1092    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1093    tcg_out_insn(s, 3305, LDR, 0, rd);
1094}
1095
1096/* Define something more legible for general use.  */
1097#define tcg_out_ldst_r  tcg_out_insn_3310
1098
1099static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1100                         TCGReg rn, intptr_t offset, int lgsize)
1101{
1102    /* If the offset is naturally aligned and in range, then we can
1103       use the scaled uimm12 encoding */
1104    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1105        uintptr_t scaled_uimm = offset >> lgsize;
1106        if (scaled_uimm <= 0xfff) {
1107            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1108            return;
1109        }
1110    }
1111
1112    /* Small signed offsets can use the unscaled encoding.  */
1113    if (offset >= -256 && offset < 256) {
1114        tcg_out_insn_3312(s, insn, rd, rn, offset);
1115        return;
1116    }
1117
1118    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1119    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1120    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1121}
1122
1123static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1124{
1125    if (ret == arg) {
1126        return true;
1127    }
1128    switch (type) {
1129    case TCG_TYPE_I32:
1130    case TCG_TYPE_I64:
1131        if (ret < 32 && arg < 32) {
1132            tcg_out_movr(s, type, ret, arg);
1133            break;
1134        } else if (ret < 32) {
1135            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1136            break;
1137        } else if (arg < 32) {
1138            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1139            break;
1140        }
1141        /* FALLTHRU */
1142
1143    case TCG_TYPE_V64:
1144        tcg_debug_assert(ret >= 32 && arg >= 32);
1145        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1146        break;
1147    case TCG_TYPE_V128:
1148        tcg_debug_assert(ret >= 32 && arg >= 32);
1149        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1150        break;
1151
1152    default:
1153        g_assert_not_reached();
1154    }
1155    return true;
1156}
1157
1158static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1159                       TCGReg base, intptr_t ofs)
1160{
1161    AArch64Insn insn;
1162    int lgsz;
1163
1164    switch (type) {
1165    case TCG_TYPE_I32:
1166        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1167        lgsz = 2;
1168        break;
1169    case TCG_TYPE_I64:
1170        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1171        lgsz = 3;
1172        break;
1173    case TCG_TYPE_V64:
1174        insn = I3312_LDRVD;
1175        lgsz = 3;
1176        break;
1177    case TCG_TYPE_V128:
1178        insn = I3312_LDRVQ;
1179        lgsz = 4;
1180        break;
1181    default:
1182        g_assert_not_reached();
1183    }
1184    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1185}
1186
1187static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1188                       TCGReg base, intptr_t ofs)
1189{
1190    AArch64Insn insn;
1191    int lgsz;
1192
1193    switch (type) {
1194    case TCG_TYPE_I32:
1195        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1196        lgsz = 2;
1197        break;
1198    case TCG_TYPE_I64:
1199        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1200        lgsz = 3;
1201        break;
1202    case TCG_TYPE_V64:
1203        insn = I3312_STRVD;
1204        lgsz = 3;
1205        break;
1206    case TCG_TYPE_V128:
1207        insn = I3312_STRVQ;
1208        lgsz = 4;
1209        break;
1210    default:
1211        g_assert_not_reached();
1212    }
1213    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1214}
1215
1216static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1217                               TCGReg base, intptr_t ofs)
1218{
1219    if (type <= TCG_TYPE_I64 && val == 0) {
1220        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1221        return true;
1222    }
1223    return false;
1224}
1225
1226static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1227                               TCGReg rn, unsigned int a, unsigned int b)
1228{
1229    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1230}
1231
1232static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1233                                TCGReg rn, unsigned int a, unsigned int b)
1234{
1235    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1236}
1237
1238static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1239                                TCGReg rn, unsigned int a, unsigned int b)
1240{
1241    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1242}
1243
1244static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1245                                TCGReg rn, TCGReg rm, unsigned int a)
1246{
1247    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1248}
1249
1250static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1251                               TCGReg rd, TCGReg rn, unsigned int m)
1252{
1253    int bits = ext ? 64 : 32;
1254    int max = bits - 1;
1255    tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1256}
1257
1258static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1259                               TCGReg rd, TCGReg rn, unsigned int m)
1260{
1261    int max = ext ? 63 : 31;
1262    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1263}
1264
1265static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1266                               TCGReg rd, TCGReg rn, unsigned int m)
1267{
1268    int max = ext ? 63 : 31;
1269    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1270}
1271
1272static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1273                                TCGReg rd, TCGReg rn, unsigned int m)
1274{
1275    int max = ext ? 63 : 31;
1276    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1277}
1278
1279static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1280                                TCGReg rd, TCGReg rn, unsigned int m)
1281{
1282    int bits = ext ? 64 : 32;
1283    int max = bits - 1;
1284    tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1285}
1286
1287static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1288                               TCGReg rn, unsigned lsb, unsigned width)
1289{
1290    unsigned size = ext ? 64 : 32;
1291    unsigned a = (size - lsb) & (size - 1);
1292    unsigned b = width - 1;
1293    tcg_out_bfm(s, ext, rd, rn, a, b);
1294}
1295
1296static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1297                        tcg_target_long b, bool const_b)
1298{
1299    if (const_b) {
1300        /* Using CMP or CMN aliases.  */
1301        if (b >= 0) {
1302            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1303        } else {
1304            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1305        }
1306    } else {
1307        /* Using CMP alias SUBS wzr, Wn, Wm */
1308        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1309    }
1310}
1311
1312static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
1313{
1314    ptrdiff_t offset = target - s->code_ptr;
1315    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1316    tcg_out_insn(s, 3206, B, offset);
1317}
1318
1319static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
1320{
1321    ptrdiff_t offset = target - s->code_ptr;
1322    if (offset == sextract64(offset, 0, 26)) {
1323        tcg_out_insn(s, 3206, BL, offset);
1324    } else {
1325        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1326        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1327    }
1328}
1329
1330static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1331{
1332    tcg_out_insn(s, 3207, BLR, reg);
1333}
1334
1335static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
1336{
1337    ptrdiff_t offset = target - s->code_ptr;
1338    if (offset == sextract64(offset, 0, 26)) {
1339        tcg_out_insn(s, 3206, BL, offset);
1340    } else {
1341        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1342        tcg_out_callr(s, TCG_REG_TMP);
1343    }
1344}
1345
1346void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
1347                              uintptr_t addr)
1348{
1349    tcg_insn_unit i1, i2;
1350    TCGType rt = TCG_TYPE_I64;
1351    TCGReg  rd = TCG_REG_TMP;
1352    uint64_t pair;
1353
1354    ptrdiff_t offset = addr - jmp_addr;
1355
1356    if (offset == sextract64(offset, 0, 26)) {
1357        i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1358        i2 = NOP;
1359    } else {
1360        offset = (addr >> 12) - (jmp_addr >> 12);
1361
1362        /* patch ADRP */
1363        i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1364        /* patch ADDI */
1365        i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1366    }
1367    pair = (uint64_t)i2 << 32 | i1;
1368    atomic_set((uint64_t *)jmp_addr, pair);
1369    flush_icache_range(jmp_addr, jmp_addr + 8);
1370}
1371
1372static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1373{
1374    if (!l->has_value) {
1375        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1376        tcg_out_insn(s, 3206, B, 0);
1377    } else {
1378        tcg_out_goto(s, l->u.value_ptr);
1379    }
1380}
1381
1382static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1383                           TCGArg b, bool b_const, TCGLabel *l)
1384{
1385    intptr_t offset;
1386    bool need_cmp;
1387
1388    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1389        need_cmp = false;
1390    } else {
1391        need_cmp = true;
1392        tcg_out_cmp(s, ext, a, b, b_const);
1393    }
1394
1395    if (!l->has_value) {
1396        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1397        offset = tcg_in32(s) >> 5;
1398    } else {
1399        offset = l->u.value_ptr - s->code_ptr;
1400        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1401    }
1402
1403    if (need_cmp) {
1404        tcg_out_insn(s, 3202, B_C, c, offset);
1405    } else if (c == TCG_COND_EQ) {
1406        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1407    } else {
1408        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1409    }
1410}
1411
1412static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1413{
1414    tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1415}
1416
1417static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1418{
1419    tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1420}
1421
1422static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1423{
1424    tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1425}
1426
1427static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1428                               TCGReg rd, TCGReg rn)
1429{
1430    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1431    int bits = (8 << s_bits) - 1;
1432    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1433}
1434
1435static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1436                               TCGReg rd, TCGReg rn)
1437{
1438    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1439    int bits = (8 << s_bits) - 1;
1440    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1441}
1442
1443static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1444                            TCGReg rn, int64_t aimm)
1445{
1446    if (aimm >= 0) {
1447        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1448    } else {
1449        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1450    }
1451}
1452
1453static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1454                                   TCGReg rh, TCGReg al, TCGReg ah,
1455                                   tcg_target_long bl, tcg_target_long bh,
1456                                   bool const_bl, bool const_bh, bool sub)
1457{
1458    TCGReg orig_rl = rl;
1459    AArch64Insn insn;
1460
1461    if (rl == ah || (!const_bh && rl == bh)) {
1462        rl = TCG_REG_TMP;
1463    }
1464
1465    if (const_bl) {
1466        insn = I3401_ADDSI;
1467        if ((bl < 0) ^ sub) {
1468            insn = I3401_SUBSI;
1469            bl = -bl;
1470        }
1471        if (unlikely(al == TCG_REG_XZR)) {
1472            /* ??? We want to allow al to be zero for the benefit of
1473               negation via subtraction.  However, that leaves open the
1474               possibility of adding 0+const in the low part, and the
1475               immediate add instructions encode XSP not XZR.  Don't try
1476               anything more elaborate here than loading another zero.  */
1477            al = TCG_REG_TMP;
1478            tcg_out_movi(s, ext, al, 0);
1479        }
1480        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1481    } else {
1482        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1483    }
1484
1485    insn = I3503_ADC;
1486    if (const_bh) {
1487        /* Note that the only two constants we support are 0 and -1, and
1488           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1489        if ((bh != 0) ^ sub) {
1490            insn = I3503_SBC;
1491        }
1492        bh = TCG_REG_XZR;
1493    } else if (sub) {
1494        insn = I3503_SBC;
1495    }
1496    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1497
1498    tcg_out_mov(s, ext, orig_rl, rl);
1499}
1500
1501static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1502{
1503    static const uint32_t sync[] = {
1504        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1505        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1506        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1507        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1508        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1509    };
1510    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1511}
1512
1513static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1514                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1515{
1516    TCGReg a1 = a0;
1517    if (is_ctz) {
1518        a1 = TCG_REG_TMP;
1519        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1520    }
1521    if (const_b && b == (ext ? 64 : 32)) {
1522        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1523    } else {
1524        AArch64Insn sel = I3506_CSEL;
1525
1526        tcg_out_cmp(s, ext, a0, 0, 1);
1527        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1528
1529        if (const_b) {
1530            if (b == -1) {
1531                b = TCG_REG_XZR;
1532                sel = I3506_CSINV;
1533            } else if (b == 0) {
1534                b = TCG_REG_XZR;
1535            } else {
1536                tcg_out_movi(s, ext, d, b);
1537                b = d;
1538            }
1539        }
1540        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1541    }
1542}
1543
1544#ifdef CONFIG_SOFTMMU
1545#include "../tcg-ldst.c.inc"
1546
1547/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1548 *                                     TCGMemOpIdx oi, uintptr_t ra)
1549 */
1550static void * const qemu_ld_helpers[16] = {
1551    [MO_UB]   = helper_ret_ldub_mmu,
1552    [MO_LEUW] = helper_le_lduw_mmu,
1553    [MO_LEUL] = helper_le_ldul_mmu,
1554    [MO_LEQ]  = helper_le_ldq_mmu,
1555    [MO_BEUW] = helper_be_lduw_mmu,
1556    [MO_BEUL] = helper_be_ldul_mmu,
1557    [MO_BEQ]  = helper_be_ldq_mmu,
1558};
1559
1560/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1561 *                                     uintxx_t val, TCGMemOpIdx oi,
1562 *                                     uintptr_t ra)
1563 */
1564static void * const qemu_st_helpers[16] = {
1565    [MO_UB]   = helper_ret_stb_mmu,
1566    [MO_LEUW] = helper_le_stw_mmu,
1567    [MO_LEUL] = helper_le_stl_mmu,
1568    [MO_LEQ]  = helper_le_stq_mmu,
1569    [MO_BEUW] = helper_be_stw_mmu,
1570    [MO_BEUL] = helper_be_stl_mmu,
1571    [MO_BEQ]  = helper_be_stq_mmu,
1572};
1573
1574static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1575{
1576    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1577    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1578    tcg_out_insn(s, 3406, ADR, rd, offset);
1579}
1580
1581static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1582{
1583    TCGMemOpIdx oi = lb->oi;
1584    MemOp opc = get_memop(oi);
1585    MemOp size = opc & MO_SIZE;
1586
1587    if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
1588        return false;
1589    }
1590
1591    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1592    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1593    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1594    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1595    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1596    if (opc & MO_SIGN) {
1597        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1598    } else {
1599        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1600    }
1601
1602    tcg_out_goto(s, lb->raddr);
1603    return true;
1604}
1605
1606static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1607{
1608    TCGMemOpIdx oi = lb->oi;
1609    MemOp opc = get_memop(oi);
1610    MemOp size = opc & MO_SIZE;
1611
1612    if (!reloc_pc19(lb->label_ptr[0], s->code_ptr)) {
1613        return false;
1614    }
1615
1616    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1617    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1618    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1619    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1620    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1621    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1622    tcg_out_goto(s, lb->raddr);
1623    return true;
1624}
1625
1626static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1627                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1628                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1629{
1630    TCGLabelQemuLdst *label = new_ldst_label(s);
1631
1632    label->is_ld = is_ld;
1633    label->oi = oi;
1634    label->type = ext;
1635    label->datalo_reg = data_reg;
1636    label->addrlo_reg = addr_reg;
1637    label->raddr = raddr;
1638    label->label_ptr[0] = label_ptr;
1639}
1640
1641/* We expect to use a 7-bit scaled negative offset from ENV.  */
1642QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1643QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1644
1645/* These offsets are built into the LDP below.  */
1646QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1647QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1648
1649/* Load and compare a TLB entry, emitting the conditional jump to the
1650   slow path for the failure case, which will be patched later when finalizing
1651   the slow path. Generated code returns the host addend in X1,
1652   clobbers X0,X2,X3,TMP. */
1653static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1654                             tcg_insn_unit **label_ptr, int mem_index,
1655                             bool is_read)
1656{
1657    unsigned a_bits = get_alignment_bits(opc);
1658    unsigned s_bits = opc & MO_SIZE;
1659    unsigned a_mask = (1u << a_bits) - 1;
1660    unsigned s_mask = (1u << s_bits) - 1;
1661    TCGReg x3;
1662    TCGType mask_type;
1663    uint64_t compare_mask;
1664
1665    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1666                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1667
1668    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1669    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1670                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1671
1672    /* Extract the TLB index from the address into X0.  */
1673    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1674                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1675                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1676
1677    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1678    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1679
1680    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1681    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1682               ? offsetof(CPUTLBEntry, addr_read)
1683               : offsetof(CPUTLBEntry, addr_write));
1684    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1685               offsetof(CPUTLBEntry, addend));
1686
1687    /* For aligned accesses, we check the first byte and include the alignment
1688       bits within the address.  For unaligned access, we check that we don't
1689       cross pages using the address of the last byte of the access.  */
1690    if (a_bits >= s_bits) {
1691        x3 = addr_reg;
1692    } else {
1693        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1694                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1695        x3 = TCG_REG_X3;
1696    }
1697    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1698
1699    /* Store the page mask part of the address into X3.  */
1700    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1701                     TCG_REG_X3, x3, compare_mask);
1702
1703    /* Perform the address comparison. */
1704    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1705
1706    /* If not equal, we jump to the slow path. */
1707    *label_ptr = s->code_ptr;
1708    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1709}
1710
1711#endif /* CONFIG_SOFTMMU */
1712
1713static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1714                                   TCGReg data_r, TCGReg addr_r,
1715                                   TCGType otype, TCGReg off_r)
1716{
1717    const MemOp bswap = memop & MO_BSWAP;
1718
1719    switch (memop & MO_SSIZE) {
1720    case MO_UB:
1721        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1722        break;
1723    case MO_SB:
1724        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1725                       data_r, addr_r, otype, off_r);
1726        break;
1727    case MO_UW:
1728        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1729        if (bswap) {
1730            tcg_out_rev16(s, data_r, data_r);
1731        }
1732        break;
1733    case MO_SW:
1734        if (bswap) {
1735            tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1736            tcg_out_rev16(s, data_r, data_r);
1737            tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1738        } else {
1739            tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1740                           data_r, addr_r, otype, off_r);
1741        }
1742        break;
1743    case MO_UL:
1744        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1745        if (bswap) {
1746            tcg_out_rev32(s, data_r, data_r);
1747        }
1748        break;
1749    case MO_SL:
1750        if (bswap) {
1751            tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1752            tcg_out_rev32(s, data_r, data_r);
1753            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1754        } else {
1755            tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1756        }
1757        break;
1758    case MO_Q:
1759        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1760        if (bswap) {
1761            tcg_out_rev64(s, data_r, data_r);
1762        }
1763        break;
1764    default:
1765        tcg_abort();
1766    }
1767}
1768
1769static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1770                                   TCGReg data_r, TCGReg addr_r,
1771                                   TCGType otype, TCGReg off_r)
1772{
1773    const MemOp bswap = memop & MO_BSWAP;
1774
1775    switch (memop & MO_SIZE) {
1776    case MO_8:
1777        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1778        break;
1779    case MO_16:
1780        if (bswap && data_r != TCG_REG_XZR) {
1781            tcg_out_rev16(s, TCG_REG_TMP, data_r);
1782            data_r = TCG_REG_TMP;
1783        }
1784        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1785        break;
1786    case MO_32:
1787        if (bswap && data_r != TCG_REG_XZR) {
1788            tcg_out_rev32(s, TCG_REG_TMP, data_r);
1789            data_r = TCG_REG_TMP;
1790        }
1791        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1792        break;
1793    case MO_64:
1794        if (bswap && data_r != TCG_REG_XZR) {
1795            tcg_out_rev64(s, TCG_REG_TMP, data_r);
1796            data_r = TCG_REG_TMP;
1797        }
1798        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1799        break;
1800    default:
1801        tcg_abort();
1802    }
1803}
1804
1805static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1806                            TCGMemOpIdx oi, TCGType ext)
1807{
1808    MemOp memop = get_memop(oi);
1809    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1810#ifdef CONFIG_SOFTMMU
1811    unsigned mem_index = get_mmuidx(oi);
1812    tcg_insn_unit *label_ptr;
1813
1814    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1815    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1816                           TCG_REG_X1, otype, addr_reg);
1817    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1818                        s->code_ptr, label_ptr);
1819#else /* !CONFIG_SOFTMMU */
1820    if (USE_GUEST_BASE) {
1821        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1822                               TCG_REG_GUEST_BASE, otype, addr_reg);
1823    } else {
1824        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1825                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1826    }
1827#endif /* CONFIG_SOFTMMU */
1828}
1829
1830static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1831                            TCGMemOpIdx oi)
1832{
1833    MemOp memop = get_memop(oi);
1834    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1835#ifdef CONFIG_SOFTMMU
1836    unsigned mem_index = get_mmuidx(oi);
1837    tcg_insn_unit *label_ptr;
1838
1839    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1840    tcg_out_qemu_st_direct(s, memop, data_reg,
1841                           TCG_REG_X1, otype, addr_reg);
1842    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1843                        data_reg, addr_reg, s->code_ptr, label_ptr);
1844#else /* !CONFIG_SOFTMMU */
1845    if (USE_GUEST_BASE) {
1846        tcg_out_qemu_st_direct(s, memop, data_reg,
1847                               TCG_REG_GUEST_BASE, otype, addr_reg);
1848    } else {
1849        tcg_out_qemu_st_direct(s, memop, data_reg,
1850                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1851    }
1852#endif /* CONFIG_SOFTMMU */
1853}
1854
1855static tcg_insn_unit *tb_ret_addr;
1856
1857static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1858                       const TCGArg args[TCG_MAX_OP_ARGS],
1859                       const int const_args[TCG_MAX_OP_ARGS])
1860{
1861    /* 99% of the time, we can signal the use of extension registers
1862       by looking to see if the opcode handles 64-bit data.  */
1863    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1864
1865    /* Hoist the loads of the most common arguments.  */
1866    TCGArg a0 = args[0];
1867    TCGArg a1 = args[1];
1868    TCGArg a2 = args[2];
1869    int c2 = const_args[2];
1870
1871    /* Some operands are defined with "rZ" constraint, a register or
1872       the zero register.  These need not actually test args[I] == 0.  */
1873#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1874
1875    switch (opc) {
1876    case INDEX_op_exit_tb:
1877        /* Reuse the zeroing that exists for goto_ptr.  */
1878        if (a0 == 0) {
1879            tcg_out_goto_long(s, s->code_gen_epilogue);
1880        } else {
1881            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1882            tcg_out_goto_long(s, tb_ret_addr);
1883        }
1884        break;
1885
1886    case INDEX_op_goto_tb:
1887        if (s->tb_jmp_insn_offset != NULL) {
1888            /* TCG_TARGET_HAS_direct_jump */
1889            /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1890               write can be used to patch the target address. */
1891            if ((uintptr_t)s->code_ptr & 7) {
1892                tcg_out32(s, NOP);
1893            }
1894            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1895            /* actual branch destination will be patched by
1896               tb_target_set_jmp_target later. */
1897            tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1898            tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1899        } else {
1900            /* !TCG_TARGET_HAS_direct_jump */
1901            tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1902            intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1903            tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1904        }
1905        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1906        set_jmp_reset_offset(s, a0);
1907        break;
1908
1909    case INDEX_op_goto_ptr:
1910        tcg_out_insn(s, 3207, BR, a0);
1911        break;
1912
1913    case INDEX_op_br:
1914        tcg_out_goto_label(s, arg_label(a0));
1915        break;
1916
1917    case INDEX_op_ld8u_i32:
1918    case INDEX_op_ld8u_i64:
1919        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1920        break;
1921    case INDEX_op_ld8s_i32:
1922        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1923        break;
1924    case INDEX_op_ld8s_i64:
1925        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1926        break;
1927    case INDEX_op_ld16u_i32:
1928    case INDEX_op_ld16u_i64:
1929        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1930        break;
1931    case INDEX_op_ld16s_i32:
1932        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1933        break;
1934    case INDEX_op_ld16s_i64:
1935        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1936        break;
1937    case INDEX_op_ld_i32:
1938    case INDEX_op_ld32u_i64:
1939        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1940        break;
1941    case INDEX_op_ld32s_i64:
1942        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1943        break;
1944    case INDEX_op_ld_i64:
1945        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1946        break;
1947
1948    case INDEX_op_st8_i32:
1949    case INDEX_op_st8_i64:
1950        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1951        break;
1952    case INDEX_op_st16_i32:
1953    case INDEX_op_st16_i64:
1954        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1955        break;
1956    case INDEX_op_st_i32:
1957    case INDEX_op_st32_i64:
1958        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1959        break;
1960    case INDEX_op_st_i64:
1961        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1962        break;
1963
1964    case INDEX_op_add_i32:
1965        a2 = (int32_t)a2;
1966        /* FALLTHRU */
1967    case INDEX_op_add_i64:
1968        if (c2) {
1969            tcg_out_addsubi(s, ext, a0, a1, a2);
1970        } else {
1971            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1972        }
1973        break;
1974
1975    case INDEX_op_sub_i32:
1976        a2 = (int32_t)a2;
1977        /* FALLTHRU */
1978    case INDEX_op_sub_i64:
1979        if (c2) {
1980            tcg_out_addsubi(s, ext, a0, a1, -a2);
1981        } else {
1982            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1983        }
1984        break;
1985
1986    case INDEX_op_neg_i64:
1987    case INDEX_op_neg_i32:
1988        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1989        break;
1990
1991    case INDEX_op_and_i32:
1992        a2 = (int32_t)a2;
1993        /* FALLTHRU */
1994    case INDEX_op_and_i64:
1995        if (c2) {
1996            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1997        } else {
1998            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1999        }
2000        break;
2001
2002    case INDEX_op_andc_i32:
2003        a2 = (int32_t)a2;
2004        /* FALLTHRU */
2005    case INDEX_op_andc_i64:
2006        if (c2) {
2007            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2008        } else {
2009            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2010        }
2011        break;
2012
2013    case INDEX_op_or_i32:
2014        a2 = (int32_t)a2;
2015        /* FALLTHRU */
2016    case INDEX_op_or_i64:
2017        if (c2) {
2018            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2019        } else {
2020            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2021        }
2022        break;
2023
2024    case INDEX_op_orc_i32:
2025        a2 = (int32_t)a2;
2026        /* FALLTHRU */
2027    case INDEX_op_orc_i64:
2028        if (c2) {
2029            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2030        } else {
2031            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2032        }
2033        break;
2034
2035    case INDEX_op_xor_i32:
2036        a2 = (int32_t)a2;
2037        /* FALLTHRU */
2038    case INDEX_op_xor_i64:
2039        if (c2) {
2040            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2041        } else {
2042            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2043        }
2044        break;
2045
2046    case INDEX_op_eqv_i32:
2047        a2 = (int32_t)a2;
2048        /* FALLTHRU */
2049    case INDEX_op_eqv_i64:
2050        if (c2) {
2051            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2052        } else {
2053            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2054        }
2055        break;
2056
2057    case INDEX_op_not_i64:
2058    case INDEX_op_not_i32:
2059        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2060        break;
2061
2062    case INDEX_op_mul_i64:
2063    case INDEX_op_mul_i32:
2064        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2065        break;
2066
2067    case INDEX_op_div_i64:
2068    case INDEX_op_div_i32:
2069        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2070        break;
2071    case INDEX_op_divu_i64:
2072    case INDEX_op_divu_i32:
2073        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2074        break;
2075
2076    case INDEX_op_rem_i64:
2077    case INDEX_op_rem_i32:
2078        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2079        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2080        break;
2081    case INDEX_op_remu_i64:
2082    case INDEX_op_remu_i32:
2083        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2084        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2085        break;
2086
2087    case INDEX_op_shl_i64:
2088    case INDEX_op_shl_i32:
2089        if (c2) {
2090            tcg_out_shl(s, ext, a0, a1, a2);
2091        } else {
2092            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2093        }
2094        break;
2095
2096    case INDEX_op_shr_i64:
2097    case INDEX_op_shr_i32:
2098        if (c2) {
2099            tcg_out_shr(s, ext, a0, a1, a2);
2100        } else {
2101            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2102        }
2103        break;
2104
2105    case INDEX_op_sar_i64:
2106    case INDEX_op_sar_i32:
2107        if (c2) {
2108            tcg_out_sar(s, ext, a0, a1, a2);
2109        } else {
2110            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2111        }
2112        break;
2113
2114    case INDEX_op_rotr_i64:
2115    case INDEX_op_rotr_i32:
2116        if (c2) {
2117            tcg_out_rotr(s, ext, a0, a1, a2);
2118        } else {
2119            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2120        }
2121        break;
2122
2123    case INDEX_op_rotl_i64:
2124    case INDEX_op_rotl_i32:
2125        if (c2) {
2126            tcg_out_rotl(s, ext, a0, a1, a2);
2127        } else {
2128            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2129            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2130        }
2131        break;
2132
2133    case INDEX_op_clz_i64:
2134    case INDEX_op_clz_i32:
2135        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2136        break;
2137    case INDEX_op_ctz_i64:
2138    case INDEX_op_ctz_i32:
2139        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2140        break;
2141
2142    case INDEX_op_brcond_i32:
2143        a1 = (int32_t)a1;
2144        /* FALLTHRU */
2145    case INDEX_op_brcond_i64:
2146        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2147        break;
2148
2149    case INDEX_op_setcond_i32:
2150        a2 = (int32_t)a2;
2151        /* FALLTHRU */
2152    case INDEX_op_setcond_i64:
2153        tcg_out_cmp(s, ext, a1, a2, c2);
2154        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2155        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2156                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2157        break;
2158
2159    case INDEX_op_movcond_i32:
2160        a2 = (int32_t)a2;
2161        /* FALLTHRU */
2162    case INDEX_op_movcond_i64:
2163        tcg_out_cmp(s, ext, a1, a2, c2);
2164        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2165        break;
2166
2167    case INDEX_op_qemu_ld_i32:
2168    case INDEX_op_qemu_ld_i64:
2169        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2170        break;
2171    case INDEX_op_qemu_st_i32:
2172    case INDEX_op_qemu_st_i64:
2173        tcg_out_qemu_st(s, REG0(0), a1, a2);
2174        break;
2175
2176    case INDEX_op_bswap64_i64:
2177        tcg_out_rev64(s, a0, a1);
2178        break;
2179    case INDEX_op_bswap32_i64:
2180    case INDEX_op_bswap32_i32:
2181        tcg_out_rev32(s, a0, a1);
2182        break;
2183    case INDEX_op_bswap16_i64:
2184    case INDEX_op_bswap16_i32:
2185        tcg_out_rev16(s, a0, a1);
2186        break;
2187
2188    case INDEX_op_ext8s_i64:
2189    case INDEX_op_ext8s_i32:
2190        tcg_out_sxt(s, ext, MO_8, a0, a1);
2191        break;
2192    case INDEX_op_ext16s_i64:
2193    case INDEX_op_ext16s_i32:
2194        tcg_out_sxt(s, ext, MO_16, a0, a1);
2195        break;
2196    case INDEX_op_ext_i32_i64:
2197    case INDEX_op_ext32s_i64:
2198        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2199        break;
2200    case INDEX_op_ext8u_i64:
2201    case INDEX_op_ext8u_i32:
2202        tcg_out_uxt(s, MO_8, a0, a1);
2203        break;
2204    case INDEX_op_ext16u_i64:
2205    case INDEX_op_ext16u_i32:
2206        tcg_out_uxt(s, MO_16, a0, a1);
2207        break;
2208    case INDEX_op_extu_i32_i64:
2209    case INDEX_op_ext32u_i64:
2210        tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2211        break;
2212
2213    case INDEX_op_deposit_i64:
2214    case INDEX_op_deposit_i32:
2215        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2216        break;
2217
2218    case INDEX_op_extract_i64:
2219    case INDEX_op_extract_i32:
2220        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2221        break;
2222
2223    case INDEX_op_sextract_i64:
2224    case INDEX_op_sextract_i32:
2225        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2226        break;
2227
2228    case INDEX_op_extract2_i64:
2229    case INDEX_op_extract2_i32:
2230        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2231        break;
2232
2233    case INDEX_op_add2_i32:
2234        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2235                        (int32_t)args[4], args[5], const_args[4],
2236                        const_args[5], false);
2237        break;
2238    case INDEX_op_add2_i64:
2239        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2240                        args[5], const_args[4], const_args[5], false);
2241        break;
2242    case INDEX_op_sub2_i32:
2243        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2244                        (int32_t)args[4], args[5], const_args[4],
2245                        const_args[5], true);
2246        break;
2247    case INDEX_op_sub2_i64:
2248        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2249                        args[5], const_args[4], const_args[5], true);
2250        break;
2251
2252    case INDEX_op_muluh_i64:
2253        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2254        break;
2255    case INDEX_op_mulsh_i64:
2256        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2257        break;
2258
2259    case INDEX_op_mb:
2260        tcg_out_mb(s, a0);
2261        break;
2262
2263    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2264    case INDEX_op_mov_i64:
2265    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
2266    case INDEX_op_movi_i64:
2267    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2268    default:
2269        g_assert_not_reached();
2270    }
2271
2272#undef REG0
2273}
2274
2275static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2276                           unsigned vecl, unsigned vece,
2277                           const TCGArg *args, const int *const_args)
2278{
2279    static const AArch64Insn cmp_insn[16] = {
2280        [TCG_COND_EQ] = I3616_CMEQ,
2281        [TCG_COND_GT] = I3616_CMGT,
2282        [TCG_COND_GE] = I3616_CMGE,
2283        [TCG_COND_GTU] = I3616_CMHI,
2284        [TCG_COND_GEU] = I3616_CMHS,
2285    };
2286    static const AArch64Insn cmp0_insn[16] = {
2287        [TCG_COND_EQ] = I3617_CMEQ0,
2288        [TCG_COND_GT] = I3617_CMGT0,
2289        [TCG_COND_GE] = I3617_CMGE0,
2290        [TCG_COND_LT] = I3617_CMLT0,
2291        [TCG_COND_LE] = I3617_CMLE0,
2292    };
2293
2294    TCGType type = vecl + TCG_TYPE_V64;
2295    unsigned is_q = vecl;
2296    TCGArg a0, a1, a2, a3;
2297    int cmode, imm8;
2298
2299    a0 = args[0];
2300    a1 = args[1];
2301    a2 = args[2];
2302
2303    switch (opc) {
2304    case INDEX_op_ld_vec:
2305        tcg_out_ld(s, type, a0, a1, a2);
2306        break;
2307    case INDEX_op_st_vec:
2308        tcg_out_st(s, type, a0, a1, a2);
2309        break;
2310    case INDEX_op_dupm_vec:
2311        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2312        break;
2313    case INDEX_op_add_vec:
2314        tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2315        break;
2316    case INDEX_op_sub_vec:
2317        tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2318        break;
2319    case INDEX_op_mul_vec:
2320        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2321        break;
2322    case INDEX_op_neg_vec:
2323        tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2324        break;
2325    case INDEX_op_abs_vec:
2326        tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2327        break;
2328    case INDEX_op_and_vec:
2329        if (const_args[2]) {
2330            is_shimm1632(~a2, &cmode, &imm8);
2331            if (a0 == a1) {
2332                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2333                return;
2334            }
2335            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2336            a2 = a0;
2337        }
2338        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2339        break;
2340    case INDEX_op_or_vec:
2341        if (const_args[2]) {
2342            is_shimm1632(a2, &cmode, &imm8);
2343            if (a0 == a1) {
2344                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2345                return;
2346            }
2347            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2348            a2 = a0;
2349        }
2350        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2351        break;
2352    case INDEX_op_andc_vec:
2353        if (const_args[2]) {
2354            is_shimm1632(a2, &cmode, &imm8);
2355            if (a0 == a1) {
2356                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2357                return;
2358            }
2359            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2360            a2 = a0;
2361        }
2362        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2363        break;
2364    case INDEX_op_orc_vec:
2365        if (const_args[2]) {
2366            is_shimm1632(~a2, &cmode, &imm8);
2367            if (a0 == a1) {
2368                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2369                return;
2370            }
2371            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2372            a2 = a0;
2373        }
2374        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2375        break;
2376    case INDEX_op_xor_vec:
2377        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2378        break;
2379    case INDEX_op_ssadd_vec:
2380        tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2381        break;
2382    case INDEX_op_sssub_vec:
2383        tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2384        break;
2385    case INDEX_op_usadd_vec:
2386        tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2387        break;
2388    case INDEX_op_ussub_vec:
2389        tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2390        break;
2391    case INDEX_op_smax_vec:
2392        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2393        break;
2394    case INDEX_op_smin_vec:
2395        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2396        break;
2397    case INDEX_op_umax_vec:
2398        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2399        break;
2400    case INDEX_op_umin_vec:
2401        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2402        break;
2403    case INDEX_op_not_vec:
2404        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2405        break;
2406    case INDEX_op_shli_vec:
2407        tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2408        break;
2409    case INDEX_op_shri_vec:
2410        tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2411        break;
2412    case INDEX_op_sari_vec:
2413        tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2414        break;
2415    case INDEX_op_aa64_sli_vec:
2416        tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2417        break;
2418    case INDEX_op_shlv_vec:
2419        tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2420        break;
2421    case INDEX_op_aa64_sshl_vec:
2422        tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2423        break;
2424    case INDEX_op_cmp_vec:
2425        {
2426            TCGCond cond = args[3];
2427            AArch64Insn insn;
2428
2429            if (cond == TCG_COND_NE) {
2430                if (const_args[2]) {
2431                    tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2432                } else {
2433                    tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2434                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2435                }
2436            } else {
2437                if (const_args[2]) {
2438                    insn = cmp0_insn[cond];
2439                    if (insn) {
2440                        tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2441                        break;
2442                    }
2443                    tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
2444                    a2 = TCG_VEC_TMP;
2445                }
2446                insn = cmp_insn[cond];
2447                if (insn == 0) {
2448                    TCGArg t;
2449                    t = a1, a1 = a2, a2 = t;
2450                    cond = tcg_swap_cond(cond);
2451                    insn = cmp_insn[cond];
2452                    tcg_debug_assert(insn != 0);
2453                }
2454                tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2455            }
2456        }
2457        break;
2458
2459    case INDEX_op_bitsel_vec:
2460        a3 = args[3];
2461        if (a0 == a3) {
2462            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2463        } else if (a0 == a2) {
2464            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2465        } else {
2466            if (a0 != a1) {
2467                tcg_out_mov(s, type, a0, a1);
2468            }
2469            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2470        }
2471        break;
2472
2473    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2474    case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi.  */
2475    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2476    default:
2477        g_assert_not_reached();
2478    }
2479}
2480
2481int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2482{
2483    switch (opc) {
2484    case INDEX_op_add_vec:
2485    case INDEX_op_sub_vec:
2486    case INDEX_op_and_vec:
2487    case INDEX_op_or_vec:
2488    case INDEX_op_xor_vec:
2489    case INDEX_op_andc_vec:
2490    case INDEX_op_orc_vec:
2491    case INDEX_op_neg_vec:
2492    case INDEX_op_abs_vec:
2493    case INDEX_op_not_vec:
2494    case INDEX_op_cmp_vec:
2495    case INDEX_op_shli_vec:
2496    case INDEX_op_shri_vec:
2497    case INDEX_op_sari_vec:
2498    case INDEX_op_ssadd_vec:
2499    case INDEX_op_sssub_vec:
2500    case INDEX_op_usadd_vec:
2501    case INDEX_op_ussub_vec:
2502    case INDEX_op_shlv_vec:
2503    case INDEX_op_bitsel_vec:
2504        return 1;
2505    case INDEX_op_rotli_vec:
2506    case INDEX_op_shrv_vec:
2507    case INDEX_op_sarv_vec:
2508    case INDEX_op_rotlv_vec:
2509    case INDEX_op_rotrv_vec:
2510        return -1;
2511    case INDEX_op_mul_vec:
2512    case INDEX_op_smax_vec:
2513    case INDEX_op_smin_vec:
2514    case INDEX_op_umax_vec:
2515    case INDEX_op_umin_vec:
2516        return vece < MO_64;
2517
2518    default:
2519        return 0;
2520    }
2521}
2522
2523void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2524                       TCGArg a0, ...)
2525{
2526    va_list va;
2527    TCGv_vec v0, v1, v2, t1, t2;
2528    TCGArg a2;
2529
2530    va_start(va, a0);
2531    v0 = temp_tcgv_vec(arg_temp(a0));
2532    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2533    a2 = va_arg(va, TCGArg);
2534    v2 = temp_tcgv_vec(arg_temp(a2));
2535
2536    switch (opc) {
2537    case INDEX_op_rotli_vec:
2538        t1 = tcg_temp_new_vec(type);
2539        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2540        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2541                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2542        tcg_temp_free_vec(t1);
2543        break;
2544
2545    case INDEX_op_shrv_vec:
2546    case INDEX_op_sarv_vec:
2547        /* Right shifts are negative left shifts for AArch64.  */
2548        t1 = tcg_temp_new_vec(type);
2549        tcg_gen_neg_vec(vece, t1, v2);
2550        opc = (opc == INDEX_op_shrv_vec
2551               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2552        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2553                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2554        tcg_temp_free_vec(t1);
2555        break;
2556
2557    case INDEX_op_rotlv_vec:
2558        t1 = tcg_temp_new_vec(type);
2559        tcg_gen_dupi_vec(vece, t1, 8 << vece);
2560        tcg_gen_sub_vec(vece, t1, v2, t1);
2561        /* Right shifts are negative left shifts for AArch64.  */
2562        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2563                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2564        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2565                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2566        tcg_gen_or_vec(vece, v0, v0, t1);
2567        tcg_temp_free_vec(t1);
2568        break;
2569
2570    case INDEX_op_rotrv_vec:
2571        t1 = tcg_temp_new_vec(type);
2572        t2 = tcg_temp_new_vec(type);
2573        tcg_gen_neg_vec(vece, t1, v2);
2574        tcg_gen_dupi_vec(vece, t2, 8 << vece);
2575        tcg_gen_add_vec(vece, t2, t1, t2);
2576        /* Right shifts are negative left shifts for AArch64.  */
2577        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2578                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2579        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2580                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2581        tcg_gen_or_vec(vece, v0, t1, t2);
2582        tcg_temp_free_vec(t1);
2583        tcg_temp_free_vec(t2);
2584        break;
2585
2586    default:
2587        g_assert_not_reached();
2588    }
2589
2590    va_end(va);
2591}
2592
2593static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2594{
2595    static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2596    static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2597    static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2598    static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2599    static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
2600    static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2601    static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2602    static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2603    static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2604    static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2605    static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2606    static const TCGTargetOpDef w_0_w = { .args_ct_str = { "w", "0", "w" } };
2607    static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } };
2608    static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } };
2609    static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
2610    static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2611    static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2612    static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2613    static const TCGTargetOpDef r_r_rAL
2614        = { .args_ct_str = { "r", "r", "rAL" } };
2615    static const TCGTargetOpDef dep
2616        = { .args_ct_str = { "r", "0", "rZ" } };
2617    static const TCGTargetOpDef ext2
2618        = { .args_ct_str = { "r", "rZ", "rZ" } };
2619    static const TCGTargetOpDef movc
2620        = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2621    static const TCGTargetOpDef add2
2622        = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2623    static const TCGTargetOpDef w_w_w_w
2624        = { .args_ct_str = { "w", "w", "w", "w" } };
2625
2626    switch (op) {
2627    case INDEX_op_goto_ptr:
2628        return &r;
2629
2630    case INDEX_op_ld8u_i32:
2631    case INDEX_op_ld8s_i32:
2632    case INDEX_op_ld16u_i32:
2633    case INDEX_op_ld16s_i32:
2634    case INDEX_op_ld_i32:
2635    case INDEX_op_ld8u_i64:
2636    case INDEX_op_ld8s_i64:
2637    case INDEX_op_ld16u_i64:
2638    case INDEX_op_ld16s_i64:
2639    case INDEX_op_ld32u_i64:
2640    case INDEX_op_ld32s_i64:
2641    case INDEX_op_ld_i64:
2642    case INDEX_op_neg_i32:
2643    case INDEX_op_neg_i64:
2644    case INDEX_op_not_i32:
2645    case INDEX_op_not_i64:
2646    case INDEX_op_bswap16_i32:
2647    case INDEX_op_bswap32_i32:
2648    case INDEX_op_bswap16_i64:
2649    case INDEX_op_bswap32_i64:
2650    case INDEX_op_bswap64_i64:
2651    case INDEX_op_ext8s_i32:
2652    case INDEX_op_ext16s_i32:
2653    case INDEX_op_ext8u_i32:
2654    case INDEX_op_ext16u_i32:
2655    case INDEX_op_ext8s_i64:
2656    case INDEX_op_ext16s_i64:
2657    case INDEX_op_ext32s_i64:
2658    case INDEX_op_ext8u_i64:
2659    case INDEX_op_ext16u_i64:
2660    case INDEX_op_ext32u_i64:
2661    case INDEX_op_ext_i32_i64:
2662    case INDEX_op_extu_i32_i64:
2663    case INDEX_op_extract_i32:
2664    case INDEX_op_extract_i64:
2665    case INDEX_op_sextract_i32:
2666    case INDEX_op_sextract_i64:
2667        return &r_r;
2668
2669    case INDEX_op_st8_i32:
2670    case INDEX_op_st16_i32:
2671    case INDEX_op_st_i32:
2672    case INDEX_op_st8_i64:
2673    case INDEX_op_st16_i64:
2674    case INDEX_op_st32_i64:
2675    case INDEX_op_st_i64:
2676        return &rZ_r;
2677
2678    case INDEX_op_add_i32:
2679    case INDEX_op_add_i64:
2680    case INDEX_op_sub_i32:
2681    case INDEX_op_sub_i64:
2682    case INDEX_op_setcond_i32:
2683    case INDEX_op_setcond_i64:
2684        return &r_r_rA;
2685
2686    case INDEX_op_mul_i32:
2687    case INDEX_op_mul_i64:
2688    case INDEX_op_div_i32:
2689    case INDEX_op_div_i64:
2690    case INDEX_op_divu_i32:
2691    case INDEX_op_divu_i64:
2692    case INDEX_op_rem_i32:
2693    case INDEX_op_rem_i64:
2694    case INDEX_op_remu_i32:
2695    case INDEX_op_remu_i64:
2696    case INDEX_op_muluh_i64:
2697    case INDEX_op_mulsh_i64:
2698        return &r_r_r;
2699
2700    case INDEX_op_and_i32:
2701    case INDEX_op_and_i64:
2702    case INDEX_op_or_i32:
2703    case INDEX_op_or_i64:
2704    case INDEX_op_xor_i32:
2705    case INDEX_op_xor_i64:
2706    case INDEX_op_andc_i32:
2707    case INDEX_op_andc_i64:
2708    case INDEX_op_orc_i32:
2709    case INDEX_op_orc_i64:
2710    case INDEX_op_eqv_i32:
2711    case INDEX_op_eqv_i64:
2712        return &r_r_rL;
2713
2714    case INDEX_op_shl_i32:
2715    case INDEX_op_shr_i32:
2716    case INDEX_op_sar_i32:
2717    case INDEX_op_rotl_i32:
2718    case INDEX_op_rotr_i32:
2719    case INDEX_op_shl_i64:
2720    case INDEX_op_shr_i64:
2721    case INDEX_op_sar_i64:
2722    case INDEX_op_rotl_i64:
2723    case INDEX_op_rotr_i64:
2724        return &r_r_ri;
2725
2726    case INDEX_op_clz_i32:
2727    case INDEX_op_ctz_i32:
2728    case INDEX_op_clz_i64:
2729    case INDEX_op_ctz_i64:
2730        return &r_r_rAL;
2731
2732    case INDEX_op_brcond_i32:
2733    case INDEX_op_brcond_i64:
2734        return &r_rA;
2735
2736    case INDEX_op_movcond_i32:
2737    case INDEX_op_movcond_i64:
2738        return &movc;
2739
2740    case INDEX_op_qemu_ld_i32:
2741    case INDEX_op_qemu_ld_i64:
2742        return &r_l;
2743    case INDEX_op_qemu_st_i32:
2744    case INDEX_op_qemu_st_i64:
2745        return &lZ_l;
2746
2747    case INDEX_op_deposit_i32:
2748    case INDEX_op_deposit_i64:
2749        return &dep;
2750
2751    case INDEX_op_extract2_i32:
2752    case INDEX_op_extract2_i64:
2753        return &ext2;
2754
2755    case INDEX_op_add2_i32:
2756    case INDEX_op_add2_i64:
2757    case INDEX_op_sub2_i32:
2758    case INDEX_op_sub2_i64:
2759        return &add2;
2760
2761    case INDEX_op_add_vec:
2762    case INDEX_op_sub_vec:
2763    case INDEX_op_mul_vec:
2764    case INDEX_op_xor_vec:
2765    case INDEX_op_ssadd_vec:
2766    case INDEX_op_sssub_vec:
2767    case INDEX_op_usadd_vec:
2768    case INDEX_op_ussub_vec:
2769    case INDEX_op_smax_vec:
2770    case INDEX_op_smin_vec:
2771    case INDEX_op_umax_vec:
2772    case INDEX_op_umin_vec:
2773    case INDEX_op_shlv_vec:
2774    case INDEX_op_shrv_vec:
2775    case INDEX_op_sarv_vec:
2776    case INDEX_op_aa64_sshl_vec:
2777        return &w_w_w;
2778    case INDEX_op_not_vec:
2779    case INDEX_op_neg_vec:
2780    case INDEX_op_abs_vec:
2781    case INDEX_op_shli_vec:
2782    case INDEX_op_shri_vec:
2783    case INDEX_op_sari_vec:
2784        return &w_w;
2785    case INDEX_op_ld_vec:
2786    case INDEX_op_st_vec:
2787    case INDEX_op_dupm_vec:
2788        return &w_r;
2789    case INDEX_op_dup_vec:
2790        return &w_wr;
2791    case INDEX_op_or_vec:
2792    case INDEX_op_andc_vec:
2793        return &w_w_wO;
2794    case INDEX_op_and_vec:
2795    case INDEX_op_orc_vec:
2796        return &w_w_wN;
2797    case INDEX_op_cmp_vec:
2798        return &w_w_wZ;
2799    case INDEX_op_bitsel_vec:
2800        return &w_w_w_w;
2801    case INDEX_op_aa64_sli_vec:
2802        return &w_0_w;
2803
2804    default:
2805        return NULL;
2806    }
2807}
2808
2809static void tcg_target_init(TCGContext *s)
2810{
2811    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2812    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2813    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2814    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2815
2816    tcg_target_call_clobber_regs = -1ull;
2817    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2818    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2819    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2820    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2821    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2822    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2823    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2824    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2825    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2826    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2827    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2828    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2829    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2830    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2831    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2832    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2833    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2834    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2835    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2836
2837    s->reserved_regs = 0;
2838    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2839    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2840    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2841    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2842    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2843}
2844
2845/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2846#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2847
2848#define FRAME_SIZE \
2849    ((PUSH_SIZE \
2850      + TCG_STATIC_CALL_ARGS_SIZE \
2851      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2852      + TCG_TARGET_STACK_ALIGN - 1) \
2853     & ~(TCG_TARGET_STACK_ALIGN - 1))
2854
2855/* We're expecting a 2 byte uleb128 encoded value.  */
2856QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2857
2858/* We're expecting to use a single ADDI insn.  */
2859QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2860
2861static void tcg_target_qemu_prologue(TCGContext *s)
2862{
2863    TCGReg r;
2864
2865    /* Push (FP, LR) and allocate space for all saved registers.  */
2866    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2867                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2868
2869    /* Set up frame pointer for canonical unwinding.  */
2870    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2871
2872    /* Store callee-preserved regs x19..x28.  */
2873    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2874        int ofs = (r - TCG_REG_X19 + 2) * 8;
2875        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2876    }
2877
2878    /* Make stack space for TCG locals.  */
2879    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2880                 FRAME_SIZE - PUSH_SIZE);
2881
2882    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2883    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2884                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2885
2886#if !defined(CONFIG_SOFTMMU)
2887    if (USE_GUEST_BASE) {
2888        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2889        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2890    }
2891#endif
2892
2893    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2894    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2895
2896    /*
2897     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2898     * and fall through to the rest of the epilogue.
2899     */
2900    s->code_gen_epilogue = s->code_ptr;
2901    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2902
2903    /* TB epilogue */
2904    tb_ret_addr = s->code_ptr;
2905
2906    /* Remove TCG locals stack space.  */
2907    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2908                 FRAME_SIZE - PUSH_SIZE);
2909
2910    /* Restore registers x19..x28.  */
2911    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2912        int ofs = (r - TCG_REG_X19 + 2) * 8;
2913        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2914    }
2915
2916    /* Pop (FP, LR), restore SP to previous frame.  */
2917    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2918                 TCG_REG_SP, PUSH_SIZE, 0, 1);
2919    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2920}
2921
2922static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2923{
2924    int i;
2925    for (i = 0; i < count; ++i) {
2926        p[i] = NOP;
2927    }
2928}
2929
2930typedef struct {
2931    DebugFrameHeader h;
2932    uint8_t fde_def_cfa[4];
2933    uint8_t fde_reg_ofs[24];
2934} DebugFrame;
2935
2936#define ELF_HOST_MACHINE EM_AARCH64
2937
2938static const DebugFrame debug_frame = {
2939    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2940    .h.cie.id = -1,
2941    .h.cie.version = 1,
2942    .h.cie.code_align = 1,
2943    .h.cie.data_align = 0x78,             /* sleb128 -8 */
2944    .h.cie.return_column = TCG_REG_LR,
2945
2946    /* Total FDE size does not include the "len" member.  */
2947    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2948
2949    .fde_def_cfa = {
2950        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
2951        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2952        (FRAME_SIZE >> 7)
2953    },
2954    .fde_reg_ofs = {
2955        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
2956        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
2957        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
2958        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
2959        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
2960        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
2961        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
2962        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
2963        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
2964        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
2965        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
2966        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
2967    }
2968};
2969
2970void tcg_register_jit(void *buf, size_t buf_size)
2971{
2972    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2973}
2974