xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision 4c4465ff)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-pool.c.inc"
14#include "qemu/bitops.h"
15
16/* We're going to re-use TCGType in setting of the SF bit, which controls
17   the size of the operation performed.  If we know the values match, it
18   makes things much cleaner.  */
19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21#ifdef CONFIG_DEBUG_TCG
22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
27
28    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
32};
33#endif /* CONFIG_DEBUG_TCG */
34
35static const int tcg_target_reg_alloc_order[] = {
36    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38    TCG_REG_X28, /* we will reserve this for guest_base if configured */
39
40    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42    TCG_REG_X16, TCG_REG_X17,
43
44    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47    /* X18 reserved by system */
48    /* X19 reserved for AREG0 */
49    /* X29 reserved as fp */
50    /* X30 reserved as temporary */
51
52    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54    /* V8 - V15 are call-saved, and skipped.  */
55    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
59};
60
61static const int tcg_target_call_iarg_regs[8] = {
62    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
64};
65static const int tcg_target_call_oarg_regs[1] = {
66    TCG_REG_X0
67};
68
69#define TCG_REG_TMP TCG_REG_X30
70#define TCG_VEC_TMP TCG_REG_V31
71
72#ifndef CONFIG_SOFTMMU
73/* Note that XZR cannot be encoded in the address base register slot,
74   as that actaully encodes SP.  So if we need to zero-extend the guest
75   address, via the address index register slot, we need to load even
76   a zero guest base into a register.  */
77#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
78#define TCG_REG_GUEST_BASE TCG_REG_X28
79#endif
80
81static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
82{
83    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
84    ptrdiff_t offset = target - src_rx;
85
86    if (offset == sextract64(offset, 0, 26)) {
87        /* read instruction, mask away previous PC_REL26 parameter contents,
88           set the proper offset, then write back the instruction. */
89        *src_rw = deposit32(*src_rw, 0, 26, offset);
90        return true;
91    }
92    return false;
93}
94
95static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
96{
97    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
98    ptrdiff_t offset = target - src_rx;
99
100    if (offset == sextract64(offset, 0, 19)) {
101        *src_rw = deposit32(*src_rw, 5, 19, offset);
102        return true;
103    }
104    return false;
105}
106
107static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
108                        intptr_t value, intptr_t addend)
109{
110    tcg_debug_assert(addend == 0);
111    switch (type) {
112    case R_AARCH64_JUMP26:
113    case R_AARCH64_CALL26:
114        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
115    case R_AARCH64_CONDBR19:
116        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
117    default:
118        g_assert_not_reached();
119    }
120}
121
122#define TCG_CT_CONST_AIMM 0x100
123#define TCG_CT_CONST_LIMM 0x200
124#define TCG_CT_CONST_ZERO 0x400
125#define TCG_CT_CONST_MONE 0x800
126#define TCG_CT_CONST_ORRI 0x1000
127#define TCG_CT_CONST_ANDI 0x2000
128
129/* parse target specific constraints */
130static const char *target_parse_constraint(TCGArgConstraint *ct,
131                                           const char *ct_str, TCGType type)
132{
133    switch (*ct_str++) {
134    case 'r': /* general registers */
135        ct->regs |= 0xffffffffu;
136        break;
137    case 'w': /* advsimd registers */
138        ct->regs |= 0xffffffff00000000ull;
139        break;
140    case 'l': /* qemu_ld / qemu_st address, data_reg */
141        ct->regs = 0xffffffffu;
142#ifdef CONFIG_SOFTMMU
143        /* x0 and x1 will be overwritten when reading the tlb entry,
144           and x2, and x3 for helper args, better to avoid using them. */
145        tcg_regset_reset_reg(ct->regs, TCG_REG_X0);
146        tcg_regset_reset_reg(ct->regs, TCG_REG_X1);
147        tcg_regset_reset_reg(ct->regs, TCG_REG_X2);
148        tcg_regset_reset_reg(ct->regs, TCG_REG_X3);
149#endif
150        break;
151    case 'A': /* Valid for arithmetic immediate (positive or negative).  */
152        ct->ct |= TCG_CT_CONST_AIMM;
153        break;
154    case 'L': /* Valid for logical immediate.  */
155        ct->ct |= TCG_CT_CONST_LIMM;
156        break;
157    case 'M': /* minus one */
158        ct->ct |= TCG_CT_CONST_MONE;
159        break;
160    case 'O': /* vector orr/bic immediate */
161        ct->ct |= TCG_CT_CONST_ORRI;
162        break;
163    case 'N': /* vector orr/bic immediate, inverted */
164        ct->ct |= TCG_CT_CONST_ANDI;
165        break;
166    case 'Z': /* zero */
167        ct->ct |= TCG_CT_CONST_ZERO;
168        break;
169    default:
170        return NULL;
171    }
172    return ct_str;
173}
174
175/* Match a constant valid for addition (12-bit, optionally shifted).  */
176static inline bool is_aimm(uint64_t val)
177{
178    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
179}
180
181/* Match a constant valid for logical operations.  */
182static inline bool is_limm(uint64_t val)
183{
184    /* Taking a simplified view of the logical immediates for now, ignoring
185       the replication that can happen across the field.  Match bit patterns
186       of the forms
187           0....01....1
188           0..01..10..0
189       and their inverses.  */
190
191    /* Make things easier below, by testing the form with msb clear. */
192    if ((int64_t)val < 0) {
193        val = ~val;
194    }
195    if (val == 0) {
196        return false;
197    }
198    val += val & -val;
199    return (val & (val - 1)) == 0;
200}
201
202/* Return true if v16 is a valid 16-bit shifted immediate.  */
203static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
204{
205    if (v16 == (v16 & 0xff)) {
206        *cmode = 0x8;
207        *imm8 = v16 & 0xff;
208        return true;
209    } else if (v16 == (v16 & 0xff00)) {
210        *cmode = 0xa;
211        *imm8 = v16 >> 8;
212        return true;
213    }
214    return false;
215}
216
217/* Return true if v32 is a valid 32-bit shifted immediate.  */
218static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
219{
220    if (v32 == (v32 & 0xff)) {
221        *cmode = 0x0;
222        *imm8 = v32 & 0xff;
223        return true;
224    } else if (v32 == (v32 & 0xff00)) {
225        *cmode = 0x2;
226        *imm8 = (v32 >> 8) & 0xff;
227        return true;
228    } else if (v32 == (v32 & 0xff0000)) {
229        *cmode = 0x4;
230        *imm8 = (v32 >> 16) & 0xff;
231        return true;
232    } else if (v32 == (v32 & 0xff000000)) {
233        *cmode = 0x6;
234        *imm8 = v32 >> 24;
235        return true;
236    }
237    return false;
238}
239
240/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
241static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
242{
243    if ((v32 & 0xffff00ff) == 0xff) {
244        *cmode = 0xc;
245        *imm8 = (v32 >> 8) & 0xff;
246        return true;
247    } else if ((v32 & 0xff00ffff) == 0xffff) {
248        *cmode = 0xd;
249        *imm8 = (v32 >> 16) & 0xff;
250        return true;
251    }
252    return false;
253}
254
255/* Return true if v32 is a valid float32 immediate.  */
256static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
257{
258    if (extract32(v32, 0, 19) == 0
259        && (extract32(v32, 25, 6) == 0x20
260            || extract32(v32, 25, 6) == 0x1f)) {
261        *cmode = 0xf;
262        *imm8 = (extract32(v32, 31, 1) << 7)
263              | (extract32(v32, 25, 1) << 6)
264              | extract32(v32, 19, 6);
265        return true;
266    }
267    return false;
268}
269
270/* Return true if v64 is a valid float64 immediate.  */
271static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
272{
273    if (extract64(v64, 0, 48) == 0
274        && (extract64(v64, 54, 9) == 0x100
275            || extract64(v64, 54, 9) == 0x0ff)) {
276        *cmode = 0xf;
277        *imm8 = (extract64(v64, 63, 1) << 7)
278              | (extract64(v64, 54, 1) << 6)
279              | extract64(v64, 48, 6);
280        return true;
281    }
282    return false;
283}
284
285/*
286 * Return non-zero if v32 can be formed by MOVI+ORR.
287 * Place the parameters for MOVI in (cmode, imm8).
288 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
289 */
290static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
291{
292    int i;
293
294    for (i = 6; i > 0; i -= 2) {
295        /* Mask out one byte we can add with ORR.  */
296        uint32_t tmp = v32 & ~(0xffu << (i * 4));
297        if (is_shimm32(tmp, cmode, imm8) ||
298            is_soimm32(tmp, cmode, imm8)) {
299            break;
300        }
301    }
302    return i;
303}
304
305/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
306static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
307{
308    if (v32 == deposit32(v32, 16, 16, v32)) {
309        return is_shimm16(v32, cmode, imm8);
310    } else {
311        return is_shimm32(v32, cmode, imm8);
312    }
313}
314
315static int tcg_target_const_match(tcg_target_long val, TCGType type,
316                                  const TCGArgConstraint *arg_ct)
317{
318    int ct = arg_ct->ct;
319
320    if (ct & TCG_CT_CONST) {
321        return 1;
322    }
323    if (type == TCG_TYPE_I32) {
324        val = (int32_t)val;
325    }
326    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
327        return 1;
328    }
329    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
330        return 1;
331    }
332    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
333        return 1;
334    }
335    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
336        return 1;
337    }
338
339    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
340    case 0:
341        break;
342    case TCG_CT_CONST_ANDI:
343        val = ~val;
344        /* fallthru */
345    case TCG_CT_CONST_ORRI:
346        if (val == deposit64(val, 32, 32, val)) {
347            int cmode, imm8;
348            return is_shimm1632(val, &cmode, &imm8);
349        }
350        break;
351    default:
352        /* Both bits should not be set for the same insn.  */
353        g_assert_not_reached();
354    }
355
356    return 0;
357}
358
359enum aarch64_cond_code {
360    COND_EQ = 0x0,
361    COND_NE = 0x1,
362    COND_CS = 0x2,     /* Unsigned greater or equal */
363    COND_HS = COND_CS, /* ALIAS greater or equal */
364    COND_CC = 0x3,     /* Unsigned less than */
365    COND_LO = COND_CC, /* ALIAS Lower */
366    COND_MI = 0x4,     /* Negative */
367    COND_PL = 0x5,     /* Zero or greater */
368    COND_VS = 0x6,     /* Overflow */
369    COND_VC = 0x7,     /* No overflow */
370    COND_HI = 0x8,     /* Unsigned greater than */
371    COND_LS = 0x9,     /* Unsigned less or equal */
372    COND_GE = 0xa,
373    COND_LT = 0xb,
374    COND_GT = 0xc,
375    COND_LE = 0xd,
376    COND_AL = 0xe,
377    COND_NV = 0xf, /* behaves like COND_AL here */
378};
379
380static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
381    [TCG_COND_EQ] = COND_EQ,
382    [TCG_COND_NE] = COND_NE,
383    [TCG_COND_LT] = COND_LT,
384    [TCG_COND_GE] = COND_GE,
385    [TCG_COND_LE] = COND_LE,
386    [TCG_COND_GT] = COND_GT,
387    /* unsigned */
388    [TCG_COND_LTU] = COND_LO,
389    [TCG_COND_GTU] = COND_HI,
390    [TCG_COND_GEU] = COND_HS,
391    [TCG_COND_LEU] = COND_LS,
392};
393
394typedef enum {
395    LDST_ST = 0,    /* store */
396    LDST_LD = 1,    /* load */
397    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
398    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
399} AArch64LdstType;
400
401/* We encode the format of the insn into the beginning of the name, so that
402   we can have the preprocessor help "typecheck" the insn vs the output
403   function.  Arm didn't provide us with nice names for the formats, so we
404   use the section number of the architecture reference manual in which the
405   instruction group is described.  */
406typedef enum {
407    /* Compare and branch (immediate).  */
408    I3201_CBZ       = 0x34000000,
409    I3201_CBNZ      = 0x35000000,
410
411    /* Conditional branch (immediate).  */
412    I3202_B_C       = 0x54000000,
413
414    /* Unconditional branch (immediate).  */
415    I3206_B         = 0x14000000,
416    I3206_BL        = 0x94000000,
417
418    /* Unconditional branch (register).  */
419    I3207_BR        = 0xd61f0000,
420    I3207_BLR       = 0xd63f0000,
421    I3207_RET       = 0xd65f0000,
422
423    /* AdvSIMD load/store single structure.  */
424    I3303_LD1R      = 0x0d40c000,
425
426    /* Load literal for loading the address at pc-relative offset */
427    I3305_LDR       = 0x58000000,
428    I3305_LDR_v64   = 0x5c000000,
429    I3305_LDR_v128  = 0x9c000000,
430
431    /* Load/store register.  Described here as 3.3.12, but the helper
432       that emits them can transform to 3.3.10 or 3.3.13.  */
433    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
434    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
435    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
436    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
437
438    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
439    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
440    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
441    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
442
443    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
444    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
445
446    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
447    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
448    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
449
450    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
451    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
452
453    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
454    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
455
456    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
457    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
458
459    I3312_TO_I3310  = 0x00200800,
460    I3312_TO_I3313  = 0x01000000,
461
462    /* Load/store register pair instructions.  */
463    I3314_LDP       = 0x28400000,
464    I3314_STP       = 0x28000000,
465
466    /* Add/subtract immediate instructions.  */
467    I3401_ADDI      = 0x11000000,
468    I3401_ADDSI     = 0x31000000,
469    I3401_SUBI      = 0x51000000,
470    I3401_SUBSI     = 0x71000000,
471
472    /* Bitfield instructions.  */
473    I3402_BFM       = 0x33000000,
474    I3402_SBFM      = 0x13000000,
475    I3402_UBFM      = 0x53000000,
476
477    /* Extract instruction.  */
478    I3403_EXTR      = 0x13800000,
479
480    /* Logical immediate instructions.  */
481    I3404_ANDI      = 0x12000000,
482    I3404_ORRI      = 0x32000000,
483    I3404_EORI      = 0x52000000,
484
485    /* Move wide immediate instructions.  */
486    I3405_MOVN      = 0x12800000,
487    I3405_MOVZ      = 0x52800000,
488    I3405_MOVK      = 0x72800000,
489
490    /* PC relative addressing instructions.  */
491    I3406_ADR       = 0x10000000,
492    I3406_ADRP      = 0x90000000,
493
494    /* Add/subtract shifted register instructions (without a shift).  */
495    I3502_ADD       = 0x0b000000,
496    I3502_ADDS      = 0x2b000000,
497    I3502_SUB       = 0x4b000000,
498    I3502_SUBS      = 0x6b000000,
499
500    /* Add/subtract shifted register instructions (with a shift).  */
501    I3502S_ADD_LSL  = I3502_ADD,
502
503    /* Add/subtract with carry instructions.  */
504    I3503_ADC       = 0x1a000000,
505    I3503_SBC       = 0x5a000000,
506
507    /* Conditional select instructions.  */
508    I3506_CSEL      = 0x1a800000,
509    I3506_CSINC     = 0x1a800400,
510    I3506_CSINV     = 0x5a800000,
511    I3506_CSNEG     = 0x5a800400,
512
513    /* Data-processing (1 source) instructions.  */
514    I3507_CLZ       = 0x5ac01000,
515    I3507_RBIT      = 0x5ac00000,
516    I3507_REV16     = 0x5ac00400,
517    I3507_REV32     = 0x5ac00800,
518    I3507_REV64     = 0x5ac00c00,
519
520    /* Data-processing (2 source) instructions.  */
521    I3508_LSLV      = 0x1ac02000,
522    I3508_LSRV      = 0x1ac02400,
523    I3508_ASRV      = 0x1ac02800,
524    I3508_RORV      = 0x1ac02c00,
525    I3508_SMULH     = 0x9b407c00,
526    I3508_UMULH     = 0x9bc07c00,
527    I3508_UDIV      = 0x1ac00800,
528    I3508_SDIV      = 0x1ac00c00,
529
530    /* Data-processing (3 source) instructions.  */
531    I3509_MADD      = 0x1b000000,
532    I3509_MSUB      = 0x1b008000,
533
534    /* Logical shifted register instructions (without a shift).  */
535    I3510_AND       = 0x0a000000,
536    I3510_BIC       = 0x0a200000,
537    I3510_ORR       = 0x2a000000,
538    I3510_ORN       = 0x2a200000,
539    I3510_EOR       = 0x4a000000,
540    I3510_EON       = 0x4a200000,
541    I3510_ANDS      = 0x6a000000,
542
543    /* Logical shifted register instructions (with a shift).  */
544    I3502S_AND_LSR  = I3510_AND | (1 << 22),
545
546    /* AdvSIMD copy */
547    I3605_DUP      = 0x0e000400,
548    I3605_INS      = 0x4e001c00,
549    I3605_UMOV     = 0x0e003c00,
550
551    /* AdvSIMD modified immediate */
552    I3606_MOVI      = 0x0f000400,
553    I3606_MVNI      = 0x2f000400,
554    I3606_BIC       = 0x2f001400,
555    I3606_ORR       = 0x0f001400,
556
557    /* AdvSIMD shift by immediate */
558    I3614_SSHR      = 0x0f000400,
559    I3614_SSRA      = 0x0f001400,
560    I3614_SHL       = 0x0f005400,
561    I3614_SLI       = 0x2f005400,
562    I3614_USHR      = 0x2f000400,
563    I3614_USRA      = 0x2f001400,
564
565    /* AdvSIMD three same.  */
566    I3616_ADD       = 0x0e208400,
567    I3616_AND       = 0x0e201c00,
568    I3616_BIC       = 0x0e601c00,
569    I3616_BIF       = 0x2ee01c00,
570    I3616_BIT       = 0x2ea01c00,
571    I3616_BSL       = 0x2e601c00,
572    I3616_EOR       = 0x2e201c00,
573    I3616_MUL       = 0x0e209c00,
574    I3616_ORR       = 0x0ea01c00,
575    I3616_ORN       = 0x0ee01c00,
576    I3616_SUB       = 0x2e208400,
577    I3616_CMGT      = 0x0e203400,
578    I3616_CMGE      = 0x0e203c00,
579    I3616_CMTST     = 0x0e208c00,
580    I3616_CMHI      = 0x2e203400,
581    I3616_CMHS      = 0x2e203c00,
582    I3616_CMEQ      = 0x2e208c00,
583    I3616_SMAX      = 0x0e206400,
584    I3616_SMIN      = 0x0e206c00,
585    I3616_SSHL      = 0x0e204400,
586    I3616_SQADD     = 0x0e200c00,
587    I3616_SQSUB     = 0x0e202c00,
588    I3616_UMAX      = 0x2e206400,
589    I3616_UMIN      = 0x2e206c00,
590    I3616_UQADD     = 0x2e200c00,
591    I3616_UQSUB     = 0x2e202c00,
592    I3616_USHL      = 0x2e204400,
593
594    /* AdvSIMD two-reg misc.  */
595    I3617_CMGT0     = 0x0e208800,
596    I3617_CMEQ0     = 0x0e209800,
597    I3617_CMLT0     = 0x0e20a800,
598    I3617_CMGE0     = 0x2e208800,
599    I3617_CMLE0     = 0x2e20a800,
600    I3617_NOT       = 0x2e205800,
601    I3617_ABS       = 0x0e20b800,
602    I3617_NEG       = 0x2e20b800,
603
604    /* System instructions.  */
605    NOP             = 0xd503201f,
606    DMB_ISH         = 0xd50338bf,
607    DMB_LD          = 0x00000100,
608    DMB_ST          = 0x00000200,
609} AArch64Insn;
610
611static inline uint32_t tcg_in32(TCGContext *s)
612{
613    uint32_t v = *(uint32_t *)s->code_ptr;
614    return v;
615}
616
617/* Emit an opcode with "type-checking" of the format.  */
618#define tcg_out_insn(S, FMT, OP, ...) \
619    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
620
621static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
622                              TCGReg rt, TCGReg rn, unsigned size)
623{
624    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
625}
626
627static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
628                              int imm19, TCGReg rt)
629{
630    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
631}
632
633static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
634                              TCGReg rt, int imm19)
635{
636    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
637}
638
639static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
640                              TCGCond c, int imm19)
641{
642    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
643}
644
645static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
646{
647    tcg_out32(s, insn | (imm26 & 0x03ffffff));
648}
649
650static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
651{
652    tcg_out32(s, insn | rn << 5);
653}
654
655static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
656                              TCGReg r1, TCGReg r2, TCGReg rn,
657                              tcg_target_long ofs, bool pre, bool w)
658{
659    insn |= 1u << 31; /* ext */
660    insn |= pre << 24;
661    insn |= w << 23;
662
663    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
664    insn |= (ofs & (0x7f << 3)) << (15 - 3);
665
666    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
667}
668
669static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
670                              TCGReg rd, TCGReg rn, uint64_t aimm)
671{
672    if (aimm > 0xfff) {
673        tcg_debug_assert((aimm & 0xfff) == 0);
674        aimm >>= 12;
675        tcg_debug_assert(aimm <= 0xfff);
676        aimm |= 1 << 12;  /* apply LSL 12 */
677    }
678    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
679}
680
681/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
682   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
683   that feed the DecodeBitMasks pseudo function.  */
684static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
685                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
686{
687    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
688              | rn << 5 | rd);
689}
690
691#define tcg_out_insn_3404  tcg_out_insn_3402
692
693static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
694                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
695{
696    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
697              | rn << 5 | rd);
698}
699
700/* This function is used for the Move (wide immediate) instruction group.
701   Note that SHIFT is a full shift count, not the 2 bit HW field. */
702static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
703                              TCGReg rd, uint16_t half, unsigned shift)
704{
705    tcg_debug_assert((shift & ~0x30) == 0);
706    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
707}
708
709static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
710                              TCGReg rd, int64_t disp)
711{
712    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
713}
714
715/* This function is for both 3.5.2 (Add/Subtract shifted register), for
716   the rare occasion when we actually want to supply a shift amount.  */
717static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
718                                      TCGType ext, TCGReg rd, TCGReg rn,
719                                      TCGReg rm, int imm6)
720{
721    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
722}
723
724/* This function is for 3.5.2 (Add/subtract shifted register),
725   and 3.5.10 (Logical shifted register), for the vast majorty of cases
726   when we don't want to apply a shift.  Thus it can also be used for
727   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
728static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
729                              TCGReg rd, TCGReg rn, TCGReg rm)
730{
731    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
732}
733
734#define tcg_out_insn_3503  tcg_out_insn_3502
735#define tcg_out_insn_3508  tcg_out_insn_3502
736#define tcg_out_insn_3510  tcg_out_insn_3502
737
738static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
739                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
740{
741    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
742              | tcg_cond_to_aarch64[c] << 12);
743}
744
745static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
746                              TCGReg rd, TCGReg rn)
747{
748    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
749}
750
751static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
752                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
753{
754    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
755}
756
757static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
758                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
759{
760    /* Note that bit 11 set means general register input.  Therefore
761       we can handle both register sets with one function.  */
762    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
763              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
764}
765
766static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
767                              TCGReg rd, bool op, int cmode, uint8_t imm8)
768{
769    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
770              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
771}
772
773static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
774                              TCGReg rd, TCGReg rn, unsigned immhb)
775{
776    tcg_out32(s, insn | q << 30 | immhb << 16
777              | (rn & 0x1f) << 5 | (rd & 0x1f));
778}
779
780static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
781                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
782{
783    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
784              | (rn & 0x1f) << 5 | (rd & 0x1f));
785}
786
787static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
788                              unsigned size, TCGReg rd, TCGReg rn)
789{
790    tcg_out32(s, insn | q << 30 | (size << 22)
791              | (rn & 0x1f) << 5 | (rd & 0x1f));
792}
793
794static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
795                              TCGReg rd, TCGReg base, TCGType ext,
796                              TCGReg regoff)
797{
798    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
799    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
800              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
801}
802
803static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
804                              TCGReg rd, TCGReg rn, intptr_t offset)
805{
806    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
807}
808
809static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
810                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
811{
812    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
813    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
814              | rn << 5 | (rd & 0x1f));
815}
816
817/* Register to register move using ORR (shifted register with no shift). */
818static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
819{
820    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
821}
822
823/* Register to register move using ADDI (move to/from SP).  */
824static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
825{
826    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
827}
828
829/* This function is used for the Logical (immediate) instruction group.
830   The value of LIMM must satisfy IS_LIMM.  See the comment above about
831   only supporting simplified logical immediates.  */
832static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
833                             TCGReg rd, TCGReg rn, uint64_t limm)
834{
835    unsigned h, l, r, c;
836
837    tcg_debug_assert(is_limm(limm));
838
839    h = clz64(limm);
840    l = ctz64(limm);
841    if (l == 0) {
842        r = 0;                  /* form 0....01....1 */
843        c = ctz64(~limm) - 1;
844        if (h == 0) {
845            r = clz64(~limm);   /* form 1..10..01..1 */
846            c += r;
847        }
848    } else {
849        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
850        c = r - h - 1;
851    }
852    if (ext == TCG_TYPE_I32) {
853        r &= 31;
854        c &= 31;
855    }
856
857    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
858}
859
860static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
861                             TCGReg rd, tcg_target_long v64)
862{
863    bool q = type == TCG_TYPE_V128;
864    int cmode, imm8, i;
865
866    /* Test all bytes equal first.  */
867    if (v64 == dup_const(MO_8, v64)) {
868        imm8 = (uint8_t)v64;
869        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
870        return;
871    }
872
873    /*
874     * Test all bytes 0x00 or 0xff second.  This can match cases that
875     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
876     */
877    for (i = imm8 = 0; i < 8; i++) {
878        uint8_t byte = v64 >> (i * 8);
879        if (byte == 0xff) {
880            imm8 |= 1 << i;
881        } else if (byte != 0) {
882            goto fail_bytes;
883        }
884    }
885    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
886    return;
887 fail_bytes:
888
889    /*
890     * Tests for various replications.  For each element width, if we
891     * cannot find an expansion there's no point checking a larger
892     * width because we already know by replication it cannot match.
893     */
894    if (v64 == dup_const(MO_16, v64)) {
895        uint16_t v16 = v64;
896
897        if (is_shimm16(v16, &cmode, &imm8)) {
898            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
899            return;
900        }
901        if (is_shimm16(~v16, &cmode, &imm8)) {
902            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
903            return;
904        }
905
906        /*
907         * Otherwise, all remaining constants can be loaded in two insns:
908         * rd = v16 & 0xff, rd |= v16 & 0xff00.
909         */
910        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
911        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
912        return;
913    } else if (v64 == dup_const(MO_32, v64)) {
914        uint32_t v32 = v64;
915        uint32_t n32 = ~v32;
916
917        if (is_shimm32(v32, &cmode, &imm8) ||
918            is_soimm32(v32, &cmode, &imm8) ||
919            is_fimm32(v32, &cmode, &imm8)) {
920            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
921            return;
922        }
923        if (is_shimm32(n32, &cmode, &imm8) ||
924            is_soimm32(n32, &cmode, &imm8)) {
925            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
926            return;
927        }
928
929        /*
930         * Restrict the set of constants to those we can load with
931         * two instructions.  Others we load from the pool.
932         */
933        i = is_shimm32_pair(v32, &cmode, &imm8);
934        if (i) {
935            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
936            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
937            return;
938        }
939        i = is_shimm32_pair(n32, &cmode, &imm8);
940        if (i) {
941            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
942            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
943            return;
944        }
945    } else if (is_fimm64(v64, &cmode, &imm8)) {
946        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
947        return;
948    }
949
950    /*
951     * As a last resort, load from the constant pool.  Sadly there
952     * is no LD1R (literal), so store the full 16-byte vector.
953     */
954    if (type == TCG_TYPE_V128) {
955        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
956        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
957    } else {
958        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
959        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
960    }
961}
962
963static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
964                            TCGReg rd, TCGReg rs)
965{
966    int is_q = type - TCG_TYPE_V64;
967    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
968    return true;
969}
970
971static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
972                             TCGReg r, TCGReg base, intptr_t offset)
973{
974    TCGReg temp = TCG_REG_TMP;
975
976    if (offset < -0xffffff || offset > 0xffffff) {
977        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
978        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
979        base = temp;
980    } else {
981        AArch64Insn add_insn = I3401_ADDI;
982
983        if (offset < 0) {
984            add_insn = I3401_SUBI;
985            offset = -offset;
986        }
987        if (offset & 0xfff000) {
988            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
989            base = temp;
990        }
991        if (offset & 0xfff) {
992            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
993            base = temp;
994        }
995    }
996    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
997    return true;
998}
999
1000static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1001                         tcg_target_long value)
1002{
1003    tcg_target_long svalue = value;
1004    tcg_target_long ivalue = ~value;
1005    tcg_target_long t0, t1, t2;
1006    int s0, s1;
1007    AArch64Insn opc;
1008
1009    switch (type) {
1010    case TCG_TYPE_I32:
1011    case TCG_TYPE_I64:
1012        tcg_debug_assert(rd < 32);
1013        break;
1014
1015    case TCG_TYPE_V64:
1016    case TCG_TYPE_V128:
1017        tcg_debug_assert(rd >= 32);
1018        tcg_out_dupi_vec(s, type, rd, value);
1019        return;
1020
1021    default:
1022        g_assert_not_reached();
1023    }
1024
1025    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1026       values within [2**31, 2**32-1], we can create smaller sequences by
1027       interpreting this as a negative 32-bit number, while ensuring that
1028       the high 32 bits are cleared by setting SF=0.  */
1029    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1030        svalue = (int32_t)value;
1031        value = (uint32_t)value;
1032        ivalue = (uint32_t)ivalue;
1033        type = TCG_TYPE_I32;
1034    }
1035
1036    /* Speed things up by handling the common case of small positive
1037       and negative values specially.  */
1038    if ((value & ~0xffffull) == 0) {
1039        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1040        return;
1041    } else if ((ivalue & ~0xffffull) == 0) {
1042        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1043        return;
1044    }
1045
1046    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1047       use the sign-extended value.  That lets us match rotated values such
1048       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1049    if (is_limm(svalue)) {
1050        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1051        return;
1052    }
1053
1054    /* Look for host pointer values within 4G of the PC.  This happens
1055       often when loading pointers to QEMU's own data structures.  */
1056    if (type == TCG_TYPE_I64) {
1057        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1058        tcg_target_long disp = value - src_rx;
1059        if (disp == sextract64(disp, 0, 21)) {
1060            tcg_out_insn(s, 3406, ADR, rd, disp);
1061            return;
1062        }
1063        disp = (value >> 12) - (src_rx >> 12);
1064        if (disp == sextract64(disp, 0, 21)) {
1065            tcg_out_insn(s, 3406, ADRP, rd, disp);
1066            if (value & 0xfff) {
1067                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1068            }
1069            return;
1070        }
1071    }
1072
1073    /* Would it take fewer insns to begin with MOVN?  */
1074    if (ctpop64(value) >= 32) {
1075        t0 = ivalue;
1076        opc = I3405_MOVN;
1077    } else {
1078        t0 = value;
1079        opc = I3405_MOVZ;
1080    }
1081    s0 = ctz64(t0) & (63 & -16);
1082    t1 = t0 & ~(0xffffUL << s0);
1083    s1 = ctz64(t1) & (63 & -16);
1084    t2 = t1 & ~(0xffffUL << s1);
1085    if (t2 == 0) {
1086        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1087        if (t1 != 0) {
1088            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1089        }
1090        return;
1091    }
1092
1093    /* For more than 2 insns, dump it into the constant pool.  */
1094    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1095    tcg_out_insn(s, 3305, LDR, 0, rd);
1096}
1097
1098/* Define something more legible for general use.  */
1099#define tcg_out_ldst_r  tcg_out_insn_3310
1100
1101static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1102                         TCGReg rn, intptr_t offset, int lgsize)
1103{
1104    /* If the offset is naturally aligned and in range, then we can
1105       use the scaled uimm12 encoding */
1106    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1107        uintptr_t scaled_uimm = offset >> lgsize;
1108        if (scaled_uimm <= 0xfff) {
1109            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1110            return;
1111        }
1112    }
1113
1114    /* Small signed offsets can use the unscaled encoding.  */
1115    if (offset >= -256 && offset < 256) {
1116        tcg_out_insn_3312(s, insn, rd, rn, offset);
1117        return;
1118    }
1119
1120    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1121    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1122    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1123}
1124
1125static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1126{
1127    if (ret == arg) {
1128        return true;
1129    }
1130    switch (type) {
1131    case TCG_TYPE_I32:
1132    case TCG_TYPE_I64:
1133        if (ret < 32 && arg < 32) {
1134            tcg_out_movr(s, type, ret, arg);
1135            break;
1136        } else if (ret < 32) {
1137            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1138            break;
1139        } else if (arg < 32) {
1140            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1141            break;
1142        }
1143        /* FALLTHRU */
1144
1145    case TCG_TYPE_V64:
1146        tcg_debug_assert(ret >= 32 && arg >= 32);
1147        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1148        break;
1149    case TCG_TYPE_V128:
1150        tcg_debug_assert(ret >= 32 && arg >= 32);
1151        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1152        break;
1153
1154    default:
1155        g_assert_not_reached();
1156    }
1157    return true;
1158}
1159
1160static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1161                       TCGReg base, intptr_t ofs)
1162{
1163    AArch64Insn insn;
1164    int lgsz;
1165
1166    switch (type) {
1167    case TCG_TYPE_I32:
1168        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1169        lgsz = 2;
1170        break;
1171    case TCG_TYPE_I64:
1172        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1173        lgsz = 3;
1174        break;
1175    case TCG_TYPE_V64:
1176        insn = I3312_LDRVD;
1177        lgsz = 3;
1178        break;
1179    case TCG_TYPE_V128:
1180        insn = I3312_LDRVQ;
1181        lgsz = 4;
1182        break;
1183    default:
1184        g_assert_not_reached();
1185    }
1186    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1187}
1188
1189static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1190                       TCGReg base, intptr_t ofs)
1191{
1192    AArch64Insn insn;
1193    int lgsz;
1194
1195    switch (type) {
1196    case TCG_TYPE_I32:
1197        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1198        lgsz = 2;
1199        break;
1200    case TCG_TYPE_I64:
1201        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1202        lgsz = 3;
1203        break;
1204    case TCG_TYPE_V64:
1205        insn = I3312_STRVD;
1206        lgsz = 3;
1207        break;
1208    case TCG_TYPE_V128:
1209        insn = I3312_STRVQ;
1210        lgsz = 4;
1211        break;
1212    default:
1213        g_assert_not_reached();
1214    }
1215    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1216}
1217
1218static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1219                               TCGReg base, intptr_t ofs)
1220{
1221    if (type <= TCG_TYPE_I64 && val == 0) {
1222        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1223        return true;
1224    }
1225    return false;
1226}
1227
1228static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1229                               TCGReg rn, unsigned int a, unsigned int b)
1230{
1231    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1232}
1233
1234static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1235                                TCGReg rn, unsigned int a, unsigned int b)
1236{
1237    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1238}
1239
1240static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1241                                TCGReg rn, unsigned int a, unsigned int b)
1242{
1243    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1244}
1245
1246static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1247                                TCGReg rn, TCGReg rm, unsigned int a)
1248{
1249    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1250}
1251
1252static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1253                               TCGReg rd, TCGReg rn, unsigned int m)
1254{
1255    int bits = ext ? 64 : 32;
1256    int max = bits - 1;
1257    tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1258}
1259
1260static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1261                               TCGReg rd, TCGReg rn, unsigned int m)
1262{
1263    int max = ext ? 63 : 31;
1264    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1265}
1266
1267static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1268                               TCGReg rd, TCGReg rn, unsigned int m)
1269{
1270    int max = ext ? 63 : 31;
1271    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1272}
1273
1274static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1275                                TCGReg rd, TCGReg rn, unsigned int m)
1276{
1277    int max = ext ? 63 : 31;
1278    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1279}
1280
1281static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1282                                TCGReg rd, TCGReg rn, unsigned int m)
1283{
1284    int bits = ext ? 64 : 32;
1285    int max = bits - 1;
1286    tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1287}
1288
1289static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1290                               TCGReg rn, unsigned lsb, unsigned width)
1291{
1292    unsigned size = ext ? 64 : 32;
1293    unsigned a = (size - lsb) & (size - 1);
1294    unsigned b = width - 1;
1295    tcg_out_bfm(s, ext, rd, rn, a, b);
1296}
1297
1298static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1299                        tcg_target_long b, bool const_b)
1300{
1301    if (const_b) {
1302        /* Using CMP or CMN aliases.  */
1303        if (b >= 0) {
1304            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1305        } else {
1306            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1307        }
1308    } else {
1309        /* Using CMP alias SUBS wzr, Wn, Wm */
1310        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1311    }
1312}
1313
1314static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1315{
1316    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1317    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1318    tcg_out_insn(s, 3206, B, offset);
1319}
1320
1321static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1322{
1323    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1324    if (offset == sextract64(offset, 0, 26)) {
1325        tcg_out_insn(s, 3206, B, offset);
1326    } else {
1327        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1328        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1329    }
1330}
1331
1332static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1333{
1334    tcg_out_insn(s, 3207, BLR, reg);
1335}
1336
1337static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
1338{
1339    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1340    if (offset == sextract64(offset, 0, 26)) {
1341        tcg_out_insn(s, 3206, BL, offset);
1342    } else {
1343        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1344        tcg_out_callr(s, TCG_REG_TMP);
1345    }
1346}
1347
1348void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
1349                              uintptr_t jmp_rw, uintptr_t addr)
1350{
1351    tcg_insn_unit i1, i2;
1352    TCGType rt = TCG_TYPE_I64;
1353    TCGReg  rd = TCG_REG_TMP;
1354    uint64_t pair;
1355
1356    ptrdiff_t offset = addr - jmp_rx;
1357
1358    if (offset == sextract64(offset, 0, 26)) {
1359        i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1360        i2 = NOP;
1361    } else {
1362        offset = (addr >> 12) - (jmp_rx >> 12);
1363
1364        /* patch ADRP */
1365        i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1366        /* patch ADDI */
1367        i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1368    }
1369    pair = (uint64_t)i2 << 32 | i1;
1370    qatomic_set((uint64_t *)jmp_rw, pair);
1371    flush_idcache_range(jmp_rx, jmp_rw, 8);
1372}
1373
1374static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1375{
1376    if (!l->has_value) {
1377        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1378        tcg_out_insn(s, 3206, B, 0);
1379    } else {
1380        tcg_out_goto(s, l->u.value_ptr);
1381    }
1382}
1383
1384static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1385                           TCGArg b, bool b_const, TCGLabel *l)
1386{
1387    intptr_t offset;
1388    bool need_cmp;
1389
1390    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1391        need_cmp = false;
1392    } else {
1393        need_cmp = true;
1394        tcg_out_cmp(s, ext, a, b, b_const);
1395    }
1396
1397    if (!l->has_value) {
1398        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1399        offset = tcg_in32(s) >> 5;
1400    } else {
1401        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1402        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1403    }
1404
1405    if (need_cmp) {
1406        tcg_out_insn(s, 3202, B_C, c, offset);
1407    } else if (c == TCG_COND_EQ) {
1408        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1409    } else {
1410        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1411    }
1412}
1413
1414static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1415{
1416    tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1417}
1418
1419static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1420{
1421    tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1422}
1423
1424static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1425{
1426    tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1427}
1428
1429static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1430                               TCGReg rd, TCGReg rn)
1431{
1432    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1433    int bits = (8 << s_bits) - 1;
1434    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1435}
1436
1437static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1438                               TCGReg rd, TCGReg rn)
1439{
1440    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1441    int bits = (8 << s_bits) - 1;
1442    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1443}
1444
1445static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1446                            TCGReg rn, int64_t aimm)
1447{
1448    if (aimm >= 0) {
1449        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1450    } else {
1451        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1452    }
1453}
1454
1455static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1456                                   TCGReg rh, TCGReg al, TCGReg ah,
1457                                   tcg_target_long bl, tcg_target_long bh,
1458                                   bool const_bl, bool const_bh, bool sub)
1459{
1460    TCGReg orig_rl = rl;
1461    AArch64Insn insn;
1462
1463    if (rl == ah || (!const_bh && rl == bh)) {
1464        rl = TCG_REG_TMP;
1465    }
1466
1467    if (const_bl) {
1468        insn = I3401_ADDSI;
1469        if ((bl < 0) ^ sub) {
1470            insn = I3401_SUBSI;
1471            bl = -bl;
1472        }
1473        if (unlikely(al == TCG_REG_XZR)) {
1474            /* ??? We want to allow al to be zero for the benefit of
1475               negation via subtraction.  However, that leaves open the
1476               possibility of adding 0+const in the low part, and the
1477               immediate add instructions encode XSP not XZR.  Don't try
1478               anything more elaborate here than loading another zero.  */
1479            al = TCG_REG_TMP;
1480            tcg_out_movi(s, ext, al, 0);
1481        }
1482        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1483    } else {
1484        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1485    }
1486
1487    insn = I3503_ADC;
1488    if (const_bh) {
1489        /* Note that the only two constants we support are 0 and -1, and
1490           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1491        if ((bh != 0) ^ sub) {
1492            insn = I3503_SBC;
1493        }
1494        bh = TCG_REG_XZR;
1495    } else if (sub) {
1496        insn = I3503_SBC;
1497    }
1498    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1499
1500    tcg_out_mov(s, ext, orig_rl, rl);
1501}
1502
1503static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1504{
1505    static const uint32_t sync[] = {
1506        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1507        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1508        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1509        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1510        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1511    };
1512    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1513}
1514
1515static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1516                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1517{
1518    TCGReg a1 = a0;
1519    if (is_ctz) {
1520        a1 = TCG_REG_TMP;
1521        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1522    }
1523    if (const_b && b == (ext ? 64 : 32)) {
1524        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1525    } else {
1526        AArch64Insn sel = I3506_CSEL;
1527
1528        tcg_out_cmp(s, ext, a0, 0, 1);
1529        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1530
1531        if (const_b) {
1532            if (b == -1) {
1533                b = TCG_REG_XZR;
1534                sel = I3506_CSINV;
1535            } else if (b == 0) {
1536                b = TCG_REG_XZR;
1537            } else {
1538                tcg_out_movi(s, ext, d, b);
1539                b = d;
1540            }
1541        }
1542        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1543    }
1544}
1545
1546#ifdef CONFIG_SOFTMMU
1547#include "../tcg-ldst.c.inc"
1548
1549/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1550 *                                     TCGMemOpIdx oi, uintptr_t ra)
1551 */
1552static void * const qemu_ld_helpers[16] = {
1553    [MO_UB]   = helper_ret_ldub_mmu,
1554    [MO_LEUW] = helper_le_lduw_mmu,
1555    [MO_LEUL] = helper_le_ldul_mmu,
1556    [MO_LEQ]  = helper_le_ldq_mmu,
1557    [MO_BEUW] = helper_be_lduw_mmu,
1558    [MO_BEUL] = helper_be_ldul_mmu,
1559    [MO_BEQ]  = helper_be_ldq_mmu,
1560};
1561
1562/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1563 *                                     uintxx_t val, TCGMemOpIdx oi,
1564 *                                     uintptr_t ra)
1565 */
1566static void * const qemu_st_helpers[16] = {
1567    [MO_UB]   = helper_ret_stb_mmu,
1568    [MO_LEUW] = helper_le_stw_mmu,
1569    [MO_LEUL] = helper_le_stl_mmu,
1570    [MO_LEQ]  = helper_le_stq_mmu,
1571    [MO_BEUW] = helper_be_stw_mmu,
1572    [MO_BEUL] = helper_be_stl_mmu,
1573    [MO_BEQ]  = helper_be_stq_mmu,
1574};
1575
1576static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1577{
1578    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1579    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1580    tcg_out_insn(s, 3406, ADR, rd, offset);
1581}
1582
1583static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1584{
1585    TCGMemOpIdx oi = lb->oi;
1586    MemOp opc = get_memop(oi);
1587    MemOp size = opc & MO_SIZE;
1588
1589    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1590        return false;
1591    }
1592
1593    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1594    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1595    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1596    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1597    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1598    if (opc & MO_SIGN) {
1599        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1600    } else {
1601        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1602    }
1603
1604    tcg_out_goto(s, lb->raddr);
1605    return true;
1606}
1607
1608static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1609{
1610    TCGMemOpIdx oi = lb->oi;
1611    MemOp opc = get_memop(oi);
1612    MemOp size = opc & MO_SIZE;
1613
1614    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1615        return false;
1616    }
1617
1618    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1619    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1620    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1621    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1622    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1623    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1624    tcg_out_goto(s, lb->raddr);
1625    return true;
1626}
1627
1628static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1629                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1630                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1631{
1632    TCGLabelQemuLdst *label = new_ldst_label(s);
1633
1634    label->is_ld = is_ld;
1635    label->oi = oi;
1636    label->type = ext;
1637    label->datalo_reg = data_reg;
1638    label->addrlo_reg = addr_reg;
1639    label->raddr = tcg_splitwx_to_rx(raddr);
1640    label->label_ptr[0] = label_ptr;
1641}
1642
1643/* We expect to use a 7-bit scaled negative offset from ENV.  */
1644QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1645QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1646
1647/* These offsets are built into the LDP below.  */
1648QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1649QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1650
1651/* Load and compare a TLB entry, emitting the conditional jump to the
1652   slow path for the failure case, which will be patched later when finalizing
1653   the slow path. Generated code returns the host addend in X1,
1654   clobbers X0,X2,X3,TMP. */
1655static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1656                             tcg_insn_unit **label_ptr, int mem_index,
1657                             bool is_read)
1658{
1659    unsigned a_bits = get_alignment_bits(opc);
1660    unsigned s_bits = opc & MO_SIZE;
1661    unsigned a_mask = (1u << a_bits) - 1;
1662    unsigned s_mask = (1u << s_bits) - 1;
1663    TCGReg x3;
1664    TCGType mask_type;
1665    uint64_t compare_mask;
1666
1667    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1668                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1669
1670    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1671    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1672                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1673
1674    /* Extract the TLB index from the address into X0.  */
1675    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1676                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1677                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1678
1679    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1680    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1681
1682    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1683    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1684               ? offsetof(CPUTLBEntry, addr_read)
1685               : offsetof(CPUTLBEntry, addr_write));
1686    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1687               offsetof(CPUTLBEntry, addend));
1688
1689    /* For aligned accesses, we check the first byte and include the alignment
1690       bits within the address.  For unaligned access, we check that we don't
1691       cross pages using the address of the last byte of the access.  */
1692    if (a_bits >= s_bits) {
1693        x3 = addr_reg;
1694    } else {
1695        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1696                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1697        x3 = TCG_REG_X3;
1698    }
1699    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1700
1701    /* Store the page mask part of the address into X3.  */
1702    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1703                     TCG_REG_X3, x3, compare_mask);
1704
1705    /* Perform the address comparison. */
1706    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1707
1708    /* If not equal, we jump to the slow path. */
1709    *label_ptr = s->code_ptr;
1710    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1711}
1712
1713#endif /* CONFIG_SOFTMMU */
1714
1715static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1716                                   TCGReg data_r, TCGReg addr_r,
1717                                   TCGType otype, TCGReg off_r)
1718{
1719    const MemOp bswap = memop & MO_BSWAP;
1720
1721    switch (memop & MO_SSIZE) {
1722    case MO_UB:
1723        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1724        break;
1725    case MO_SB:
1726        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1727                       data_r, addr_r, otype, off_r);
1728        break;
1729    case MO_UW:
1730        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1731        if (bswap) {
1732            tcg_out_rev16(s, data_r, data_r);
1733        }
1734        break;
1735    case MO_SW:
1736        if (bswap) {
1737            tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1738            tcg_out_rev16(s, data_r, data_r);
1739            tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1740        } else {
1741            tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1742                           data_r, addr_r, otype, off_r);
1743        }
1744        break;
1745    case MO_UL:
1746        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1747        if (bswap) {
1748            tcg_out_rev32(s, data_r, data_r);
1749        }
1750        break;
1751    case MO_SL:
1752        if (bswap) {
1753            tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1754            tcg_out_rev32(s, data_r, data_r);
1755            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1756        } else {
1757            tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1758        }
1759        break;
1760    case MO_Q:
1761        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1762        if (bswap) {
1763            tcg_out_rev64(s, data_r, data_r);
1764        }
1765        break;
1766    default:
1767        tcg_abort();
1768    }
1769}
1770
1771static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1772                                   TCGReg data_r, TCGReg addr_r,
1773                                   TCGType otype, TCGReg off_r)
1774{
1775    const MemOp bswap = memop & MO_BSWAP;
1776
1777    switch (memop & MO_SIZE) {
1778    case MO_8:
1779        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1780        break;
1781    case MO_16:
1782        if (bswap && data_r != TCG_REG_XZR) {
1783            tcg_out_rev16(s, TCG_REG_TMP, data_r);
1784            data_r = TCG_REG_TMP;
1785        }
1786        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1787        break;
1788    case MO_32:
1789        if (bswap && data_r != TCG_REG_XZR) {
1790            tcg_out_rev32(s, TCG_REG_TMP, data_r);
1791            data_r = TCG_REG_TMP;
1792        }
1793        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1794        break;
1795    case MO_64:
1796        if (bswap && data_r != TCG_REG_XZR) {
1797            tcg_out_rev64(s, TCG_REG_TMP, data_r);
1798            data_r = TCG_REG_TMP;
1799        }
1800        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1801        break;
1802    default:
1803        tcg_abort();
1804    }
1805}
1806
1807static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1808                            TCGMemOpIdx oi, TCGType ext)
1809{
1810    MemOp memop = get_memop(oi);
1811    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1812#ifdef CONFIG_SOFTMMU
1813    unsigned mem_index = get_mmuidx(oi);
1814    tcg_insn_unit *label_ptr;
1815
1816    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1817    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1818                           TCG_REG_X1, otype, addr_reg);
1819    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1820                        s->code_ptr, label_ptr);
1821#else /* !CONFIG_SOFTMMU */
1822    if (USE_GUEST_BASE) {
1823        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1824                               TCG_REG_GUEST_BASE, otype, addr_reg);
1825    } else {
1826        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1827                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1828    }
1829#endif /* CONFIG_SOFTMMU */
1830}
1831
1832static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1833                            TCGMemOpIdx oi)
1834{
1835    MemOp memop = get_memop(oi);
1836    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1837#ifdef CONFIG_SOFTMMU
1838    unsigned mem_index = get_mmuidx(oi);
1839    tcg_insn_unit *label_ptr;
1840
1841    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1842    tcg_out_qemu_st_direct(s, memop, data_reg,
1843                           TCG_REG_X1, otype, addr_reg);
1844    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1845                        data_reg, addr_reg, s->code_ptr, label_ptr);
1846#else /* !CONFIG_SOFTMMU */
1847    if (USE_GUEST_BASE) {
1848        tcg_out_qemu_st_direct(s, memop, data_reg,
1849                               TCG_REG_GUEST_BASE, otype, addr_reg);
1850    } else {
1851        tcg_out_qemu_st_direct(s, memop, data_reg,
1852                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1853    }
1854#endif /* CONFIG_SOFTMMU */
1855}
1856
1857static const tcg_insn_unit *tb_ret_addr;
1858
1859static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1860                       const TCGArg args[TCG_MAX_OP_ARGS],
1861                       const int const_args[TCG_MAX_OP_ARGS])
1862{
1863    /* 99% of the time, we can signal the use of extension registers
1864       by looking to see if the opcode handles 64-bit data.  */
1865    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1866
1867    /* Hoist the loads of the most common arguments.  */
1868    TCGArg a0 = args[0];
1869    TCGArg a1 = args[1];
1870    TCGArg a2 = args[2];
1871    int c2 = const_args[2];
1872
1873    /* Some operands are defined with "rZ" constraint, a register or
1874       the zero register.  These need not actually test args[I] == 0.  */
1875#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1876
1877    switch (opc) {
1878    case INDEX_op_exit_tb:
1879        /* Reuse the zeroing that exists for goto_ptr.  */
1880        if (a0 == 0) {
1881            tcg_out_goto_long(s, tcg_code_gen_epilogue);
1882        } else {
1883            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1884            tcg_out_goto_long(s, tb_ret_addr);
1885        }
1886        break;
1887
1888    case INDEX_op_goto_tb:
1889        if (s->tb_jmp_insn_offset != NULL) {
1890            /* TCG_TARGET_HAS_direct_jump */
1891            /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1892               write can be used to patch the target address. */
1893            if ((uintptr_t)s->code_ptr & 7) {
1894                tcg_out32(s, NOP);
1895            }
1896            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1897            /* actual branch destination will be patched by
1898               tb_target_set_jmp_target later. */
1899            tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1900            tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1901        } else {
1902            /* !TCG_TARGET_HAS_direct_jump */
1903            tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1904            intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1905            tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1906        }
1907        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1908        set_jmp_reset_offset(s, a0);
1909        break;
1910
1911    case INDEX_op_goto_ptr:
1912        tcg_out_insn(s, 3207, BR, a0);
1913        break;
1914
1915    case INDEX_op_br:
1916        tcg_out_goto_label(s, arg_label(a0));
1917        break;
1918
1919    case INDEX_op_ld8u_i32:
1920    case INDEX_op_ld8u_i64:
1921        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1922        break;
1923    case INDEX_op_ld8s_i32:
1924        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1925        break;
1926    case INDEX_op_ld8s_i64:
1927        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1928        break;
1929    case INDEX_op_ld16u_i32:
1930    case INDEX_op_ld16u_i64:
1931        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1932        break;
1933    case INDEX_op_ld16s_i32:
1934        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1935        break;
1936    case INDEX_op_ld16s_i64:
1937        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1938        break;
1939    case INDEX_op_ld_i32:
1940    case INDEX_op_ld32u_i64:
1941        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1942        break;
1943    case INDEX_op_ld32s_i64:
1944        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1945        break;
1946    case INDEX_op_ld_i64:
1947        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1948        break;
1949
1950    case INDEX_op_st8_i32:
1951    case INDEX_op_st8_i64:
1952        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1953        break;
1954    case INDEX_op_st16_i32:
1955    case INDEX_op_st16_i64:
1956        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1957        break;
1958    case INDEX_op_st_i32:
1959    case INDEX_op_st32_i64:
1960        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1961        break;
1962    case INDEX_op_st_i64:
1963        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1964        break;
1965
1966    case INDEX_op_add_i32:
1967        a2 = (int32_t)a2;
1968        /* FALLTHRU */
1969    case INDEX_op_add_i64:
1970        if (c2) {
1971            tcg_out_addsubi(s, ext, a0, a1, a2);
1972        } else {
1973            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1974        }
1975        break;
1976
1977    case INDEX_op_sub_i32:
1978        a2 = (int32_t)a2;
1979        /* FALLTHRU */
1980    case INDEX_op_sub_i64:
1981        if (c2) {
1982            tcg_out_addsubi(s, ext, a0, a1, -a2);
1983        } else {
1984            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1985        }
1986        break;
1987
1988    case INDEX_op_neg_i64:
1989    case INDEX_op_neg_i32:
1990        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1991        break;
1992
1993    case INDEX_op_and_i32:
1994        a2 = (int32_t)a2;
1995        /* FALLTHRU */
1996    case INDEX_op_and_i64:
1997        if (c2) {
1998            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1999        } else {
2000            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2001        }
2002        break;
2003
2004    case INDEX_op_andc_i32:
2005        a2 = (int32_t)a2;
2006        /* FALLTHRU */
2007    case INDEX_op_andc_i64:
2008        if (c2) {
2009            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2010        } else {
2011            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2012        }
2013        break;
2014
2015    case INDEX_op_or_i32:
2016        a2 = (int32_t)a2;
2017        /* FALLTHRU */
2018    case INDEX_op_or_i64:
2019        if (c2) {
2020            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2021        } else {
2022            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2023        }
2024        break;
2025
2026    case INDEX_op_orc_i32:
2027        a2 = (int32_t)a2;
2028        /* FALLTHRU */
2029    case INDEX_op_orc_i64:
2030        if (c2) {
2031            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2032        } else {
2033            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2034        }
2035        break;
2036
2037    case INDEX_op_xor_i32:
2038        a2 = (int32_t)a2;
2039        /* FALLTHRU */
2040    case INDEX_op_xor_i64:
2041        if (c2) {
2042            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2043        } else {
2044            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2045        }
2046        break;
2047
2048    case INDEX_op_eqv_i32:
2049        a2 = (int32_t)a2;
2050        /* FALLTHRU */
2051    case INDEX_op_eqv_i64:
2052        if (c2) {
2053            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2054        } else {
2055            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2056        }
2057        break;
2058
2059    case INDEX_op_not_i64:
2060    case INDEX_op_not_i32:
2061        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2062        break;
2063
2064    case INDEX_op_mul_i64:
2065    case INDEX_op_mul_i32:
2066        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2067        break;
2068
2069    case INDEX_op_div_i64:
2070    case INDEX_op_div_i32:
2071        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2072        break;
2073    case INDEX_op_divu_i64:
2074    case INDEX_op_divu_i32:
2075        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2076        break;
2077
2078    case INDEX_op_rem_i64:
2079    case INDEX_op_rem_i32:
2080        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2081        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2082        break;
2083    case INDEX_op_remu_i64:
2084    case INDEX_op_remu_i32:
2085        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2086        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2087        break;
2088
2089    case INDEX_op_shl_i64:
2090    case INDEX_op_shl_i32:
2091        if (c2) {
2092            tcg_out_shl(s, ext, a0, a1, a2);
2093        } else {
2094            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2095        }
2096        break;
2097
2098    case INDEX_op_shr_i64:
2099    case INDEX_op_shr_i32:
2100        if (c2) {
2101            tcg_out_shr(s, ext, a0, a1, a2);
2102        } else {
2103            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2104        }
2105        break;
2106
2107    case INDEX_op_sar_i64:
2108    case INDEX_op_sar_i32:
2109        if (c2) {
2110            tcg_out_sar(s, ext, a0, a1, a2);
2111        } else {
2112            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2113        }
2114        break;
2115
2116    case INDEX_op_rotr_i64:
2117    case INDEX_op_rotr_i32:
2118        if (c2) {
2119            tcg_out_rotr(s, ext, a0, a1, a2);
2120        } else {
2121            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2122        }
2123        break;
2124
2125    case INDEX_op_rotl_i64:
2126    case INDEX_op_rotl_i32:
2127        if (c2) {
2128            tcg_out_rotl(s, ext, a0, a1, a2);
2129        } else {
2130            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2131            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2132        }
2133        break;
2134
2135    case INDEX_op_clz_i64:
2136    case INDEX_op_clz_i32:
2137        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2138        break;
2139    case INDEX_op_ctz_i64:
2140    case INDEX_op_ctz_i32:
2141        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2142        break;
2143
2144    case INDEX_op_brcond_i32:
2145        a1 = (int32_t)a1;
2146        /* FALLTHRU */
2147    case INDEX_op_brcond_i64:
2148        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2149        break;
2150
2151    case INDEX_op_setcond_i32:
2152        a2 = (int32_t)a2;
2153        /* FALLTHRU */
2154    case INDEX_op_setcond_i64:
2155        tcg_out_cmp(s, ext, a1, a2, c2);
2156        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2157        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2158                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2159        break;
2160
2161    case INDEX_op_movcond_i32:
2162        a2 = (int32_t)a2;
2163        /* FALLTHRU */
2164    case INDEX_op_movcond_i64:
2165        tcg_out_cmp(s, ext, a1, a2, c2);
2166        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2167        break;
2168
2169    case INDEX_op_qemu_ld_i32:
2170    case INDEX_op_qemu_ld_i64:
2171        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2172        break;
2173    case INDEX_op_qemu_st_i32:
2174    case INDEX_op_qemu_st_i64:
2175        tcg_out_qemu_st(s, REG0(0), a1, a2);
2176        break;
2177
2178    case INDEX_op_bswap64_i64:
2179        tcg_out_rev64(s, a0, a1);
2180        break;
2181    case INDEX_op_bswap32_i64:
2182    case INDEX_op_bswap32_i32:
2183        tcg_out_rev32(s, a0, a1);
2184        break;
2185    case INDEX_op_bswap16_i64:
2186    case INDEX_op_bswap16_i32:
2187        tcg_out_rev16(s, a0, a1);
2188        break;
2189
2190    case INDEX_op_ext8s_i64:
2191    case INDEX_op_ext8s_i32:
2192        tcg_out_sxt(s, ext, MO_8, a0, a1);
2193        break;
2194    case INDEX_op_ext16s_i64:
2195    case INDEX_op_ext16s_i32:
2196        tcg_out_sxt(s, ext, MO_16, a0, a1);
2197        break;
2198    case INDEX_op_ext_i32_i64:
2199    case INDEX_op_ext32s_i64:
2200        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2201        break;
2202    case INDEX_op_ext8u_i64:
2203    case INDEX_op_ext8u_i32:
2204        tcg_out_uxt(s, MO_8, a0, a1);
2205        break;
2206    case INDEX_op_ext16u_i64:
2207    case INDEX_op_ext16u_i32:
2208        tcg_out_uxt(s, MO_16, a0, a1);
2209        break;
2210    case INDEX_op_extu_i32_i64:
2211    case INDEX_op_ext32u_i64:
2212        tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2213        break;
2214
2215    case INDEX_op_deposit_i64:
2216    case INDEX_op_deposit_i32:
2217        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2218        break;
2219
2220    case INDEX_op_extract_i64:
2221    case INDEX_op_extract_i32:
2222        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2223        break;
2224
2225    case INDEX_op_sextract_i64:
2226    case INDEX_op_sextract_i32:
2227        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2228        break;
2229
2230    case INDEX_op_extract2_i64:
2231    case INDEX_op_extract2_i32:
2232        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2233        break;
2234
2235    case INDEX_op_add2_i32:
2236        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2237                        (int32_t)args[4], args[5], const_args[4],
2238                        const_args[5], false);
2239        break;
2240    case INDEX_op_add2_i64:
2241        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2242                        args[5], const_args[4], const_args[5], false);
2243        break;
2244    case INDEX_op_sub2_i32:
2245        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2246                        (int32_t)args[4], args[5], const_args[4],
2247                        const_args[5], true);
2248        break;
2249    case INDEX_op_sub2_i64:
2250        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2251                        args[5], const_args[4], const_args[5], true);
2252        break;
2253
2254    case INDEX_op_muluh_i64:
2255        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2256        break;
2257    case INDEX_op_mulsh_i64:
2258        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2259        break;
2260
2261    case INDEX_op_mb:
2262        tcg_out_mb(s, a0);
2263        break;
2264
2265    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2266    case INDEX_op_mov_i64:
2267    case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
2268    case INDEX_op_movi_i64:
2269    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2270    default:
2271        g_assert_not_reached();
2272    }
2273
2274#undef REG0
2275}
2276
2277static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2278                           unsigned vecl, unsigned vece,
2279                           const TCGArg *args, const int *const_args)
2280{
2281    static const AArch64Insn cmp_insn[16] = {
2282        [TCG_COND_EQ] = I3616_CMEQ,
2283        [TCG_COND_GT] = I3616_CMGT,
2284        [TCG_COND_GE] = I3616_CMGE,
2285        [TCG_COND_GTU] = I3616_CMHI,
2286        [TCG_COND_GEU] = I3616_CMHS,
2287    };
2288    static const AArch64Insn cmp0_insn[16] = {
2289        [TCG_COND_EQ] = I3617_CMEQ0,
2290        [TCG_COND_GT] = I3617_CMGT0,
2291        [TCG_COND_GE] = I3617_CMGE0,
2292        [TCG_COND_LT] = I3617_CMLT0,
2293        [TCG_COND_LE] = I3617_CMLE0,
2294    };
2295
2296    TCGType type = vecl + TCG_TYPE_V64;
2297    unsigned is_q = vecl;
2298    TCGArg a0, a1, a2, a3;
2299    int cmode, imm8;
2300
2301    a0 = args[0];
2302    a1 = args[1];
2303    a2 = args[2];
2304
2305    switch (opc) {
2306    case INDEX_op_ld_vec:
2307        tcg_out_ld(s, type, a0, a1, a2);
2308        break;
2309    case INDEX_op_st_vec:
2310        tcg_out_st(s, type, a0, a1, a2);
2311        break;
2312    case INDEX_op_dupm_vec:
2313        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2314        break;
2315    case INDEX_op_add_vec:
2316        tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2317        break;
2318    case INDEX_op_sub_vec:
2319        tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2320        break;
2321    case INDEX_op_mul_vec:
2322        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2323        break;
2324    case INDEX_op_neg_vec:
2325        tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2326        break;
2327    case INDEX_op_abs_vec:
2328        tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2329        break;
2330    case INDEX_op_and_vec:
2331        if (const_args[2]) {
2332            is_shimm1632(~a2, &cmode, &imm8);
2333            if (a0 == a1) {
2334                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2335                return;
2336            }
2337            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2338            a2 = a0;
2339        }
2340        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2341        break;
2342    case INDEX_op_or_vec:
2343        if (const_args[2]) {
2344            is_shimm1632(a2, &cmode, &imm8);
2345            if (a0 == a1) {
2346                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2347                return;
2348            }
2349            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2350            a2 = a0;
2351        }
2352        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2353        break;
2354    case INDEX_op_andc_vec:
2355        if (const_args[2]) {
2356            is_shimm1632(a2, &cmode, &imm8);
2357            if (a0 == a1) {
2358                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2359                return;
2360            }
2361            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2362            a2 = a0;
2363        }
2364        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2365        break;
2366    case INDEX_op_orc_vec:
2367        if (const_args[2]) {
2368            is_shimm1632(~a2, &cmode, &imm8);
2369            if (a0 == a1) {
2370                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2371                return;
2372            }
2373            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2374            a2 = a0;
2375        }
2376        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2377        break;
2378    case INDEX_op_xor_vec:
2379        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2380        break;
2381    case INDEX_op_ssadd_vec:
2382        tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2383        break;
2384    case INDEX_op_sssub_vec:
2385        tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2386        break;
2387    case INDEX_op_usadd_vec:
2388        tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2389        break;
2390    case INDEX_op_ussub_vec:
2391        tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2392        break;
2393    case INDEX_op_smax_vec:
2394        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2395        break;
2396    case INDEX_op_smin_vec:
2397        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2398        break;
2399    case INDEX_op_umax_vec:
2400        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2401        break;
2402    case INDEX_op_umin_vec:
2403        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2404        break;
2405    case INDEX_op_not_vec:
2406        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2407        break;
2408    case INDEX_op_shli_vec:
2409        tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2410        break;
2411    case INDEX_op_shri_vec:
2412        tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2413        break;
2414    case INDEX_op_sari_vec:
2415        tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2416        break;
2417    case INDEX_op_aa64_sli_vec:
2418        tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2419        break;
2420    case INDEX_op_shlv_vec:
2421        tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2422        break;
2423    case INDEX_op_aa64_sshl_vec:
2424        tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2425        break;
2426    case INDEX_op_cmp_vec:
2427        {
2428            TCGCond cond = args[3];
2429            AArch64Insn insn;
2430
2431            if (cond == TCG_COND_NE) {
2432                if (const_args[2]) {
2433                    tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2434                } else {
2435                    tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2436                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2437                }
2438            } else {
2439                if (const_args[2]) {
2440                    insn = cmp0_insn[cond];
2441                    if (insn) {
2442                        tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2443                        break;
2444                    }
2445                    tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
2446                    a2 = TCG_VEC_TMP;
2447                }
2448                insn = cmp_insn[cond];
2449                if (insn == 0) {
2450                    TCGArg t;
2451                    t = a1, a1 = a2, a2 = t;
2452                    cond = tcg_swap_cond(cond);
2453                    insn = cmp_insn[cond];
2454                    tcg_debug_assert(insn != 0);
2455                }
2456                tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2457            }
2458        }
2459        break;
2460
2461    case INDEX_op_bitsel_vec:
2462        a3 = args[3];
2463        if (a0 == a3) {
2464            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2465        } else if (a0 == a2) {
2466            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2467        } else {
2468            if (a0 != a1) {
2469                tcg_out_mov(s, type, a0, a1);
2470            }
2471            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2472        }
2473        break;
2474
2475    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2476    case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi.  */
2477    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2478    default:
2479        g_assert_not_reached();
2480    }
2481}
2482
2483int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2484{
2485    switch (opc) {
2486    case INDEX_op_add_vec:
2487    case INDEX_op_sub_vec:
2488    case INDEX_op_and_vec:
2489    case INDEX_op_or_vec:
2490    case INDEX_op_xor_vec:
2491    case INDEX_op_andc_vec:
2492    case INDEX_op_orc_vec:
2493    case INDEX_op_neg_vec:
2494    case INDEX_op_abs_vec:
2495    case INDEX_op_not_vec:
2496    case INDEX_op_cmp_vec:
2497    case INDEX_op_shli_vec:
2498    case INDEX_op_shri_vec:
2499    case INDEX_op_sari_vec:
2500    case INDEX_op_ssadd_vec:
2501    case INDEX_op_sssub_vec:
2502    case INDEX_op_usadd_vec:
2503    case INDEX_op_ussub_vec:
2504    case INDEX_op_shlv_vec:
2505    case INDEX_op_bitsel_vec:
2506        return 1;
2507    case INDEX_op_rotli_vec:
2508    case INDEX_op_shrv_vec:
2509    case INDEX_op_sarv_vec:
2510    case INDEX_op_rotlv_vec:
2511    case INDEX_op_rotrv_vec:
2512        return -1;
2513    case INDEX_op_mul_vec:
2514    case INDEX_op_smax_vec:
2515    case INDEX_op_smin_vec:
2516    case INDEX_op_umax_vec:
2517    case INDEX_op_umin_vec:
2518        return vece < MO_64;
2519
2520    default:
2521        return 0;
2522    }
2523}
2524
2525void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2526                       TCGArg a0, ...)
2527{
2528    va_list va;
2529    TCGv_vec v0, v1, v2, t1, t2;
2530    TCGArg a2;
2531
2532    va_start(va, a0);
2533    v0 = temp_tcgv_vec(arg_temp(a0));
2534    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2535    a2 = va_arg(va, TCGArg);
2536    v2 = temp_tcgv_vec(arg_temp(a2));
2537
2538    switch (opc) {
2539    case INDEX_op_rotli_vec:
2540        t1 = tcg_temp_new_vec(type);
2541        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2542        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2543                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2544        tcg_temp_free_vec(t1);
2545        break;
2546
2547    case INDEX_op_shrv_vec:
2548    case INDEX_op_sarv_vec:
2549        /* Right shifts are negative left shifts for AArch64.  */
2550        t1 = tcg_temp_new_vec(type);
2551        tcg_gen_neg_vec(vece, t1, v2);
2552        opc = (opc == INDEX_op_shrv_vec
2553               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2554        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2555                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2556        tcg_temp_free_vec(t1);
2557        break;
2558
2559    case INDEX_op_rotlv_vec:
2560        t1 = tcg_temp_new_vec(type);
2561        tcg_gen_dupi_vec(vece, t1, 8 << vece);
2562        tcg_gen_sub_vec(vece, t1, v2, t1);
2563        /* Right shifts are negative left shifts for AArch64.  */
2564        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2565                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2566        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2567                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2568        tcg_gen_or_vec(vece, v0, v0, t1);
2569        tcg_temp_free_vec(t1);
2570        break;
2571
2572    case INDEX_op_rotrv_vec:
2573        t1 = tcg_temp_new_vec(type);
2574        t2 = tcg_temp_new_vec(type);
2575        tcg_gen_neg_vec(vece, t1, v2);
2576        tcg_gen_dupi_vec(vece, t2, 8 << vece);
2577        tcg_gen_add_vec(vece, t2, t1, t2);
2578        /* Right shifts are negative left shifts for AArch64.  */
2579        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2580                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2581        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2582                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2583        tcg_gen_or_vec(vece, v0, t1, t2);
2584        tcg_temp_free_vec(t1);
2585        tcg_temp_free_vec(t2);
2586        break;
2587
2588    default:
2589        g_assert_not_reached();
2590    }
2591
2592    va_end(va);
2593}
2594
2595static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2596{
2597    static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
2598    static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2599    static const TCGTargetOpDef w_w = { .args_ct_str = { "w", "w" } };
2600    static const TCGTargetOpDef w_r = { .args_ct_str = { "w", "r" } };
2601    static const TCGTargetOpDef w_wr = { .args_ct_str = { "w", "wr" } };
2602    static const TCGTargetOpDef r_l = { .args_ct_str = { "r", "l" } };
2603    static const TCGTargetOpDef r_rA = { .args_ct_str = { "r", "rA" } };
2604    static const TCGTargetOpDef rZ_r = { .args_ct_str = { "rZ", "r" } };
2605    static const TCGTargetOpDef lZ_l = { .args_ct_str = { "lZ", "l" } };
2606    static const TCGTargetOpDef r_r_r = { .args_ct_str = { "r", "r", "r" } };
2607    static const TCGTargetOpDef w_w_w = { .args_ct_str = { "w", "w", "w" } };
2608    static const TCGTargetOpDef w_0_w = { .args_ct_str = { "w", "0", "w" } };
2609    static const TCGTargetOpDef w_w_wO = { .args_ct_str = { "w", "w", "wO" } };
2610    static const TCGTargetOpDef w_w_wN = { .args_ct_str = { "w", "w", "wN" } };
2611    static const TCGTargetOpDef w_w_wZ = { .args_ct_str = { "w", "w", "wZ" } };
2612    static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
2613    static const TCGTargetOpDef r_r_rA = { .args_ct_str = { "r", "r", "rA" } };
2614    static const TCGTargetOpDef r_r_rL = { .args_ct_str = { "r", "r", "rL" } };
2615    static const TCGTargetOpDef r_r_rAL
2616        = { .args_ct_str = { "r", "r", "rAL" } };
2617    static const TCGTargetOpDef dep
2618        = { .args_ct_str = { "r", "0", "rZ" } };
2619    static const TCGTargetOpDef ext2
2620        = { .args_ct_str = { "r", "rZ", "rZ" } };
2621    static const TCGTargetOpDef movc
2622        = { .args_ct_str = { "r", "r", "rA", "rZ", "rZ" } };
2623    static const TCGTargetOpDef add2
2624        = { .args_ct_str = { "r", "r", "rZ", "rZ", "rA", "rMZ" } };
2625    static const TCGTargetOpDef w_w_w_w
2626        = { .args_ct_str = { "w", "w", "w", "w" } };
2627
2628    switch (op) {
2629    case INDEX_op_goto_ptr:
2630        return &r;
2631
2632    case INDEX_op_ld8u_i32:
2633    case INDEX_op_ld8s_i32:
2634    case INDEX_op_ld16u_i32:
2635    case INDEX_op_ld16s_i32:
2636    case INDEX_op_ld_i32:
2637    case INDEX_op_ld8u_i64:
2638    case INDEX_op_ld8s_i64:
2639    case INDEX_op_ld16u_i64:
2640    case INDEX_op_ld16s_i64:
2641    case INDEX_op_ld32u_i64:
2642    case INDEX_op_ld32s_i64:
2643    case INDEX_op_ld_i64:
2644    case INDEX_op_neg_i32:
2645    case INDEX_op_neg_i64:
2646    case INDEX_op_not_i32:
2647    case INDEX_op_not_i64:
2648    case INDEX_op_bswap16_i32:
2649    case INDEX_op_bswap32_i32:
2650    case INDEX_op_bswap16_i64:
2651    case INDEX_op_bswap32_i64:
2652    case INDEX_op_bswap64_i64:
2653    case INDEX_op_ext8s_i32:
2654    case INDEX_op_ext16s_i32:
2655    case INDEX_op_ext8u_i32:
2656    case INDEX_op_ext16u_i32:
2657    case INDEX_op_ext8s_i64:
2658    case INDEX_op_ext16s_i64:
2659    case INDEX_op_ext32s_i64:
2660    case INDEX_op_ext8u_i64:
2661    case INDEX_op_ext16u_i64:
2662    case INDEX_op_ext32u_i64:
2663    case INDEX_op_ext_i32_i64:
2664    case INDEX_op_extu_i32_i64:
2665    case INDEX_op_extract_i32:
2666    case INDEX_op_extract_i64:
2667    case INDEX_op_sextract_i32:
2668    case INDEX_op_sextract_i64:
2669        return &r_r;
2670
2671    case INDEX_op_st8_i32:
2672    case INDEX_op_st16_i32:
2673    case INDEX_op_st_i32:
2674    case INDEX_op_st8_i64:
2675    case INDEX_op_st16_i64:
2676    case INDEX_op_st32_i64:
2677    case INDEX_op_st_i64:
2678        return &rZ_r;
2679
2680    case INDEX_op_add_i32:
2681    case INDEX_op_add_i64:
2682    case INDEX_op_sub_i32:
2683    case INDEX_op_sub_i64:
2684    case INDEX_op_setcond_i32:
2685    case INDEX_op_setcond_i64:
2686        return &r_r_rA;
2687
2688    case INDEX_op_mul_i32:
2689    case INDEX_op_mul_i64:
2690    case INDEX_op_div_i32:
2691    case INDEX_op_div_i64:
2692    case INDEX_op_divu_i32:
2693    case INDEX_op_divu_i64:
2694    case INDEX_op_rem_i32:
2695    case INDEX_op_rem_i64:
2696    case INDEX_op_remu_i32:
2697    case INDEX_op_remu_i64:
2698    case INDEX_op_muluh_i64:
2699    case INDEX_op_mulsh_i64:
2700        return &r_r_r;
2701
2702    case INDEX_op_and_i32:
2703    case INDEX_op_and_i64:
2704    case INDEX_op_or_i32:
2705    case INDEX_op_or_i64:
2706    case INDEX_op_xor_i32:
2707    case INDEX_op_xor_i64:
2708    case INDEX_op_andc_i32:
2709    case INDEX_op_andc_i64:
2710    case INDEX_op_orc_i32:
2711    case INDEX_op_orc_i64:
2712    case INDEX_op_eqv_i32:
2713    case INDEX_op_eqv_i64:
2714        return &r_r_rL;
2715
2716    case INDEX_op_shl_i32:
2717    case INDEX_op_shr_i32:
2718    case INDEX_op_sar_i32:
2719    case INDEX_op_rotl_i32:
2720    case INDEX_op_rotr_i32:
2721    case INDEX_op_shl_i64:
2722    case INDEX_op_shr_i64:
2723    case INDEX_op_sar_i64:
2724    case INDEX_op_rotl_i64:
2725    case INDEX_op_rotr_i64:
2726        return &r_r_ri;
2727
2728    case INDEX_op_clz_i32:
2729    case INDEX_op_ctz_i32:
2730    case INDEX_op_clz_i64:
2731    case INDEX_op_ctz_i64:
2732        return &r_r_rAL;
2733
2734    case INDEX_op_brcond_i32:
2735    case INDEX_op_brcond_i64:
2736        return &r_rA;
2737
2738    case INDEX_op_movcond_i32:
2739    case INDEX_op_movcond_i64:
2740        return &movc;
2741
2742    case INDEX_op_qemu_ld_i32:
2743    case INDEX_op_qemu_ld_i64:
2744        return &r_l;
2745    case INDEX_op_qemu_st_i32:
2746    case INDEX_op_qemu_st_i64:
2747        return &lZ_l;
2748
2749    case INDEX_op_deposit_i32:
2750    case INDEX_op_deposit_i64:
2751        return &dep;
2752
2753    case INDEX_op_extract2_i32:
2754    case INDEX_op_extract2_i64:
2755        return &ext2;
2756
2757    case INDEX_op_add2_i32:
2758    case INDEX_op_add2_i64:
2759    case INDEX_op_sub2_i32:
2760    case INDEX_op_sub2_i64:
2761        return &add2;
2762
2763    case INDEX_op_add_vec:
2764    case INDEX_op_sub_vec:
2765    case INDEX_op_mul_vec:
2766    case INDEX_op_xor_vec:
2767    case INDEX_op_ssadd_vec:
2768    case INDEX_op_sssub_vec:
2769    case INDEX_op_usadd_vec:
2770    case INDEX_op_ussub_vec:
2771    case INDEX_op_smax_vec:
2772    case INDEX_op_smin_vec:
2773    case INDEX_op_umax_vec:
2774    case INDEX_op_umin_vec:
2775    case INDEX_op_shlv_vec:
2776    case INDEX_op_shrv_vec:
2777    case INDEX_op_sarv_vec:
2778    case INDEX_op_aa64_sshl_vec:
2779        return &w_w_w;
2780    case INDEX_op_not_vec:
2781    case INDEX_op_neg_vec:
2782    case INDEX_op_abs_vec:
2783    case INDEX_op_shli_vec:
2784    case INDEX_op_shri_vec:
2785    case INDEX_op_sari_vec:
2786        return &w_w;
2787    case INDEX_op_ld_vec:
2788    case INDEX_op_st_vec:
2789    case INDEX_op_dupm_vec:
2790        return &w_r;
2791    case INDEX_op_dup_vec:
2792        return &w_wr;
2793    case INDEX_op_or_vec:
2794    case INDEX_op_andc_vec:
2795        return &w_w_wO;
2796    case INDEX_op_and_vec:
2797    case INDEX_op_orc_vec:
2798        return &w_w_wN;
2799    case INDEX_op_cmp_vec:
2800        return &w_w_wZ;
2801    case INDEX_op_bitsel_vec:
2802        return &w_w_w_w;
2803    case INDEX_op_aa64_sli_vec:
2804        return &w_0_w;
2805
2806    default:
2807        return NULL;
2808    }
2809}
2810
2811static void tcg_target_init(TCGContext *s)
2812{
2813    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2814    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2815    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2816    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2817
2818    tcg_target_call_clobber_regs = -1ull;
2819    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2820    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2821    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2822    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2823    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2824    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2825    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2826    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2827    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2828    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2829    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2830    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2831    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2832    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2833    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2834    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2835    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2836    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2837    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2838
2839    s->reserved_regs = 0;
2840    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2841    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2842    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2843    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2844    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2845}
2846
2847/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2848#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2849
2850#define FRAME_SIZE \
2851    ((PUSH_SIZE \
2852      + TCG_STATIC_CALL_ARGS_SIZE \
2853      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2854      + TCG_TARGET_STACK_ALIGN - 1) \
2855     & ~(TCG_TARGET_STACK_ALIGN - 1))
2856
2857/* We're expecting a 2 byte uleb128 encoded value.  */
2858QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2859
2860/* We're expecting to use a single ADDI insn.  */
2861QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2862
2863static void tcg_target_qemu_prologue(TCGContext *s)
2864{
2865    TCGReg r;
2866
2867    /* Push (FP, LR) and allocate space for all saved registers.  */
2868    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2869                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2870
2871    /* Set up frame pointer for canonical unwinding.  */
2872    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2873
2874    /* Store callee-preserved regs x19..x28.  */
2875    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2876        int ofs = (r - TCG_REG_X19 + 2) * 8;
2877        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2878    }
2879
2880    /* Make stack space for TCG locals.  */
2881    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2882                 FRAME_SIZE - PUSH_SIZE);
2883
2884    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2885    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2886                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2887
2888#if !defined(CONFIG_SOFTMMU)
2889    if (USE_GUEST_BASE) {
2890        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2891        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2892    }
2893#endif
2894
2895    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2896    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2897
2898    /*
2899     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2900     * and fall through to the rest of the epilogue.
2901     */
2902    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2903    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2904
2905    /* TB epilogue */
2906    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
2907
2908    /* Remove TCG locals stack space.  */
2909    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2910                 FRAME_SIZE - PUSH_SIZE);
2911
2912    /* Restore registers x19..x28.  */
2913    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2914        int ofs = (r - TCG_REG_X19 + 2) * 8;
2915        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2916    }
2917
2918    /* Pop (FP, LR), restore SP to previous frame.  */
2919    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2920                 TCG_REG_SP, PUSH_SIZE, 0, 1);
2921    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2922}
2923
2924static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2925{
2926    int i;
2927    for (i = 0; i < count; ++i) {
2928        p[i] = NOP;
2929    }
2930}
2931
2932typedef struct {
2933    DebugFrameHeader h;
2934    uint8_t fde_def_cfa[4];
2935    uint8_t fde_reg_ofs[24];
2936} DebugFrame;
2937
2938#define ELF_HOST_MACHINE EM_AARCH64
2939
2940static const DebugFrame debug_frame = {
2941    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2942    .h.cie.id = -1,
2943    .h.cie.version = 1,
2944    .h.cie.code_align = 1,
2945    .h.cie.data_align = 0x78,             /* sleb128 -8 */
2946    .h.cie.return_column = TCG_REG_LR,
2947
2948    /* Total FDE size does not include the "len" member.  */
2949    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2950
2951    .fde_def_cfa = {
2952        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
2953        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2954        (FRAME_SIZE >> 7)
2955    },
2956    .fde_reg_ofs = {
2957        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
2958        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
2959        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
2960        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
2961        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
2962        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
2963        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
2964        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
2965        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
2966        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
2967        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
2968        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
2969    }
2970};
2971
2972void tcg_register_jit(const void *buf, size_t buf_size)
2973{
2974    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2975}
2976