xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision 5e437d3c)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-pool.c.inc"
14#include "qemu/bitops.h"
15
16/* We're going to re-use TCGType in setting of the SF bit, which controls
17   the size of the operation performed.  If we know the values match, it
18   makes things much cleaner.  */
19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21#ifdef CONFIG_DEBUG_TCG
22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
27
28    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
32};
33#endif /* CONFIG_DEBUG_TCG */
34
35static const int tcg_target_reg_alloc_order[] = {
36    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38    TCG_REG_X28, /* we will reserve this for guest_base if configured */
39
40    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42    TCG_REG_X16, TCG_REG_X17,
43
44    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47    /* X18 reserved by system */
48    /* X19 reserved for AREG0 */
49    /* X29 reserved as fp */
50    /* X30 reserved as temporary */
51
52    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54    /* V8 - V15 are call-saved, and skipped.  */
55    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
59};
60
61static const int tcg_target_call_iarg_regs[8] = {
62    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
64};
65static const int tcg_target_call_oarg_regs[1] = {
66    TCG_REG_X0
67};
68
69#define TCG_REG_TMP TCG_REG_X30
70#define TCG_VEC_TMP TCG_REG_V31
71
72#ifndef CONFIG_SOFTMMU
73/* Note that XZR cannot be encoded in the address base register slot,
74   as that actaully encodes SP.  So if we need to zero-extend the guest
75   address, via the address index register slot, we need to load even
76   a zero guest base into a register.  */
77#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
78#define TCG_REG_GUEST_BASE TCG_REG_X28
79#endif
80
81static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
82{
83    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
84    ptrdiff_t offset = target - src_rx;
85
86    if (offset == sextract64(offset, 0, 26)) {
87        /* read instruction, mask away previous PC_REL26 parameter contents,
88           set the proper offset, then write back the instruction. */
89        *src_rw = deposit32(*src_rw, 0, 26, offset);
90        return true;
91    }
92    return false;
93}
94
95static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
96{
97    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
98    ptrdiff_t offset = target - src_rx;
99
100    if (offset == sextract64(offset, 0, 19)) {
101        *src_rw = deposit32(*src_rw, 5, 19, offset);
102        return true;
103    }
104    return false;
105}
106
107static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
108                        intptr_t value, intptr_t addend)
109{
110    tcg_debug_assert(addend == 0);
111    switch (type) {
112    case R_AARCH64_JUMP26:
113    case R_AARCH64_CALL26:
114        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
115    case R_AARCH64_CONDBR19:
116        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
117    default:
118        g_assert_not_reached();
119    }
120}
121
122#define TCG_CT_CONST_AIMM 0x100
123#define TCG_CT_CONST_LIMM 0x200
124#define TCG_CT_CONST_ZERO 0x400
125#define TCG_CT_CONST_MONE 0x800
126#define TCG_CT_CONST_ORRI 0x1000
127#define TCG_CT_CONST_ANDI 0x2000
128
129#define ALL_GENERAL_REGS  0xffffffffu
130#define ALL_VECTOR_REGS   0xffffffff00000000ull
131
132#ifdef CONFIG_SOFTMMU
133#define ALL_QLDST_REGS \
134    (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
135                          (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
136#else
137#define ALL_QLDST_REGS   ALL_GENERAL_REGS
138#endif
139
140/* Match a constant valid for addition (12-bit, optionally shifted).  */
141static inline bool is_aimm(uint64_t val)
142{
143    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
144}
145
146/* Match a constant valid for logical operations.  */
147static inline bool is_limm(uint64_t val)
148{
149    /* Taking a simplified view of the logical immediates for now, ignoring
150       the replication that can happen across the field.  Match bit patterns
151       of the forms
152           0....01....1
153           0..01..10..0
154       and their inverses.  */
155
156    /* Make things easier below, by testing the form with msb clear. */
157    if ((int64_t)val < 0) {
158        val = ~val;
159    }
160    if (val == 0) {
161        return false;
162    }
163    val += val & -val;
164    return (val & (val - 1)) == 0;
165}
166
167/* Return true if v16 is a valid 16-bit shifted immediate.  */
168static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
169{
170    if (v16 == (v16 & 0xff)) {
171        *cmode = 0x8;
172        *imm8 = v16 & 0xff;
173        return true;
174    } else if (v16 == (v16 & 0xff00)) {
175        *cmode = 0xa;
176        *imm8 = v16 >> 8;
177        return true;
178    }
179    return false;
180}
181
182/* Return true if v32 is a valid 32-bit shifted immediate.  */
183static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
184{
185    if (v32 == (v32 & 0xff)) {
186        *cmode = 0x0;
187        *imm8 = v32 & 0xff;
188        return true;
189    } else if (v32 == (v32 & 0xff00)) {
190        *cmode = 0x2;
191        *imm8 = (v32 >> 8) & 0xff;
192        return true;
193    } else if (v32 == (v32 & 0xff0000)) {
194        *cmode = 0x4;
195        *imm8 = (v32 >> 16) & 0xff;
196        return true;
197    } else if (v32 == (v32 & 0xff000000)) {
198        *cmode = 0x6;
199        *imm8 = v32 >> 24;
200        return true;
201    }
202    return false;
203}
204
205/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
206static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
207{
208    if ((v32 & 0xffff00ff) == 0xff) {
209        *cmode = 0xc;
210        *imm8 = (v32 >> 8) & 0xff;
211        return true;
212    } else if ((v32 & 0xff00ffff) == 0xffff) {
213        *cmode = 0xd;
214        *imm8 = (v32 >> 16) & 0xff;
215        return true;
216    }
217    return false;
218}
219
220/* Return true if v32 is a valid float32 immediate.  */
221static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
222{
223    if (extract32(v32, 0, 19) == 0
224        && (extract32(v32, 25, 6) == 0x20
225            || extract32(v32, 25, 6) == 0x1f)) {
226        *cmode = 0xf;
227        *imm8 = (extract32(v32, 31, 1) << 7)
228              | (extract32(v32, 25, 1) << 6)
229              | extract32(v32, 19, 6);
230        return true;
231    }
232    return false;
233}
234
235/* Return true if v64 is a valid float64 immediate.  */
236static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
237{
238    if (extract64(v64, 0, 48) == 0
239        && (extract64(v64, 54, 9) == 0x100
240            || extract64(v64, 54, 9) == 0x0ff)) {
241        *cmode = 0xf;
242        *imm8 = (extract64(v64, 63, 1) << 7)
243              | (extract64(v64, 54, 1) << 6)
244              | extract64(v64, 48, 6);
245        return true;
246    }
247    return false;
248}
249
250/*
251 * Return non-zero if v32 can be formed by MOVI+ORR.
252 * Place the parameters for MOVI in (cmode, imm8).
253 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
254 */
255static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
256{
257    int i;
258
259    for (i = 6; i > 0; i -= 2) {
260        /* Mask out one byte we can add with ORR.  */
261        uint32_t tmp = v32 & ~(0xffu << (i * 4));
262        if (is_shimm32(tmp, cmode, imm8) ||
263            is_soimm32(tmp, cmode, imm8)) {
264            break;
265        }
266    }
267    return i;
268}
269
270/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
271static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
272{
273    if (v32 == deposit32(v32, 16, 16, v32)) {
274        return is_shimm16(v32, cmode, imm8);
275    } else {
276        return is_shimm32(v32, cmode, imm8);
277    }
278}
279
280static int tcg_target_const_match(tcg_target_long val, TCGType type,
281                                  const TCGArgConstraint *arg_ct)
282{
283    int ct = arg_ct->ct;
284
285    if (ct & TCG_CT_CONST) {
286        return 1;
287    }
288    if (type == TCG_TYPE_I32) {
289        val = (int32_t)val;
290    }
291    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
292        return 1;
293    }
294    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
295        return 1;
296    }
297    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
298        return 1;
299    }
300    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
301        return 1;
302    }
303
304    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
305    case 0:
306        break;
307    case TCG_CT_CONST_ANDI:
308        val = ~val;
309        /* fallthru */
310    case TCG_CT_CONST_ORRI:
311        if (val == deposit64(val, 32, 32, val)) {
312            int cmode, imm8;
313            return is_shimm1632(val, &cmode, &imm8);
314        }
315        break;
316    default:
317        /* Both bits should not be set for the same insn.  */
318        g_assert_not_reached();
319    }
320
321    return 0;
322}
323
324enum aarch64_cond_code {
325    COND_EQ = 0x0,
326    COND_NE = 0x1,
327    COND_CS = 0x2,     /* Unsigned greater or equal */
328    COND_HS = COND_CS, /* ALIAS greater or equal */
329    COND_CC = 0x3,     /* Unsigned less than */
330    COND_LO = COND_CC, /* ALIAS Lower */
331    COND_MI = 0x4,     /* Negative */
332    COND_PL = 0x5,     /* Zero or greater */
333    COND_VS = 0x6,     /* Overflow */
334    COND_VC = 0x7,     /* No overflow */
335    COND_HI = 0x8,     /* Unsigned greater than */
336    COND_LS = 0x9,     /* Unsigned less or equal */
337    COND_GE = 0xa,
338    COND_LT = 0xb,
339    COND_GT = 0xc,
340    COND_LE = 0xd,
341    COND_AL = 0xe,
342    COND_NV = 0xf, /* behaves like COND_AL here */
343};
344
345static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
346    [TCG_COND_EQ] = COND_EQ,
347    [TCG_COND_NE] = COND_NE,
348    [TCG_COND_LT] = COND_LT,
349    [TCG_COND_GE] = COND_GE,
350    [TCG_COND_LE] = COND_LE,
351    [TCG_COND_GT] = COND_GT,
352    /* unsigned */
353    [TCG_COND_LTU] = COND_LO,
354    [TCG_COND_GTU] = COND_HI,
355    [TCG_COND_GEU] = COND_HS,
356    [TCG_COND_LEU] = COND_LS,
357};
358
359typedef enum {
360    LDST_ST = 0,    /* store */
361    LDST_LD = 1,    /* load */
362    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
363    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
364} AArch64LdstType;
365
366/* We encode the format of the insn into the beginning of the name, so that
367   we can have the preprocessor help "typecheck" the insn vs the output
368   function.  Arm didn't provide us with nice names for the formats, so we
369   use the section number of the architecture reference manual in which the
370   instruction group is described.  */
371typedef enum {
372    /* Compare and branch (immediate).  */
373    I3201_CBZ       = 0x34000000,
374    I3201_CBNZ      = 0x35000000,
375
376    /* Conditional branch (immediate).  */
377    I3202_B_C       = 0x54000000,
378
379    /* Unconditional branch (immediate).  */
380    I3206_B         = 0x14000000,
381    I3206_BL        = 0x94000000,
382
383    /* Unconditional branch (register).  */
384    I3207_BR        = 0xd61f0000,
385    I3207_BLR       = 0xd63f0000,
386    I3207_RET       = 0xd65f0000,
387
388    /* AdvSIMD load/store single structure.  */
389    I3303_LD1R      = 0x0d40c000,
390
391    /* Load literal for loading the address at pc-relative offset */
392    I3305_LDR       = 0x58000000,
393    I3305_LDR_v64   = 0x5c000000,
394    I3305_LDR_v128  = 0x9c000000,
395
396    /* Load/store register.  Described here as 3.3.12, but the helper
397       that emits them can transform to 3.3.10 or 3.3.13.  */
398    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
399    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
400    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
401    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
402
403    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
404    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
405    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
406    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
407
408    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
409    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
410
411    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
412    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
413    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
414
415    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
416    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
417
418    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
419    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
420
421    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
422    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
423
424    I3312_TO_I3310  = 0x00200800,
425    I3312_TO_I3313  = 0x01000000,
426
427    /* Load/store register pair instructions.  */
428    I3314_LDP       = 0x28400000,
429    I3314_STP       = 0x28000000,
430
431    /* Add/subtract immediate instructions.  */
432    I3401_ADDI      = 0x11000000,
433    I3401_ADDSI     = 0x31000000,
434    I3401_SUBI      = 0x51000000,
435    I3401_SUBSI     = 0x71000000,
436
437    /* Bitfield instructions.  */
438    I3402_BFM       = 0x33000000,
439    I3402_SBFM      = 0x13000000,
440    I3402_UBFM      = 0x53000000,
441
442    /* Extract instruction.  */
443    I3403_EXTR      = 0x13800000,
444
445    /* Logical immediate instructions.  */
446    I3404_ANDI      = 0x12000000,
447    I3404_ORRI      = 0x32000000,
448    I3404_EORI      = 0x52000000,
449
450    /* Move wide immediate instructions.  */
451    I3405_MOVN      = 0x12800000,
452    I3405_MOVZ      = 0x52800000,
453    I3405_MOVK      = 0x72800000,
454
455    /* PC relative addressing instructions.  */
456    I3406_ADR       = 0x10000000,
457    I3406_ADRP      = 0x90000000,
458
459    /* Add/subtract shifted register instructions (without a shift).  */
460    I3502_ADD       = 0x0b000000,
461    I3502_ADDS      = 0x2b000000,
462    I3502_SUB       = 0x4b000000,
463    I3502_SUBS      = 0x6b000000,
464
465    /* Add/subtract shifted register instructions (with a shift).  */
466    I3502S_ADD_LSL  = I3502_ADD,
467
468    /* Add/subtract with carry instructions.  */
469    I3503_ADC       = 0x1a000000,
470    I3503_SBC       = 0x5a000000,
471
472    /* Conditional select instructions.  */
473    I3506_CSEL      = 0x1a800000,
474    I3506_CSINC     = 0x1a800400,
475    I3506_CSINV     = 0x5a800000,
476    I3506_CSNEG     = 0x5a800400,
477
478    /* Data-processing (1 source) instructions.  */
479    I3507_CLZ       = 0x5ac01000,
480    I3507_RBIT      = 0x5ac00000,
481    I3507_REV16     = 0x5ac00400,
482    I3507_REV32     = 0x5ac00800,
483    I3507_REV64     = 0x5ac00c00,
484
485    /* Data-processing (2 source) instructions.  */
486    I3508_LSLV      = 0x1ac02000,
487    I3508_LSRV      = 0x1ac02400,
488    I3508_ASRV      = 0x1ac02800,
489    I3508_RORV      = 0x1ac02c00,
490    I3508_SMULH     = 0x9b407c00,
491    I3508_UMULH     = 0x9bc07c00,
492    I3508_UDIV      = 0x1ac00800,
493    I3508_SDIV      = 0x1ac00c00,
494
495    /* Data-processing (3 source) instructions.  */
496    I3509_MADD      = 0x1b000000,
497    I3509_MSUB      = 0x1b008000,
498
499    /* Logical shifted register instructions (without a shift).  */
500    I3510_AND       = 0x0a000000,
501    I3510_BIC       = 0x0a200000,
502    I3510_ORR       = 0x2a000000,
503    I3510_ORN       = 0x2a200000,
504    I3510_EOR       = 0x4a000000,
505    I3510_EON       = 0x4a200000,
506    I3510_ANDS      = 0x6a000000,
507
508    /* Logical shifted register instructions (with a shift).  */
509    I3502S_AND_LSR  = I3510_AND | (1 << 22),
510
511    /* AdvSIMD copy */
512    I3605_DUP      = 0x0e000400,
513    I3605_INS      = 0x4e001c00,
514    I3605_UMOV     = 0x0e003c00,
515
516    /* AdvSIMD modified immediate */
517    I3606_MOVI      = 0x0f000400,
518    I3606_MVNI      = 0x2f000400,
519    I3606_BIC       = 0x2f001400,
520    I3606_ORR       = 0x0f001400,
521
522    /* AdvSIMD scalar shift by immediate */
523    I3609_SSHR      = 0x5f000400,
524    I3609_SSRA      = 0x5f001400,
525    I3609_SHL       = 0x5f005400,
526    I3609_USHR      = 0x7f000400,
527    I3609_USRA      = 0x7f001400,
528    I3609_SLI       = 0x7f005400,
529
530    /* AdvSIMD scalar three same */
531    I3611_SQADD     = 0x5e200c00,
532    I3611_SQSUB     = 0x5e202c00,
533    I3611_CMGT      = 0x5e203400,
534    I3611_CMGE      = 0x5e203c00,
535    I3611_SSHL      = 0x5e204400,
536    I3611_ADD       = 0x5e208400,
537    I3611_CMTST     = 0x5e208c00,
538    I3611_UQADD     = 0x7e200c00,
539    I3611_UQSUB     = 0x7e202c00,
540    I3611_CMHI      = 0x7e203400,
541    I3611_CMHS      = 0x7e203c00,
542    I3611_USHL      = 0x7e204400,
543    I3611_SUB       = 0x7e208400,
544    I3611_CMEQ      = 0x7e208c00,
545
546    /* AdvSIMD scalar two-reg misc */
547    I3612_CMGT0     = 0x5e208800,
548    I3612_CMEQ0     = 0x5e209800,
549    I3612_CMLT0     = 0x5e20a800,
550    I3612_ABS       = 0x5e20b800,
551    I3612_CMGE0     = 0x7e208800,
552    I3612_CMLE0     = 0x7e209800,
553    I3612_NEG       = 0x7e20b800,
554
555    /* AdvSIMD shift by immediate */
556    I3614_SSHR      = 0x0f000400,
557    I3614_SSRA      = 0x0f001400,
558    I3614_SHL       = 0x0f005400,
559    I3614_SLI       = 0x2f005400,
560    I3614_USHR      = 0x2f000400,
561    I3614_USRA      = 0x2f001400,
562
563    /* AdvSIMD three same.  */
564    I3616_ADD       = 0x0e208400,
565    I3616_AND       = 0x0e201c00,
566    I3616_BIC       = 0x0e601c00,
567    I3616_BIF       = 0x2ee01c00,
568    I3616_BIT       = 0x2ea01c00,
569    I3616_BSL       = 0x2e601c00,
570    I3616_EOR       = 0x2e201c00,
571    I3616_MUL       = 0x0e209c00,
572    I3616_ORR       = 0x0ea01c00,
573    I3616_ORN       = 0x0ee01c00,
574    I3616_SUB       = 0x2e208400,
575    I3616_CMGT      = 0x0e203400,
576    I3616_CMGE      = 0x0e203c00,
577    I3616_CMTST     = 0x0e208c00,
578    I3616_CMHI      = 0x2e203400,
579    I3616_CMHS      = 0x2e203c00,
580    I3616_CMEQ      = 0x2e208c00,
581    I3616_SMAX      = 0x0e206400,
582    I3616_SMIN      = 0x0e206c00,
583    I3616_SSHL      = 0x0e204400,
584    I3616_SQADD     = 0x0e200c00,
585    I3616_SQSUB     = 0x0e202c00,
586    I3616_UMAX      = 0x2e206400,
587    I3616_UMIN      = 0x2e206c00,
588    I3616_UQADD     = 0x2e200c00,
589    I3616_UQSUB     = 0x2e202c00,
590    I3616_USHL      = 0x2e204400,
591
592    /* AdvSIMD two-reg misc.  */
593    I3617_CMGT0     = 0x0e208800,
594    I3617_CMEQ0     = 0x0e209800,
595    I3617_CMLT0     = 0x0e20a800,
596    I3617_CMGE0     = 0x2e208800,
597    I3617_CMLE0     = 0x2e209800,
598    I3617_NOT       = 0x2e205800,
599    I3617_ABS       = 0x0e20b800,
600    I3617_NEG       = 0x2e20b800,
601
602    /* System instructions.  */
603    NOP             = 0xd503201f,
604    DMB_ISH         = 0xd50338bf,
605    DMB_LD          = 0x00000100,
606    DMB_ST          = 0x00000200,
607} AArch64Insn;
608
609static inline uint32_t tcg_in32(TCGContext *s)
610{
611    uint32_t v = *(uint32_t *)s->code_ptr;
612    return v;
613}
614
615/* Emit an opcode with "type-checking" of the format.  */
616#define tcg_out_insn(S, FMT, OP, ...) \
617    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
618
619static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
620                              TCGReg rt, TCGReg rn, unsigned size)
621{
622    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
623}
624
625static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
626                              int imm19, TCGReg rt)
627{
628    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
629}
630
631static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
632                              TCGReg rt, int imm19)
633{
634    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
635}
636
637static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
638                              TCGCond c, int imm19)
639{
640    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
641}
642
643static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
644{
645    tcg_out32(s, insn | (imm26 & 0x03ffffff));
646}
647
648static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
649{
650    tcg_out32(s, insn | rn << 5);
651}
652
653static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
654                              TCGReg r1, TCGReg r2, TCGReg rn,
655                              tcg_target_long ofs, bool pre, bool w)
656{
657    insn |= 1u << 31; /* ext */
658    insn |= pre << 24;
659    insn |= w << 23;
660
661    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
662    insn |= (ofs & (0x7f << 3)) << (15 - 3);
663
664    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
665}
666
667static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
668                              TCGReg rd, TCGReg rn, uint64_t aimm)
669{
670    if (aimm > 0xfff) {
671        tcg_debug_assert((aimm & 0xfff) == 0);
672        aimm >>= 12;
673        tcg_debug_assert(aimm <= 0xfff);
674        aimm |= 1 << 12;  /* apply LSL 12 */
675    }
676    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
677}
678
679/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
680   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
681   that feed the DecodeBitMasks pseudo function.  */
682static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
683                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
684{
685    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
686              | rn << 5 | rd);
687}
688
689#define tcg_out_insn_3404  tcg_out_insn_3402
690
691static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
692                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
693{
694    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
695              | rn << 5 | rd);
696}
697
698/* This function is used for the Move (wide immediate) instruction group.
699   Note that SHIFT is a full shift count, not the 2 bit HW field. */
700static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
701                              TCGReg rd, uint16_t half, unsigned shift)
702{
703    tcg_debug_assert((shift & ~0x30) == 0);
704    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
705}
706
707static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
708                              TCGReg rd, int64_t disp)
709{
710    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
711}
712
713/* This function is for both 3.5.2 (Add/Subtract shifted register), for
714   the rare occasion when we actually want to supply a shift amount.  */
715static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
716                                      TCGType ext, TCGReg rd, TCGReg rn,
717                                      TCGReg rm, int imm6)
718{
719    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
720}
721
722/* This function is for 3.5.2 (Add/subtract shifted register),
723   and 3.5.10 (Logical shifted register), for the vast majorty of cases
724   when we don't want to apply a shift.  Thus it can also be used for
725   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
726static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
727                              TCGReg rd, TCGReg rn, TCGReg rm)
728{
729    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
730}
731
732#define tcg_out_insn_3503  tcg_out_insn_3502
733#define tcg_out_insn_3508  tcg_out_insn_3502
734#define tcg_out_insn_3510  tcg_out_insn_3502
735
736static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
737                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
738{
739    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
740              | tcg_cond_to_aarch64[c] << 12);
741}
742
743static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
744                              TCGReg rd, TCGReg rn)
745{
746    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
747}
748
749static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
750                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
751{
752    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
753}
754
755static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
756                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
757{
758    /* Note that bit 11 set means general register input.  Therefore
759       we can handle both register sets with one function.  */
760    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
761              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
762}
763
764static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
765                              TCGReg rd, bool op, int cmode, uint8_t imm8)
766{
767    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
768              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
769}
770
771static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
772                              TCGReg rd, TCGReg rn, unsigned immhb)
773{
774    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
775}
776
777static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
778                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
779{
780    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
781              | (rn & 0x1f) << 5 | (rd & 0x1f));
782}
783
784static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
785                              unsigned size, TCGReg rd, TCGReg rn)
786{
787    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
788}
789
790static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
791                              TCGReg rd, TCGReg rn, unsigned immhb)
792{
793    tcg_out32(s, insn | q << 30 | immhb << 16
794              | (rn & 0x1f) << 5 | (rd & 0x1f));
795}
796
797static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
798                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
799{
800    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
801              | (rn & 0x1f) << 5 | (rd & 0x1f));
802}
803
804static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
805                              unsigned size, TCGReg rd, TCGReg rn)
806{
807    tcg_out32(s, insn | q << 30 | (size << 22)
808              | (rn & 0x1f) << 5 | (rd & 0x1f));
809}
810
811static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
812                              TCGReg rd, TCGReg base, TCGType ext,
813                              TCGReg regoff)
814{
815    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
816    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
817              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
818}
819
820static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
821                              TCGReg rd, TCGReg rn, intptr_t offset)
822{
823    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
824}
825
826static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
827                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
828{
829    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
830    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
831              | rn << 5 | (rd & 0x1f));
832}
833
834/* Register to register move using ORR (shifted register with no shift). */
835static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
836{
837    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
838}
839
840/* Register to register move using ADDI (move to/from SP).  */
841static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
842{
843    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
844}
845
846/* This function is used for the Logical (immediate) instruction group.
847   The value of LIMM must satisfy IS_LIMM.  See the comment above about
848   only supporting simplified logical immediates.  */
849static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
850                             TCGReg rd, TCGReg rn, uint64_t limm)
851{
852    unsigned h, l, r, c;
853
854    tcg_debug_assert(is_limm(limm));
855
856    h = clz64(limm);
857    l = ctz64(limm);
858    if (l == 0) {
859        r = 0;                  /* form 0....01....1 */
860        c = ctz64(~limm) - 1;
861        if (h == 0) {
862            r = clz64(~limm);   /* form 1..10..01..1 */
863            c += r;
864        }
865    } else {
866        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
867        c = r - h - 1;
868    }
869    if (ext == TCG_TYPE_I32) {
870        r &= 31;
871        c &= 31;
872    }
873
874    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
875}
876
877static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
878                             TCGReg rd, int64_t v64)
879{
880    bool q = type == TCG_TYPE_V128;
881    int cmode, imm8, i;
882
883    /* Test all bytes equal first.  */
884    if (vece == MO_8) {
885        imm8 = (uint8_t)v64;
886        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
887        return;
888    }
889
890    /*
891     * Test all bytes 0x00 or 0xff second.  This can match cases that
892     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
893     */
894    for (i = imm8 = 0; i < 8; i++) {
895        uint8_t byte = v64 >> (i * 8);
896        if (byte == 0xff) {
897            imm8 |= 1 << i;
898        } else if (byte != 0) {
899            goto fail_bytes;
900        }
901    }
902    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
903    return;
904 fail_bytes:
905
906    /*
907     * Tests for various replications.  For each element width, if we
908     * cannot find an expansion there's no point checking a larger
909     * width because we already know by replication it cannot match.
910     */
911    if (vece == MO_16) {
912        uint16_t v16 = v64;
913
914        if (is_shimm16(v16, &cmode, &imm8)) {
915            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
916            return;
917        }
918        if (is_shimm16(~v16, &cmode, &imm8)) {
919            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
920            return;
921        }
922
923        /*
924         * Otherwise, all remaining constants can be loaded in two insns:
925         * rd = v16 & 0xff, rd |= v16 & 0xff00.
926         */
927        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
928        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
929        return;
930    } else if (vece == MO_32) {
931        uint32_t v32 = v64;
932        uint32_t n32 = ~v32;
933
934        if (is_shimm32(v32, &cmode, &imm8) ||
935            is_soimm32(v32, &cmode, &imm8) ||
936            is_fimm32(v32, &cmode, &imm8)) {
937            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
938            return;
939        }
940        if (is_shimm32(n32, &cmode, &imm8) ||
941            is_soimm32(n32, &cmode, &imm8)) {
942            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
943            return;
944        }
945
946        /*
947         * Restrict the set of constants to those we can load with
948         * two instructions.  Others we load from the pool.
949         */
950        i = is_shimm32_pair(v32, &cmode, &imm8);
951        if (i) {
952            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
953            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
954            return;
955        }
956        i = is_shimm32_pair(n32, &cmode, &imm8);
957        if (i) {
958            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
959            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
960            return;
961        }
962    } else if (is_fimm64(v64, &cmode, &imm8)) {
963        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
964        return;
965    }
966
967    /*
968     * As a last resort, load from the constant pool.  Sadly there
969     * is no LD1R (literal), so store the full 16-byte vector.
970     */
971    if (type == TCG_TYPE_V128) {
972        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
973        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
974    } else {
975        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
976        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
977    }
978}
979
980static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
981                            TCGReg rd, TCGReg rs)
982{
983    int is_q = type - TCG_TYPE_V64;
984    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
985    return true;
986}
987
988static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
989                             TCGReg r, TCGReg base, intptr_t offset)
990{
991    TCGReg temp = TCG_REG_TMP;
992
993    if (offset < -0xffffff || offset > 0xffffff) {
994        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
995        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
996        base = temp;
997    } else {
998        AArch64Insn add_insn = I3401_ADDI;
999
1000        if (offset < 0) {
1001            add_insn = I3401_SUBI;
1002            offset = -offset;
1003        }
1004        if (offset & 0xfff000) {
1005            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1006            base = temp;
1007        }
1008        if (offset & 0xfff) {
1009            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1010            base = temp;
1011        }
1012    }
1013    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1014    return true;
1015}
1016
1017static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1018                         tcg_target_long value)
1019{
1020    tcg_target_long svalue = value;
1021    tcg_target_long ivalue = ~value;
1022    tcg_target_long t0, t1, t2;
1023    int s0, s1;
1024    AArch64Insn opc;
1025
1026    switch (type) {
1027    case TCG_TYPE_I32:
1028    case TCG_TYPE_I64:
1029        tcg_debug_assert(rd < 32);
1030        break;
1031    default:
1032        g_assert_not_reached();
1033    }
1034
1035    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1036       values within [2**31, 2**32-1], we can create smaller sequences by
1037       interpreting this as a negative 32-bit number, while ensuring that
1038       the high 32 bits are cleared by setting SF=0.  */
1039    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1040        svalue = (int32_t)value;
1041        value = (uint32_t)value;
1042        ivalue = (uint32_t)ivalue;
1043        type = TCG_TYPE_I32;
1044    }
1045
1046    /* Speed things up by handling the common case of small positive
1047       and negative values specially.  */
1048    if ((value & ~0xffffull) == 0) {
1049        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1050        return;
1051    } else if ((ivalue & ~0xffffull) == 0) {
1052        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1053        return;
1054    }
1055
1056    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1057       use the sign-extended value.  That lets us match rotated values such
1058       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1059    if (is_limm(svalue)) {
1060        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1061        return;
1062    }
1063
1064    /* Look for host pointer values within 4G of the PC.  This happens
1065       often when loading pointers to QEMU's own data structures.  */
1066    if (type == TCG_TYPE_I64) {
1067        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1068        tcg_target_long disp = value - src_rx;
1069        if (disp == sextract64(disp, 0, 21)) {
1070            tcg_out_insn(s, 3406, ADR, rd, disp);
1071            return;
1072        }
1073        disp = (value >> 12) - (src_rx >> 12);
1074        if (disp == sextract64(disp, 0, 21)) {
1075            tcg_out_insn(s, 3406, ADRP, rd, disp);
1076            if (value & 0xfff) {
1077                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1078            }
1079            return;
1080        }
1081    }
1082
1083    /* Would it take fewer insns to begin with MOVN?  */
1084    if (ctpop64(value) >= 32) {
1085        t0 = ivalue;
1086        opc = I3405_MOVN;
1087    } else {
1088        t0 = value;
1089        opc = I3405_MOVZ;
1090    }
1091    s0 = ctz64(t0) & (63 & -16);
1092    t1 = t0 & ~(0xffffUL << s0);
1093    s1 = ctz64(t1) & (63 & -16);
1094    t2 = t1 & ~(0xffffUL << s1);
1095    if (t2 == 0) {
1096        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1097        if (t1 != 0) {
1098            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1099        }
1100        return;
1101    }
1102
1103    /* For more than 2 insns, dump it into the constant pool.  */
1104    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1105    tcg_out_insn(s, 3305, LDR, 0, rd);
1106}
1107
1108/* Define something more legible for general use.  */
1109#define tcg_out_ldst_r  tcg_out_insn_3310
1110
1111static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1112                         TCGReg rn, intptr_t offset, int lgsize)
1113{
1114    /* If the offset is naturally aligned and in range, then we can
1115       use the scaled uimm12 encoding */
1116    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1117        uintptr_t scaled_uimm = offset >> lgsize;
1118        if (scaled_uimm <= 0xfff) {
1119            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1120            return;
1121        }
1122    }
1123
1124    /* Small signed offsets can use the unscaled encoding.  */
1125    if (offset >= -256 && offset < 256) {
1126        tcg_out_insn_3312(s, insn, rd, rn, offset);
1127        return;
1128    }
1129
1130    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1131    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1132    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1133}
1134
1135static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1136{
1137    if (ret == arg) {
1138        return true;
1139    }
1140    switch (type) {
1141    case TCG_TYPE_I32:
1142    case TCG_TYPE_I64:
1143        if (ret < 32 && arg < 32) {
1144            tcg_out_movr(s, type, ret, arg);
1145            break;
1146        } else if (ret < 32) {
1147            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1148            break;
1149        } else if (arg < 32) {
1150            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1151            break;
1152        }
1153        /* FALLTHRU */
1154
1155    case TCG_TYPE_V64:
1156        tcg_debug_assert(ret >= 32 && arg >= 32);
1157        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1158        break;
1159    case TCG_TYPE_V128:
1160        tcg_debug_assert(ret >= 32 && arg >= 32);
1161        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1162        break;
1163
1164    default:
1165        g_assert_not_reached();
1166    }
1167    return true;
1168}
1169
1170static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1171                       TCGReg base, intptr_t ofs)
1172{
1173    AArch64Insn insn;
1174    int lgsz;
1175
1176    switch (type) {
1177    case TCG_TYPE_I32:
1178        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1179        lgsz = 2;
1180        break;
1181    case TCG_TYPE_I64:
1182        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1183        lgsz = 3;
1184        break;
1185    case TCG_TYPE_V64:
1186        insn = I3312_LDRVD;
1187        lgsz = 3;
1188        break;
1189    case TCG_TYPE_V128:
1190        insn = I3312_LDRVQ;
1191        lgsz = 4;
1192        break;
1193    default:
1194        g_assert_not_reached();
1195    }
1196    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1197}
1198
1199static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1200                       TCGReg base, intptr_t ofs)
1201{
1202    AArch64Insn insn;
1203    int lgsz;
1204
1205    switch (type) {
1206    case TCG_TYPE_I32:
1207        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1208        lgsz = 2;
1209        break;
1210    case TCG_TYPE_I64:
1211        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1212        lgsz = 3;
1213        break;
1214    case TCG_TYPE_V64:
1215        insn = I3312_STRVD;
1216        lgsz = 3;
1217        break;
1218    case TCG_TYPE_V128:
1219        insn = I3312_STRVQ;
1220        lgsz = 4;
1221        break;
1222    default:
1223        g_assert_not_reached();
1224    }
1225    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1226}
1227
1228static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1229                               TCGReg base, intptr_t ofs)
1230{
1231    if (type <= TCG_TYPE_I64 && val == 0) {
1232        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1233        return true;
1234    }
1235    return false;
1236}
1237
1238static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1239                               TCGReg rn, unsigned int a, unsigned int b)
1240{
1241    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1242}
1243
1244static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1245                                TCGReg rn, unsigned int a, unsigned int b)
1246{
1247    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1248}
1249
1250static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1251                                TCGReg rn, unsigned int a, unsigned int b)
1252{
1253    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1254}
1255
1256static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1257                                TCGReg rn, TCGReg rm, unsigned int a)
1258{
1259    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1260}
1261
1262static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1263                               TCGReg rd, TCGReg rn, unsigned int m)
1264{
1265    int bits = ext ? 64 : 32;
1266    int max = bits - 1;
1267    tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1268}
1269
1270static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1271                               TCGReg rd, TCGReg rn, unsigned int m)
1272{
1273    int max = ext ? 63 : 31;
1274    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1275}
1276
1277static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1278                               TCGReg rd, TCGReg rn, unsigned int m)
1279{
1280    int max = ext ? 63 : 31;
1281    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1282}
1283
1284static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1285                                TCGReg rd, TCGReg rn, unsigned int m)
1286{
1287    int max = ext ? 63 : 31;
1288    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1289}
1290
1291static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1292                                TCGReg rd, TCGReg rn, unsigned int m)
1293{
1294    int bits = ext ? 64 : 32;
1295    int max = bits - 1;
1296    tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1297}
1298
1299static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1300                               TCGReg rn, unsigned lsb, unsigned width)
1301{
1302    unsigned size = ext ? 64 : 32;
1303    unsigned a = (size - lsb) & (size - 1);
1304    unsigned b = width - 1;
1305    tcg_out_bfm(s, ext, rd, rn, a, b);
1306}
1307
1308static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1309                        tcg_target_long b, bool const_b)
1310{
1311    if (const_b) {
1312        /* Using CMP or CMN aliases.  */
1313        if (b >= 0) {
1314            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1315        } else {
1316            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1317        }
1318    } else {
1319        /* Using CMP alias SUBS wzr, Wn, Wm */
1320        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1321    }
1322}
1323
1324static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1325{
1326    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1327    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1328    tcg_out_insn(s, 3206, B, offset);
1329}
1330
1331static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1332{
1333    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1334    if (offset == sextract64(offset, 0, 26)) {
1335        tcg_out_insn(s, 3206, B, offset);
1336    } else {
1337        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1338        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1339    }
1340}
1341
1342static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1343{
1344    tcg_out_insn(s, 3207, BLR, reg);
1345}
1346
1347static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
1348{
1349    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1350    if (offset == sextract64(offset, 0, 26)) {
1351        tcg_out_insn(s, 3206, BL, offset);
1352    } else {
1353        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1354        tcg_out_callr(s, TCG_REG_TMP);
1355    }
1356}
1357
1358void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
1359                              uintptr_t jmp_rw, uintptr_t addr)
1360{
1361    tcg_insn_unit i1, i2;
1362    TCGType rt = TCG_TYPE_I64;
1363    TCGReg  rd = TCG_REG_TMP;
1364    uint64_t pair;
1365
1366    ptrdiff_t offset = addr - jmp_rx;
1367
1368    if (offset == sextract64(offset, 0, 26)) {
1369        i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1370        i2 = NOP;
1371    } else {
1372        offset = (addr >> 12) - (jmp_rx >> 12);
1373
1374        /* patch ADRP */
1375        i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1376        /* patch ADDI */
1377        i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1378    }
1379    pair = (uint64_t)i2 << 32 | i1;
1380    qatomic_set((uint64_t *)jmp_rw, pair);
1381    flush_idcache_range(jmp_rx, jmp_rw, 8);
1382}
1383
1384static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1385{
1386    if (!l->has_value) {
1387        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1388        tcg_out_insn(s, 3206, B, 0);
1389    } else {
1390        tcg_out_goto(s, l->u.value_ptr);
1391    }
1392}
1393
1394static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1395                           TCGArg b, bool b_const, TCGLabel *l)
1396{
1397    intptr_t offset;
1398    bool need_cmp;
1399
1400    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1401        need_cmp = false;
1402    } else {
1403        need_cmp = true;
1404        tcg_out_cmp(s, ext, a, b, b_const);
1405    }
1406
1407    if (!l->has_value) {
1408        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1409        offset = tcg_in32(s) >> 5;
1410    } else {
1411        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1412        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1413    }
1414
1415    if (need_cmp) {
1416        tcg_out_insn(s, 3202, B_C, c, offset);
1417    } else if (c == TCG_COND_EQ) {
1418        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1419    } else {
1420        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1421    }
1422}
1423
1424static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1425{
1426    tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1427}
1428
1429static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1430{
1431    tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1432}
1433
1434static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1435{
1436    tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1437}
1438
1439static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1440                               TCGReg rd, TCGReg rn)
1441{
1442    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1443    int bits = (8 << s_bits) - 1;
1444    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1445}
1446
1447static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1448                               TCGReg rd, TCGReg rn)
1449{
1450    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1451    int bits = (8 << s_bits) - 1;
1452    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1453}
1454
1455static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1456                            TCGReg rn, int64_t aimm)
1457{
1458    if (aimm >= 0) {
1459        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1460    } else {
1461        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1462    }
1463}
1464
1465static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1466                            TCGReg rh, TCGReg al, TCGReg ah,
1467                            tcg_target_long bl, tcg_target_long bh,
1468                            bool const_bl, bool const_bh, bool sub)
1469{
1470    TCGReg orig_rl = rl;
1471    AArch64Insn insn;
1472
1473    if (rl == ah || (!const_bh && rl == bh)) {
1474        rl = TCG_REG_TMP;
1475    }
1476
1477    if (const_bl) {
1478        if (bl < 0) {
1479            bl = -bl;
1480            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1481        } else {
1482            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1483        }
1484
1485        if (unlikely(al == TCG_REG_XZR)) {
1486            /* ??? We want to allow al to be zero for the benefit of
1487               negation via subtraction.  However, that leaves open the
1488               possibility of adding 0+const in the low part, and the
1489               immediate add instructions encode XSP not XZR.  Don't try
1490               anything more elaborate here than loading another zero.  */
1491            al = TCG_REG_TMP;
1492            tcg_out_movi(s, ext, al, 0);
1493        }
1494        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1495    } else {
1496        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1497    }
1498
1499    insn = I3503_ADC;
1500    if (const_bh) {
1501        /* Note that the only two constants we support are 0 and -1, and
1502           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1503        if ((bh != 0) ^ sub) {
1504            insn = I3503_SBC;
1505        }
1506        bh = TCG_REG_XZR;
1507    } else if (sub) {
1508        insn = I3503_SBC;
1509    }
1510    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1511
1512    tcg_out_mov(s, ext, orig_rl, rl);
1513}
1514
1515static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1516{
1517    static const uint32_t sync[] = {
1518        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1519        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1520        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1521        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1522        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1523    };
1524    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1525}
1526
1527static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1528                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1529{
1530    TCGReg a1 = a0;
1531    if (is_ctz) {
1532        a1 = TCG_REG_TMP;
1533        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1534    }
1535    if (const_b && b == (ext ? 64 : 32)) {
1536        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1537    } else {
1538        AArch64Insn sel = I3506_CSEL;
1539
1540        tcg_out_cmp(s, ext, a0, 0, 1);
1541        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1542
1543        if (const_b) {
1544            if (b == -1) {
1545                b = TCG_REG_XZR;
1546                sel = I3506_CSINV;
1547            } else if (b == 0) {
1548                b = TCG_REG_XZR;
1549            } else {
1550                tcg_out_movi(s, ext, d, b);
1551                b = d;
1552            }
1553        }
1554        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1555    }
1556}
1557
1558#ifdef CONFIG_SOFTMMU
1559#include "../tcg-ldst.c.inc"
1560
1561/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1562 *                                     TCGMemOpIdx oi, uintptr_t ra)
1563 */
1564static void * const qemu_ld_helpers[16] = {
1565    [MO_UB]   = helper_ret_ldub_mmu,
1566    [MO_LEUW] = helper_le_lduw_mmu,
1567    [MO_LEUL] = helper_le_ldul_mmu,
1568    [MO_LEQ]  = helper_le_ldq_mmu,
1569    [MO_BEUW] = helper_be_lduw_mmu,
1570    [MO_BEUL] = helper_be_ldul_mmu,
1571    [MO_BEQ]  = helper_be_ldq_mmu,
1572};
1573
1574/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1575 *                                     uintxx_t val, TCGMemOpIdx oi,
1576 *                                     uintptr_t ra)
1577 */
1578static void * const qemu_st_helpers[16] = {
1579    [MO_UB]   = helper_ret_stb_mmu,
1580    [MO_LEUW] = helper_le_stw_mmu,
1581    [MO_LEUL] = helper_le_stl_mmu,
1582    [MO_LEQ]  = helper_le_stq_mmu,
1583    [MO_BEUW] = helper_be_stw_mmu,
1584    [MO_BEUL] = helper_be_stl_mmu,
1585    [MO_BEQ]  = helper_be_stq_mmu,
1586};
1587
1588static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1589{
1590    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1591    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1592    tcg_out_insn(s, 3406, ADR, rd, offset);
1593}
1594
1595static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1596{
1597    TCGMemOpIdx oi = lb->oi;
1598    MemOp opc = get_memop(oi);
1599    MemOp size = opc & MO_SIZE;
1600
1601    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1602        return false;
1603    }
1604
1605    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1606    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1607    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1608    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1609    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1610    if (opc & MO_SIGN) {
1611        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1612    } else {
1613        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1614    }
1615
1616    tcg_out_goto(s, lb->raddr);
1617    return true;
1618}
1619
1620static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1621{
1622    TCGMemOpIdx oi = lb->oi;
1623    MemOp opc = get_memop(oi);
1624    MemOp size = opc & MO_SIZE;
1625
1626    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1627        return false;
1628    }
1629
1630    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1631    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1632    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1633    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1634    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1635    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1636    tcg_out_goto(s, lb->raddr);
1637    return true;
1638}
1639
1640static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1641                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1642                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1643{
1644    TCGLabelQemuLdst *label = new_ldst_label(s);
1645
1646    label->is_ld = is_ld;
1647    label->oi = oi;
1648    label->type = ext;
1649    label->datalo_reg = data_reg;
1650    label->addrlo_reg = addr_reg;
1651    label->raddr = tcg_splitwx_to_rx(raddr);
1652    label->label_ptr[0] = label_ptr;
1653}
1654
1655/* We expect to use a 7-bit scaled negative offset from ENV.  */
1656QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1657QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1658
1659/* These offsets are built into the LDP below.  */
1660QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1661QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1662
1663/* Load and compare a TLB entry, emitting the conditional jump to the
1664   slow path for the failure case, which will be patched later when finalizing
1665   the slow path. Generated code returns the host addend in X1,
1666   clobbers X0,X2,X3,TMP. */
1667static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1668                             tcg_insn_unit **label_ptr, int mem_index,
1669                             bool is_read)
1670{
1671    unsigned a_bits = get_alignment_bits(opc);
1672    unsigned s_bits = opc & MO_SIZE;
1673    unsigned a_mask = (1u << a_bits) - 1;
1674    unsigned s_mask = (1u << s_bits) - 1;
1675    TCGReg x3;
1676    TCGType mask_type;
1677    uint64_t compare_mask;
1678
1679    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1680                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1681
1682    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1683    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1684                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1685
1686    /* Extract the TLB index from the address into X0.  */
1687    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1688                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1689                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1690
1691    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1692    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1693
1694    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1695    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1696               ? offsetof(CPUTLBEntry, addr_read)
1697               : offsetof(CPUTLBEntry, addr_write));
1698    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1699               offsetof(CPUTLBEntry, addend));
1700
1701    /* For aligned accesses, we check the first byte and include the alignment
1702       bits within the address.  For unaligned access, we check that we don't
1703       cross pages using the address of the last byte of the access.  */
1704    if (a_bits >= s_bits) {
1705        x3 = addr_reg;
1706    } else {
1707        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1708                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1709        x3 = TCG_REG_X3;
1710    }
1711    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1712
1713    /* Store the page mask part of the address into X3.  */
1714    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1715                     TCG_REG_X3, x3, compare_mask);
1716
1717    /* Perform the address comparison. */
1718    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1719
1720    /* If not equal, we jump to the slow path. */
1721    *label_ptr = s->code_ptr;
1722    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1723}
1724
1725#endif /* CONFIG_SOFTMMU */
1726
1727static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1728                                   TCGReg data_r, TCGReg addr_r,
1729                                   TCGType otype, TCGReg off_r)
1730{
1731    const MemOp bswap = memop & MO_BSWAP;
1732
1733    switch (memop & MO_SSIZE) {
1734    case MO_UB:
1735        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1736        break;
1737    case MO_SB:
1738        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1739                       data_r, addr_r, otype, off_r);
1740        break;
1741    case MO_UW:
1742        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1743        if (bswap) {
1744            tcg_out_rev16(s, data_r, data_r);
1745        }
1746        break;
1747    case MO_SW:
1748        if (bswap) {
1749            tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1750            tcg_out_rev16(s, data_r, data_r);
1751            tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1752        } else {
1753            tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1754                           data_r, addr_r, otype, off_r);
1755        }
1756        break;
1757    case MO_UL:
1758        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1759        if (bswap) {
1760            tcg_out_rev32(s, data_r, data_r);
1761        }
1762        break;
1763    case MO_SL:
1764        if (bswap) {
1765            tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1766            tcg_out_rev32(s, data_r, data_r);
1767            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1768        } else {
1769            tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1770        }
1771        break;
1772    case MO_Q:
1773        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1774        if (bswap) {
1775            tcg_out_rev64(s, data_r, data_r);
1776        }
1777        break;
1778    default:
1779        tcg_abort();
1780    }
1781}
1782
1783static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1784                                   TCGReg data_r, TCGReg addr_r,
1785                                   TCGType otype, TCGReg off_r)
1786{
1787    const MemOp bswap = memop & MO_BSWAP;
1788
1789    switch (memop & MO_SIZE) {
1790    case MO_8:
1791        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1792        break;
1793    case MO_16:
1794        if (bswap && data_r != TCG_REG_XZR) {
1795            tcg_out_rev16(s, TCG_REG_TMP, data_r);
1796            data_r = TCG_REG_TMP;
1797        }
1798        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1799        break;
1800    case MO_32:
1801        if (bswap && data_r != TCG_REG_XZR) {
1802            tcg_out_rev32(s, TCG_REG_TMP, data_r);
1803            data_r = TCG_REG_TMP;
1804        }
1805        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1806        break;
1807    case MO_64:
1808        if (bswap && data_r != TCG_REG_XZR) {
1809            tcg_out_rev64(s, TCG_REG_TMP, data_r);
1810            data_r = TCG_REG_TMP;
1811        }
1812        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1813        break;
1814    default:
1815        tcg_abort();
1816    }
1817}
1818
1819static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1820                            TCGMemOpIdx oi, TCGType ext)
1821{
1822    MemOp memop = get_memop(oi);
1823    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1824#ifdef CONFIG_SOFTMMU
1825    unsigned mem_index = get_mmuidx(oi);
1826    tcg_insn_unit *label_ptr;
1827
1828    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1829    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1830                           TCG_REG_X1, otype, addr_reg);
1831    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1832                        s->code_ptr, label_ptr);
1833#else /* !CONFIG_SOFTMMU */
1834    if (USE_GUEST_BASE) {
1835        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1836                               TCG_REG_GUEST_BASE, otype, addr_reg);
1837    } else {
1838        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1839                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1840    }
1841#endif /* CONFIG_SOFTMMU */
1842}
1843
1844static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1845                            TCGMemOpIdx oi)
1846{
1847    MemOp memop = get_memop(oi);
1848    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1849#ifdef CONFIG_SOFTMMU
1850    unsigned mem_index = get_mmuidx(oi);
1851    tcg_insn_unit *label_ptr;
1852
1853    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1854    tcg_out_qemu_st_direct(s, memop, data_reg,
1855                           TCG_REG_X1, otype, addr_reg);
1856    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1857                        data_reg, addr_reg, s->code_ptr, label_ptr);
1858#else /* !CONFIG_SOFTMMU */
1859    if (USE_GUEST_BASE) {
1860        tcg_out_qemu_st_direct(s, memop, data_reg,
1861                               TCG_REG_GUEST_BASE, otype, addr_reg);
1862    } else {
1863        tcg_out_qemu_st_direct(s, memop, data_reg,
1864                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1865    }
1866#endif /* CONFIG_SOFTMMU */
1867}
1868
1869static const tcg_insn_unit *tb_ret_addr;
1870
1871static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1872                       const TCGArg args[TCG_MAX_OP_ARGS],
1873                       const int const_args[TCG_MAX_OP_ARGS])
1874{
1875    /* 99% of the time, we can signal the use of extension registers
1876       by looking to see if the opcode handles 64-bit data.  */
1877    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1878
1879    /* Hoist the loads of the most common arguments.  */
1880    TCGArg a0 = args[0];
1881    TCGArg a1 = args[1];
1882    TCGArg a2 = args[2];
1883    int c2 = const_args[2];
1884
1885    /* Some operands are defined with "rZ" constraint, a register or
1886       the zero register.  These need not actually test args[I] == 0.  */
1887#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1888
1889    switch (opc) {
1890    case INDEX_op_exit_tb:
1891        /* Reuse the zeroing that exists for goto_ptr.  */
1892        if (a0 == 0) {
1893            tcg_out_goto_long(s, tcg_code_gen_epilogue);
1894        } else {
1895            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1896            tcg_out_goto_long(s, tb_ret_addr);
1897        }
1898        break;
1899
1900    case INDEX_op_goto_tb:
1901        if (s->tb_jmp_insn_offset != NULL) {
1902            /* TCG_TARGET_HAS_direct_jump */
1903            /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1904               write can be used to patch the target address. */
1905            if ((uintptr_t)s->code_ptr & 7) {
1906                tcg_out32(s, NOP);
1907            }
1908            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1909            /* actual branch destination will be patched by
1910               tb_target_set_jmp_target later. */
1911            tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1912            tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1913        } else {
1914            /* !TCG_TARGET_HAS_direct_jump */
1915            tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1916            intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1917            tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1918        }
1919        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1920        set_jmp_reset_offset(s, a0);
1921        break;
1922
1923    case INDEX_op_goto_ptr:
1924        tcg_out_insn(s, 3207, BR, a0);
1925        break;
1926
1927    case INDEX_op_br:
1928        tcg_out_goto_label(s, arg_label(a0));
1929        break;
1930
1931    case INDEX_op_ld8u_i32:
1932    case INDEX_op_ld8u_i64:
1933        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1934        break;
1935    case INDEX_op_ld8s_i32:
1936        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1937        break;
1938    case INDEX_op_ld8s_i64:
1939        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1940        break;
1941    case INDEX_op_ld16u_i32:
1942    case INDEX_op_ld16u_i64:
1943        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1944        break;
1945    case INDEX_op_ld16s_i32:
1946        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1947        break;
1948    case INDEX_op_ld16s_i64:
1949        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1950        break;
1951    case INDEX_op_ld_i32:
1952    case INDEX_op_ld32u_i64:
1953        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1954        break;
1955    case INDEX_op_ld32s_i64:
1956        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1957        break;
1958    case INDEX_op_ld_i64:
1959        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1960        break;
1961
1962    case INDEX_op_st8_i32:
1963    case INDEX_op_st8_i64:
1964        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1965        break;
1966    case INDEX_op_st16_i32:
1967    case INDEX_op_st16_i64:
1968        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1969        break;
1970    case INDEX_op_st_i32:
1971    case INDEX_op_st32_i64:
1972        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1973        break;
1974    case INDEX_op_st_i64:
1975        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1976        break;
1977
1978    case INDEX_op_add_i32:
1979        a2 = (int32_t)a2;
1980        /* FALLTHRU */
1981    case INDEX_op_add_i64:
1982        if (c2) {
1983            tcg_out_addsubi(s, ext, a0, a1, a2);
1984        } else {
1985            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1986        }
1987        break;
1988
1989    case INDEX_op_sub_i32:
1990        a2 = (int32_t)a2;
1991        /* FALLTHRU */
1992    case INDEX_op_sub_i64:
1993        if (c2) {
1994            tcg_out_addsubi(s, ext, a0, a1, -a2);
1995        } else {
1996            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1997        }
1998        break;
1999
2000    case INDEX_op_neg_i64:
2001    case INDEX_op_neg_i32:
2002        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2003        break;
2004
2005    case INDEX_op_and_i32:
2006        a2 = (int32_t)a2;
2007        /* FALLTHRU */
2008    case INDEX_op_and_i64:
2009        if (c2) {
2010            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2011        } else {
2012            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2013        }
2014        break;
2015
2016    case INDEX_op_andc_i32:
2017        a2 = (int32_t)a2;
2018        /* FALLTHRU */
2019    case INDEX_op_andc_i64:
2020        if (c2) {
2021            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2022        } else {
2023            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2024        }
2025        break;
2026
2027    case INDEX_op_or_i32:
2028        a2 = (int32_t)a2;
2029        /* FALLTHRU */
2030    case INDEX_op_or_i64:
2031        if (c2) {
2032            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2033        } else {
2034            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2035        }
2036        break;
2037
2038    case INDEX_op_orc_i32:
2039        a2 = (int32_t)a2;
2040        /* FALLTHRU */
2041    case INDEX_op_orc_i64:
2042        if (c2) {
2043            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2044        } else {
2045            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2046        }
2047        break;
2048
2049    case INDEX_op_xor_i32:
2050        a2 = (int32_t)a2;
2051        /* FALLTHRU */
2052    case INDEX_op_xor_i64:
2053        if (c2) {
2054            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2055        } else {
2056            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2057        }
2058        break;
2059
2060    case INDEX_op_eqv_i32:
2061        a2 = (int32_t)a2;
2062        /* FALLTHRU */
2063    case INDEX_op_eqv_i64:
2064        if (c2) {
2065            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2066        } else {
2067            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2068        }
2069        break;
2070
2071    case INDEX_op_not_i64:
2072    case INDEX_op_not_i32:
2073        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2074        break;
2075
2076    case INDEX_op_mul_i64:
2077    case INDEX_op_mul_i32:
2078        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2079        break;
2080
2081    case INDEX_op_div_i64:
2082    case INDEX_op_div_i32:
2083        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2084        break;
2085    case INDEX_op_divu_i64:
2086    case INDEX_op_divu_i32:
2087        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2088        break;
2089
2090    case INDEX_op_rem_i64:
2091    case INDEX_op_rem_i32:
2092        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2093        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2094        break;
2095    case INDEX_op_remu_i64:
2096    case INDEX_op_remu_i32:
2097        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2098        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2099        break;
2100
2101    case INDEX_op_shl_i64:
2102    case INDEX_op_shl_i32:
2103        if (c2) {
2104            tcg_out_shl(s, ext, a0, a1, a2);
2105        } else {
2106            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2107        }
2108        break;
2109
2110    case INDEX_op_shr_i64:
2111    case INDEX_op_shr_i32:
2112        if (c2) {
2113            tcg_out_shr(s, ext, a0, a1, a2);
2114        } else {
2115            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2116        }
2117        break;
2118
2119    case INDEX_op_sar_i64:
2120    case INDEX_op_sar_i32:
2121        if (c2) {
2122            tcg_out_sar(s, ext, a0, a1, a2);
2123        } else {
2124            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2125        }
2126        break;
2127
2128    case INDEX_op_rotr_i64:
2129    case INDEX_op_rotr_i32:
2130        if (c2) {
2131            tcg_out_rotr(s, ext, a0, a1, a2);
2132        } else {
2133            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2134        }
2135        break;
2136
2137    case INDEX_op_rotl_i64:
2138    case INDEX_op_rotl_i32:
2139        if (c2) {
2140            tcg_out_rotl(s, ext, a0, a1, a2);
2141        } else {
2142            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2143            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2144        }
2145        break;
2146
2147    case INDEX_op_clz_i64:
2148    case INDEX_op_clz_i32:
2149        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2150        break;
2151    case INDEX_op_ctz_i64:
2152    case INDEX_op_ctz_i32:
2153        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2154        break;
2155
2156    case INDEX_op_brcond_i32:
2157        a1 = (int32_t)a1;
2158        /* FALLTHRU */
2159    case INDEX_op_brcond_i64:
2160        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2161        break;
2162
2163    case INDEX_op_setcond_i32:
2164        a2 = (int32_t)a2;
2165        /* FALLTHRU */
2166    case INDEX_op_setcond_i64:
2167        tcg_out_cmp(s, ext, a1, a2, c2);
2168        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2169        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2170                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2171        break;
2172
2173    case INDEX_op_movcond_i32:
2174        a2 = (int32_t)a2;
2175        /* FALLTHRU */
2176    case INDEX_op_movcond_i64:
2177        tcg_out_cmp(s, ext, a1, a2, c2);
2178        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2179        break;
2180
2181    case INDEX_op_qemu_ld_i32:
2182    case INDEX_op_qemu_ld_i64:
2183        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2184        break;
2185    case INDEX_op_qemu_st_i32:
2186    case INDEX_op_qemu_st_i64:
2187        tcg_out_qemu_st(s, REG0(0), a1, a2);
2188        break;
2189
2190    case INDEX_op_bswap64_i64:
2191        tcg_out_rev64(s, a0, a1);
2192        break;
2193    case INDEX_op_bswap32_i64:
2194    case INDEX_op_bswap32_i32:
2195        tcg_out_rev32(s, a0, a1);
2196        break;
2197    case INDEX_op_bswap16_i64:
2198    case INDEX_op_bswap16_i32:
2199        tcg_out_rev16(s, a0, a1);
2200        break;
2201
2202    case INDEX_op_ext8s_i64:
2203    case INDEX_op_ext8s_i32:
2204        tcg_out_sxt(s, ext, MO_8, a0, a1);
2205        break;
2206    case INDEX_op_ext16s_i64:
2207    case INDEX_op_ext16s_i32:
2208        tcg_out_sxt(s, ext, MO_16, a0, a1);
2209        break;
2210    case INDEX_op_ext_i32_i64:
2211    case INDEX_op_ext32s_i64:
2212        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2213        break;
2214    case INDEX_op_ext8u_i64:
2215    case INDEX_op_ext8u_i32:
2216        tcg_out_uxt(s, MO_8, a0, a1);
2217        break;
2218    case INDEX_op_ext16u_i64:
2219    case INDEX_op_ext16u_i32:
2220        tcg_out_uxt(s, MO_16, a0, a1);
2221        break;
2222    case INDEX_op_extu_i32_i64:
2223    case INDEX_op_ext32u_i64:
2224        tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2225        break;
2226
2227    case INDEX_op_deposit_i64:
2228    case INDEX_op_deposit_i32:
2229        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2230        break;
2231
2232    case INDEX_op_extract_i64:
2233    case INDEX_op_extract_i32:
2234        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2235        break;
2236
2237    case INDEX_op_sextract_i64:
2238    case INDEX_op_sextract_i32:
2239        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2240        break;
2241
2242    case INDEX_op_extract2_i64:
2243    case INDEX_op_extract2_i32:
2244        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2245        break;
2246
2247    case INDEX_op_add2_i32:
2248        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2249                        (int32_t)args[4], args[5], const_args[4],
2250                        const_args[5], false);
2251        break;
2252    case INDEX_op_add2_i64:
2253        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2254                        args[5], const_args[4], const_args[5], false);
2255        break;
2256    case INDEX_op_sub2_i32:
2257        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2258                        (int32_t)args[4], args[5], const_args[4],
2259                        const_args[5], true);
2260        break;
2261    case INDEX_op_sub2_i64:
2262        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2263                        args[5], const_args[4], const_args[5], true);
2264        break;
2265
2266    case INDEX_op_muluh_i64:
2267        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2268        break;
2269    case INDEX_op_mulsh_i64:
2270        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2271        break;
2272
2273    case INDEX_op_mb:
2274        tcg_out_mb(s, a0);
2275        break;
2276
2277    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2278    case INDEX_op_mov_i64:
2279    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2280    default:
2281        g_assert_not_reached();
2282    }
2283
2284#undef REG0
2285}
2286
2287static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2288                           unsigned vecl, unsigned vece,
2289                           const TCGArg args[TCG_MAX_OP_ARGS],
2290                           const int const_args[TCG_MAX_OP_ARGS])
2291{
2292    static const AArch64Insn cmp_vec_insn[16] = {
2293        [TCG_COND_EQ] = I3616_CMEQ,
2294        [TCG_COND_GT] = I3616_CMGT,
2295        [TCG_COND_GE] = I3616_CMGE,
2296        [TCG_COND_GTU] = I3616_CMHI,
2297        [TCG_COND_GEU] = I3616_CMHS,
2298    };
2299    static const AArch64Insn cmp_scalar_insn[16] = {
2300        [TCG_COND_EQ] = I3611_CMEQ,
2301        [TCG_COND_GT] = I3611_CMGT,
2302        [TCG_COND_GE] = I3611_CMGE,
2303        [TCG_COND_GTU] = I3611_CMHI,
2304        [TCG_COND_GEU] = I3611_CMHS,
2305    };
2306    static const AArch64Insn cmp0_vec_insn[16] = {
2307        [TCG_COND_EQ] = I3617_CMEQ0,
2308        [TCG_COND_GT] = I3617_CMGT0,
2309        [TCG_COND_GE] = I3617_CMGE0,
2310        [TCG_COND_LT] = I3617_CMLT0,
2311        [TCG_COND_LE] = I3617_CMLE0,
2312    };
2313    static const AArch64Insn cmp0_scalar_insn[16] = {
2314        [TCG_COND_EQ] = I3612_CMEQ0,
2315        [TCG_COND_GT] = I3612_CMGT0,
2316        [TCG_COND_GE] = I3612_CMGE0,
2317        [TCG_COND_LT] = I3612_CMLT0,
2318        [TCG_COND_LE] = I3612_CMLE0,
2319    };
2320
2321    TCGType type = vecl + TCG_TYPE_V64;
2322    unsigned is_q = vecl;
2323    bool is_scalar = !is_q && vece == MO_64;
2324    TCGArg a0, a1, a2, a3;
2325    int cmode, imm8;
2326
2327    a0 = args[0];
2328    a1 = args[1];
2329    a2 = args[2];
2330
2331    switch (opc) {
2332    case INDEX_op_ld_vec:
2333        tcg_out_ld(s, type, a0, a1, a2);
2334        break;
2335    case INDEX_op_st_vec:
2336        tcg_out_st(s, type, a0, a1, a2);
2337        break;
2338    case INDEX_op_dupm_vec:
2339        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2340        break;
2341    case INDEX_op_add_vec:
2342        if (is_scalar) {
2343            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2344        } else {
2345            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2346        }
2347        break;
2348    case INDEX_op_sub_vec:
2349        if (is_scalar) {
2350            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2351        } else {
2352            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2353        }
2354        break;
2355    case INDEX_op_mul_vec:
2356        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2357        break;
2358    case INDEX_op_neg_vec:
2359        if (is_scalar) {
2360            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2361        } else {
2362            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2363        }
2364        break;
2365    case INDEX_op_abs_vec:
2366        if (is_scalar) {
2367            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2368        } else {
2369            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2370        }
2371        break;
2372    case INDEX_op_and_vec:
2373        if (const_args[2]) {
2374            is_shimm1632(~a2, &cmode, &imm8);
2375            if (a0 == a1) {
2376                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2377                return;
2378            }
2379            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2380            a2 = a0;
2381        }
2382        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2383        break;
2384    case INDEX_op_or_vec:
2385        if (const_args[2]) {
2386            is_shimm1632(a2, &cmode, &imm8);
2387            if (a0 == a1) {
2388                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2389                return;
2390            }
2391            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2392            a2 = a0;
2393        }
2394        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2395        break;
2396    case INDEX_op_andc_vec:
2397        if (const_args[2]) {
2398            is_shimm1632(a2, &cmode, &imm8);
2399            if (a0 == a1) {
2400                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2401                return;
2402            }
2403            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2404            a2 = a0;
2405        }
2406        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2407        break;
2408    case INDEX_op_orc_vec:
2409        if (const_args[2]) {
2410            is_shimm1632(~a2, &cmode, &imm8);
2411            if (a0 == a1) {
2412                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2413                return;
2414            }
2415            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2416            a2 = a0;
2417        }
2418        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2419        break;
2420    case INDEX_op_xor_vec:
2421        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2422        break;
2423    case INDEX_op_ssadd_vec:
2424        if (is_scalar) {
2425            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2426        } else {
2427            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2428        }
2429        break;
2430    case INDEX_op_sssub_vec:
2431        if (is_scalar) {
2432            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2433        } else {
2434            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2435        }
2436        break;
2437    case INDEX_op_usadd_vec:
2438        if (is_scalar) {
2439            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2440        } else {
2441            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2442        }
2443        break;
2444    case INDEX_op_ussub_vec:
2445        if (is_scalar) {
2446            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2447        } else {
2448            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2449        }
2450        break;
2451    case INDEX_op_smax_vec:
2452        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2453        break;
2454    case INDEX_op_smin_vec:
2455        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2456        break;
2457    case INDEX_op_umax_vec:
2458        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2459        break;
2460    case INDEX_op_umin_vec:
2461        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2462        break;
2463    case INDEX_op_not_vec:
2464        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2465        break;
2466    case INDEX_op_shli_vec:
2467        if (is_scalar) {
2468            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2469        } else {
2470            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2471        }
2472        break;
2473    case INDEX_op_shri_vec:
2474        if (is_scalar) {
2475            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2476        } else {
2477            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2478        }
2479        break;
2480    case INDEX_op_sari_vec:
2481        if (is_scalar) {
2482            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2483        } else {
2484            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2485        }
2486        break;
2487    case INDEX_op_aa64_sli_vec:
2488        if (is_scalar) {
2489            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2490        } else {
2491            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2492        }
2493        break;
2494    case INDEX_op_shlv_vec:
2495        if (is_scalar) {
2496            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2497        } else {
2498            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2499        }
2500        break;
2501    case INDEX_op_aa64_sshl_vec:
2502        if (is_scalar) {
2503            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2504        } else {
2505            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2506        }
2507        break;
2508    case INDEX_op_cmp_vec:
2509        {
2510            TCGCond cond = args[3];
2511            AArch64Insn insn;
2512
2513            if (cond == TCG_COND_NE) {
2514                if (const_args[2]) {
2515                    if (is_scalar) {
2516                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2517                    } else {
2518                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2519                    }
2520                } else {
2521                    if (is_scalar) {
2522                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2523                    } else {
2524                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2525                    }
2526                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2527                }
2528            } else {
2529                if (const_args[2]) {
2530                    if (is_scalar) {
2531                        insn = cmp0_scalar_insn[cond];
2532                        if (insn) {
2533                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2534                            break;
2535                        }
2536                    } else {
2537                        insn = cmp0_vec_insn[cond];
2538                        if (insn) {
2539                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2540                            break;
2541                        }
2542                    }
2543                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2544                    a2 = TCG_VEC_TMP;
2545                }
2546                if (is_scalar) {
2547                    insn = cmp_scalar_insn[cond];
2548                    if (insn == 0) {
2549                        TCGArg t;
2550                        t = a1, a1 = a2, a2 = t;
2551                        cond = tcg_swap_cond(cond);
2552                        insn = cmp_scalar_insn[cond];
2553                        tcg_debug_assert(insn != 0);
2554                    }
2555                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2556                } else {
2557                    insn = cmp_vec_insn[cond];
2558                    if (insn == 0) {
2559                        TCGArg t;
2560                        t = a1, a1 = a2, a2 = t;
2561                        cond = tcg_swap_cond(cond);
2562                        insn = cmp_vec_insn[cond];
2563                        tcg_debug_assert(insn != 0);
2564                    }
2565                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2566                }
2567            }
2568        }
2569        break;
2570
2571    case INDEX_op_bitsel_vec:
2572        a3 = args[3];
2573        if (a0 == a3) {
2574            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2575        } else if (a0 == a2) {
2576            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2577        } else {
2578            if (a0 != a1) {
2579                tcg_out_mov(s, type, a0, a1);
2580            }
2581            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2582        }
2583        break;
2584
2585    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2586    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2587    default:
2588        g_assert_not_reached();
2589    }
2590}
2591
2592int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2593{
2594    switch (opc) {
2595    case INDEX_op_add_vec:
2596    case INDEX_op_sub_vec:
2597    case INDEX_op_and_vec:
2598    case INDEX_op_or_vec:
2599    case INDEX_op_xor_vec:
2600    case INDEX_op_andc_vec:
2601    case INDEX_op_orc_vec:
2602    case INDEX_op_neg_vec:
2603    case INDEX_op_abs_vec:
2604    case INDEX_op_not_vec:
2605    case INDEX_op_cmp_vec:
2606    case INDEX_op_shli_vec:
2607    case INDEX_op_shri_vec:
2608    case INDEX_op_sari_vec:
2609    case INDEX_op_ssadd_vec:
2610    case INDEX_op_sssub_vec:
2611    case INDEX_op_usadd_vec:
2612    case INDEX_op_ussub_vec:
2613    case INDEX_op_shlv_vec:
2614    case INDEX_op_bitsel_vec:
2615        return 1;
2616    case INDEX_op_rotli_vec:
2617    case INDEX_op_shrv_vec:
2618    case INDEX_op_sarv_vec:
2619    case INDEX_op_rotlv_vec:
2620    case INDEX_op_rotrv_vec:
2621        return -1;
2622    case INDEX_op_mul_vec:
2623    case INDEX_op_smax_vec:
2624    case INDEX_op_smin_vec:
2625    case INDEX_op_umax_vec:
2626    case INDEX_op_umin_vec:
2627        return vece < MO_64;
2628
2629    default:
2630        return 0;
2631    }
2632}
2633
2634void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2635                       TCGArg a0, ...)
2636{
2637    va_list va;
2638    TCGv_vec v0, v1, v2, t1, t2, c1;
2639    TCGArg a2;
2640
2641    va_start(va, a0);
2642    v0 = temp_tcgv_vec(arg_temp(a0));
2643    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2644    a2 = va_arg(va, TCGArg);
2645    va_end(va);
2646
2647    switch (opc) {
2648    case INDEX_op_rotli_vec:
2649        t1 = tcg_temp_new_vec(type);
2650        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2651        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2652                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2653        tcg_temp_free_vec(t1);
2654        break;
2655
2656    case INDEX_op_shrv_vec:
2657    case INDEX_op_sarv_vec:
2658        /* Right shifts are negative left shifts for AArch64.  */
2659        v2 = temp_tcgv_vec(arg_temp(a2));
2660        t1 = tcg_temp_new_vec(type);
2661        tcg_gen_neg_vec(vece, t1, v2);
2662        opc = (opc == INDEX_op_shrv_vec
2663               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2664        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2665                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2666        tcg_temp_free_vec(t1);
2667        break;
2668
2669    case INDEX_op_rotlv_vec:
2670        v2 = temp_tcgv_vec(arg_temp(a2));
2671        t1 = tcg_temp_new_vec(type);
2672        c1 = tcg_constant_vec(type, vece, 8 << vece);
2673        tcg_gen_sub_vec(vece, t1, v2, c1);
2674        /* Right shifts are negative left shifts for AArch64.  */
2675        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2676                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2677        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2678                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2679        tcg_gen_or_vec(vece, v0, v0, t1);
2680        tcg_temp_free_vec(t1);
2681        break;
2682
2683    case INDEX_op_rotrv_vec:
2684        v2 = temp_tcgv_vec(arg_temp(a2));
2685        t1 = tcg_temp_new_vec(type);
2686        t2 = tcg_temp_new_vec(type);
2687        c1 = tcg_constant_vec(type, vece, 8 << vece);
2688        tcg_gen_neg_vec(vece, t1, v2);
2689        tcg_gen_sub_vec(vece, t2, c1, v2);
2690        /* Right shifts are negative left shifts for AArch64.  */
2691        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2692                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2693        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2694                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2695        tcg_gen_or_vec(vece, v0, t1, t2);
2696        tcg_temp_free_vec(t1);
2697        tcg_temp_free_vec(t2);
2698        break;
2699
2700    default:
2701        g_assert_not_reached();
2702    }
2703}
2704
2705static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2706{
2707    switch (op) {
2708    case INDEX_op_goto_ptr:
2709        return C_O0_I1(r);
2710
2711    case INDEX_op_ld8u_i32:
2712    case INDEX_op_ld8s_i32:
2713    case INDEX_op_ld16u_i32:
2714    case INDEX_op_ld16s_i32:
2715    case INDEX_op_ld_i32:
2716    case INDEX_op_ld8u_i64:
2717    case INDEX_op_ld8s_i64:
2718    case INDEX_op_ld16u_i64:
2719    case INDEX_op_ld16s_i64:
2720    case INDEX_op_ld32u_i64:
2721    case INDEX_op_ld32s_i64:
2722    case INDEX_op_ld_i64:
2723    case INDEX_op_neg_i32:
2724    case INDEX_op_neg_i64:
2725    case INDEX_op_not_i32:
2726    case INDEX_op_not_i64:
2727    case INDEX_op_bswap16_i32:
2728    case INDEX_op_bswap32_i32:
2729    case INDEX_op_bswap16_i64:
2730    case INDEX_op_bswap32_i64:
2731    case INDEX_op_bswap64_i64:
2732    case INDEX_op_ext8s_i32:
2733    case INDEX_op_ext16s_i32:
2734    case INDEX_op_ext8u_i32:
2735    case INDEX_op_ext16u_i32:
2736    case INDEX_op_ext8s_i64:
2737    case INDEX_op_ext16s_i64:
2738    case INDEX_op_ext32s_i64:
2739    case INDEX_op_ext8u_i64:
2740    case INDEX_op_ext16u_i64:
2741    case INDEX_op_ext32u_i64:
2742    case INDEX_op_ext_i32_i64:
2743    case INDEX_op_extu_i32_i64:
2744    case INDEX_op_extract_i32:
2745    case INDEX_op_extract_i64:
2746    case INDEX_op_sextract_i32:
2747    case INDEX_op_sextract_i64:
2748        return C_O1_I1(r, r);
2749
2750    case INDEX_op_st8_i32:
2751    case INDEX_op_st16_i32:
2752    case INDEX_op_st_i32:
2753    case INDEX_op_st8_i64:
2754    case INDEX_op_st16_i64:
2755    case INDEX_op_st32_i64:
2756    case INDEX_op_st_i64:
2757        return C_O0_I2(rZ, r);
2758
2759    case INDEX_op_add_i32:
2760    case INDEX_op_add_i64:
2761    case INDEX_op_sub_i32:
2762    case INDEX_op_sub_i64:
2763    case INDEX_op_setcond_i32:
2764    case INDEX_op_setcond_i64:
2765        return C_O1_I2(r, r, rA);
2766
2767    case INDEX_op_mul_i32:
2768    case INDEX_op_mul_i64:
2769    case INDEX_op_div_i32:
2770    case INDEX_op_div_i64:
2771    case INDEX_op_divu_i32:
2772    case INDEX_op_divu_i64:
2773    case INDEX_op_rem_i32:
2774    case INDEX_op_rem_i64:
2775    case INDEX_op_remu_i32:
2776    case INDEX_op_remu_i64:
2777    case INDEX_op_muluh_i64:
2778    case INDEX_op_mulsh_i64:
2779        return C_O1_I2(r, r, r);
2780
2781    case INDEX_op_and_i32:
2782    case INDEX_op_and_i64:
2783    case INDEX_op_or_i32:
2784    case INDEX_op_or_i64:
2785    case INDEX_op_xor_i32:
2786    case INDEX_op_xor_i64:
2787    case INDEX_op_andc_i32:
2788    case INDEX_op_andc_i64:
2789    case INDEX_op_orc_i32:
2790    case INDEX_op_orc_i64:
2791    case INDEX_op_eqv_i32:
2792    case INDEX_op_eqv_i64:
2793        return C_O1_I2(r, r, rL);
2794
2795    case INDEX_op_shl_i32:
2796    case INDEX_op_shr_i32:
2797    case INDEX_op_sar_i32:
2798    case INDEX_op_rotl_i32:
2799    case INDEX_op_rotr_i32:
2800    case INDEX_op_shl_i64:
2801    case INDEX_op_shr_i64:
2802    case INDEX_op_sar_i64:
2803    case INDEX_op_rotl_i64:
2804    case INDEX_op_rotr_i64:
2805        return C_O1_I2(r, r, ri);
2806
2807    case INDEX_op_clz_i32:
2808    case INDEX_op_ctz_i32:
2809    case INDEX_op_clz_i64:
2810    case INDEX_op_ctz_i64:
2811        return C_O1_I2(r, r, rAL);
2812
2813    case INDEX_op_brcond_i32:
2814    case INDEX_op_brcond_i64:
2815        return C_O0_I2(r, rA);
2816
2817    case INDEX_op_movcond_i32:
2818    case INDEX_op_movcond_i64:
2819        return C_O1_I4(r, r, rA, rZ, rZ);
2820
2821    case INDEX_op_qemu_ld_i32:
2822    case INDEX_op_qemu_ld_i64:
2823        return C_O1_I1(r, l);
2824    case INDEX_op_qemu_st_i32:
2825    case INDEX_op_qemu_st_i64:
2826        return C_O0_I2(lZ, l);
2827
2828    case INDEX_op_deposit_i32:
2829    case INDEX_op_deposit_i64:
2830        return C_O1_I2(r, 0, rZ);
2831
2832    case INDEX_op_extract2_i32:
2833    case INDEX_op_extract2_i64:
2834        return C_O1_I2(r, rZ, rZ);
2835
2836    case INDEX_op_add2_i32:
2837    case INDEX_op_add2_i64:
2838    case INDEX_op_sub2_i32:
2839    case INDEX_op_sub2_i64:
2840        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2841
2842    case INDEX_op_add_vec:
2843    case INDEX_op_sub_vec:
2844    case INDEX_op_mul_vec:
2845    case INDEX_op_xor_vec:
2846    case INDEX_op_ssadd_vec:
2847    case INDEX_op_sssub_vec:
2848    case INDEX_op_usadd_vec:
2849    case INDEX_op_ussub_vec:
2850    case INDEX_op_smax_vec:
2851    case INDEX_op_smin_vec:
2852    case INDEX_op_umax_vec:
2853    case INDEX_op_umin_vec:
2854    case INDEX_op_shlv_vec:
2855    case INDEX_op_shrv_vec:
2856    case INDEX_op_sarv_vec:
2857    case INDEX_op_aa64_sshl_vec:
2858        return C_O1_I2(w, w, w);
2859    case INDEX_op_not_vec:
2860    case INDEX_op_neg_vec:
2861    case INDEX_op_abs_vec:
2862    case INDEX_op_shli_vec:
2863    case INDEX_op_shri_vec:
2864    case INDEX_op_sari_vec:
2865        return C_O1_I1(w, w);
2866    case INDEX_op_ld_vec:
2867    case INDEX_op_dupm_vec:
2868        return C_O1_I1(w, r);
2869    case INDEX_op_st_vec:
2870        return C_O0_I2(w, r);
2871    case INDEX_op_dup_vec:
2872        return C_O1_I1(w, wr);
2873    case INDEX_op_or_vec:
2874    case INDEX_op_andc_vec:
2875        return C_O1_I2(w, w, wO);
2876    case INDEX_op_and_vec:
2877    case INDEX_op_orc_vec:
2878        return C_O1_I2(w, w, wN);
2879    case INDEX_op_cmp_vec:
2880        return C_O1_I2(w, w, wZ);
2881    case INDEX_op_bitsel_vec:
2882        return C_O1_I3(w, w, w, w);
2883    case INDEX_op_aa64_sli_vec:
2884        return C_O1_I2(w, 0, w);
2885
2886    default:
2887        g_assert_not_reached();
2888    }
2889}
2890
2891static void tcg_target_init(TCGContext *s)
2892{
2893    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2894    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2895    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2896    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2897
2898    tcg_target_call_clobber_regs = -1ull;
2899    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2900    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2901    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2902    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2903    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2904    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2905    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2906    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2907    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2908    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2909    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2910    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2911    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2912    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2913    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2914    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2915    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2916    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2917    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2918
2919    s->reserved_regs = 0;
2920    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2921    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2922    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2923    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2924    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2925}
2926
2927/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2928#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2929
2930#define FRAME_SIZE \
2931    ((PUSH_SIZE \
2932      + TCG_STATIC_CALL_ARGS_SIZE \
2933      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2934      + TCG_TARGET_STACK_ALIGN - 1) \
2935     & ~(TCG_TARGET_STACK_ALIGN - 1))
2936
2937/* We're expecting a 2 byte uleb128 encoded value.  */
2938QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2939
2940/* We're expecting to use a single ADDI insn.  */
2941QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2942
2943static void tcg_target_qemu_prologue(TCGContext *s)
2944{
2945    TCGReg r;
2946
2947    /* Push (FP, LR) and allocate space for all saved registers.  */
2948    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2949                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2950
2951    /* Set up frame pointer for canonical unwinding.  */
2952    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2953
2954    /* Store callee-preserved regs x19..x28.  */
2955    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2956        int ofs = (r - TCG_REG_X19 + 2) * 8;
2957        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2958    }
2959
2960    /* Make stack space for TCG locals.  */
2961    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2962                 FRAME_SIZE - PUSH_SIZE);
2963
2964    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2965    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2966                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2967
2968#if !defined(CONFIG_SOFTMMU)
2969    if (USE_GUEST_BASE) {
2970        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2971        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2972    }
2973#endif
2974
2975    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2976    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2977
2978    /*
2979     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2980     * and fall through to the rest of the epilogue.
2981     */
2982    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2983    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2984
2985    /* TB epilogue */
2986    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
2987
2988    /* Remove TCG locals stack space.  */
2989    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2990                 FRAME_SIZE - PUSH_SIZE);
2991
2992    /* Restore registers x19..x28.  */
2993    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2994        int ofs = (r - TCG_REG_X19 + 2) * 8;
2995        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2996    }
2997
2998    /* Pop (FP, LR), restore SP to previous frame.  */
2999    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3000                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3001    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3002}
3003
3004static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3005{
3006    int i;
3007    for (i = 0; i < count; ++i) {
3008        p[i] = NOP;
3009    }
3010}
3011
3012typedef struct {
3013    DebugFrameHeader h;
3014    uint8_t fde_def_cfa[4];
3015    uint8_t fde_reg_ofs[24];
3016} DebugFrame;
3017
3018#define ELF_HOST_MACHINE EM_AARCH64
3019
3020static const DebugFrame debug_frame = {
3021    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3022    .h.cie.id = -1,
3023    .h.cie.version = 1,
3024    .h.cie.code_align = 1,
3025    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3026    .h.cie.return_column = TCG_REG_LR,
3027
3028    /* Total FDE size does not include the "len" member.  */
3029    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3030
3031    .fde_def_cfa = {
3032        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3033        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3034        (FRAME_SIZE >> 7)
3035    },
3036    .fde_reg_ofs = {
3037        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3038        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3039        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3040        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3041        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3042        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3043        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3044        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3045        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3046        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3047        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3048        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3049    }
3050};
3051
3052void tcg_register_jit(const void *buf, size_t buf_size)
3053{
3054    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3055}
3056