xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision b91a0fa7)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-pool.c.inc"
14#include "qemu/bitops.h"
15
16/* We're going to re-use TCGType in setting of the SF bit, which controls
17   the size of the operation performed.  If we know the values match, it
18   makes things much cleaner.  */
19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21#ifdef CONFIG_DEBUG_TCG
22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
27
28    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
32};
33#endif /* CONFIG_DEBUG_TCG */
34
35static const int tcg_target_reg_alloc_order[] = {
36    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38    TCG_REG_X28, /* we will reserve this for guest_base if configured */
39
40    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42    TCG_REG_X16, TCG_REG_X17,
43
44    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47    /* X18 reserved by system */
48    /* X19 reserved for AREG0 */
49    /* X29 reserved as fp */
50    /* X30 reserved as temporary */
51
52    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54    /* V8 - V15 are call-saved, and skipped.  */
55    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
59};
60
61static const int tcg_target_call_iarg_regs[8] = {
62    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
64};
65static const int tcg_target_call_oarg_regs[1] = {
66    TCG_REG_X0
67};
68
69#define TCG_REG_TMP TCG_REG_X30
70#define TCG_VEC_TMP TCG_REG_V31
71
72#ifndef CONFIG_SOFTMMU
73/* Note that XZR cannot be encoded in the address base register slot,
74   as that actaully encodes SP.  So if we need to zero-extend the guest
75   address, via the address index register slot, we need to load even
76   a zero guest base into a register.  */
77#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
78#define TCG_REG_GUEST_BASE TCG_REG_X28
79#endif
80
81static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
82{
83    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
84    ptrdiff_t offset = target - src_rx;
85
86    if (offset == sextract64(offset, 0, 26)) {
87        /* read instruction, mask away previous PC_REL26 parameter contents,
88           set the proper offset, then write back the instruction. */
89        *src_rw = deposit32(*src_rw, 0, 26, offset);
90        return true;
91    }
92    return false;
93}
94
95static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
96{
97    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
98    ptrdiff_t offset = target - src_rx;
99
100    if (offset == sextract64(offset, 0, 19)) {
101        *src_rw = deposit32(*src_rw, 5, 19, offset);
102        return true;
103    }
104    return false;
105}
106
107static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
108                        intptr_t value, intptr_t addend)
109{
110    tcg_debug_assert(addend == 0);
111    switch (type) {
112    case R_AARCH64_JUMP26:
113    case R_AARCH64_CALL26:
114        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
115    case R_AARCH64_CONDBR19:
116        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
117    default:
118        g_assert_not_reached();
119    }
120}
121
122#define TCG_CT_CONST_AIMM 0x100
123#define TCG_CT_CONST_LIMM 0x200
124#define TCG_CT_CONST_ZERO 0x400
125#define TCG_CT_CONST_MONE 0x800
126#define TCG_CT_CONST_ORRI 0x1000
127#define TCG_CT_CONST_ANDI 0x2000
128
129#define ALL_GENERAL_REGS  0xffffffffu
130#define ALL_VECTOR_REGS   0xffffffff00000000ull
131
132#ifdef CONFIG_SOFTMMU
133#define ALL_QLDST_REGS \
134    (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
135                          (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
136#else
137#define ALL_QLDST_REGS   ALL_GENERAL_REGS
138#endif
139
140/* Match a constant valid for addition (12-bit, optionally shifted).  */
141static inline bool is_aimm(uint64_t val)
142{
143    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
144}
145
146/* Match a constant valid for logical operations.  */
147static inline bool is_limm(uint64_t val)
148{
149    /* Taking a simplified view of the logical immediates for now, ignoring
150       the replication that can happen across the field.  Match bit patterns
151       of the forms
152           0....01....1
153           0..01..10..0
154       and their inverses.  */
155
156    /* Make things easier below, by testing the form with msb clear. */
157    if ((int64_t)val < 0) {
158        val = ~val;
159    }
160    if (val == 0) {
161        return false;
162    }
163    val += val & -val;
164    return (val & (val - 1)) == 0;
165}
166
167/* Return true if v16 is a valid 16-bit shifted immediate.  */
168static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
169{
170    if (v16 == (v16 & 0xff)) {
171        *cmode = 0x8;
172        *imm8 = v16 & 0xff;
173        return true;
174    } else if (v16 == (v16 & 0xff00)) {
175        *cmode = 0xa;
176        *imm8 = v16 >> 8;
177        return true;
178    }
179    return false;
180}
181
182/* Return true if v32 is a valid 32-bit shifted immediate.  */
183static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
184{
185    if (v32 == (v32 & 0xff)) {
186        *cmode = 0x0;
187        *imm8 = v32 & 0xff;
188        return true;
189    } else if (v32 == (v32 & 0xff00)) {
190        *cmode = 0x2;
191        *imm8 = (v32 >> 8) & 0xff;
192        return true;
193    } else if (v32 == (v32 & 0xff0000)) {
194        *cmode = 0x4;
195        *imm8 = (v32 >> 16) & 0xff;
196        return true;
197    } else if (v32 == (v32 & 0xff000000)) {
198        *cmode = 0x6;
199        *imm8 = v32 >> 24;
200        return true;
201    }
202    return false;
203}
204
205/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
206static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
207{
208    if ((v32 & 0xffff00ff) == 0xff) {
209        *cmode = 0xc;
210        *imm8 = (v32 >> 8) & 0xff;
211        return true;
212    } else if ((v32 & 0xff00ffff) == 0xffff) {
213        *cmode = 0xd;
214        *imm8 = (v32 >> 16) & 0xff;
215        return true;
216    }
217    return false;
218}
219
220/* Return true if v32 is a valid float32 immediate.  */
221static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
222{
223    if (extract32(v32, 0, 19) == 0
224        && (extract32(v32, 25, 6) == 0x20
225            || extract32(v32, 25, 6) == 0x1f)) {
226        *cmode = 0xf;
227        *imm8 = (extract32(v32, 31, 1) << 7)
228              | (extract32(v32, 25, 1) << 6)
229              | extract32(v32, 19, 6);
230        return true;
231    }
232    return false;
233}
234
235/* Return true if v64 is a valid float64 immediate.  */
236static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
237{
238    if (extract64(v64, 0, 48) == 0
239        && (extract64(v64, 54, 9) == 0x100
240            || extract64(v64, 54, 9) == 0x0ff)) {
241        *cmode = 0xf;
242        *imm8 = (extract64(v64, 63, 1) << 7)
243              | (extract64(v64, 54, 1) << 6)
244              | extract64(v64, 48, 6);
245        return true;
246    }
247    return false;
248}
249
250/*
251 * Return non-zero if v32 can be formed by MOVI+ORR.
252 * Place the parameters for MOVI in (cmode, imm8).
253 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
254 */
255static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
256{
257    int i;
258
259    for (i = 6; i > 0; i -= 2) {
260        /* Mask out one byte we can add with ORR.  */
261        uint32_t tmp = v32 & ~(0xffu << (i * 4));
262        if (is_shimm32(tmp, cmode, imm8) ||
263            is_soimm32(tmp, cmode, imm8)) {
264            break;
265        }
266    }
267    return i;
268}
269
270/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
271static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
272{
273    if (v32 == deposit32(v32, 16, 16, v32)) {
274        return is_shimm16(v32, cmode, imm8);
275    } else {
276        return is_shimm32(v32, cmode, imm8);
277    }
278}
279
280static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
281{
282    if (ct & TCG_CT_CONST) {
283        return 1;
284    }
285    if (type == TCG_TYPE_I32) {
286        val = (int32_t)val;
287    }
288    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
289        return 1;
290    }
291    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
292        return 1;
293    }
294    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
295        return 1;
296    }
297    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
298        return 1;
299    }
300
301    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
302    case 0:
303        break;
304    case TCG_CT_CONST_ANDI:
305        val = ~val;
306        /* fallthru */
307    case TCG_CT_CONST_ORRI:
308        if (val == deposit64(val, 32, 32, val)) {
309            int cmode, imm8;
310            return is_shimm1632(val, &cmode, &imm8);
311        }
312        break;
313    default:
314        /* Both bits should not be set for the same insn.  */
315        g_assert_not_reached();
316    }
317
318    return 0;
319}
320
321enum aarch64_cond_code {
322    COND_EQ = 0x0,
323    COND_NE = 0x1,
324    COND_CS = 0x2,     /* Unsigned greater or equal */
325    COND_HS = COND_CS, /* ALIAS greater or equal */
326    COND_CC = 0x3,     /* Unsigned less than */
327    COND_LO = COND_CC, /* ALIAS Lower */
328    COND_MI = 0x4,     /* Negative */
329    COND_PL = 0x5,     /* Zero or greater */
330    COND_VS = 0x6,     /* Overflow */
331    COND_VC = 0x7,     /* No overflow */
332    COND_HI = 0x8,     /* Unsigned greater than */
333    COND_LS = 0x9,     /* Unsigned less or equal */
334    COND_GE = 0xa,
335    COND_LT = 0xb,
336    COND_GT = 0xc,
337    COND_LE = 0xd,
338    COND_AL = 0xe,
339    COND_NV = 0xf, /* behaves like COND_AL here */
340};
341
342static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
343    [TCG_COND_EQ] = COND_EQ,
344    [TCG_COND_NE] = COND_NE,
345    [TCG_COND_LT] = COND_LT,
346    [TCG_COND_GE] = COND_GE,
347    [TCG_COND_LE] = COND_LE,
348    [TCG_COND_GT] = COND_GT,
349    /* unsigned */
350    [TCG_COND_LTU] = COND_LO,
351    [TCG_COND_GTU] = COND_HI,
352    [TCG_COND_GEU] = COND_HS,
353    [TCG_COND_LEU] = COND_LS,
354};
355
356typedef enum {
357    LDST_ST = 0,    /* store */
358    LDST_LD = 1,    /* load */
359    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
360    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
361} AArch64LdstType;
362
363/* We encode the format of the insn into the beginning of the name, so that
364   we can have the preprocessor help "typecheck" the insn vs the output
365   function.  Arm didn't provide us with nice names for the formats, so we
366   use the section number of the architecture reference manual in which the
367   instruction group is described.  */
368typedef enum {
369    /* Compare and branch (immediate).  */
370    I3201_CBZ       = 0x34000000,
371    I3201_CBNZ      = 0x35000000,
372
373    /* Conditional branch (immediate).  */
374    I3202_B_C       = 0x54000000,
375
376    /* Unconditional branch (immediate).  */
377    I3206_B         = 0x14000000,
378    I3206_BL        = 0x94000000,
379
380    /* Unconditional branch (register).  */
381    I3207_BR        = 0xd61f0000,
382    I3207_BLR       = 0xd63f0000,
383    I3207_RET       = 0xd65f0000,
384
385    /* AdvSIMD load/store single structure.  */
386    I3303_LD1R      = 0x0d40c000,
387
388    /* Load literal for loading the address at pc-relative offset */
389    I3305_LDR       = 0x58000000,
390    I3305_LDR_v64   = 0x5c000000,
391    I3305_LDR_v128  = 0x9c000000,
392
393    /* Load/store register.  Described here as 3.3.12, but the helper
394       that emits them can transform to 3.3.10 or 3.3.13.  */
395    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
396    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
397    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
398    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
399
400    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
401    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
402    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
403    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
404
405    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
406    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
407
408    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
409    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
410    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
411
412    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
413    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
414
415    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
416    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
417
418    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
419    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
420
421    I3312_TO_I3310  = 0x00200800,
422    I3312_TO_I3313  = 0x01000000,
423
424    /* Load/store register pair instructions.  */
425    I3314_LDP       = 0x28400000,
426    I3314_STP       = 0x28000000,
427
428    /* Add/subtract immediate instructions.  */
429    I3401_ADDI      = 0x11000000,
430    I3401_ADDSI     = 0x31000000,
431    I3401_SUBI      = 0x51000000,
432    I3401_SUBSI     = 0x71000000,
433
434    /* Bitfield instructions.  */
435    I3402_BFM       = 0x33000000,
436    I3402_SBFM      = 0x13000000,
437    I3402_UBFM      = 0x53000000,
438
439    /* Extract instruction.  */
440    I3403_EXTR      = 0x13800000,
441
442    /* Logical immediate instructions.  */
443    I3404_ANDI      = 0x12000000,
444    I3404_ORRI      = 0x32000000,
445    I3404_EORI      = 0x52000000,
446
447    /* Move wide immediate instructions.  */
448    I3405_MOVN      = 0x12800000,
449    I3405_MOVZ      = 0x52800000,
450    I3405_MOVK      = 0x72800000,
451
452    /* PC relative addressing instructions.  */
453    I3406_ADR       = 0x10000000,
454    I3406_ADRP      = 0x90000000,
455
456    /* Add/subtract shifted register instructions (without a shift).  */
457    I3502_ADD       = 0x0b000000,
458    I3502_ADDS      = 0x2b000000,
459    I3502_SUB       = 0x4b000000,
460    I3502_SUBS      = 0x6b000000,
461
462    /* Add/subtract shifted register instructions (with a shift).  */
463    I3502S_ADD_LSL  = I3502_ADD,
464
465    /* Add/subtract with carry instructions.  */
466    I3503_ADC       = 0x1a000000,
467    I3503_SBC       = 0x5a000000,
468
469    /* Conditional select instructions.  */
470    I3506_CSEL      = 0x1a800000,
471    I3506_CSINC     = 0x1a800400,
472    I3506_CSINV     = 0x5a800000,
473    I3506_CSNEG     = 0x5a800400,
474
475    /* Data-processing (1 source) instructions.  */
476    I3507_CLZ       = 0x5ac01000,
477    I3507_RBIT      = 0x5ac00000,
478    I3507_REV       = 0x5ac00000, /* + size << 10 */
479
480    /* Data-processing (2 source) instructions.  */
481    I3508_LSLV      = 0x1ac02000,
482    I3508_LSRV      = 0x1ac02400,
483    I3508_ASRV      = 0x1ac02800,
484    I3508_RORV      = 0x1ac02c00,
485    I3508_SMULH     = 0x9b407c00,
486    I3508_UMULH     = 0x9bc07c00,
487    I3508_UDIV      = 0x1ac00800,
488    I3508_SDIV      = 0x1ac00c00,
489
490    /* Data-processing (3 source) instructions.  */
491    I3509_MADD      = 0x1b000000,
492    I3509_MSUB      = 0x1b008000,
493
494    /* Logical shifted register instructions (without a shift).  */
495    I3510_AND       = 0x0a000000,
496    I3510_BIC       = 0x0a200000,
497    I3510_ORR       = 0x2a000000,
498    I3510_ORN       = 0x2a200000,
499    I3510_EOR       = 0x4a000000,
500    I3510_EON       = 0x4a200000,
501    I3510_ANDS      = 0x6a000000,
502
503    /* Logical shifted register instructions (with a shift).  */
504    I3502S_AND_LSR  = I3510_AND | (1 << 22),
505
506    /* AdvSIMD copy */
507    I3605_DUP      = 0x0e000400,
508    I3605_INS      = 0x4e001c00,
509    I3605_UMOV     = 0x0e003c00,
510
511    /* AdvSIMD modified immediate */
512    I3606_MOVI      = 0x0f000400,
513    I3606_MVNI      = 0x2f000400,
514    I3606_BIC       = 0x2f001400,
515    I3606_ORR       = 0x0f001400,
516
517    /* AdvSIMD scalar shift by immediate */
518    I3609_SSHR      = 0x5f000400,
519    I3609_SSRA      = 0x5f001400,
520    I3609_SHL       = 0x5f005400,
521    I3609_USHR      = 0x7f000400,
522    I3609_USRA      = 0x7f001400,
523    I3609_SLI       = 0x7f005400,
524
525    /* AdvSIMD scalar three same */
526    I3611_SQADD     = 0x5e200c00,
527    I3611_SQSUB     = 0x5e202c00,
528    I3611_CMGT      = 0x5e203400,
529    I3611_CMGE      = 0x5e203c00,
530    I3611_SSHL      = 0x5e204400,
531    I3611_ADD       = 0x5e208400,
532    I3611_CMTST     = 0x5e208c00,
533    I3611_UQADD     = 0x7e200c00,
534    I3611_UQSUB     = 0x7e202c00,
535    I3611_CMHI      = 0x7e203400,
536    I3611_CMHS      = 0x7e203c00,
537    I3611_USHL      = 0x7e204400,
538    I3611_SUB       = 0x7e208400,
539    I3611_CMEQ      = 0x7e208c00,
540
541    /* AdvSIMD scalar two-reg misc */
542    I3612_CMGT0     = 0x5e208800,
543    I3612_CMEQ0     = 0x5e209800,
544    I3612_CMLT0     = 0x5e20a800,
545    I3612_ABS       = 0x5e20b800,
546    I3612_CMGE0     = 0x7e208800,
547    I3612_CMLE0     = 0x7e209800,
548    I3612_NEG       = 0x7e20b800,
549
550    /* AdvSIMD shift by immediate */
551    I3614_SSHR      = 0x0f000400,
552    I3614_SSRA      = 0x0f001400,
553    I3614_SHL       = 0x0f005400,
554    I3614_SLI       = 0x2f005400,
555    I3614_USHR      = 0x2f000400,
556    I3614_USRA      = 0x2f001400,
557
558    /* AdvSIMD three same.  */
559    I3616_ADD       = 0x0e208400,
560    I3616_AND       = 0x0e201c00,
561    I3616_BIC       = 0x0e601c00,
562    I3616_BIF       = 0x2ee01c00,
563    I3616_BIT       = 0x2ea01c00,
564    I3616_BSL       = 0x2e601c00,
565    I3616_EOR       = 0x2e201c00,
566    I3616_MUL       = 0x0e209c00,
567    I3616_ORR       = 0x0ea01c00,
568    I3616_ORN       = 0x0ee01c00,
569    I3616_SUB       = 0x2e208400,
570    I3616_CMGT      = 0x0e203400,
571    I3616_CMGE      = 0x0e203c00,
572    I3616_CMTST     = 0x0e208c00,
573    I3616_CMHI      = 0x2e203400,
574    I3616_CMHS      = 0x2e203c00,
575    I3616_CMEQ      = 0x2e208c00,
576    I3616_SMAX      = 0x0e206400,
577    I3616_SMIN      = 0x0e206c00,
578    I3616_SSHL      = 0x0e204400,
579    I3616_SQADD     = 0x0e200c00,
580    I3616_SQSUB     = 0x0e202c00,
581    I3616_UMAX      = 0x2e206400,
582    I3616_UMIN      = 0x2e206c00,
583    I3616_UQADD     = 0x2e200c00,
584    I3616_UQSUB     = 0x2e202c00,
585    I3616_USHL      = 0x2e204400,
586
587    /* AdvSIMD two-reg misc.  */
588    I3617_CMGT0     = 0x0e208800,
589    I3617_CMEQ0     = 0x0e209800,
590    I3617_CMLT0     = 0x0e20a800,
591    I3617_CMGE0     = 0x2e208800,
592    I3617_CMLE0     = 0x2e209800,
593    I3617_NOT       = 0x2e205800,
594    I3617_ABS       = 0x0e20b800,
595    I3617_NEG       = 0x2e20b800,
596
597    /* System instructions.  */
598    NOP             = 0xd503201f,
599    DMB_ISH         = 0xd50338bf,
600    DMB_LD          = 0x00000100,
601    DMB_ST          = 0x00000200,
602} AArch64Insn;
603
604static inline uint32_t tcg_in32(TCGContext *s)
605{
606    uint32_t v = *(uint32_t *)s->code_ptr;
607    return v;
608}
609
610/* Emit an opcode with "type-checking" of the format.  */
611#define tcg_out_insn(S, FMT, OP, ...) \
612    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
613
614static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
615                              TCGReg rt, TCGReg rn, unsigned size)
616{
617    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
618}
619
620static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
621                              int imm19, TCGReg rt)
622{
623    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
624}
625
626static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
627                              TCGReg rt, int imm19)
628{
629    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
630}
631
632static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
633                              TCGCond c, int imm19)
634{
635    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
636}
637
638static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
639{
640    tcg_out32(s, insn | (imm26 & 0x03ffffff));
641}
642
643static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
644{
645    tcg_out32(s, insn | rn << 5);
646}
647
648static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
649                              TCGReg r1, TCGReg r2, TCGReg rn,
650                              tcg_target_long ofs, bool pre, bool w)
651{
652    insn |= 1u << 31; /* ext */
653    insn |= pre << 24;
654    insn |= w << 23;
655
656    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
657    insn |= (ofs & (0x7f << 3)) << (15 - 3);
658
659    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
660}
661
662static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
663                              TCGReg rd, TCGReg rn, uint64_t aimm)
664{
665    if (aimm > 0xfff) {
666        tcg_debug_assert((aimm & 0xfff) == 0);
667        aimm >>= 12;
668        tcg_debug_assert(aimm <= 0xfff);
669        aimm |= 1 << 12;  /* apply LSL 12 */
670    }
671    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
672}
673
674/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
675   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
676   that feed the DecodeBitMasks pseudo function.  */
677static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
678                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
679{
680    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
681              | rn << 5 | rd);
682}
683
684#define tcg_out_insn_3404  tcg_out_insn_3402
685
686static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
687                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
688{
689    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
690              | rn << 5 | rd);
691}
692
693/* This function is used for the Move (wide immediate) instruction group.
694   Note that SHIFT is a full shift count, not the 2 bit HW field. */
695static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
696                              TCGReg rd, uint16_t half, unsigned shift)
697{
698    tcg_debug_assert((shift & ~0x30) == 0);
699    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
700}
701
702static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
703                              TCGReg rd, int64_t disp)
704{
705    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
706}
707
708/* This function is for both 3.5.2 (Add/Subtract shifted register), for
709   the rare occasion when we actually want to supply a shift amount.  */
710static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
711                                      TCGType ext, TCGReg rd, TCGReg rn,
712                                      TCGReg rm, int imm6)
713{
714    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
715}
716
717/* This function is for 3.5.2 (Add/subtract shifted register),
718   and 3.5.10 (Logical shifted register), for the vast majorty of cases
719   when we don't want to apply a shift.  Thus it can also be used for
720   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
721static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
722                              TCGReg rd, TCGReg rn, TCGReg rm)
723{
724    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
725}
726
727#define tcg_out_insn_3503  tcg_out_insn_3502
728#define tcg_out_insn_3508  tcg_out_insn_3502
729#define tcg_out_insn_3510  tcg_out_insn_3502
730
731static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
732                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
733{
734    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
735              | tcg_cond_to_aarch64[c] << 12);
736}
737
738static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
739                              TCGReg rd, TCGReg rn)
740{
741    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
742}
743
744static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
745                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
746{
747    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
748}
749
750static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
751                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
752{
753    /* Note that bit 11 set means general register input.  Therefore
754       we can handle both register sets with one function.  */
755    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
756              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
757}
758
759static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
760                              TCGReg rd, bool op, int cmode, uint8_t imm8)
761{
762    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
763              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
764}
765
766static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
767                              TCGReg rd, TCGReg rn, unsigned immhb)
768{
769    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
770}
771
772static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
773                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
774{
775    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
776              | (rn & 0x1f) << 5 | (rd & 0x1f));
777}
778
779static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
780                              unsigned size, TCGReg rd, TCGReg rn)
781{
782    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
783}
784
785static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
786                              TCGReg rd, TCGReg rn, unsigned immhb)
787{
788    tcg_out32(s, insn | q << 30 | immhb << 16
789              | (rn & 0x1f) << 5 | (rd & 0x1f));
790}
791
792static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
793                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
794{
795    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
796              | (rn & 0x1f) << 5 | (rd & 0x1f));
797}
798
799static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
800                              unsigned size, TCGReg rd, TCGReg rn)
801{
802    tcg_out32(s, insn | q << 30 | (size << 22)
803              | (rn & 0x1f) << 5 | (rd & 0x1f));
804}
805
806static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
807                              TCGReg rd, TCGReg base, TCGType ext,
808                              TCGReg regoff)
809{
810    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
811    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
812              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
813}
814
815static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
816                              TCGReg rd, TCGReg rn, intptr_t offset)
817{
818    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
819}
820
821static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
822                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
823{
824    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
825    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
826              | rn << 5 | (rd & 0x1f));
827}
828
829/* Register to register move using ORR (shifted register with no shift). */
830static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
831{
832    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
833}
834
835/* Register to register move using ADDI (move to/from SP).  */
836static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
837{
838    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
839}
840
841/* This function is used for the Logical (immediate) instruction group.
842   The value of LIMM must satisfy IS_LIMM.  See the comment above about
843   only supporting simplified logical immediates.  */
844static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
845                             TCGReg rd, TCGReg rn, uint64_t limm)
846{
847    unsigned h, l, r, c;
848
849    tcg_debug_assert(is_limm(limm));
850
851    h = clz64(limm);
852    l = ctz64(limm);
853    if (l == 0) {
854        r = 0;                  /* form 0....01....1 */
855        c = ctz64(~limm) - 1;
856        if (h == 0) {
857            r = clz64(~limm);   /* form 1..10..01..1 */
858            c += r;
859        }
860    } else {
861        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
862        c = r - h - 1;
863    }
864    if (ext == TCG_TYPE_I32) {
865        r &= 31;
866        c &= 31;
867    }
868
869    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
870}
871
872static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
873                             TCGReg rd, int64_t v64)
874{
875    bool q = type == TCG_TYPE_V128;
876    int cmode, imm8, i;
877
878    /* Test all bytes equal first.  */
879    if (vece == MO_8) {
880        imm8 = (uint8_t)v64;
881        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
882        return;
883    }
884
885    /*
886     * Test all bytes 0x00 or 0xff second.  This can match cases that
887     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
888     */
889    for (i = imm8 = 0; i < 8; i++) {
890        uint8_t byte = v64 >> (i * 8);
891        if (byte == 0xff) {
892            imm8 |= 1 << i;
893        } else if (byte != 0) {
894            goto fail_bytes;
895        }
896    }
897    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
898    return;
899 fail_bytes:
900
901    /*
902     * Tests for various replications.  For each element width, if we
903     * cannot find an expansion there's no point checking a larger
904     * width because we already know by replication it cannot match.
905     */
906    if (vece == MO_16) {
907        uint16_t v16 = v64;
908
909        if (is_shimm16(v16, &cmode, &imm8)) {
910            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
911            return;
912        }
913        if (is_shimm16(~v16, &cmode, &imm8)) {
914            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
915            return;
916        }
917
918        /*
919         * Otherwise, all remaining constants can be loaded in two insns:
920         * rd = v16 & 0xff, rd |= v16 & 0xff00.
921         */
922        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
923        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
924        return;
925    } else if (vece == MO_32) {
926        uint32_t v32 = v64;
927        uint32_t n32 = ~v32;
928
929        if (is_shimm32(v32, &cmode, &imm8) ||
930            is_soimm32(v32, &cmode, &imm8) ||
931            is_fimm32(v32, &cmode, &imm8)) {
932            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
933            return;
934        }
935        if (is_shimm32(n32, &cmode, &imm8) ||
936            is_soimm32(n32, &cmode, &imm8)) {
937            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
938            return;
939        }
940
941        /*
942         * Restrict the set of constants to those we can load with
943         * two instructions.  Others we load from the pool.
944         */
945        i = is_shimm32_pair(v32, &cmode, &imm8);
946        if (i) {
947            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
948            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
949            return;
950        }
951        i = is_shimm32_pair(n32, &cmode, &imm8);
952        if (i) {
953            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
954            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
955            return;
956        }
957    } else if (is_fimm64(v64, &cmode, &imm8)) {
958        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
959        return;
960    }
961
962    /*
963     * As a last resort, load from the constant pool.  Sadly there
964     * is no LD1R (literal), so store the full 16-byte vector.
965     */
966    if (type == TCG_TYPE_V128) {
967        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
968        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
969    } else {
970        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
971        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
972    }
973}
974
975static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
976                            TCGReg rd, TCGReg rs)
977{
978    int is_q = type - TCG_TYPE_V64;
979    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
980    return true;
981}
982
983static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
984                             TCGReg r, TCGReg base, intptr_t offset)
985{
986    TCGReg temp = TCG_REG_TMP;
987
988    if (offset < -0xffffff || offset > 0xffffff) {
989        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
990        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
991        base = temp;
992    } else {
993        AArch64Insn add_insn = I3401_ADDI;
994
995        if (offset < 0) {
996            add_insn = I3401_SUBI;
997            offset = -offset;
998        }
999        if (offset & 0xfff000) {
1000            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1001            base = temp;
1002        }
1003        if (offset & 0xfff) {
1004            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1005            base = temp;
1006        }
1007    }
1008    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1009    return true;
1010}
1011
1012static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1013                         tcg_target_long value)
1014{
1015    tcg_target_long svalue = value;
1016    tcg_target_long ivalue = ~value;
1017    tcg_target_long t0, t1, t2;
1018    int s0, s1;
1019    AArch64Insn opc;
1020
1021    switch (type) {
1022    case TCG_TYPE_I32:
1023    case TCG_TYPE_I64:
1024        tcg_debug_assert(rd < 32);
1025        break;
1026    default:
1027        g_assert_not_reached();
1028    }
1029
1030    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1031       values within [2**31, 2**32-1], we can create smaller sequences by
1032       interpreting this as a negative 32-bit number, while ensuring that
1033       the high 32 bits are cleared by setting SF=0.  */
1034    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1035        svalue = (int32_t)value;
1036        value = (uint32_t)value;
1037        ivalue = (uint32_t)ivalue;
1038        type = TCG_TYPE_I32;
1039    }
1040
1041    /* Speed things up by handling the common case of small positive
1042       and negative values specially.  */
1043    if ((value & ~0xffffull) == 0) {
1044        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1045        return;
1046    } else if ((ivalue & ~0xffffull) == 0) {
1047        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1048        return;
1049    }
1050
1051    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1052       use the sign-extended value.  That lets us match rotated values such
1053       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1054    if (is_limm(svalue)) {
1055        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1056        return;
1057    }
1058
1059    /* Look for host pointer values within 4G of the PC.  This happens
1060       often when loading pointers to QEMU's own data structures.  */
1061    if (type == TCG_TYPE_I64) {
1062        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1063        tcg_target_long disp = value - src_rx;
1064        if (disp == sextract64(disp, 0, 21)) {
1065            tcg_out_insn(s, 3406, ADR, rd, disp);
1066            return;
1067        }
1068        disp = (value >> 12) - (src_rx >> 12);
1069        if (disp == sextract64(disp, 0, 21)) {
1070            tcg_out_insn(s, 3406, ADRP, rd, disp);
1071            if (value & 0xfff) {
1072                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1073            }
1074            return;
1075        }
1076    }
1077
1078    /* Would it take fewer insns to begin with MOVN?  */
1079    if (ctpop64(value) >= 32) {
1080        t0 = ivalue;
1081        opc = I3405_MOVN;
1082    } else {
1083        t0 = value;
1084        opc = I3405_MOVZ;
1085    }
1086    s0 = ctz64(t0) & (63 & -16);
1087    t1 = t0 & ~(0xffffUL << s0);
1088    s1 = ctz64(t1) & (63 & -16);
1089    t2 = t1 & ~(0xffffUL << s1);
1090    if (t2 == 0) {
1091        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1092        if (t1 != 0) {
1093            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1094        }
1095        return;
1096    }
1097
1098    /* For more than 2 insns, dump it into the constant pool.  */
1099    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1100    tcg_out_insn(s, 3305, LDR, 0, rd);
1101}
1102
1103/* Define something more legible for general use.  */
1104#define tcg_out_ldst_r  tcg_out_insn_3310
1105
1106static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1107                         TCGReg rn, intptr_t offset, int lgsize)
1108{
1109    /* If the offset is naturally aligned and in range, then we can
1110       use the scaled uimm12 encoding */
1111    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1112        uintptr_t scaled_uimm = offset >> lgsize;
1113        if (scaled_uimm <= 0xfff) {
1114            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1115            return;
1116        }
1117    }
1118
1119    /* Small signed offsets can use the unscaled encoding.  */
1120    if (offset >= -256 && offset < 256) {
1121        tcg_out_insn_3312(s, insn, rd, rn, offset);
1122        return;
1123    }
1124
1125    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1126    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1127    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1128}
1129
1130static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1131{
1132    if (ret == arg) {
1133        return true;
1134    }
1135    switch (type) {
1136    case TCG_TYPE_I32:
1137    case TCG_TYPE_I64:
1138        if (ret < 32 && arg < 32) {
1139            tcg_out_movr(s, type, ret, arg);
1140            break;
1141        } else if (ret < 32) {
1142            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1143            break;
1144        } else if (arg < 32) {
1145            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1146            break;
1147        }
1148        /* FALLTHRU */
1149
1150    case TCG_TYPE_V64:
1151        tcg_debug_assert(ret >= 32 && arg >= 32);
1152        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1153        break;
1154    case TCG_TYPE_V128:
1155        tcg_debug_assert(ret >= 32 && arg >= 32);
1156        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1157        break;
1158
1159    default:
1160        g_assert_not_reached();
1161    }
1162    return true;
1163}
1164
1165static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1166                       TCGReg base, intptr_t ofs)
1167{
1168    AArch64Insn insn;
1169    int lgsz;
1170
1171    switch (type) {
1172    case TCG_TYPE_I32:
1173        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1174        lgsz = 2;
1175        break;
1176    case TCG_TYPE_I64:
1177        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1178        lgsz = 3;
1179        break;
1180    case TCG_TYPE_V64:
1181        insn = I3312_LDRVD;
1182        lgsz = 3;
1183        break;
1184    case TCG_TYPE_V128:
1185        insn = I3312_LDRVQ;
1186        lgsz = 4;
1187        break;
1188    default:
1189        g_assert_not_reached();
1190    }
1191    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1192}
1193
1194static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1195                       TCGReg base, intptr_t ofs)
1196{
1197    AArch64Insn insn;
1198    int lgsz;
1199
1200    switch (type) {
1201    case TCG_TYPE_I32:
1202        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1203        lgsz = 2;
1204        break;
1205    case TCG_TYPE_I64:
1206        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1207        lgsz = 3;
1208        break;
1209    case TCG_TYPE_V64:
1210        insn = I3312_STRVD;
1211        lgsz = 3;
1212        break;
1213    case TCG_TYPE_V128:
1214        insn = I3312_STRVQ;
1215        lgsz = 4;
1216        break;
1217    default:
1218        g_assert_not_reached();
1219    }
1220    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1221}
1222
1223static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1224                               TCGReg base, intptr_t ofs)
1225{
1226    if (type <= TCG_TYPE_I64 && val == 0) {
1227        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1228        return true;
1229    }
1230    return false;
1231}
1232
1233static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1234                               TCGReg rn, unsigned int a, unsigned int b)
1235{
1236    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1237}
1238
1239static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1240                                TCGReg rn, unsigned int a, unsigned int b)
1241{
1242    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1243}
1244
1245static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1246                                TCGReg rn, unsigned int a, unsigned int b)
1247{
1248    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1249}
1250
1251static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1252                                TCGReg rn, TCGReg rm, unsigned int a)
1253{
1254    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1255}
1256
1257static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1258                               TCGReg rd, TCGReg rn, unsigned int m)
1259{
1260    int bits = ext ? 64 : 32;
1261    int max = bits - 1;
1262    tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1263}
1264
1265static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1266                               TCGReg rd, TCGReg rn, unsigned int m)
1267{
1268    int max = ext ? 63 : 31;
1269    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1270}
1271
1272static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1273                               TCGReg rd, TCGReg rn, unsigned int m)
1274{
1275    int max = ext ? 63 : 31;
1276    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1277}
1278
1279static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1280                                TCGReg rd, TCGReg rn, unsigned int m)
1281{
1282    int max = ext ? 63 : 31;
1283    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1284}
1285
1286static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1287                                TCGReg rd, TCGReg rn, unsigned int m)
1288{
1289    int max = ext ? 63 : 31;
1290    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1291}
1292
1293static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1294                               TCGReg rn, unsigned lsb, unsigned width)
1295{
1296    unsigned size = ext ? 64 : 32;
1297    unsigned a = (size - lsb) & (size - 1);
1298    unsigned b = width - 1;
1299    tcg_out_bfm(s, ext, rd, rn, a, b);
1300}
1301
1302static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1303                        tcg_target_long b, bool const_b)
1304{
1305    if (const_b) {
1306        /* Using CMP or CMN aliases.  */
1307        if (b >= 0) {
1308            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1309        } else {
1310            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1311        }
1312    } else {
1313        /* Using CMP alias SUBS wzr, Wn, Wm */
1314        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1315    }
1316}
1317
1318static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1319{
1320    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1321    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1322    tcg_out_insn(s, 3206, B, offset);
1323}
1324
1325static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1326{
1327    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1328    if (offset == sextract64(offset, 0, 26)) {
1329        tcg_out_insn(s, 3206, B, offset);
1330    } else {
1331        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1332        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1333    }
1334}
1335
1336static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1337{
1338    tcg_out_insn(s, 3207, BLR, reg);
1339}
1340
1341static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
1342{
1343    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1344    if (offset == sextract64(offset, 0, 26)) {
1345        tcg_out_insn(s, 3206, BL, offset);
1346    } else {
1347        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1348        tcg_out_callr(s, TCG_REG_TMP);
1349    }
1350}
1351
1352void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
1353                              uintptr_t jmp_rw, uintptr_t addr)
1354{
1355    tcg_insn_unit i1, i2;
1356    TCGType rt = TCG_TYPE_I64;
1357    TCGReg  rd = TCG_REG_TMP;
1358    uint64_t pair;
1359
1360    ptrdiff_t offset = addr - jmp_rx;
1361
1362    if (offset == sextract64(offset, 0, 26)) {
1363        i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1364        i2 = NOP;
1365    } else {
1366        offset = (addr >> 12) - (jmp_rx >> 12);
1367
1368        /* patch ADRP */
1369        i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1370        /* patch ADDI */
1371        i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1372    }
1373    pair = (uint64_t)i2 << 32 | i1;
1374    qatomic_set((uint64_t *)jmp_rw, pair);
1375    flush_idcache_range(jmp_rx, jmp_rw, 8);
1376}
1377
1378static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1379{
1380    if (!l->has_value) {
1381        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1382        tcg_out_insn(s, 3206, B, 0);
1383    } else {
1384        tcg_out_goto(s, l->u.value_ptr);
1385    }
1386}
1387
1388static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1389                           TCGArg b, bool b_const, TCGLabel *l)
1390{
1391    intptr_t offset;
1392    bool need_cmp;
1393
1394    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1395        need_cmp = false;
1396    } else {
1397        need_cmp = true;
1398        tcg_out_cmp(s, ext, a, b, b_const);
1399    }
1400
1401    if (!l->has_value) {
1402        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1403        offset = tcg_in32(s) >> 5;
1404    } else {
1405        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1406        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1407    }
1408
1409    if (need_cmp) {
1410        tcg_out_insn(s, 3202, B_C, c, offset);
1411    } else if (c == TCG_COND_EQ) {
1412        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1413    } else {
1414        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1415    }
1416}
1417
1418static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1419                               TCGReg rd, TCGReg rn)
1420{
1421    /* REV, REV16, REV32 */
1422    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1423}
1424
1425static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1426                               TCGReg rd, TCGReg rn)
1427{
1428    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1429    int bits = (8 << s_bits) - 1;
1430    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1431}
1432
1433static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1434                               TCGReg rd, TCGReg rn)
1435{
1436    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1437    int bits = (8 << s_bits) - 1;
1438    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1439}
1440
1441static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1442                            TCGReg rn, int64_t aimm)
1443{
1444    if (aimm >= 0) {
1445        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1446    } else {
1447        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1448    }
1449}
1450
1451static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1452                            TCGReg rh, TCGReg al, TCGReg ah,
1453                            tcg_target_long bl, tcg_target_long bh,
1454                            bool const_bl, bool const_bh, bool sub)
1455{
1456    TCGReg orig_rl = rl;
1457    AArch64Insn insn;
1458
1459    if (rl == ah || (!const_bh && rl == bh)) {
1460        rl = TCG_REG_TMP;
1461    }
1462
1463    if (const_bl) {
1464        if (bl < 0) {
1465            bl = -bl;
1466            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1467        } else {
1468            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1469        }
1470
1471        if (unlikely(al == TCG_REG_XZR)) {
1472            /* ??? We want to allow al to be zero for the benefit of
1473               negation via subtraction.  However, that leaves open the
1474               possibility of adding 0+const in the low part, and the
1475               immediate add instructions encode XSP not XZR.  Don't try
1476               anything more elaborate here than loading another zero.  */
1477            al = TCG_REG_TMP;
1478            tcg_out_movi(s, ext, al, 0);
1479        }
1480        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1481    } else {
1482        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1483    }
1484
1485    insn = I3503_ADC;
1486    if (const_bh) {
1487        /* Note that the only two constants we support are 0 and -1, and
1488           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1489        if ((bh != 0) ^ sub) {
1490            insn = I3503_SBC;
1491        }
1492        bh = TCG_REG_XZR;
1493    } else if (sub) {
1494        insn = I3503_SBC;
1495    }
1496    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1497
1498    tcg_out_mov(s, ext, orig_rl, rl);
1499}
1500
1501static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1502{
1503    static const uint32_t sync[] = {
1504        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1505        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1506        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1507        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1508        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1509    };
1510    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1511}
1512
1513static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1514                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1515{
1516    TCGReg a1 = a0;
1517    if (is_ctz) {
1518        a1 = TCG_REG_TMP;
1519        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1520    }
1521    if (const_b && b == (ext ? 64 : 32)) {
1522        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1523    } else {
1524        AArch64Insn sel = I3506_CSEL;
1525
1526        tcg_out_cmp(s, ext, a0, 0, 1);
1527        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1528
1529        if (const_b) {
1530            if (b == -1) {
1531                b = TCG_REG_XZR;
1532                sel = I3506_CSINV;
1533            } else if (b == 0) {
1534                b = TCG_REG_XZR;
1535            } else {
1536                tcg_out_movi(s, ext, d, b);
1537                b = d;
1538            }
1539        }
1540        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1541    }
1542}
1543
1544#ifdef CONFIG_SOFTMMU
1545#include "../tcg-ldst.c.inc"
1546
1547/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1548 *                                     MemOpIdx oi, uintptr_t ra)
1549 */
1550static void * const qemu_ld_helpers[MO_SIZE + 1] = {
1551    [MO_8]  = helper_ret_ldub_mmu,
1552#ifdef HOST_WORDS_BIGENDIAN
1553    [MO_16] = helper_be_lduw_mmu,
1554    [MO_32] = helper_be_ldul_mmu,
1555    [MO_64] = helper_be_ldq_mmu,
1556#else
1557    [MO_16] = helper_le_lduw_mmu,
1558    [MO_32] = helper_le_ldul_mmu,
1559    [MO_64] = helper_le_ldq_mmu,
1560#endif
1561};
1562
1563/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1564 *                                     uintxx_t val, MemOpIdx oi,
1565 *                                     uintptr_t ra)
1566 */
1567static void * const qemu_st_helpers[MO_SIZE + 1] = {
1568    [MO_8]  = helper_ret_stb_mmu,
1569#ifdef HOST_WORDS_BIGENDIAN
1570    [MO_16] = helper_be_stw_mmu,
1571    [MO_32] = helper_be_stl_mmu,
1572    [MO_64] = helper_be_stq_mmu,
1573#else
1574    [MO_16] = helper_le_stw_mmu,
1575    [MO_32] = helper_le_stl_mmu,
1576    [MO_64] = helper_le_stq_mmu,
1577#endif
1578};
1579
1580static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1581{
1582    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1583    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1584    tcg_out_insn(s, 3406, ADR, rd, offset);
1585}
1586
1587static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1588{
1589    MemOpIdx oi = lb->oi;
1590    MemOp opc = get_memop(oi);
1591    MemOp size = opc & MO_SIZE;
1592
1593    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1594        return false;
1595    }
1596
1597    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1598    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1599    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1600    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1601    tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]);
1602    if (opc & MO_SIGN) {
1603        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1604    } else {
1605        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1606    }
1607
1608    tcg_out_goto(s, lb->raddr);
1609    return true;
1610}
1611
1612static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1613{
1614    MemOpIdx oi = lb->oi;
1615    MemOp opc = get_memop(oi);
1616    MemOp size = opc & MO_SIZE;
1617
1618    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1619        return false;
1620    }
1621
1622    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1623    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1624    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1625    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1626    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1627    tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]);
1628    tcg_out_goto(s, lb->raddr);
1629    return true;
1630}
1631
1632static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1633                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1634                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1635{
1636    TCGLabelQemuLdst *label = new_ldst_label(s);
1637
1638    label->is_ld = is_ld;
1639    label->oi = oi;
1640    label->type = ext;
1641    label->datalo_reg = data_reg;
1642    label->addrlo_reg = addr_reg;
1643    label->raddr = tcg_splitwx_to_rx(raddr);
1644    label->label_ptr[0] = label_ptr;
1645}
1646
1647/* We expect to use a 7-bit scaled negative offset from ENV.  */
1648QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1649QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1650
1651/* These offsets are built into the LDP below.  */
1652QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1653QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1654
1655/* Load and compare a TLB entry, emitting the conditional jump to the
1656   slow path for the failure case, which will be patched later when finalizing
1657   the slow path. Generated code returns the host addend in X1,
1658   clobbers X0,X2,X3,TMP. */
1659static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1660                             tcg_insn_unit **label_ptr, int mem_index,
1661                             bool is_read)
1662{
1663    unsigned a_bits = get_alignment_bits(opc);
1664    unsigned s_bits = opc & MO_SIZE;
1665    unsigned a_mask = (1u << a_bits) - 1;
1666    unsigned s_mask = (1u << s_bits) - 1;
1667    TCGReg x3;
1668    TCGType mask_type;
1669    uint64_t compare_mask;
1670
1671    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1672                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1673
1674    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1675    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1676                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1677
1678    /* Extract the TLB index from the address into X0.  */
1679    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1680                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1681                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1682
1683    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1684    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1685
1686    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1687    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1688               ? offsetof(CPUTLBEntry, addr_read)
1689               : offsetof(CPUTLBEntry, addr_write));
1690    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1691               offsetof(CPUTLBEntry, addend));
1692
1693    /* For aligned accesses, we check the first byte and include the alignment
1694       bits within the address.  For unaligned access, we check that we don't
1695       cross pages using the address of the last byte of the access.  */
1696    if (a_bits >= s_bits) {
1697        x3 = addr_reg;
1698    } else {
1699        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1700                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1701        x3 = TCG_REG_X3;
1702    }
1703    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1704
1705    /* Store the page mask part of the address into X3.  */
1706    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1707                     TCG_REG_X3, x3, compare_mask);
1708
1709    /* Perform the address comparison. */
1710    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1711
1712    /* If not equal, we jump to the slow path. */
1713    *label_ptr = s->code_ptr;
1714    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1715}
1716
1717#endif /* CONFIG_SOFTMMU */
1718
1719static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1720                                   TCGReg data_r, TCGReg addr_r,
1721                                   TCGType otype, TCGReg off_r)
1722{
1723    /* Byte swapping is left to middle-end expansion. */
1724    tcg_debug_assert((memop & MO_BSWAP) == 0);
1725
1726    switch (memop & MO_SSIZE) {
1727    case MO_UB:
1728        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1729        break;
1730    case MO_SB:
1731        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1732                       data_r, addr_r, otype, off_r);
1733        break;
1734    case MO_UW:
1735        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1736        break;
1737    case MO_SW:
1738        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1739                       data_r, addr_r, otype, off_r);
1740        break;
1741    case MO_UL:
1742        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1743        break;
1744    case MO_SL:
1745        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1746        break;
1747    case MO_UQ:
1748        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1749        break;
1750    default:
1751        tcg_abort();
1752    }
1753}
1754
1755static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1756                                   TCGReg data_r, TCGReg addr_r,
1757                                   TCGType otype, TCGReg off_r)
1758{
1759    /* Byte swapping is left to middle-end expansion. */
1760    tcg_debug_assert((memop & MO_BSWAP) == 0);
1761
1762    switch (memop & MO_SIZE) {
1763    case MO_8:
1764        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1765        break;
1766    case MO_16:
1767        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1768        break;
1769    case MO_32:
1770        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1771        break;
1772    case MO_64:
1773        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1774        break;
1775    default:
1776        tcg_abort();
1777    }
1778}
1779
1780static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1781                            MemOpIdx oi, TCGType ext)
1782{
1783    MemOp memop = get_memop(oi);
1784    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1785#ifdef CONFIG_SOFTMMU
1786    unsigned mem_index = get_mmuidx(oi);
1787    tcg_insn_unit *label_ptr;
1788
1789    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1790    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1791                           TCG_REG_X1, otype, addr_reg);
1792    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1793                        s->code_ptr, label_ptr);
1794#else /* !CONFIG_SOFTMMU */
1795    if (USE_GUEST_BASE) {
1796        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1797                               TCG_REG_GUEST_BASE, otype, addr_reg);
1798    } else {
1799        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1800                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1801    }
1802#endif /* CONFIG_SOFTMMU */
1803}
1804
1805static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1806                            MemOpIdx oi)
1807{
1808    MemOp memop = get_memop(oi);
1809    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1810#ifdef CONFIG_SOFTMMU
1811    unsigned mem_index = get_mmuidx(oi);
1812    tcg_insn_unit *label_ptr;
1813
1814    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1815    tcg_out_qemu_st_direct(s, memop, data_reg,
1816                           TCG_REG_X1, otype, addr_reg);
1817    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1818                        data_reg, addr_reg, s->code_ptr, label_ptr);
1819#else /* !CONFIG_SOFTMMU */
1820    if (USE_GUEST_BASE) {
1821        tcg_out_qemu_st_direct(s, memop, data_reg,
1822                               TCG_REG_GUEST_BASE, otype, addr_reg);
1823    } else {
1824        tcg_out_qemu_st_direct(s, memop, data_reg,
1825                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1826    }
1827#endif /* CONFIG_SOFTMMU */
1828}
1829
1830static const tcg_insn_unit *tb_ret_addr;
1831
1832static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1833                       const TCGArg args[TCG_MAX_OP_ARGS],
1834                       const int const_args[TCG_MAX_OP_ARGS])
1835{
1836    /* 99% of the time, we can signal the use of extension registers
1837       by looking to see if the opcode handles 64-bit data.  */
1838    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1839
1840    /* Hoist the loads of the most common arguments.  */
1841    TCGArg a0 = args[0];
1842    TCGArg a1 = args[1];
1843    TCGArg a2 = args[2];
1844    int c2 = const_args[2];
1845
1846    /* Some operands are defined with "rZ" constraint, a register or
1847       the zero register.  These need not actually test args[I] == 0.  */
1848#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1849
1850    switch (opc) {
1851    case INDEX_op_exit_tb:
1852        /* Reuse the zeroing that exists for goto_ptr.  */
1853        if (a0 == 0) {
1854            tcg_out_goto_long(s, tcg_code_gen_epilogue);
1855        } else {
1856            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1857            tcg_out_goto_long(s, tb_ret_addr);
1858        }
1859        break;
1860
1861    case INDEX_op_goto_tb:
1862        if (s->tb_jmp_insn_offset != NULL) {
1863            /* TCG_TARGET_HAS_direct_jump */
1864            /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1865               write can be used to patch the target address. */
1866            if ((uintptr_t)s->code_ptr & 7) {
1867                tcg_out32(s, NOP);
1868            }
1869            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1870            /* actual branch destination will be patched by
1871               tb_target_set_jmp_target later. */
1872            tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1873            tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1874        } else {
1875            /* !TCG_TARGET_HAS_direct_jump */
1876            tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1877            intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1878            tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1879        }
1880        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1881        set_jmp_reset_offset(s, a0);
1882        break;
1883
1884    case INDEX_op_goto_ptr:
1885        tcg_out_insn(s, 3207, BR, a0);
1886        break;
1887
1888    case INDEX_op_br:
1889        tcg_out_goto_label(s, arg_label(a0));
1890        break;
1891
1892    case INDEX_op_ld8u_i32:
1893    case INDEX_op_ld8u_i64:
1894        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1895        break;
1896    case INDEX_op_ld8s_i32:
1897        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1898        break;
1899    case INDEX_op_ld8s_i64:
1900        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1901        break;
1902    case INDEX_op_ld16u_i32:
1903    case INDEX_op_ld16u_i64:
1904        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1905        break;
1906    case INDEX_op_ld16s_i32:
1907        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1908        break;
1909    case INDEX_op_ld16s_i64:
1910        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1911        break;
1912    case INDEX_op_ld_i32:
1913    case INDEX_op_ld32u_i64:
1914        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1915        break;
1916    case INDEX_op_ld32s_i64:
1917        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1918        break;
1919    case INDEX_op_ld_i64:
1920        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1921        break;
1922
1923    case INDEX_op_st8_i32:
1924    case INDEX_op_st8_i64:
1925        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1926        break;
1927    case INDEX_op_st16_i32:
1928    case INDEX_op_st16_i64:
1929        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1930        break;
1931    case INDEX_op_st_i32:
1932    case INDEX_op_st32_i64:
1933        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1934        break;
1935    case INDEX_op_st_i64:
1936        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1937        break;
1938
1939    case INDEX_op_add_i32:
1940        a2 = (int32_t)a2;
1941        /* FALLTHRU */
1942    case INDEX_op_add_i64:
1943        if (c2) {
1944            tcg_out_addsubi(s, ext, a0, a1, a2);
1945        } else {
1946            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1947        }
1948        break;
1949
1950    case INDEX_op_sub_i32:
1951        a2 = (int32_t)a2;
1952        /* FALLTHRU */
1953    case INDEX_op_sub_i64:
1954        if (c2) {
1955            tcg_out_addsubi(s, ext, a0, a1, -a2);
1956        } else {
1957            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1958        }
1959        break;
1960
1961    case INDEX_op_neg_i64:
1962    case INDEX_op_neg_i32:
1963        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1964        break;
1965
1966    case INDEX_op_and_i32:
1967        a2 = (int32_t)a2;
1968        /* FALLTHRU */
1969    case INDEX_op_and_i64:
1970        if (c2) {
1971            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1972        } else {
1973            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1974        }
1975        break;
1976
1977    case INDEX_op_andc_i32:
1978        a2 = (int32_t)a2;
1979        /* FALLTHRU */
1980    case INDEX_op_andc_i64:
1981        if (c2) {
1982            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1983        } else {
1984            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1985        }
1986        break;
1987
1988    case INDEX_op_or_i32:
1989        a2 = (int32_t)a2;
1990        /* FALLTHRU */
1991    case INDEX_op_or_i64:
1992        if (c2) {
1993            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1994        } else {
1995            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1996        }
1997        break;
1998
1999    case INDEX_op_orc_i32:
2000        a2 = (int32_t)a2;
2001        /* FALLTHRU */
2002    case INDEX_op_orc_i64:
2003        if (c2) {
2004            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2005        } else {
2006            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2007        }
2008        break;
2009
2010    case INDEX_op_xor_i32:
2011        a2 = (int32_t)a2;
2012        /* FALLTHRU */
2013    case INDEX_op_xor_i64:
2014        if (c2) {
2015            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2016        } else {
2017            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2018        }
2019        break;
2020
2021    case INDEX_op_eqv_i32:
2022        a2 = (int32_t)a2;
2023        /* FALLTHRU */
2024    case INDEX_op_eqv_i64:
2025        if (c2) {
2026            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2027        } else {
2028            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2029        }
2030        break;
2031
2032    case INDEX_op_not_i64:
2033    case INDEX_op_not_i32:
2034        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2035        break;
2036
2037    case INDEX_op_mul_i64:
2038    case INDEX_op_mul_i32:
2039        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2040        break;
2041
2042    case INDEX_op_div_i64:
2043    case INDEX_op_div_i32:
2044        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2045        break;
2046    case INDEX_op_divu_i64:
2047    case INDEX_op_divu_i32:
2048        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2049        break;
2050
2051    case INDEX_op_rem_i64:
2052    case INDEX_op_rem_i32:
2053        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2054        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2055        break;
2056    case INDEX_op_remu_i64:
2057    case INDEX_op_remu_i32:
2058        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2059        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2060        break;
2061
2062    case INDEX_op_shl_i64:
2063    case INDEX_op_shl_i32:
2064        if (c2) {
2065            tcg_out_shl(s, ext, a0, a1, a2);
2066        } else {
2067            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2068        }
2069        break;
2070
2071    case INDEX_op_shr_i64:
2072    case INDEX_op_shr_i32:
2073        if (c2) {
2074            tcg_out_shr(s, ext, a0, a1, a2);
2075        } else {
2076            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2077        }
2078        break;
2079
2080    case INDEX_op_sar_i64:
2081    case INDEX_op_sar_i32:
2082        if (c2) {
2083            tcg_out_sar(s, ext, a0, a1, a2);
2084        } else {
2085            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2086        }
2087        break;
2088
2089    case INDEX_op_rotr_i64:
2090    case INDEX_op_rotr_i32:
2091        if (c2) {
2092            tcg_out_rotr(s, ext, a0, a1, a2);
2093        } else {
2094            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2095        }
2096        break;
2097
2098    case INDEX_op_rotl_i64:
2099    case INDEX_op_rotl_i32:
2100        if (c2) {
2101            tcg_out_rotl(s, ext, a0, a1, a2);
2102        } else {
2103            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2104            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2105        }
2106        break;
2107
2108    case INDEX_op_clz_i64:
2109    case INDEX_op_clz_i32:
2110        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2111        break;
2112    case INDEX_op_ctz_i64:
2113    case INDEX_op_ctz_i32:
2114        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2115        break;
2116
2117    case INDEX_op_brcond_i32:
2118        a1 = (int32_t)a1;
2119        /* FALLTHRU */
2120    case INDEX_op_brcond_i64:
2121        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2122        break;
2123
2124    case INDEX_op_setcond_i32:
2125        a2 = (int32_t)a2;
2126        /* FALLTHRU */
2127    case INDEX_op_setcond_i64:
2128        tcg_out_cmp(s, ext, a1, a2, c2);
2129        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2130        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2131                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2132        break;
2133
2134    case INDEX_op_movcond_i32:
2135        a2 = (int32_t)a2;
2136        /* FALLTHRU */
2137    case INDEX_op_movcond_i64:
2138        tcg_out_cmp(s, ext, a1, a2, c2);
2139        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2140        break;
2141
2142    case INDEX_op_qemu_ld_i32:
2143    case INDEX_op_qemu_ld_i64:
2144        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2145        break;
2146    case INDEX_op_qemu_st_i32:
2147    case INDEX_op_qemu_st_i64:
2148        tcg_out_qemu_st(s, REG0(0), a1, a2);
2149        break;
2150
2151    case INDEX_op_bswap64_i64:
2152        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2153        break;
2154    case INDEX_op_bswap32_i64:
2155        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2156        if (a2 & TCG_BSWAP_OS) {
2157            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a0);
2158        }
2159        break;
2160    case INDEX_op_bswap32_i32:
2161        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2162        break;
2163    case INDEX_op_bswap16_i64:
2164    case INDEX_op_bswap16_i32:
2165        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2166        if (a2 & TCG_BSWAP_OS) {
2167            /* Output must be sign-extended. */
2168            tcg_out_sxt(s, ext, MO_16, a0, a0);
2169        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2170            /* Output must be zero-extended, but input isn't. */
2171            tcg_out_uxt(s, MO_16, a0, a0);
2172        }
2173        break;
2174
2175    case INDEX_op_ext8s_i64:
2176    case INDEX_op_ext8s_i32:
2177        tcg_out_sxt(s, ext, MO_8, a0, a1);
2178        break;
2179    case INDEX_op_ext16s_i64:
2180    case INDEX_op_ext16s_i32:
2181        tcg_out_sxt(s, ext, MO_16, a0, a1);
2182        break;
2183    case INDEX_op_ext_i32_i64:
2184    case INDEX_op_ext32s_i64:
2185        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2186        break;
2187    case INDEX_op_ext8u_i64:
2188    case INDEX_op_ext8u_i32:
2189        tcg_out_uxt(s, MO_8, a0, a1);
2190        break;
2191    case INDEX_op_ext16u_i64:
2192    case INDEX_op_ext16u_i32:
2193        tcg_out_uxt(s, MO_16, a0, a1);
2194        break;
2195    case INDEX_op_extu_i32_i64:
2196    case INDEX_op_ext32u_i64:
2197        tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2198        break;
2199
2200    case INDEX_op_deposit_i64:
2201    case INDEX_op_deposit_i32:
2202        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2203        break;
2204
2205    case INDEX_op_extract_i64:
2206    case INDEX_op_extract_i32:
2207        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2208        break;
2209
2210    case INDEX_op_sextract_i64:
2211    case INDEX_op_sextract_i32:
2212        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2213        break;
2214
2215    case INDEX_op_extract2_i64:
2216    case INDEX_op_extract2_i32:
2217        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2218        break;
2219
2220    case INDEX_op_add2_i32:
2221        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2222                        (int32_t)args[4], args[5], const_args[4],
2223                        const_args[5], false);
2224        break;
2225    case INDEX_op_add2_i64:
2226        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2227                        args[5], const_args[4], const_args[5], false);
2228        break;
2229    case INDEX_op_sub2_i32:
2230        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2231                        (int32_t)args[4], args[5], const_args[4],
2232                        const_args[5], true);
2233        break;
2234    case INDEX_op_sub2_i64:
2235        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2236                        args[5], const_args[4], const_args[5], true);
2237        break;
2238
2239    case INDEX_op_muluh_i64:
2240        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2241        break;
2242    case INDEX_op_mulsh_i64:
2243        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2244        break;
2245
2246    case INDEX_op_mb:
2247        tcg_out_mb(s, a0);
2248        break;
2249
2250    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2251    case INDEX_op_mov_i64:
2252    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2253    default:
2254        g_assert_not_reached();
2255    }
2256
2257#undef REG0
2258}
2259
2260static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2261                           unsigned vecl, unsigned vece,
2262                           const TCGArg args[TCG_MAX_OP_ARGS],
2263                           const int const_args[TCG_MAX_OP_ARGS])
2264{
2265    static const AArch64Insn cmp_vec_insn[16] = {
2266        [TCG_COND_EQ] = I3616_CMEQ,
2267        [TCG_COND_GT] = I3616_CMGT,
2268        [TCG_COND_GE] = I3616_CMGE,
2269        [TCG_COND_GTU] = I3616_CMHI,
2270        [TCG_COND_GEU] = I3616_CMHS,
2271    };
2272    static const AArch64Insn cmp_scalar_insn[16] = {
2273        [TCG_COND_EQ] = I3611_CMEQ,
2274        [TCG_COND_GT] = I3611_CMGT,
2275        [TCG_COND_GE] = I3611_CMGE,
2276        [TCG_COND_GTU] = I3611_CMHI,
2277        [TCG_COND_GEU] = I3611_CMHS,
2278    };
2279    static const AArch64Insn cmp0_vec_insn[16] = {
2280        [TCG_COND_EQ] = I3617_CMEQ0,
2281        [TCG_COND_GT] = I3617_CMGT0,
2282        [TCG_COND_GE] = I3617_CMGE0,
2283        [TCG_COND_LT] = I3617_CMLT0,
2284        [TCG_COND_LE] = I3617_CMLE0,
2285    };
2286    static const AArch64Insn cmp0_scalar_insn[16] = {
2287        [TCG_COND_EQ] = I3612_CMEQ0,
2288        [TCG_COND_GT] = I3612_CMGT0,
2289        [TCG_COND_GE] = I3612_CMGE0,
2290        [TCG_COND_LT] = I3612_CMLT0,
2291        [TCG_COND_LE] = I3612_CMLE0,
2292    };
2293
2294    TCGType type = vecl + TCG_TYPE_V64;
2295    unsigned is_q = vecl;
2296    bool is_scalar = !is_q && vece == MO_64;
2297    TCGArg a0, a1, a2, a3;
2298    int cmode, imm8;
2299
2300    a0 = args[0];
2301    a1 = args[1];
2302    a2 = args[2];
2303
2304    switch (opc) {
2305    case INDEX_op_ld_vec:
2306        tcg_out_ld(s, type, a0, a1, a2);
2307        break;
2308    case INDEX_op_st_vec:
2309        tcg_out_st(s, type, a0, a1, a2);
2310        break;
2311    case INDEX_op_dupm_vec:
2312        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2313        break;
2314    case INDEX_op_add_vec:
2315        if (is_scalar) {
2316            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2317        } else {
2318            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2319        }
2320        break;
2321    case INDEX_op_sub_vec:
2322        if (is_scalar) {
2323            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2324        } else {
2325            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2326        }
2327        break;
2328    case INDEX_op_mul_vec:
2329        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2330        break;
2331    case INDEX_op_neg_vec:
2332        if (is_scalar) {
2333            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2334        } else {
2335            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2336        }
2337        break;
2338    case INDEX_op_abs_vec:
2339        if (is_scalar) {
2340            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2341        } else {
2342            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2343        }
2344        break;
2345    case INDEX_op_and_vec:
2346        if (const_args[2]) {
2347            is_shimm1632(~a2, &cmode, &imm8);
2348            if (a0 == a1) {
2349                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2350                return;
2351            }
2352            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2353            a2 = a0;
2354        }
2355        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2356        break;
2357    case INDEX_op_or_vec:
2358        if (const_args[2]) {
2359            is_shimm1632(a2, &cmode, &imm8);
2360            if (a0 == a1) {
2361                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2362                return;
2363            }
2364            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2365            a2 = a0;
2366        }
2367        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2368        break;
2369    case INDEX_op_andc_vec:
2370        if (const_args[2]) {
2371            is_shimm1632(a2, &cmode, &imm8);
2372            if (a0 == a1) {
2373                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2374                return;
2375            }
2376            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2377            a2 = a0;
2378        }
2379        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2380        break;
2381    case INDEX_op_orc_vec:
2382        if (const_args[2]) {
2383            is_shimm1632(~a2, &cmode, &imm8);
2384            if (a0 == a1) {
2385                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2386                return;
2387            }
2388            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2389            a2 = a0;
2390        }
2391        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2392        break;
2393    case INDEX_op_xor_vec:
2394        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2395        break;
2396    case INDEX_op_ssadd_vec:
2397        if (is_scalar) {
2398            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2399        } else {
2400            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2401        }
2402        break;
2403    case INDEX_op_sssub_vec:
2404        if (is_scalar) {
2405            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2406        } else {
2407            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2408        }
2409        break;
2410    case INDEX_op_usadd_vec:
2411        if (is_scalar) {
2412            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2413        } else {
2414            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2415        }
2416        break;
2417    case INDEX_op_ussub_vec:
2418        if (is_scalar) {
2419            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2420        } else {
2421            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2422        }
2423        break;
2424    case INDEX_op_smax_vec:
2425        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2426        break;
2427    case INDEX_op_smin_vec:
2428        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2429        break;
2430    case INDEX_op_umax_vec:
2431        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2432        break;
2433    case INDEX_op_umin_vec:
2434        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2435        break;
2436    case INDEX_op_not_vec:
2437        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2438        break;
2439    case INDEX_op_shli_vec:
2440        if (is_scalar) {
2441            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2442        } else {
2443            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2444        }
2445        break;
2446    case INDEX_op_shri_vec:
2447        if (is_scalar) {
2448            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2449        } else {
2450            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2451        }
2452        break;
2453    case INDEX_op_sari_vec:
2454        if (is_scalar) {
2455            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2456        } else {
2457            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2458        }
2459        break;
2460    case INDEX_op_aa64_sli_vec:
2461        if (is_scalar) {
2462            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2463        } else {
2464            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2465        }
2466        break;
2467    case INDEX_op_shlv_vec:
2468        if (is_scalar) {
2469            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2470        } else {
2471            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2472        }
2473        break;
2474    case INDEX_op_aa64_sshl_vec:
2475        if (is_scalar) {
2476            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2477        } else {
2478            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2479        }
2480        break;
2481    case INDEX_op_cmp_vec:
2482        {
2483            TCGCond cond = args[3];
2484            AArch64Insn insn;
2485
2486            if (cond == TCG_COND_NE) {
2487                if (const_args[2]) {
2488                    if (is_scalar) {
2489                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2490                    } else {
2491                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2492                    }
2493                } else {
2494                    if (is_scalar) {
2495                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2496                    } else {
2497                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2498                    }
2499                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2500                }
2501            } else {
2502                if (const_args[2]) {
2503                    if (is_scalar) {
2504                        insn = cmp0_scalar_insn[cond];
2505                        if (insn) {
2506                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2507                            break;
2508                        }
2509                    } else {
2510                        insn = cmp0_vec_insn[cond];
2511                        if (insn) {
2512                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2513                            break;
2514                        }
2515                    }
2516                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2517                    a2 = TCG_VEC_TMP;
2518                }
2519                if (is_scalar) {
2520                    insn = cmp_scalar_insn[cond];
2521                    if (insn == 0) {
2522                        TCGArg t;
2523                        t = a1, a1 = a2, a2 = t;
2524                        cond = tcg_swap_cond(cond);
2525                        insn = cmp_scalar_insn[cond];
2526                        tcg_debug_assert(insn != 0);
2527                    }
2528                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2529                } else {
2530                    insn = cmp_vec_insn[cond];
2531                    if (insn == 0) {
2532                        TCGArg t;
2533                        t = a1, a1 = a2, a2 = t;
2534                        cond = tcg_swap_cond(cond);
2535                        insn = cmp_vec_insn[cond];
2536                        tcg_debug_assert(insn != 0);
2537                    }
2538                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2539                }
2540            }
2541        }
2542        break;
2543
2544    case INDEX_op_bitsel_vec:
2545        a3 = args[3];
2546        if (a0 == a3) {
2547            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2548        } else if (a0 == a2) {
2549            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2550        } else {
2551            if (a0 != a1) {
2552                tcg_out_mov(s, type, a0, a1);
2553            }
2554            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2555        }
2556        break;
2557
2558    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2559    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2560    default:
2561        g_assert_not_reached();
2562    }
2563}
2564
2565int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2566{
2567    switch (opc) {
2568    case INDEX_op_add_vec:
2569    case INDEX_op_sub_vec:
2570    case INDEX_op_and_vec:
2571    case INDEX_op_or_vec:
2572    case INDEX_op_xor_vec:
2573    case INDEX_op_andc_vec:
2574    case INDEX_op_orc_vec:
2575    case INDEX_op_neg_vec:
2576    case INDEX_op_abs_vec:
2577    case INDEX_op_not_vec:
2578    case INDEX_op_cmp_vec:
2579    case INDEX_op_shli_vec:
2580    case INDEX_op_shri_vec:
2581    case INDEX_op_sari_vec:
2582    case INDEX_op_ssadd_vec:
2583    case INDEX_op_sssub_vec:
2584    case INDEX_op_usadd_vec:
2585    case INDEX_op_ussub_vec:
2586    case INDEX_op_shlv_vec:
2587    case INDEX_op_bitsel_vec:
2588        return 1;
2589    case INDEX_op_rotli_vec:
2590    case INDEX_op_shrv_vec:
2591    case INDEX_op_sarv_vec:
2592    case INDEX_op_rotlv_vec:
2593    case INDEX_op_rotrv_vec:
2594        return -1;
2595    case INDEX_op_mul_vec:
2596    case INDEX_op_smax_vec:
2597    case INDEX_op_smin_vec:
2598    case INDEX_op_umax_vec:
2599    case INDEX_op_umin_vec:
2600        return vece < MO_64;
2601
2602    default:
2603        return 0;
2604    }
2605}
2606
2607void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2608                       TCGArg a0, ...)
2609{
2610    va_list va;
2611    TCGv_vec v0, v1, v2, t1, t2, c1;
2612    TCGArg a2;
2613
2614    va_start(va, a0);
2615    v0 = temp_tcgv_vec(arg_temp(a0));
2616    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2617    a2 = va_arg(va, TCGArg);
2618    va_end(va);
2619
2620    switch (opc) {
2621    case INDEX_op_rotli_vec:
2622        t1 = tcg_temp_new_vec(type);
2623        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2624        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2625                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2626        tcg_temp_free_vec(t1);
2627        break;
2628
2629    case INDEX_op_shrv_vec:
2630    case INDEX_op_sarv_vec:
2631        /* Right shifts are negative left shifts for AArch64.  */
2632        v2 = temp_tcgv_vec(arg_temp(a2));
2633        t1 = tcg_temp_new_vec(type);
2634        tcg_gen_neg_vec(vece, t1, v2);
2635        opc = (opc == INDEX_op_shrv_vec
2636               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2637        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2638                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2639        tcg_temp_free_vec(t1);
2640        break;
2641
2642    case INDEX_op_rotlv_vec:
2643        v2 = temp_tcgv_vec(arg_temp(a2));
2644        t1 = tcg_temp_new_vec(type);
2645        c1 = tcg_constant_vec(type, vece, 8 << vece);
2646        tcg_gen_sub_vec(vece, t1, v2, c1);
2647        /* Right shifts are negative left shifts for AArch64.  */
2648        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2649                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2650        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2651                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2652        tcg_gen_or_vec(vece, v0, v0, t1);
2653        tcg_temp_free_vec(t1);
2654        break;
2655
2656    case INDEX_op_rotrv_vec:
2657        v2 = temp_tcgv_vec(arg_temp(a2));
2658        t1 = tcg_temp_new_vec(type);
2659        t2 = tcg_temp_new_vec(type);
2660        c1 = tcg_constant_vec(type, vece, 8 << vece);
2661        tcg_gen_neg_vec(vece, t1, v2);
2662        tcg_gen_sub_vec(vece, t2, c1, v2);
2663        /* Right shifts are negative left shifts for AArch64.  */
2664        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2665                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2666        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2667                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2668        tcg_gen_or_vec(vece, v0, t1, t2);
2669        tcg_temp_free_vec(t1);
2670        tcg_temp_free_vec(t2);
2671        break;
2672
2673    default:
2674        g_assert_not_reached();
2675    }
2676}
2677
2678static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2679{
2680    switch (op) {
2681    case INDEX_op_goto_ptr:
2682        return C_O0_I1(r);
2683
2684    case INDEX_op_ld8u_i32:
2685    case INDEX_op_ld8s_i32:
2686    case INDEX_op_ld16u_i32:
2687    case INDEX_op_ld16s_i32:
2688    case INDEX_op_ld_i32:
2689    case INDEX_op_ld8u_i64:
2690    case INDEX_op_ld8s_i64:
2691    case INDEX_op_ld16u_i64:
2692    case INDEX_op_ld16s_i64:
2693    case INDEX_op_ld32u_i64:
2694    case INDEX_op_ld32s_i64:
2695    case INDEX_op_ld_i64:
2696    case INDEX_op_neg_i32:
2697    case INDEX_op_neg_i64:
2698    case INDEX_op_not_i32:
2699    case INDEX_op_not_i64:
2700    case INDEX_op_bswap16_i32:
2701    case INDEX_op_bswap32_i32:
2702    case INDEX_op_bswap16_i64:
2703    case INDEX_op_bswap32_i64:
2704    case INDEX_op_bswap64_i64:
2705    case INDEX_op_ext8s_i32:
2706    case INDEX_op_ext16s_i32:
2707    case INDEX_op_ext8u_i32:
2708    case INDEX_op_ext16u_i32:
2709    case INDEX_op_ext8s_i64:
2710    case INDEX_op_ext16s_i64:
2711    case INDEX_op_ext32s_i64:
2712    case INDEX_op_ext8u_i64:
2713    case INDEX_op_ext16u_i64:
2714    case INDEX_op_ext32u_i64:
2715    case INDEX_op_ext_i32_i64:
2716    case INDEX_op_extu_i32_i64:
2717    case INDEX_op_extract_i32:
2718    case INDEX_op_extract_i64:
2719    case INDEX_op_sextract_i32:
2720    case INDEX_op_sextract_i64:
2721        return C_O1_I1(r, r);
2722
2723    case INDEX_op_st8_i32:
2724    case INDEX_op_st16_i32:
2725    case INDEX_op_st_i32:
2726    case INDEX_op_st8_i64:
2727    case INDEX_op_st16_i64:
2728    case INDEX_op_st32_i64:
2729    case INDEX_op_st_i64:
2730        return C_O0_I2(rZ, r);
2731
2732    case INDEX_op_add_i32:
2733    case INDEX_op_add_i64:
2734    case INDEX_op_sub_i32:
2735    case INDEX_op_sub_i64:
2736    case INDEX_op_setcond_i32:
2737    case INDEX_op_setcond_i64:
2738        return C_O1_I2(r, r, rA);
2739
2740    case INDEX_op_mul_i32:
2741    case INDEX_op_mul_i64:
2742    case INDEX_op_div_i32:
2743    case INDEX_op_div_i64:
2744    case INDEX_op_divu_i32:
2745    case INDEX_op_divu_i64:
2746    case INDEX_op_rem_i32:
2747    case INDEX_op_rem_i64:
2748    case INDEX_op_remu_i32:
2749    case INDEX_op_remu_i64:
2750    case INDEX_op_muluh_i64:
2751    case INDEX_op_mulsh_i64:
2752        return C_O1_I2(r, r, r);
2753
2754    case INDEX_op_and_i32:
2755    case INDEX_op_and_i64:
2756    case INDEX_op_or_i32:
2757    case INDEX_op_or_i64:
2758    case INDEX_op_xor_i32:
2759    case INDEX_op_xor_i64:
2760    case INDEX_op_andc_i32:
2761    case INDEX_op_andc_i64:
2762    case INDEX_op_orc_i32:
2763    case INDEX_op_orc_i64:
2764    case INDEX_op_eqv_i32:
2765    case INDEX_op_eqv_i64:
2766        return C_O1_I2(r, r, rL);
2767
2768    case INDEX_op_shl_i32:
2769    case INDEX_op_shr_i32:
2770    case INDEX_op_sar_i32:
2771    case INDEX_op_rotl_i32:
2772    case INDEX_op_rotr_i32:
2773    case INDEX_op_shl_i64:
2774    case INDEX_op_shr_i64:
2775    case INDEX_op_sar_i64:
2776    case INDEX_op_rotl_i64:
2777    case INDEX_op_rotr_i64:
2778        return C_O1_I2(r, r, ri);
2779
2780    case INDEX_op_clz_i32:
2781    case INDEX_op_ctz_i32:
2782    case INDEX_op_clz_i64:
2783    case INDEX_op_ctz_i64:
2784        return C_O1_I2(r, r, rAL);
2785
2786    case INDEX_op_brcond_i32:
2787    case INDEX_op_brcond_i64:
2788        return C_O0_I2(r, rA);
2789
2790    case INDEX_op_movcond_i32:
2791    case INDEX_op_movcond_i64:
2792        return C_O1_I4(r, r, rA, rZ, rZ);
2793
2794    case INDEX_op_qemu_ld_i32:
2795    case INDEX_op_qemu_ld_i64:
2796        return C_O1_I1(r, l);
2797    case INDEX_op_qemu_st_i32:
2798    case INDEX_op_qemu_st_i64:
2799        return C_O0_I2(lZ, l);
2800
2801    case INDEX_op_deposit_i32:
2802    case INDEX_op_deposit_i64:
2803        return C_O1_I2(r, 0, rZ);
2804
2805    case INDEX_op_extract2_i32:
2806    case INDEX_op_extract2_i64:
2807        return C_O1_I2(r, rZ, rZ);
2808
2809    case INDEX_op_add2_i32:
2810    case INDEX_op_add2_i64:
2811    case INDEX_op_sub2_i32:
2812    case INDEX_op_sub2_i64:
2813        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2814
2815    case INDEX_op_add_vec:
2816    case INDEX_op_sub_vec:
2817    case INDEX_op_mul_vec:
2818    case INDEX_op_xor_vec:
2819    case INDEX_op_ssadd_vec:
2820    case INDEX_op_sssub_vec:
2821    case INDEX_op_usadd_vec:
2822    case INDEX_op_ussub_vec:
2823    case INDEX_op_smax_vec:
2824    case INDEX_op_smin_vec:
2825    case INDEX_op_umax_vec:
2826    case INDEX_op_umin_vec:
2827    case INDEX_op_shlv_vec:
2828    case INDEX_op_shrv_vec:
2829    case INDEX_op_sarv_vec:
2830    case INDEX_op_aa64_sshl_vec:
2831        return C_O1_I2(w, w, w);
2832    case INDEX_op_not_vec:
2833    case INDEX_op_neg_vec:
2834    case INDEX_op_abs_vec:
2835    case INDEX_op_shli_vec:
2836    case INDEX_op_shri_vec:
2837    case INDEX_op_sari_vec:
2838        return C_O1_I1(w, w);
2839    case INDEX_op_ld_vec:
2840    case INDEX_op_dupm_vec:
2841        return C_O1_I1(w, r);
2842    case INDEX_op_st_vec:
2843        return C_O0_I2(w, r);
2844    case INDEX_op_dup_vec:
2845        return C_O1_I1(w, wr);
2846    case INDEX_op_or_vec:
2847    case INDEX_op_andc_vec:
2848        return C_O1_I2(w, w, wO);
2849    case INDEX_op_and_vec:
2850    case INDEX_op_orc_vec:
2851        return C_O1_I2(w, w, wN);
2852    case INDEX_op_cmp_vec:
2853        return C_O1_I2(w, w, wZ);
2854    case INDEX_op_bitsel_vec:
2855        return C_O1_I3(w, w, w, w);
2856    case INDEX_op_aa64_sli_vec:
2857        return C_O1_I2(w, 0, w);
2858
2859    default:
2860        g_assert_not_reached();
2861    }
2862}
2863
2864static void tcg_target_init(TCGContext *s)
2865{
2866    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2867    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2868    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2869    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2870
2871    tcg_target_call_clobber_regs = -1ull;
2872    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2873    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2874    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2875    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2876    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2877    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2878    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2879    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2880    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2881    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2882    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2883    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2884    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2885    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2886    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2887    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2888    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2889    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2890    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2891
2892    s->reserved_regs = 0;
2893    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2894    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2895    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2896    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2897    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2898}
2899
2900/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2901#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2902
2903#define FRAME_SIZE \
2904    ((PUSH_SIZE \
2905      + TCG_STATIC_CALL_ARGS_SIZE \
2906      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2907      + TCG_TARGET_STACK_ALIGN - 1) \
2908     & ~(TCG_TARGET_STACK_ALIGN - 1))
2909
2910/* We're expecting a 2 byte uleb128 encoded value.  */
2911QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2912
2913/* We're expecting to use a single ADDI insn.  */
2914QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2915
2916static void tcg_target_qemu_prologue(TCGContext *s)
2917{
2918    TCGReg r;
2919
2920    /* Push (FP, LR) and allocate space for all saved registers.  */
2921    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2922                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2923
2924    /* Set up frame pointer for canonical unwinding.  */
2925    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2926
2927    /* Store callee-preserved regs x19..x28.  */
2928    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2929        int ofs = (r - TCG_REG_X19 + 2) * 8;
2930        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2931    }
2932
2933    /* Make stack space for TCG locals.  */
2934    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2935                 FRAME_SIZE - PUSH_SIZE);
2936
2937    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2938    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2939                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2940
2941#if !defined(CONFIG_SOFTMMU)
2942    if (USE_GUEST_BASE) {
2943        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2944        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2945    }
2946#endif
2947
2948    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2949    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2950
2951    /*
2952     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2953     * and fall through to the rest of the epilogue.
2954     */
2955    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2956    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2957
2958    /* TB epilogue */
2959    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
2960
2961    /* Remove TCG locals stack space.  */
2962    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2963                 FRAME_SIZE - PUSH_SIZE);
2964
2965    /* Restore registers x19..x28.  */
2966    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2967        int ofs = (r - TCG_REG_X19 + 2) * 8;
2968        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2969    }
2970
2971    /* Pop (FP, LR), restore SP to previous frame.  */
2972    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2973                 TCG_REG_SP, PUSH_SIZE, 0, 1);
2974    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2975}
2976
2977static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2978{
2979    int i;
2980    for (i = 0; i < count; ++i) {
2981        p[i] = NOP;
2982    }
2983}
2984
2985typedef struct {
2986    DebugFrameHeader h;
2987    uint8_t fde_def_cfa[4];
2988    uint8_t fde_reg_ofs[24];
2989} DebugFrame;
2990
2991#define ELF_HOST_MACHINE EM_AARCH64
2992
2993static const DebugFrame debug_frame = {
2994    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2995    .h.cie.id = -1,
2996    .h.cie.version = 1,
2997    .h.cie.code_align = 1,
2998    .h.cie.data_align = 0x78,             /* sleb128 -8 */
2999    .h.cie.return_column = TCG_REG_LR,
3000
3001    /* Total FDE size does not include the "len" member.  */
3002    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3003
3004    .fde_def_cfa = {
3005        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3006        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3007        (FRAME_SIZE >> 7)
3008    },
3009    .fde_reg_ofs = {
3010        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3011        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3012        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3013        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3014        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3015        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3016        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3017        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3018        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3019        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3020        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3021        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3022    }
3023};
3024
3025void tcg_register_jit(const void *buf, size_t buf_size)
3026{
3027    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3028}
3029