xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision b14df228)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-ldst.c.inc"
14#include "../tcg-pool.c.inc"
15#include "qemu/bitops.h"
16
17/* We're going to re-use TCGType in setting of the SF bit, which controls
18   the size of the operation performed.  If we know the values match, it
19   makes things much cleaner.  */
20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21
22#ifdef CONFIG_DEBUG_TCG
23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28
29    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33};
34#endif /* CONFIG_DEBUG_TCG */
35
36static const int tcg_target_reg_alloc_order[] = {
37    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39    TCG_REG_X28, /* we will reserve this for guest_base if configured */
40
41    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
43    TCG_REG_X16, TCG_REG_X17,
44
45    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
46    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
47
48    /* X18 reserved by system */
49    /* X19 reserved for AREG0 */
50    /* X29 reserved as fp */
51    /* X30 reserved as temporary */
52
53    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
54    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
55    /* V8 - V15 are call-saved, and skipped.  */
56    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
57    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
58    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
59    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
60};
61
62static const int tcg_target_call_iarg_regs[8] = {
63    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
64    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
65};
66static const int tcg_target_call_oarg_regs[1] = {
67    TCG_REG_X0
68};
69
70#define TCG_REG_TMP TCG_REG_X30
71#define TCG_VEC_TMP TCG_REG_V31
72
73#ifndef CONFIG_SOFTMMU
74/* Note that XZR cannot be encoded in the address base register slot,
75   as that actaully encodes SP.  So if we need to zero-extend the guest
76   address, via the address index register slot, we need to load even
77   a zero guest base into a register.  */
78#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
79#define TCG_REG_GUEST_BASE TCG_REG_X28
80#endif
81
82static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
83{
84    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
85    ptrdiff_t offset = target - src_rx;
86
87    if (offset == sextract64(offset, 0, 26)) {
88        /* read instruction, mask away previous PC_REL26 parameter contents,
89           set the proper offset, then write back the instruction. */
90        *src_rw = deposit32(*src_rw, 0, 26, offset);
91        return true;
92    }
93    return false;
94}
95
96static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
97{
98    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
99    ptrdiff_t offset = target - src_rx;
100
101    if (offset == sextract64(offset, 0, 19)) {
102        *src_rw = deposit32(*src_rw, 5, 19, offset);
103        return true;
104    }
105    return false;
106}
107
108static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
109                        intptr_t value, intptr_t addend)
110{
111    tcg_debug_assert(addend == 0);
112    switch (type) {
113    case R_AARCH64_JUMP26:
114    case R_AARCH64_CALL26:
115        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
116    case R_AARCH64_CONDBR19:
117        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
118    default:
119        g_assert_not_reached();
120    }
121}
122
123#define TCG_CT_CONST_AIMM 0x100
124#define TCG_CT_CONST_LIMM 0x200
125#define TCG_CT_CONST_ZERO 0x400
126#define TCG_CT_CONST_MONE 0x800
127#define TCG_CT_CONST_ORRI 0x1000
128#define TCG_CT_CONST_ANDI 0x2000
129
130#define ALL_GENERAL_REGS  0xffffffffu
131#define ALL_VECTOR_REGS   0xffffffff00000000ull
132
133#ifdef CONFIG_SOFTMMU
134#define ALL_QLDST_REGS \
135    (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
136                          (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
137#else
138#define ALL_QLDST_REGS   ALL_GENERAL_REGS
139#endif
140
141/* Match a constant valid for addition (12-bit, optionally shifted).  */
142static inline bool is_aimm(uint64_t val)
143{
144    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
145}
146
147/* Match a constant valid for logical operations.  */
148static inline bool is_limm(uint64_t val)
149{
150    /* Taking a simplified view of the logical immediates for now, ignoring
151       the replication that can happen across the field.  Match bit patterns
152       of the forms
153           0....01....1
154           0..01..10..0
155       and their inverses.  */
156
157    /* Make things easier below, by testing the form with msb clear. */
158    if ((int64_t)val < 0) {
159        val = ~val;
160    }
161    if (val == 0) {
162        return false;
163    }
164    val += val & -val;
165    return (val & (val - 1)) == 0;
166}
167
168/* Return true if v16 is a valid 16-bit shifted immediate.  */
169static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
170{
171    if (v16 == (v16 & 0xff)) {
172        *cmode = 0x8;
173        *imm8 = v16 & 0xff;
174        return true;
175    } else if (v16 == (v16 & 0xff00)) {
176        *cmode = 0xa;
177        *imm8 = v16 >> 8;
178        return true;
179    }
180    return false;
181}
182
183/* Return true if v32 is a valid 32-bit shifted immediate.  */
184static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
185{
186    if (v32 == (v32 & 0xff)) {
187        *cmode = 0x0;
188        *imm8 = v32 & 0xff;
189        return true;
190    } else if (v32 == (v32 & 0xff00)) {
191        *cmode = 0x2;
192        *imm8 = (v32 >> 8) & 0xff;
193        return true;
194    } else if (v32 == (v32 & 0xff0000)) {
195        *cmode = 0x4;
196        *imm8 = (v32 >> 16) & 0xff;
197        return true;
198    } else if (v32 == (v32 & 0xff000000)) {
199        *cmode = 0x6;
200        *imm8 = v32 >> 24;
201        return true;
202    }
203    return false;
204}
205
206/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
207static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
208{
209    if ((v32 & 0xffff00ff) == 0xff) {
210        *cmode = 0xc;
211        *imm8 = (v32 >> 8) & 0xff;
212        return true;
213    } else if ((v32 & 0xff00ffff) == 0xffff) {
214        *cmode = 0xd;
215        *imm8 = (v32 >> 16) & 0xff;
216        return true;
217    }
218    return false;
219}
220
221/* Return true if v32 is a valid float32 immediate.  */
222static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
223{
224    if (extract32(v32, 0, 19) == 0
225        && (extract32(v32, 25, 6) == 0x20
226            || extract32(v32, 25, 6) == 0x1f)) {
227        *cmode = 0xf;
228        *imm8 = (extract32(v32, 31, 1) << 7)
229              | (extract32(v32, 25, 1) << 6)
230              | extract32(v32, 19, 6);
231        return true;
232    }
233    return false;
234}
235
236/* Return true if v64 is a valid float64 immediate.  */
237static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
238{
239    if (extract64(v64, 0, 48) == 0
240        && (extract64(v64, 54, 9) == 0x100
241            || extract64(v64, 54, 9) == 0x0ff)) {
242        *cmode = 0xf;
243        *imm8 = (extract64(v64, 63, 1) << 7)
244              | (extract64(v64, 54, 1) << 6)
245              | extract64(v64, 48, 6);
246        return true;
247    }
248    return false;
249}
250
251/*
252 * Return non-zero if v32 can be formed by MOVI+ORR.
253 * Place the parameters for MOVI in (cmode, imm8).
254 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
255 */
256static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
257{
258    int i;
259
260    for (i = 6; i > 0; i -= 2) {
261        /* Mask out one byte we can add with ORR.  */
262        uint32_t tmp = v32 & ~(0xffu << (i * 4));
263        if (is_shimm32(tmp, cmode, imm8) ||
264            is_soimm32(tmp, cmode, imm8)) {
265            break;
266        }
267    }
268    return i;
269}
270
271/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
272static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
273{
274    if (v32 == deposit32(v32, 16, 16, v32)) {
275        return is_shimm16(v32, cmode, imm8);
276    } else {
277        return is_shimm32(v32, cmode, imm8);
278    }
279}
280
281static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
282{
283    if (ct & TCG_CT_CONST) {
284        return 1;
285    }
286    if (type == TCG_TYPE_I32) {
287        val = (int32_t)val;
288    }
289    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
290        return 1;
291    }
292    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
293        return 1;
294    }
295    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
296        return 1;
297    }
298    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
299        return 1;
300    }
301
302    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
303    case 0:
304        break;
305    case TCG_CT_CONST_ANDI:
306        val = ~val;
307        /* fallthru */
308    case TCG_CT_CONST_ORRI:
309        if (val == deposit64(val, 32, 32, val)) {
310            int cmode, imm8;
311            return is_shimm1632(val, &cmode, &imm8);
312        }
313        break;
314    default:
315        /* Both bits should not be set for the same insn.  */
316        g_assert_not_reached();
317    }
318
319    return 0;
320}
321
322enum aarch64_cond_code {
323    COND_EQ = 0x0,
324    COND_NE = 0x1,
325    COND_CS = 0x2,     /* Unsigned greater or equal */
326    COND_HS = COND_CS, /* ALIAS greater or equal */
327    COND_CC = 0x3,     /* Unsigned less than */
328    COND_LO = COND_CC, /* ALIAS Lower */
329    COND_MI = 0x4,     /* Negative */
330    COND_PL = 0x5,     /* Zero or greater */
331    COND_VS = 0x6,     /* Overflow */
332    COND_VC = 0x7,     /* No overflow */
333    COND_HI = 0x8,     /* Unsigned greater than */
334    COND_LS = 0x9,     /* Unsigned less or equal */
335    COND_GE = 0xa,
336    COND_LT = 0xb,
337    COND_GT = 0xc,
338    COND_LE = 0xd,
339    COND_AL = 0xe,
340    COND_NV = 0xf, /* behaves like COND_AL here */
341};
342
343static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
344    [TCG_COND_EQ] = COND_EQ,
345    [TCG_COND_NE] = COND_NE,
346    [TCG_COND_LT] = COND_LT,
347    [TCG_COND_GE] = COND_GE,
348    [TCG_COND_LE] = COND_LE,
349    [TCG_COND_GT] = COND_GT,
350    /* unsigned */
351    [TCG_COND_LTU] = COND_LO,
352    [TCG_COND_GTU] = COND_HI,
353    [TCG_COND_GEU] = COND_HS,
354    [TCG_COND_LEU] = COND_LS,
355};
356
357typedef enum {
358    LDST_ST = 0,    /* store */
359    LDST_LD = 1,    /* load */
360    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
361    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
362} AArch64LdstType;
363
364/* We encode the format of the insn into the beginning of the name, so that
365   we can have the preprocessor help "typecheck" the insn vs the output
366   function.  Arm didn't provide us with nice names for the formats, so we
367   use the section number of the architecture reference manual in which the
368   instruction group is described.  */
369typedef enum {
370    /* Compare and branch (immediate).  */
371    I3201_CBZ       = 0x34000000,
372    I3201_CBNZ      = 0x35000000,
373
374    /* Conditional branch (immediate).  */
375    I3202_B_C       = 0x54000000,
376
377    /* Unconditional branch (immediate).  */
378    I3206_B         = 0x14000000,
379    I3206_BL        = 0x94000000,
380
381    /* Unconditional branch (register).  */
382    I3207_BR        = 0xd61f0000,
383    I3207_BLR       = 0xd63f0000,
384    I3207_RET       = 0xd65f0000,
385
386    /* AdvSIMD load/store single structure.  */
387    I3303_LD1R      = 0x0d40c000,
388
389    /* Load literal for loading the address at pc-relative offset */
390    I3305_LDR       = 0x58000000,
391    I3305_LDR_v64   = 0x5c000000,
392    I3305_LDR_v128  = 0x9c000000,
393
394    /* Load/store register.  Described here as 3.3.12, but the helper
395       that emits them can transform to 3.3.10 or 3.3.13.  */
396    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
397    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
398    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
399    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
400
401    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
402    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
403    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
404    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
405
406    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
407    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
408
409    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
410    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
411    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
412
413    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
414    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
415
416    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
417    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
418
419    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
420    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
421
422    I3312_TO_I3310  = 0x00200800,
423    I3312_TO_I3313  = 0x01000000,
424
425    /* Load/store register pair instructions.  */
426    I3314_LDP       = 0x28400000,
427    I3314_STP       = 0x28000000,
428
429    /* Add/subtract immediate instructions.  */
430    I3401_ADDI      = 0x11000000,
431    I3401_ADDSI     = 0x31000000,
432    I3401_SUBI      = 0x51000000,
433    I3401_SUBSI     = 0x71000000,
434
435    /* Bitfield instructions.  */
436    I3402_BFM       = 0x33000000,
437    I3402_SBFM      = 0x13000000,
438    I3402_UBFM      = 0x53000000,
439
440    /* Extract instruction.  */
441    I3403_EXTR      = 0x13800000,
442
443    /* Logical immediate instructions.  */
444    I3404_ANDI      = 0x12000000,
445    I3404_ORRI      = 0x32000000,
446    I3404_EORI      = 0x52000000,
447    I3404_ANDSI     = 0x72000000,
448
449    /* Move wide immediate instructions.  */
450    I3405_MOVN      = 0x12800000,
451    I3405_MOVZ      = 0x52800000,
452    I3405_MOVK      = 0x72800000,
453
454    /* PC relative addressing instructions.  */
455    I3406_ADR       = 0x10000000,
456    I3406_ADRP      = 0x90000000,
457
458    /* Add/subtract shifted register instructions (without a shift).  */
459    I3502_ADD       = 0x0b000000,
460    I3502_ADDS      = 0x2b000000,
461    I3502_SUB       = 0x4b000000,
462    I3502_SUBS      = 0x6b000000,
463
464    /* Add/subtract shifted register instructions (with a shift).  */
465    I3502S_ADD_LSL  = I3502_ADD,
466
467    /* Add/subtract with carry instructions.  */
468    I3503_ADC       = 0x1a000000,
469    I3503_SBC       = 0x5a000000,
470
471    /* Conditional select instructions.  */
472    I3506_CSEL      = 0x1a800000,
473    I3506_CSINC     = 0x1a800400,
474    I3506_CSINV     = 0x5a800000,
475    I3506_CSNEG     = 0x5a800400,
476
477    /* Data-processing (1 source) instructions.  */
478    I3507_CLZ       = 0x5ac01000,
479    I3507_RBIT      = 0x5ac00000,
480    I3507_REV       = 0x5ac00000, /* + size << 10 */
481
482    /* Data-processing (2 source) instructions.  */
483    I3508_LSLV      = 0x1ac02000,
484    I3508_LSRV      = 0x1ac02400,
485    I3508_ASRV      = 0x1ac02800,
486    I3508_RORV      = 0x1ac02c00,
487    I3508_SMULH     = 0x9b407c00,
488    I3508_UMULH     = 0x9bc07c00,
489    I3508_UDIV      = 0x1ac00800,
490    I3508_SDIV      = 0x1ac00c00,
491
492    /* Data-processing (3 source) instructions.  */
493    I3509_MADD      = 0x1b000000,
494    I3509_MSUB      = 0x1b008000,
495
496    /* Logical shifted register instructions (without a shift).  */
497    I3510_AND       = 0x0a000000,
498    I3510_BIC       = 0x0a200000,
499    I3510_ORR       = 0x2a000000,
500    I3510_ORN       = 0x2a200000,
501    I3510_EOR       = 0x4a000000,
502    I3510_EON       = 0x4a200000,
503    I3510_ANDS      = 0x6a000000,
504
505    /* Logical shifted register instructions (with a shift).  */
506    I3502S_AND_LSR  = I3510_AND | (1 << 22),
507
508    /* AdvSIMD copy */
509    I3605_DUP      = 0x0e000400,
510    I3605_INS      = 0x4e001c00,
511    I3605_UMOV     = 0x0e003c00,
512
513    /* AdvSIMD modified immediate */
514    I3606_MOVI      = 0x0f000400,
515    I3606_MVNI      = 0x2f000400,
516    I3606_BIC       = 0x2f001400,
517    I3606_ORR       = 0x0f001400,
518
519    /* AdvSIMD scalar shift by immediate */
520    I3609_SSHR      = 0x5f000400,
521    I3609_SSRA      = 0x5f001400,
522    I3609_SHL       = 0x5f005400,
523    I3609_USHR      = 0x7f000400,
524    I3609_USRA      = 0x7f001400,
525    I3609_SLI       = 0x7f005400,
526
527    /* AdvSIMD scalar three same */
528    I3611_SQADD     = 0x5e200c00,
529    I3611_SQSUB     = 0x5e202c00,
530    I3611_CMGT      = 0x5e203400,
531    I3611_CMGE      = 0x5e203c00,
532    I3611_SSHL      = 0x5e204400,
533    I3611_ADD       = 0x5e208400,
534    I3611_CMTST     = 0x5e208c00,
535    I3611_UQADD     = 0x7e200c00,
536    I3611_UQSUB     = 0x7e202c00,
537    I3611_CMHI      = 0x7e203400,
538    I3611_CMHS      = 0x7e203c00,
539    I3611_USHL      = 0x7e204400,
540    I3611_SUB       = 0x7e208400,
541    I3611_CMEQ      = 0x7e208c00,
542
543    /* AdvSIMD scalar two-reg misc */
544    I3612_CMGT0     = 0x5e208800,
545    I3612_CMEQ0     = 0x5e209800,
546    I3612_CMLT0     = 0x5e20a800,
547    I3612_ABS       = 0x5e20b800,
548    I3612_CMGE0     = 0x7e208800,
549    I3612_CMLE0     = 0x7e209800,
550    I3612_NEG       = 0x7e20b800,
551
552    /* AdvSIMD shift by immediate */
553    I3614_SSHR      = 0x0f000400,
554    I3614_SSRA      = 0x0f001400,
555    I3614_SHL       = 0x0f005400,
556    I3614_SLI       = 0x2f005400,
557    I3614_USHR      = 0x2f000400,
558    I3614_USRA      = 0x2f001400,
559
560    /* AdvSIMD three same.  */
561    I3616_ADD       = 0x0e208400,
562    I3616_AND       = 0x0e201c00,
563    I3616_BIC       = 0x0e601c00,
564    I3616_BIF       = 0x2ee01c00,
565    I3616_BIT       = 0x2ea01c00,
566    I3616_BSL       = 0x2e601c00,
567    I3616_EOR       = 0x2e201c00,
568    I3616_MUL       = 0x0e209c00,
569    I3616_ORR       = 0x0ea01c00,
570    I3616_ORN       = 0x0ee01c00,
571    I3616_SUB       = 0x2e208400,
572    I3616_CMGT      = 0x0e203400,
573    I3616_CMGE      = 0x0e203c00,
574    I3616_CMTST     = 0x0e208c00,
575    I3616_CMHI      = 0x2e203400,
576    I3616_CMHS      = 0x2e203c00,
577    I3616_CMEQ      = 0x2e208c00,
578    I3616_SMAX      = 0x0e206400,
579    I3616_SMIN      = 0x0e206c00,
580    I3616_SSHL      = 0x0e204400,
581    I3616_SQADD     = 0x0e200c00,
582    I3616_SQSUB     = 0x0e202c00,
583    I3616_UMAX      = 0x2e206400,
584    I3616_UMIN      = 0x2e206c00,
585    I3616_UQADD     = 0x2e200c00,
586    I3616_UQSUB     = 0x2e202c00,
587    I3616_USHL      = 0x2e204400,
588
589    /* AdvSIMD two-reg misc.  */
590    I3617_CMGT0     = 0x0e208800,
591    I3617_CMEQ0     = 0x0e209800,
592    I3617_CMLT0     = 0x0e20a800,
593    I3617_CMGE0     = 0x2e208800,
594    I3617_CMLE0     = 0x2e209800,
595    I3617_NOT       = 0x2e205800,
596    I3617_ABS       = 0x0e20b800,
597    I3617_NEG       = 0x2e20b800,
598
599    /* System instructions.  */
600    NOP             = 0xd503201f,
601    DMB_ISH         = 0xd50338bf,
602    DMB_LD          = 0x00000100,
603    DMB_ST          = 0x00000200,
604} AArch64Insn;
605
606static inline uint32_t tcg_in32(TCGContext *s)
607{
608    uint32_t v = *(uint32_t *)s->code_ptr;
609    return v;
610}
611
612/* Emit an opcode with "type-checking" of the format.  */
613#define tcg_out_insn(S, FMT, OP, ...) \
614    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
615
616static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
617                              TCGReg rt, TCGReg rn, unsigned size)
618{
619    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
620}
621
622static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
623                              int imm19, TCGReg rt)
624{
625    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
626}
627
628static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
629                              TCGReg rt, int imm19)
630{
631    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
632}
633
634static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
635                              TCGCond c, int imm19)
636{
637    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
638}
639
640static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
641{
642    tcg_out32(s, insn | (imm26 & 0x03ffffff));
643}
644
645static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
646{
647    tcg_out32(s, insn | rn << 5);
648}
649
650static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
651                              TCGReg r1, TCGReg r2, TCGReg rn,
652                              tcg_target_long ofs, bool pre, bool w)
653{
654    insn |= 1u << 31; /* ext */
655    insn |= pre << 24;
656    insn |= w << 23;
657
658    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
659    insn |= (ofs & (0x7f << 3)) << (15 - 3);
660
661    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
662}
663
664static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
665                              TCGReg rd, TCGReg rn, uint64_t aimm)
666{
667    if (aimm > 0xfff) {
668        tcg_debug_assert((aimm & 0xfff) == 0);
669        aimm >>= 12;
670        tcg_debug_assert(aimm <= 0xfff);
671        aimm |= 1 << 12;  /* apply LSL 12 */
672    }
673    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
674}
675
676/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
677   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
678   that feed the DecodeBitMasks pseudo function.  */
679static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
680                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
681{
682    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
683              | rn << 5 | rd);
684}
685
686#define tcg_out_insn_3404  tcg_out_insn_3402
687
688static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
689                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
690{
691    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
692              | rn << 5 | rd);
693}
694
695/* This function is used for the Move (wide immediate) instruction group.
696   Note that SHIFT is a full shift count, not the 2 bit HW field. */
697static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
698                              TCGReg rd, uint16_t half, unsigned shift)
699{
700    tcg_debug_assert((shift & ~0x30) == 0);
701    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
702}
703
704static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
705                              TCGReg rd, int64_t disp)
706{
707    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
708}
709
710/* This function is for both 3.5.2 (Add/Subtract shifted register), for
711   the rare occasion when we actually want to supply a shift amount.  */
712static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
713                                      TCGType ext, TCGReg rd, TCGReg rn,
714                                      TCGReg rm, int imm6)
715{
716    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
717}
718
719/* This function is for 3.5.2 (Add/subtract shifted register),
720   and 3.5.10 (Logical shifted register), for the vast majorty of cases
721   when we don't want to apply a shift.  Thus it can also be used for
722   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
723static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
724                              TCGReg rd, TCGReg rn, TCGReg rm)
725{
726    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
727}
728
729#define tcg_out_insn_3503  tcg_out_insn_3502
730#define tcg_out_insn_3508  tcg_out_insn_3502
731#define tcg_out_insn_3510  tcg_out_insn_3502
732
733static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
734                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
735{
736    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
737              | tcg_cond_to_aarch64[c] << 12);
738}
739
740static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
741                              TCGReg rd, TCGReg rn)
742{
743    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
744}
745
746static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
747                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
748{
749    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
750}
751
752static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
753                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
754{
755    /* Note that bit 11 set means general register input.  Therefore
756       we can handle both register sets with one function.  */
757    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
758              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
759}
760
761static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
762                              TCGReg rd, bool op, int cmode, uint8_t imm8)
763{
764    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
765              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
766}
767
768static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
769                              TCGReg rd, TCGReg rn, unsigned immhb)
770{
771    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
772}
773
774static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
775                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
776{
777    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
778              | (rn & 0x1f) << 5 | (rd & 0x1f));
779}
780
781static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
782                              unsigned size, TCGReg rd, TCGReg rn)
783{
784    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
785}
786
787static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
788                              TCGReg rd, TCGReg rn, unsigned immhb)
789{
790    tcg_out32(s, insn | q << 30 | immhb << 16
791              | (rn & 0x1f) << 5 | (rd & 0x1f));
792}
793
794static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
795                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
796{
797    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
798              | (rn & 0x1f) << 5 | (rd & 0x1f));
799}
800
801static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
802                              unsigned size, TCGReg rd, TCGReg rn)
803{
804    tcg_out32(s, insn | q << 30 | (size << 22)
805              | (rn & 0x1f) << 5 | (rd & 0x1f));
806}
807
808static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
809                              TCGReg rd, TCGReg base, TCGType ext,
810                              TCGReg regoff)
811{
812    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
813    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
814              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
815}
816
817static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
818                              TCGReg rd, TCGReg rn, intptr_t offset)
819{
820    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
821}
822
823static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
824                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
825{
826    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
827    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
828              | rn << 5 | (rd & 0x1f));
829}
830
831/* Register to register move using ORR (shifted register with no shift). */
832static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
833{
834    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
835}
836
837/* Register to register move using ADDI (move to/from SP).  */
838static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
839{
840    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
841}
842
843/* This function is used for the Logical (immediate) instruction group.
844   The value of LIMM must satisfy IS_LIMM.  See the comment above about
845   only supporting simplified logical immediates.  */
846static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
847                             TCGReg rd, TCGReg rn, uint64_t limm)
848{
849    unsigned h, l, r, c;
850
851    tcg_debug_assert(is_limm(limm));
852
853    h = clz64(limm);
854    l = ctz64(limm);
855    if (l == 0) {
856        r = 0;                  /* form 0....01....1 */
857        c = ctz64(~limm) - 1;
858        if (h == 0) {
859            r = clz64(~limm);   /* form 1..10..01..1 */
860            c += r;
861        }
862    } else {
863        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
864        c = r - h - 1;
865    }
866    if (ext == TCG_TYPE_I32) {
867        r &= 31;
868        c &= 31;
869    }
870
871    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
872}
873
874static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
875                             TCGReg rd, int64_t v64)
876{
877    bool q = type == TCG_TYPE_V128;
878    int cmode, imm8, i;
879
880    /* Test all bytes equal first.  */
881    if (vece == MO_8) {
882        imm8 = (uint8_t)v64;
883        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
884        return;
885    }
886
887    /*
888     * Test all bytes 0x00 or 0xff second.  This can match cases that
889     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
890     */
891    for (i = imm8 = 0; i < 8; i++) {
892        uint8_t byte = v64 >> (i * 8);
893        if (byte == 0xff) {
894            imm8 |= 1 << i;
895        } else if (byte != 0) {
896            goto fail_bytes;
897        }
898    }
899    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
900    return;
901 fail_bytes:
902
903    /*
904     * Tests for various replications.  For each element width, if we
905     * cannot find an expansion there's no point checking a larger
906     * width because we already know by replication it cannot match.
907     */
908    if (vece == MO_16) {
909        uint16_t v16 = v64;
910
911        if (is_shimm16(v16, &cmode, &imm8)) {
912            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
913            return;
914        }
915        if (is_shimm16(~v16, &cmode, &imm8)) {
916            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
917            return;
918        }
919
920        /*
921         * Otherwise, all remaining constants can be loaded in two insns:
922         * rd = v16 & 0xff, rd |= v16 & 0xff00.
923         */
924        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
925        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
926        return;
927    } else if (vece == MO_32) {
928        uint32_t v32 = v64;
929        uint32_t n32 = ~v32;
930
931        if (is_shimm32(v32, &cmode, &imm8) ||
932            is_soimm32(v32, &cmode, &imm8) ||
933            is_fimm32(v32, &cmode, &imm8)) {
934            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
935            return;
936        }
937        if (is_shimm32(n32, &cmode, &imm8) ||
938            is_soimm32(n32, &cmode, &imm8)) {
939            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
940            return;
941        }
942
943        /*
944         * Restrict the set of constants to those we can load with
945         * two instructions.  Others we load from the pool.
946         */
947        i = is_shimm32_pair(v32, &cmode, &imm8);
948        if (i) {
949            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
950            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
951            return;
952        }
953        i = is_shimm32_pair(n32, &cmode, &imm8);
954        if (i) {
955            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
956            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
957            return;
958        }
959    } else if (is_fimm64(v64, &cmode, &imm8)) {
960        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
961        return;
962    }
963
964    /*
965     * As a last resort, load from the constant pool.  Sadly there
966     * is no LD1R (literal), so store the full 16-byte vector.
967     */
968    if (type == TCG_TYPE_V128) {
969        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
970        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
971    } else {
972        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
973        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
974    }
975}
976
977static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
978                            TCGReg rd, TCGReg rs)
979{
980    int is_q = type - TCG_TYPE_V64;
981    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
982    return true;
983}
984
985static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
986                             TCGReg r, TCGReg base, intptr_t offset)
987{
988    TCGReg temp = TCG_REG_TMP;
989
990    if (offset < -0xffffff || offset > 0xffffff) {
991        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
992        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
993        base = temp;
994    } else {
995        AArch64Insn add_insn = I3401_ADDI;
996
997        if (offset < 0) {
998            add_insn = I3401_SUBI;
999            offset = -offset;
1000        }
1001        if (offset & 0xfff000) {
1002            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1003            base = temp;
1004        }
1005        if (offset & 0xfff) {
1006            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1007            base = temp;
1008        }
1009    }
1010    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1011    return true;
1012}
1013
1014static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1015                         tcg_target_long value)
1016{
1017    tcg_target_long svalue = value;
1018    tcg_target_long ivalue = ~value;
1019    tcg_target_long t0, t1, t2;
1020    int s0, s1;
1021    AArch64Insn opc;
1022
1023    switch (type) {
1024    case TCG_TYPE_I32:
1025    case TCG_TYPE_I64:
1026        tcg_debug_assert(rd < 32);
1027        break;
1028    default:
1029        g_assert_not_reached();
1030    }
1031
1032    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1033       values within [2**31, 2**32-1], we can create smaller sequences by
1034       interpreting this as a negative 32-bit number, while ensuring that
1035       the high 32 bits are cleared by setting SF=0.  */
1036    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1037        svalue = (int32_t)value;
1038        value = (uint32_t)value;
1039        ivalue = (uint32_t)ivalue;
1040        type = TCG_TYPE_I32;
1041    }
1042
1043    /* Speed things up by handling the common case of small positive
1044       and negative values specially.  */
1045    if ((value & ~0xffffull) == 0) {
1046        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1047        return;
1048    } else if ((ivalue & ~0xffffull) == 0) {
1049        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1050        return;
1051    }
1052
1053    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1054       use the sign-extended value.  That lets us match rotated values such
1055       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1056    if (is_limm(svalue)) {
1057        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1058        return;
1059    }
1060
1061    /* Look for host pointer values within 4G of the PC.  This happens
1062       often when loading pointers to QEMU's own data structures.  */
1063    if (type == TCG_TYPE_I64) {
1064        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1065        tcg_target_long disp = value - src_rx;
1066        if (disp == sextract64(disp, 0, 21)) {
1067            tcg_out_insn(s, 3406, ADR, rd, disp);
1068            return;
1069        }
1070        disp = (value >> 12) - (src_rx >> 12);
1071        if (disp == sextract64(disp, 0, 21)) {
1072            tcg_out_insn(s, 3406, ADRP, rd, disp);
1073            if (value & 0xfff) {
1074                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1075            }
1076            return;
1077        }
1078    }
1079
1080    /* Would it take fewer insns to begin with MOVN?  */
1081    if (ctpop64(value) >= 32) {
1082        t0 = ivalue;
1083        opc = I3405_MOVN;
1084    } else {
1085        t0 = value;
1086        opc = I3405_MOVZ;
1087    }
1088    s0 = ctz64(t0) & (63 & -16);
1089    t1 = t0 & ~(0xffffull << s0);
1090    s1 = ctz64(t1) & (63 & -16);
1091    t2 = t1 & ~(0xffffull << s1);
1092    if (t2 == 0) {
1093        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1094        if (t1 != 0) {
1095            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1096        }
1097        return;
1098    }
1099
1100    /* For more than 2 insns, dump it into the constant pool.  */
1101    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1102    tcg_out_insn(s, 3305, LDR, 0, rd);
1103}
1104
1105/* Define something more legible for general use.  */
1106#define tcg_out_ldst_r  tcg_out_insn_3310
1107
1108static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1109                         TCGReg rn, intptr_t offset, int lgsize)
1110{
1111    /* If the offset is naturally aligned and in range, then we can
1112       use the scaled uimm12 encoding */
1113    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1114        uintptr_t scaled_uimm = offset >> lgsize;
1115        if (scaled_uimm <= 0xfff) {
1116            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1117            return;
1118        }
1119    }
1120
1121    /* Small signed offsets can use the unscaled encoding.  */
1122    if (offset >= -256 && offset < 256) {
1123        tcg_out_insn_3312(s, insn, rd, rn, offset);
1124        return;
1125    }
1126
1127    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1128    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1129    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1130}
1131
1132static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1133{
1134    if (ret == arg) {
1135        return true;
1136    }
1137    switch (type) {
1138    case TCG_TYPE_I32:
1139    case TCG_TYPE_I64:
1140        if (ret < 32 && arg < 32) {
1141            tcg_out_movr(s, type, ret, arg);
1142            break;
1143        } else if (ret < 32) {
1144            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1145            break;
1146        } else if (arg < 32) {
1147            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1148            break;
1149        }
1150        /* FALLTHRU */
1151
1152    case TCG_TYPE_V64:
1153        tcg_debug_assert(ret >= 32 && arg >= 32);
1154        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1155        break;
1156    case TCG_TYPE_V128:
1157        tcg_debug_assert(ret >= 32 && arg >= 32);
1158        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1159        break;
1160
1161    default:
1162        g_assert_not_reached();
1163    }
1164    return true;
1165}
1166
1167static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1168                       TCGReg base, intptr_t ofs)
1169{
1170    AArch64Insn insn;
1171    int lgsz;
1172
1173    switch (type) {
1174    case TCG_TYPE_I32:
1175        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1176        lgsz = 2;
1177        break;
1178    case TCG_TYPE_I64:
1179        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1180        lgsz = 3;
1181        break;
1182    case TCG_TYPE_V64:
1183        insn = I3312_LDRVD;
1184        lgsz = 3;
1185        break;
1186    case TCG_TYPE_V128:
1187        insn = I3312_LDRVQ;
1188        lgsz = 4;
1189        break;
1190    default:
1191        g_assert_not_reached();
1192    }
1193    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1194}
1195
1196static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1197                       TCGReg base, intptr_t ofs)
1198{
1199    AArch64Insn insn;
1200    int lgsz;
1201
1202    switch (type) {
1203    case TCG_TYPE_I32:
1204        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1205        lgsz = 2;
1206        break;
1207    case TCG_TYPE_I64:
1208        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1209        lgsz = 3;
1210        break;
1211    case TCG_TYPE_V64:
1212        insn = I3312_STRVD;
1213        lgsz = 3;
1214        break;
1215    case TCG_TYPE_V128:
1216        insn = I3312_STRVQ;
1217        lgsz = 4;
1218        break;
1219    default:
1220        g_assert_not_reached();
1221    }
1222    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1223}
1224
1225static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1226                               TCGReg base, intptr_t ofs)
1227{
1228    if (type <= TCG_TYPE_I64 && val == 0) {
1229        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1230        return true;
1231    }
1232    return false;
1233}
1234
1235static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1236                               TCGReg rn, unsigned int a, unsigned int b)
1237{
1238    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1239}
1240
1241static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1242                                TCGReg rn, unsigned int a, unsigned int b)
1243{
1244    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1245}
1246
1247static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1248                                TCGReg rn, unsigned int a, unsigned int b)
1249{
1250    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1251}
1252
1253static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1254                                TCGReg rn, TCGReg rm, unsigned int a)
1255{
1256    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1257}
1258
1259static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1260                               TCGReg rd, TCGReg rn, unsigned int m)
1261{
1262    int bits = ext ? 64 : 32;
1263    int max = bits - 1;
1264    tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1265}
1266
1267static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1268                               TCGReg rd, TCGReg rn, unsigned int m)
1269{
1270    int max = ext ? 63 : 31;
1271    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1272}
1273
1274static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1275                               TCGReg rd, TCGReg rn, unsigned int m)
1276{
1277    int max = ext ? 63 : 31;
1278    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1279}
1280
1281static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1282                                TCGReg rd, TCGReg rn, unsigned int m)
1283{
1284    int max = ext ? 63 : 31;
1285    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1286}
1287
1288static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1289                                TCGReg rd, TCGReg rn, unsigned int m)
1290{
1291    int max = ext ? 63 : 31;
1292    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1293}
1294
1295static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1296                               TCGReg rn, unsigned lsb, unsigned width)
1297{
1298    unsigned size = ext ? 64 : 32;
1299    unsigned a = (size - lsb) & (size - 1);
1300    unsigned b = width - 1;
1301    tcg_out_bfm(s, ext, rd, rn, a, b);
1302}
1303
1304static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1305                        tcg_target_long b, bool const_b)
1306{
1307    if (const_b) {
1308        /* Using CMP or CMN aliases.  */
1309        if (b >= 0) {
1310            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1311        } else {
1312            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1313        }
1314    } else {
1315        /* Using CMP alias SUBS wzr, Wn, Wm */
1316        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1317    }
1318}
1319
1320static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1321{
1322    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1323    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1324    tcg_out_insn(s, 3206, B, offset);
1325}
1326
1327static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1328{
1329    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1330    if (offset == sextract64(offset, 0, 26)) {
1331        tcg_out_insn(s, 3206, B, offset);
1332    } else {
1333        /* Choose X9 as a call-clobbered non-LR temporary. */
1334        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
1335        tcg_out_insn(s, 3207, BR, TCG_REG_X9);
1336    }
1337}
1338
1339static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1340{
1341    tcg_out_insn(s, 3207, BLR, reg);
1342}
1343
1344static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
1345{
1346    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1347    if (offset == sextract64(offset, 0, 26)) {
1348        tcg_out_insn(s, 3206, BL, offset);
1349    } else {
1350        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1351        tcg_out_callr(s, TCG_REG_TMP);
1352    }
1353}
1354
1355void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
1356                              uintptr_t jmp_rw, uintptr_t addr)
1357{
1358    tcg_insn_unit i1, i2;
1359    TCGType rt = TCG_TYPE_I64;
1360    TCGReg  rd = TCG_REG_TMP;
1361    uint64_t pair;
1362
1363    ptrdiff_t offset = addr - jmp_rx;
1364
1365    if (offset == sextract64(offset, 0, 26)) {
1366        i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1367        i2 = NOP;
1368    } else {
1369        offset = (addr >> 12) - (jmp_rx >> 12);
1370
1371        /* patch ADRP */
1372        i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1373        /* patch ADDI */
1374        i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1375    }
1376    pair = (uint64_t)i2 << 32 | i1;
1377    qatomic_set((uint64_t *)jmp_rw, pair);
1378    flush_idcache_range(jmp_rx, jmp_rw, 8);
1379}
1380
1381static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1382{
1383    if (!l->has_value) {
1384        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1385        tcg_out_insn(s, 3206, B, 0);
1386    } else {
1387        tcg_out_goto(s, l->u.value_ptr);
1388    }
1389}
1390
1391static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1392                           TCGArg b, bool b_const, TCGLabel *l)
1393{
1394    intptr_t offset;
1395    bool need_cmp;
1396
1397    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1398        need_cmp = false;
1399    } else {
1400        need_cmp = true;
1401        tcg_out_cmp(s, ext, a, b, b_const);
1402    }
1403
1404    if (!l->has_value) {
1405        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1406        offset = tcg_in32(s) >> 5;
1407    } else {
1408        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1409        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1410    }
1411
1412    if (need_cmp) {
1413        tcg_out_insn(s, 3202, B_C, c, offset);
1414    } else if (c == TCG_COND_EQ) {
1415        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1416    } else {
1417        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1418    }
1419}
1420
1421static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1422                               TCGReg rd, TCGReg rn)
1423{
1424    /* REV, REV16, REV32 */
1425    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1426}
1427
1428static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1429                               TCGReg rd, TCGReg rn)
1430{
1431    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1432    int bits = (8 << s_bits) - 1;
1433    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1434}
1435
1436static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1437                               TCGReg rd, TCGReg rn)
1438{
1439    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1440    int bits = (8 << s_bits) - 1;
1441    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1442}
1443
1444static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1445                            TCGReg rn, int64_t aimm)
1446{
1447    if (aimm >= 0) {
1448        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1449    } else {
1450        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1451    }
1452}
1453
1454static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1455                            TCGReg rh, TCGReg al, TCGReg ah,
1456                            tcg_target_long bl, tcg_target_long bh,
1457                            bool const_bl, bool const_bh, bool sub)
1458{
1459    TCGReg orig_rl = rl;
1460    AArch64Insn insn;
1461
1462    if (rl == ah || (!const_bh && rl == bh)) {
1463        rl = TCG_REG_TMP;
1464    }
1465
1466    if (const_bl) {
1467        if (bl < 0) {
1468            bl = -bl;
1469            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1470        } else {
1471            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1472        }
1473
1474        if (unlikely(al == TCG_REG_XZR)) {
1475            /* ??? We want to allow al to be zero for the benefit of
1476               negation via subtraction.  However, that leaves open the
1477               possibility of adding 0+const in the low part, and the
1478               immediate add instructions encode XSP not XZR.  Don't try
1479               anything more elaborate here than loading another zero.  */
1480            al = TCG_REG_TMP;
1481            tcg_out_movi(s, ext, al, 0);
1482        }
1483        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1484    } else {
1485        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1486    }
1487
1488    insn = I3503_ADC;
1489    if (const_bh) {
1490        /* Note that the only two constants we support are 0 and -1, and
1491           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1492        if ((bh != 0) ^ sub) {
1493            insn = I3503_SBC;
1494        }
1495        bh = TCG_REG_XZR;
1496    } else if (sub) {
1497        insn = I3503_SBC;
1498    }
1499    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1500
1501    tcg_out_mov(s, ext, orig_rl, rl);
1502}
1503
1504static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1505{
1506    static const uint32_t sync[] = {
1507        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1508        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1509        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1510        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1511        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1512    };
1513    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1514}
1515
1516static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1517                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1518{
1519    TCGReg a1 = a0;
1520    if (is_ctz) {
1521        a1 = TCG_REG_TMP;
1522        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1523    }
1524    if (const_b && b == (ext ? 64 : 32)) {
1525        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1526    } else {
1527        AArch64Insn sel = I3506_CSEL;
1528
1529        tcg_out_cmp(s, ext, a0, 0, 1);
1530        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1531
1532        if (const_b) {
1533            if (b == -1) {
1534                b = TCG_REG_XZR;
1535                sel = I3506_CSINV;
1536            } else if (b == 0) {
1537                b = TCG_REG_XZR;
1538            } else {
1539                tcg_out_movi(s, ext, d, b);
1540                b = d;
1541            }
1542        }
1543        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1544    }
1545}
1546
1547static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1548{
1549    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1550    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1551    tcg_out_insn(s, 3406, ADR, rd, offset);
1552}
1553
1554#ifdef CONFIG_SOFTMMU
1555/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1556 *                                     MemOpIdx oi, uintptr_t ra)
1557 */
1558static void * const qemu_ld_helpers[MO_SIZE + 1] = {
1559    [MO_8]  = helper_ret_ldub_mmu,
1560#if HOST_BIG_ENDIAN
1561    [MO_16] = helper_be_lduw_mmu,
1562    [MO_32] = helper_be_ldul_mmu,
1563    [MO_64] = helper_be_ldq_mmu,
1564#else
1565    [MO_16] = helper_le_lduw_mmu,
1566    [MO_32] = helper_le_ldul_mmu,
1567    [MO_64] = helper_le_ldq_mmu,
1568#endif
1569};
1570
1571/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1572 *                                     uintxx_t val, MemOpIdx oi,
1573 *                                     uintptr_t ra)
1574 */
1575static void * const qemu_st_helpers[MO_SIZE + 1] = {
1576    [MO_8]  = helper_ret_stb_mmu,
1577#if HOST_BIG_ENDIAN
1578    [MO_16] = helper_be_stw_mmu,
1579    [MO_32] = helper_be_stl_mmu,
1580    [MO_64] = helper_be_stq_mmu,
1581#else
1582    [MO_16] = helper_le_stw_mmu,
1583    [MO_32] = helper_le_stl_mmu,
1584    [MO_64] = helper_le_stq_mmu,
1585#endif
1586};
1587
1588static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1589{
1590    MemOpIdx oi = lb->oi;
1591    MemOp opc = get_memop(oi);
1592    MemOp size = opc & MO_SIZE;
1593
1594    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1595        return false;
1596    }
1597
1598    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1599    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1600    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1601    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1602    tcg_out_call(s, qemu_ld_helpers[opc & MO_SIZE]);
1603    if (opc & MO_SIGN) {
1604        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1605    } else {
1606        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1607    }
1608
1609    tcg_out_goto(s, lb->raddr);
1610    return true;
1611}
1612
1613static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1614{
1615    MemOpIdx oi = lb->oi;
1616    MemOp opc = get_memop(oi);
1617    MemOp size = opc & MO_SIZE;
1618
1619    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1620        return false;
1621    }
1622
1623    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1624    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1625    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1626    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1627    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1628    tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE]);
1629    tcg_out_goto(s, lb->raddr);
1630    return true;
1631}
1632
1633static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1634                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1635                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1636{
1637    TCGLabelQemuLdst *label = new_ldst_label(s);
1638
1639    label->is_ld = is_ld;
1640    label->oi = oi;
1641    label->type = ext;
1642    label->datalo_reg = data_reg;
1643    label->addrlo_reg = addr_reg;
1644    label->raddr = tcg_splitwx_to_rx(raddr);
1645    label->label_ptr[0] = label_ptr;
1646}
1647
1648/* We expect to use a 7-bit scaled negative offset from ENV.  */
1649QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1650QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1651
1652/* These offsets are built into the LDP below.  */
1653QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1654QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1655
1656/* Load and compare a TLB entry, emitting the conditional jump to the
1657   slow path for the failure case, which will be patched later when finalizing
1658   the slow path. Generated code returns the host addend in X1,
1659   clobbers X0,X2,X3,TMP. */
1660static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1661                             tcg_insn_unit **label_ptr, int mem_index,
1662                             bool is_read)
1663{
1664    unsigned a_bits = get_alignment_bits(opc);
1665    unsigned s_bits = opc & MO_SIZE;
1666    unsigned a_mask = (1u << a_bits) - 1;
1667    unsigned s_mask = (1u << s_bits) - 1;
1668    TCGReg x3;
1669    TCGType mask_type;
1670    uint64_t compare_mask;
1671
1672    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1673                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1674
1675    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1676    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1677                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1678
1679    /* Extract the TLB index from the address into X0.  */
1680    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1681                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1682                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1683
1684    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1685    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1686
1687    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1688    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1689               ? offsetof(CPUTLBEntry, addr_read)
1690               : offsetof(CPUTLBEntry, addr_write));
1691    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1692               offsetof(CPUTLBEntry, addend));
1693
1694    /* For aligned accesses, we check the first byte and include the alignment
1695       bits within the address.  For unaligned access, we check that we don't
1696       cross pages using the address of the last byte of the access.  */
1697    if (a_bits >= s_bits) {
1698        x3 = addr_reg;
1699    } else {
1700        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1701                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1702        x3 = TCG_REG_X3;
1703    }
1704    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1705
1706    /* Store the page mask part of the address into X3.  */
1707    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1708                     TCG_REG_X3, x3, compare_mask);
1709
1710    /* Perform the address comparison. */
1711    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1712
1713    /* If not equal, we jump to the slow path. */
1714    *label_ptr = s->code_ptr;
1715    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1716}
1717
1718#else
1719static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
1720                                   unsigned a_bits)
1721{
1722    unsigned a_mask = (1 << a_bits) - 1;
1723    TCGLabelQemuLdst *label = new_ldst_label(s);
1724
1725    label->is_ld = is_ld;
1726    label->addrlo_reg = addr_reg;
1727
1728    /* tst addr, #mask */
1729    tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1730
1731    label->label_ptr[0] = s->code_ptr;
1732
1733    /* b.ne slow_path */
1734    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1735
1736    label->raddr = tcg_splitwx_to_rx(s->code_ptr);
1737}
1738
1739static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
1740{
1741    if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1742        return false;
1743    }
1744
1745    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg);
1746    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1747
1748    /* "Tail call" to the helper, with the return address back inline. */
1749    tcg_out_adr(s, TCG_REG_LR, l->raddr);
1750    tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld
1751                                        : helper_unaligned_st));
1752    return true;
1753}
1754
1755static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1756{
1757    return tcg_out_fail_alignment(s, l);
1758}
1759
1760static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1761{
1762    return tcg_out_fail_alignment(s, l);
1763}
1764#endif /* CONFIG_SOFTMMU */
1765
1766static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1767                                   TCGReg data_r, TCGReg addr_r,
1768                                   TCGType otype, TCGReg off_r)
1769{
1770    switch (memop & MO_SSIZE) {
1771    case MO_UB:
1772        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1773        break;
1774    case MO_SB:
1775        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1776                       data_r, addr_r, otype, off_r);
1777        break;
1778    case MO_UW:
1779        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1780        break;
1781    case MO_SW:
1782        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1783                       data_r, addr_r, otype, off_r);
1784        break;
1785    case MO_UL:
1786        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1787        break;
1788    case MO_SL:
1789        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1790        break;
1791    case MO_UQ:
1792        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1793        break;
1794    default:
1795        tcg_abort();
1796    }
1797}
1798
1799static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1800                                   TCGReg data_r, TCGReg addr_r,
1801                                   TCGType otype, TCGReg off_r)
1802{
1803    switch (memop & MO_SIZE) {
1804    case MO_8:
1805        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1806        break;
1807    case MO_16:
1808        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1809        break;
1810    case MO_32:
1811        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1812        break;
1813    case MO_64:
1814        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1815        break;
1816    default:
1817        tcg_abort();
1818    }
1819}
1820
1821static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1822                            MemOpIdx oi, TCGType ext)
1823{
1824    MemOp memop = get_memop(oi);
1825    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1826
1827    /* Byte swapping is left to middle-end expansion. */
1828    tcg_debug_assert((memop & MO_BSWAP) == 0);
1829
1830#ifdef CONFIG_SOFTMMU
1831    unsigned mem_index = get_mmuidx(oi);
1832    tcg_insn_unit *label_ptr;
1833
1834    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1835    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1836                           TCG_REG_X1, otype, addr_reg);
1837    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1838                        s->code_ptr, label_ptr);
1839#else /* !CONFIG_SOFTMMU */
1840    unsigned a_bits = get_alignment_bits(memop);
1841    if (a_bits) {
1842        tcg_out_test_alignment(s, true, addr_reg, a_bits);
1843    }
1844    if (USE_GUEST_BASE) {
1845        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1846                               TCG_REG_GUEST_BASE, otype, addr_reg);
1847    } else {
1848        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1849                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1850    }
1851#endif /* CONFIG_SOFTMMU */
1852}
1853
1854static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1855                            MemOpIdx oi)
1856{
1857    MemOp memop = get_memop(oi);
1858    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1859
1860    /* Byte swapping is left to middle-end expansion. */
1861    tcg_debug_assert((memop & MO_BSWAP) == 0);
1862
1863#ifdef CONFIG_SOFTMMU
1864    unsigned mem_index = get_mmuidx(oi);
1865    tcg_insn_unit *label_ptr;
1866
1867    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1868    tcg_out_qemu_st_direct(s, memop, data_reg,
1869                           TCG_REG_X1, otype, addr_reg);
1870    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1871                        data_reg, addr_reg, s->code_ptr, label_ptr);
1872#else /* !CONFIG_SOFTMMU */
1873    unsigned a_bits = get_alignment_bits(memop);
1874    if (a_bits) {
1875        tcg_out_test_alignment(s, false, addr_reg, a_bits);
1876    }
1877    if (USE_GUEST_BASE) {
1878        tcg_out_qemu_st_direct(s, memop, data_reg,
1879                               TCG_REG_GUEST_BASE, otype, addr_reg);
1880    } else {
1881        tcg_out_qemu_st_direct(s, memop, data_reg,
1882                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1883    }
1884#endif /* CONFIG_SOFTMMU */
1885}
1886
1887static const tcg_insn_unit *tb_ret_addr;
1888
1889static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1890                       const TCGArg args[TCG_MAX_OP_ARGS],
1891                       const int const_args[TCG_MAX_OP_ARGS])
1892{
1893    /* 99% of the time, we can signal the use of extension registers
1894       by looking to see if the opcode handles 64-bit data.  */
1895    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1896
1897    /* Hoist the loads of the most common arguments.  */
1898    TCGArg a0 = args[0];
1899    TCGArg a1 = args[1];
1900    TCGArg a2 = args[2];
1901    int c2 = const_args[2];
1902
1903    /* Some operands are defined with "rZ" constraint, a register or
1904       the zero register.  These need not actually test args[I] == 0.  */
1905#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1906
1907    switch (opc) {
1908    case INDEX_op_exit_tb:
1909        /* Reuse the zeroing that exists for goto_ptr.  */
1910        if (a0 == 0) {
1911            tcg_out_goto_long(s, tcg_code_gen_epilogue);
1912        } else {
1913            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1914            tcg_out_goto_long(s, tb_ret_addr);
1915        }
1916        break;
1917
1918    case INDEX_op_goto_tb:
1919        if (s->tb_jmp_insn_offset != NULL) {
1920            /* TCG_TARGET_HAS_direct_jump */
1921            /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1922               write can be used to patch the target address. */
1923            if ((uintptr_t)s->code_ptr & 7) {
1924                tcg_out32(s, NOP);
1925            }
1926            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1927            /* actual branch destination will be patched by
1928               tb_target_set_jmp_target later. */
1929            tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1930            tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1931        } else {
1932            /* !TCG_TARGET_HAS_direct_jump */
1933            tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1934            intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1935            tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1936        }
1937        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1938        set_jmp_reset_offset(s, a0);
1939        break;
1940
1941    case INDEX_op_goto_ptr:
1942        tcg_out_insn(s, 3207, BR, a0);
1943        break;
1944
1945    case INDEX_op_br:
1946        tcg_out_goto_label(s, arg_label(a0));
1947        break;
1948
1949    case INDEX_op_ld8u_i32:
1950    case INDEX_op_ld8u_i64:
1951        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1952        break;
1953    case INDEX_op_ld8s_i32:
1954        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1955        break;
1956    case INDEX_op_ld8s_i64:
1957        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1958        break;
1959    case INDEX_op_ld16u_i32:
1960    case INDEX_op_ld16u_i64:
1961        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1962        break;
1963    case INDEX_op_ld16s_i32:
1964        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1965        break;
1966    case INDEX_op_ld16s_i64:
1967        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1968        break;
1969    case INDEX_op_ld_i32:
1970    case INDEX_op_ld32u_i64:
1971        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1972        break;
1973    case INDEX_op_ld32s_i64:
1974        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1975        break;
1976    case INDEX_op_ld_i64:
1977        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1978        break;
1979
1980    case INDEX_op_st8_i32:
1981    case INDEX_op_st8_i64:
1982        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1983        break;
1984    case INDEX_op_st16_i32:
1985    case INDEX_op_st16_i64:
1986        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1987        break;
1988    case INDEX_op_st_i32:
1989    case INDEX_op_st32_i64:
1990        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1991        break;
1992    case INDEX_op_st_i64:
1993        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1994        break;
1995
1996    case INDEX_op_add_i32:
1997        a2 = (int32_t)a2;
1998        /* FALLTHRU */
1999    case INDEX_op_add_i64:
2000        if (c2) {
2001            tcg_out_addsubi(s, ext, a0, a1, a2);
2002        } else {
2003            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2004        }
2005        break;
2006
2007    case INDEX_op_sub_i32:
2008        a2 = (int32_t)a2;
2009        /* FALLTHRU */
2010    case INDEX_op_sub_i64:
2011        if (c2) {
2012            tcg_out_addsubi(s, ext, a0, a1, -a2);
2013        } else {
2014            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2015        }
2016        break;
2017
2018    case INDEX_op_neg_i64:
2019    case INDEX_op_neg_i32:
2020        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2021        break;
2022
2023    case INDEX_op_and_i32:
2024        a2 = (int32_t)a2;
2025        /* FALLTHRU */
2026    case INDEX_op_and_i64:
2027        if (c2) {
2028            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2029        } else {
2030            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2031        }
2032        break;
2033
2034    case INDEX_op_andc_i32:
2035        a2 = (int32_t)a2;
2036        /* FALLTHRU */
2037    case INDEX_op_andc_i64:
2038        if (c2) {
2039            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2040        } else {
2041            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2042        }
2043        break;
2044
2045    case INDEX_op_or_i32:
2046        a2 = (int32_t)a2;
2047        /* FALLTHRU */
2048    case INDEX_op_or_i64:
2049        if (c2) {
2050            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2051        } else {
2052            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2053        }
2054        break;
2055
2056    case INDEX_op_orc_i32:
2057        a2 = (int32_t)a2;
2058        /* FALLTHRU */
2059    case INDEX_op_orc_i64:
2060        if (c2) {
2061            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2062        } else {
2063            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2064        }
2065        break;
2066
2067    case INDEX_op_xor_i32:
2068        a2 = (int32_t)a2;
2069        /* FALLTHRU */
2070    case INDEX_op_xor_i64:
2071        if (c2) {
2072            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2073        } else {
2074            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2075        }
2076        break;
2077
2078    case INDEX_op_eqv_i32:
2079        a2 = (int32_t)a2;
2080        /* FALLTHRU */
2081    case INDEX_op_eqv_i64:
2082        if (c2) {
2083            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2084        } else {
2085            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2086        }
2087        break;
2088
2089    case INDEX_op_not_i64:
2090    case INDEX_op_not_i32:
2091        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2092        break;
2093
2094    case INDEX_op_mul_i64:
2095    case INDEX_op_mul_i32:
2096        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2097        break;
2098
2099    case INDEX_op_div_i64:
2100    case INDEX_op_div_i32:
2101        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2102        break;
2103    case INDEX_op_divu_i64:
2104    case INDEX_op_divu_i32:
2105        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2106        break;
2107
2108    case INDEX_op_rem_i64:
2109    case INDEX_op_rem_i32:
2110        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2111        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2112        break;
2113    case INDEX_op_remu_i64:
2114    case INDEX_op_remu_i32:
2115        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2116        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2117        break;
2118
2119    case INDEX_op_shl_i64:
2120    case INDEX_op_shl_i32:
2121        if (c2) {
2122            tcg_out_shl(s, ext, a0, a1, a2);
2123        } else {
2124            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2125        }
2126        break;
2127
2128    case INDEX_op_shr_i64:
2129    case INDEX_op_shr_i32:
2130        if (c2) {
2131            tcg_out_shr(s, ext, a0, a1, a2);
2132        } else {
2133            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2134        }
2135        break;
2136
2137    case INDEX_op_sar_i64:
2138    case INDEX_op_sar_i32:
2139        if (c2) {
2140            tcg_out_sar(s, ext, a0, a1, a2);
2141        } else {
2142            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2143        }
2144        break;
2145
2146    case INDEX_op_rotr_i64:
2147    case INDEX_op_rotr_i32:
2148        if (c2) {
2149            tcg_out_rotr(s, ext, a0, a1, a2);
2150        } else {
2151            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2152        }
2153        break;
2154
2155    case INDEX_op_rotl_i64:
2156    case INDEX_op_rotl_i32:
2157        if (c2) {
2158            tcg_out_rotl(s, ext, a0, a1, a2);
2159        } else {
2160            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2161            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2162        }
2163        break;
2164
2165    case INDEX_op_clz_i64:
2166    case INDEX_op_clz_i32:
2167        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2168        break;
2169    case INDEX_op_ctz_i64:
2170    case INDEX_op_ctz_i32:
2171        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2172        break;
2173
2174    case INDEX_op_brcond_i32:
2175        a1 = (int32_t)a1;
2176        /* FALLTHRU */
2177    case INDEX_op_brcond_i64:
2178        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2179        break;
2180
2181    case INDEX_op_setcond_i32:
2182        a2 = (int32_t)a2;
2183        /* FALLTHRU */
2184    case INDEX_op_setcond_i64:
2185        tcg_out_cmp(s, ext, a1, a2, c2);
2186        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2187        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2188                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2189        break;
2190
2191    case INDEX_op_movcond_i32:
2192        a2 = (int32_t)a2;
2193        /* FALLTHRU */
2194    case INDEX_op_movcond_i64:
2195        tcg_out_cmp(s, ext, a1, a2, c2);
2196        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2197        break;
2198
2199    case INDEX_op_qemu_ld_i32:
2200    case INDEX_op_qemu_ld_i64:
2201        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2202        break;
2203    case INDEX_op_qemu_st_i32:
2204    case INDEX_op_qemu_st_i64:
2205        tcg_out_qemu_st(s, REG0(0), a1, a2);
2206        break;
2207
2208    case INDEX_op_bswap64_i64:
2209        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2210        break;
2211    case INDEX_op_bswap32_i64:
2212        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2213        if (a2 & TCG_BSWAP_OS) {
2214            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a0);
2215        }
2216        break;
2217    case INDEX_op_bswap32_i32:
2218        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2219        break;
2220    case INDEX_op_bswap16_i64:
2221    case INDEX_op_bswap16_i32:
2222        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2223        if (a2 & TCG_BSWAP_OS) {
2224            /* Output must be sign-extended. */
2225            tcg_out_sxt(s, ext, MO_16, a0, a0);
2226        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2227            /* Output must be zero-extended, but input isn't. */
2228            tcg_out_uxt(s, MO_16, a0, a0);
2229        }
2230        break;
2231
2232    case INDEX_op_ext8s_i64:
2233    case INDEX_op_ext8s_i32:
2234        tcg_out_sxt(s, ext, MO_8, a0, a1);
2235        break;
2236    case INDEX_op_ext16s_i64:
2237    case INDEX_op_ext16s_i32:
2238        tcg_out_sxt(s, ext, MO_16, a0, a1);
2239        break;
2240    case INDEX_op_ext_i32_i64:
2241    case INDEX_op_ext32s_i64:
2242        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2243        break;
2244    case INDEX_op_ext8u_i64:
2245    case INDEX_op_ext8u_i32:
2246        tcg_out_uxt(s, MO_8, a0, a1);
2247        break;
2248    case INDEX_op_ext16u_i64:
2249    case INDEX_op_ext16u_i32:
2250        tcg_out_uxt(s, MO_16, a0, a1);
2251        break;
2252    case INDEX_op_extu_i32_i64:
2253    case INDEX_op_ext32u_i64:
2254        tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2255        break;
2256
2257    case INDEX_op_deposit_i64:
2258    case INDEX_op_deposit_i32:
2259        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2260        break;
2261
2262    case INDEX_op_extract_i64:
2263    case INDEX_op_extract_i32:
2264        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2265        break;
2266
2267    case INDEX_op_sextract_i64:
2268    case INDEX_op_sextract_i32:
2269        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2270        break;
2271
2272    case INDEX_op_extract2_i64:
2273    case INDEX_op_extract2_i32:
2274        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2275        break;
2276
2277    case INDEX_op_add2_i32:
2278        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2279                        (int32_t)args[4], args[5], const_args[4],
2280                        const_args[5], false);
2281        break;
2282    case INDEX_op_add2_i64:
2283        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2284                        args[5], const_args[4], const_args[5], false);
2285        break;
2286    case INDEX_op_sub2_i32:
2287        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2288                        (int32_t)args[4], args[5], const_args[4],
2289                        const_args[5], true);
2290        break;
2291    case INDEX_op_sub2_i64:
2292        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2293                        args[5], const_args[4], const_args[5], true);
2294        break;
2295
2296    case INDEX_op_muluh_i64:
2297        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2298        break;
2299    case INDEX_op_mulsh_i64:
2300        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2301        break;
2302
2303    case INDEX_op_mb:
2304        tcg_out_mb(s, a0);
2305        break;
2306
2307    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2308    case INDEX_op_mov_i64:
2309    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2310    default:
2311        g_assert_not_reached();
2312    }
2313
2314#undef REG0
2315}
2316
2317static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2318                           unsigned vecl, unsigned vece,
2319                           const TCGArg args[TCG_MAX_OP_ARGS],
2320                           const int const_args[TCG_MAX_OP_ARGS])
2321{
2322    static const AArch64Insn cmp_vec_insn[16] = {
2323        [TCG_COND_EQ] = I3616_CMEQ,
2324        [TCG_COND_GT] = I3616_CMGT,
2325        [TCG_COND_GE] = I3616_CMGE,
2326        [TCG_COND_GTU] = I3616_CMHI,
2327        [TCG_COND_GEU] = I3616_CMHS,
2328    };
2329    static const AArch64Insn cmp_scalar_insn[16] = {
2330        [TCG_COND_EQ] = I3611_CMEQ,
2331        [TCG_COND_GT] = I3611_CMGT,
2332        [TCG_COND_GE] = I3611_CMGE,
2333        [TCG_COND_GTU] = I3611_CMHI,
2334        [TCG_COND_GEU] = I3611_CMHS,
2335    };
2336    static const AArch64Insn cmp0_vec_insn[16] = {
2337        [TCG_COND_EQ] = I3617_CMEQ0,
2338        [TCG_COND_GT] = I3617_CMGT0,
2339        [TCG_COND_GE] = I3617_CMGE0,
2340        [TCG_COND_LT] = I3617_CMLT0,
2341        [TCG_COND_LE] = I3617_CMLE0,
2342    };
2343    static const AArch64Insn cmp0_scalar_insn[16] = {
2344        [TCG_COND_EQ] = I3612_CMEQ0,
2345        [TCG_COND_GT] = I3612_CMGT0,
2346        [TCG_COND_GE] = I3612_CMGE0,
2347        [TCG_COND_LT] = I3612_CMLT0,
2348        [TCG_COND_LE] = I3612_CMLE0,
2349    };
2350
2351    TCGType type = vecl + TCG_TYPE_V64;
2352    unsigned is_q = vecl;
2353    bool is_scalar = !is_q && vece == MO_64;
2354    TCGArg a0, a1, a2, a3;
2355    int cmode, imm8;
2356
2357    a0 = args[0];
2358    a1 = args[1];
2359    a2 = args[2];
2360
2361    switch (opc) {
2362    case INDEX_op_ld_vec:
2363        tcg_out_ld(s, type, a0, a1, a2);
2364        break;
2365    case INDEX_op_st_vec:
2366        tcg_out_st(s, type, a0, a1, a2);
2367        break;
2368    case INDEX_op_dupm_vec:
2369        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2370        break;
2371    case INDEX_op_add_vec:
2372        if (is_scalar) {
2373            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2374        } else {
2375            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2376        }
2377        break;
2378    case INDEX_op_sub_vec:
2379        if (is_scalar) {
2380            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2381        } else {
2382            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2383        }
2384        break;
2385    case INDEX_op_mul_vec:
2386        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2387        break;
2388    case INDEX_op_neg_vec:
2389        if (is_scalar) {
2390            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2391        } else {
2392            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2393        }
2394        break;
2395    case INDEX_op_abs_vec:
2396        if (is_scalar) {
2397            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2398        } else {
2399            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2400        }
2401        break;
2402    case INDEX_op_and_vec:
2403        if (const_args[2]) {
2404            is_shimm1632(~a2, &cmode, &imm8);
2405            if (a0 == a1) {
2406                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2407                return;
2408            }
2409            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2410            a2 = a0;
2411        }
2412        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2413        break;
2414    case INDEX_op_or_vec:
2415        if (const_args[2]) {
2416            is_shimm1632(a2, &cmode, &imm8);
2417            if (a0 == a1) {
2418                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2419                return;
2420            }
2421            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2422            a2 = a0;
2423        }
2424        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2425        break;
2426    case INDEX_op_andc_vec:
2427        if (const_args[2]) {
2428            is_shimm1632(a2, &cmode, &imm8);
2429            if (a0 == a1) {
2430                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2431                return;
2432            }
2433            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2434            a2 = a0;
2435        }
2436        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2437        break;
2438    case INDEX_op_orc_vec:
2439        if (const_args[2]) {
2440            is_shimm1632(~a2, &cmode, &imm8);
2441            if (a0 == a1) {
2442                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2443                return;
2444            }
2445            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2446            a2 = a0;
2447        }
2448        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2449        break;
2450    case INDEX_op_xor_vec:
2451        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2452        break;
2453    case INDEX_op_ssadd_vec:
2454        if (is_scalar) {
2455            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2456        } else {
2457            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2458        }
2459        break;
2460    case INDEX_op_sssub_vec:
2461        if (is_scalar) {
2462            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2463        } else {
2464            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2465        }
2466        break;
2467    case INDEX_op_usadd_vec:
2468        if (is_scalar) {
2469            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2470        } else {
2471            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2472        }
2473        break;
2474    case INDEX_op_ussub_vec:
2475        if (is_scalar) {
2476            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2477        } else {
2478            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2479        }
2480        break;
2481    case INDEX_op_smax_vec:
2482        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2483        break;
2484    case INDEX_op_smin_vec:
2485        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2486        break;
2487    case INDEX_op_umax_vec:
2488        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2489        break;
2490    case INDEX_op_umin_vec:
2491        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2492        break;
2493    case INDEX_op_not_vec:
2494        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2495        break;
2496    case INDEX_op_shli_vec:
2497        if (is_scalar) {
2498            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2499        } else {
2500            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2501        }
2502        break;
2503    case INDEX_op_shri_vec:
2504        if (is_scalar) {
2505            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2506        } else {
2507            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2508        }
2509        break;
2510    case INDEX_op_sari_vec:
2511        if (is_scalar) {
2512            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2513        } else {
2514            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2515        }
2516        break;
2517    case INDEX_op_aa64_sli_vec:
2518        if (is_scalar) {
2519            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2520        } else {
2521            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2522        }
2523        break;
2524    case INDEX_op_shlv_vec:
2525        if (is_scalar) {
2526            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2527        } else {
2528            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2529        }
2530        break;
2531    case INDEX_op_aa64_sshl_vec:
2532        if (is_scalar) {
2533            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2534        } else {
2535            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2536        }
2537        break;
2538    case INDEX_op_cmp_vec:
2539        {
2540            TCGCond cond = args[3];
2541            AArch64Insn insn;
2542
2543            if (cond == TCG_COND_NE) {
2544                if (const_args[2]) {
2545                    if (is_scalar) {
2546                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2547                    } else {
2548                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2549                    }
2550                } else {
2551                    if (is_scalar) {
2552                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2553                    } else {
2554                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2555                    }
2556                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2557                }
2558            } else {
2559                if (const_args[2]) {
2560                    if (is_scalar) {
2561                        insn = cmp0_scalar_insn[cond];
2562                        if (insn) {
2563                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2564                            break;
2565                        }
2566                    } else {
2567                        insn = cmp0_vec_insn[cond];
2568                        if (insn) {
2569                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2570                            break;
2571                        }
2572                    }
2573                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2574                    a2 = TCG_VEC_TMP;
2575                }
2576                if (is_scalar) {
2577                    insn = cmp_scalar_insn[cond];
2578                    if (insn == 0) {
2579                        TCGArg t;
2580                        t = a1, a1 = a2, a2 = t;
2581                        cond = tcg_swap_cond(cond);
2582                        insn = cmp_scalar_insn[cond];
2583                        tcg_debug_assert(insn != 0);
2584                    }
2585                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2586                } else {
2587                    insn = cmp_vec_insn[cond];
2588                    if (insn == 0) {
2589                        TCGArg t;
2590                        t = a1, a1 = a2, a2 = t;
2591                        cond = tcg_swap_cond(cond);
2592                        insn = cmp_vec_insn[cond];
2593                        tcg_debug_assert(insn != 0);
2594                    }
2595                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2596                }
2597            }
2598        }
2599        break;
2600
2601    case INDEX_op_bitsel_vec:
2602        a3 = args[3];
2603        if (a0 == a3) {
2604            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2605        } else if (a0 == a2) {
2606            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2607        } else {
2608            if (a0 != a1) {
2609                tcg_out_mov(s, type, a0, a1);
2610            }
2611            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2612        }
2613        break;
2614
2615    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2616    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2617    default:
2618        g_assert_not_reached();
2619    }
2620}
2621
2622int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2623{
2624    switch (opc) {
2625    case INDEX_op_add_vec:
2626    case INDEX_op_sub_vec:
2627    case INDEX_op_and_vec:
2628    case INDEX_op_or_vec:
2629    case INDEX_op_xor_vec:
2630    case INDEX_op_andc_vec:
2631    case INDEX_op_orc_vec:
2632    case INDEX_op_neg_vec:
2633    case INDEX_op_abs_vec:
2634    case INDEX_op_not_vec:
2635    case INDEX_op_cmp_vec:
2636    case INDEX_op_shli_vec:
2637    case INDEX_op_shri_vec:
2638    case INDEX_op_sari_vec:
2639    case INDEX_op_ssadd_vec:
2640    case INDEX_op_sssub_vec:
2641    case INDEX_op_usadd_vec:
2642    case INDEX_op_ussub_vec:
2643    case INDEX_op_shlv_vec:
2644    case INDEX_op_bitsel_vec:
2645        return 1;
2646    case INDEX_op_rotli_vec:
2647    case INDEX_op_shrv_vec:
2648    case INDEX_op_sarv_vec:
2649    case INDEX_op_rotlv_vec:
2650    case INDEX_op_rotrv_vec:
2651        return -1;
2652    case INDEX_op_mul_vec:
2653    case INDEX_op_smax_vec:
2654    case INDEX_op_smin_vec:
2655    case INDEX_op_umax_vec:
2656    case INDEX_op_umin_vec:
2657        return vece < MO_64;
2658
2659    default:
2660        return 0;
2661    }
2662}
2663
2664void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2665                       TCGArg a0, ...)
2666{
2667    va_list va;
2668    TCGv_vec v0, v1, v2, t1, t2, c1;
2669    TCGArg a2;
2670
2671    va_start(va, a0);
2672    v0 = temp_tcgv_vec(arg_temp(a0));
2673    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2674    a2 = va_arg(va, TCGArg);
2675    va_end(va);
2676
2677    switch (opc) {
2678    case INDEX_op_rotli_vec:
2679        t1 = tcg_temp_new_vec(type);
2680        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2681        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2682                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2683        tcg_temp_free_vec(t1);
2684        break;
2685
2686    case INDEX_op_shrv_vec:
2687    case INDEX_op_sarv_vec:
2688        /* Right shifts are negative left shifts for AArch64.  */
2689        v2 = temp_tcgv_vec(arg_temp(a2));
2690        t1 = tcg_temp_new_vec(type);
2691        tcg_gen_neg_vec(vece, t1, v2);
2692        opc = (opc == INDEX_op_shrv_vec
2693               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2694        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2695                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2696        tcg_temp_free_vec(t1);
2697        break;
2698
2699    case INDEX_op_rotlv_vec:
2700        v2 = temp_tcgv_vec(arg_temp(a2));
2701        t1 = tcg_temp_new_vec(type);
2702        c1 = tcg_constant_vec(type, vece, 8 << vece);
2703        tcg_gen_sub_vec(vece, t1, v2, c1);
2704        /* Right shifts are negative left shifts for AArch64.  */
2705        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2706                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2707        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2708                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2709        tcg_gen_or_vec(vece, v0, v0, t1);
2710        tcg_temp_free_vec(t1);
2711        break;
2712
2713    case INDEX_op_rotrv_vec:
2714        v2 = temp_tcgv_vec(arg_temp(a2));
2715        t1 = tcg_temp_new_vec(type);
2716        t2 = tcg_temp_new_vec(type);
2717        c1 = tcg_constant_vec(type, vece, 8 << vece);
2718        tcg_gen_neg_vec(vece, t1, v2);
2719        tcg_gen_sub_vec(vece, t2, c1, v2);
2720        /* Right shifts are negative left shifts for AArch64.  */
2721        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2722                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2723        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2724                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2725        tcg_gen_or_vec(vece, v0, t1, t2);
2726        tcg_temp_free_vec(t1);
2727        tcg_temp_free_vec(t2);
2728        break;
2729
2730    default:
2731        g_assert_not_reached();
2732    }
2733}
2734
2735static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2736{
2737    switch (op) {
2738    case INDEX_op_goto_ptr:
2739        return C_O0_I1(r);
2740
2741    case INDEX_op_ld8u_i32:
2742    case INDEX_op_ld8s_i32:
2743    case INDEX_op_ld16u_i32:
2744    case INDEX_op_ld16s_i32:
2745    case INDEX_op_ld_i32:
2746    case INDEX_op_ld8u_i64:
2747    case INDEX_op_ld8s_i64:
2748    case INDEX_op_ld16u_i64:
2749    case INDEX_op_ld16s_i64:
2750    case INDEX_op_ld32u_i64:
2751    case INDEX_op_ld32s_i64:
2752    case INDEX_op_ld_i64:
2753    case INDEX_op_neg_i32:
2754    case INDEX_op_neg_i64:
2755    case INDEX_op_not_i32:
2756    case INDEX_op_not_i64:
2757    case INDEX_op_bswap16_i32:
2758    case INDEX_op_bswap32_i32:
2759    case INDEX_op_bswap16_i64:
2760    case INDEX_op_bswap32_i64:
2761    case INDEX_op_bswap64_i64:
2762    case INDEX_op_ext8s_i32:
2763    case INDEX_op_ext16s_i32:
2764    case INDEX_op_ext8u_i32:
2765    case INDEX_op_ext16u_i32:
2766    case INDEX_op_ext8s_i64:
2767    case INDEX_op_ext16s_i64:
2768    case INDEX_op_ext32s_i64:
2769    case INDEX_op_ext8u_i64:
2770    case INDEX_op_ext16u_i64:
2771    case INDEX_op_ext32u_i64:
2772    case INDEX_op_ext_i32_i64:
2773    case INDEX_op_extu_i32_i64:
2774    case INDEX_op_extract_i32:
2775    case INDEX_op_extract_i64:
2776    case INDEX_op_sextract_i32:
2777    case INDEX_op_sextract_i64:
2778        return C_O1_I1(r, r);
2779
2780    case INDEX_op_st8_i32:
2781    case INDEX_op_st16_i32:
2782    case INDEX_op_st_i32:
2783    case INDEX_op_st8_i64:
2784    case INDEX_op_st16_i64:
2785    case INDEX_op_st32_i64:
2786    case INDEX_op_st_i64:
2787        return C_O0_I2(rZ, r);
2788
2789    case INDEX_op_add_i32:
2790    case INDEX_op_add_i64:
2791    case INDEX_op_sub_i32:
2792    case INDEX_op_sub_i64:
2793    case INDEX_op_setcond_i32:
2794    case INDEX_op_setcond_i64:
2795        return C_O1_I2(r, r, rA);
2796
2797    case INDEX_op_mul_i32:
2798    case INDEX_op_mul_i64:
2799    case INDEX_op_div_i32:
2800    case INDEX_op_div_i64:
2801    case INDEX_op_divu_i32:
2802    case INDEX_op_divu_i64:
2803    case INDEX_op_rem_i32:
2804    case INDEX_op_rem_i64:
2805    case INDEX_op_remu_i32:
2806    case INDEX_op_remu_i64:
2807    case INDEX_op_muluh_i64:
2808    case INDEX_op_mulsh_i64:
2809        return C_O1_I2(r, r, r);
2810
2811    case INDEX_op_and_i32:
2812    case INDEX_op_and_i64:
2813    case INDEX_op_or_i32:
2814    case INDEX_op_or_i64:
2815    case INDEX_op_xor_i32:
2816    case INDEX_op_xor_i64:
2817    case INDEX_op_andc_i32:
2818    case INDEX_op_andc_i64:
2819    case INDEX_op_orc_i32:
2820    case INDEX_op_orc_i64:
2821    case INDEX_op_eqv_i32:
2822    case INDEX_op_eqv_i64:
2823        return C_O1_I2(r, r, rL);
2824
2825    case INDEX_op_shl_i32:
2826    case INDEX_op_shr_i32:
2827    case INDEX_op_sar_i32:
2828    case INDEX_op_rotl_i32:
2829    case INDEX_op_rotr_i32:
2830    case INDEX_op_shl_i64:
2831    case INDEX_op_shr_i64:
2832    case INDEX_op_sar_i64:
2833    case INDEX_op_rotl_i64:
2834    case INDEX_op_rotr_i64:
2835        return C_O1_I2(r, r, ri);
2836
2837    case INDEX_op_clz_i32:
2838    case INDEX_op_ctz_i32:
2839    case INDEX_op_clz_i64:
2840    case INDEX_op_ctz_i64:
2841        return C_O1_I2(r, r, rAL);
2842
2843    case INDEX_op_brcond_i32:
2844    case INDEX_op_brcond_i64:
2845        return C_O0_I2(r, rA);
2846
2847    case INDEX_op_movcond_i32:
2848    case INDEX_op_movcond_i64:
2849        return C_O1_I4(r, r, rA, rZ, rZ);
2850
2851    case INDEX_op_qemu_ld_i32:
2852    case INDEX_op_qemu_ld_i64:
2853        return C_O1_I1(r, l);
2854    case INDEX_op_qemu_st_i32:
2855    case INDEX_op_qemu_st_i64:
2856        return C_O0_I2(lZ, l);
2857
2858    case INDEX_op_deposit_i32:
2859    case INDEX_op_deposit_i64:
2860        return C_O1_I2(r, 0, rZ);
2861
2862    case INDEX_op_extract2_i32:
2863    case INDEX_op_extract2_i64:
2864        return C_O1_I2(r, rZ, rZ);
2865
2866    case INDEX_op_add2_i32:
2867    case INDEX_op_add2_i64:
2868    case INDEX_op_sub2_i32:
2869    case INDEX_op_sub2_i64:
2870        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2871
2872    case INDEX_op_add_vec:
2873    case INDEX_op_sub_vec:
2874    case INDEX_op_mul_vec:
2875    case INDEX_op_xor_vec:
2876    case INDEX_op_ssadd_vec:
2877    case INDEX_op_sssub_vec:
2878    case INDEX_op_usadd_vec:
2879    case INDEX_op_ussub_vec:
2880    case INDEX_op_smax_vec:
2881    case INDEX_op_smin_vec:
2882    case INDEX_op_umax_vec:
2883    case INDEX_op_umin_vec:
2884    case INDEX_op_shlv_vec:
2885    case INDEX_op_shrv_vec:
2886    case INDEX_op_sarv_vec:
2887    case INDEX_op_aa64_sshl_vec:
2888        return C_O1_I2(w, w, w);
2889    case INDEX_op_not_vec:
2890    case INDEX_op_neg_vec:
2891    case INDEX_op_abs_vec:
2892    case INDEX_op_shli_vec:
2893    case INDEX_op_shri_vec:
2894    case INDEX_op_sari_vec:
2895        return C_O1_I1(w, w);
2896    case INDEX_op_ld_vec:
2897    case INDEX_op_dupm_vec:
2898        return C_O1_I1(w, r);
2899    case INDEX_op_st_vec:
2900        return C_O0_I2(w, r);
2901    case INDEX_op_dup_vec:
2902        return C_O1_I1(w, wr);
2903    case INDEX_op_or_vec:
2904    case INDEX_op_andc_vec:
2905        return C_O1_I2(w, w, wO);
2906    case INDEX_op_and_vec:
2907    case INDEX_op_orc_vec:
2908        return C_O1_I2(w, w, wN);
2909    case INDEX_op_cmp_vec:
2910        return C_O1_I2(w, w, wZ);
2911    case INDEX_op_bitsel_vec:
2912        return C_O1_I3(w, w, w, w);
2913    case INDEX_op_aa64_sli_vec:
2914        return C_O1_I2(w, 0, w);
2915
2916    default:
2917        g_assert_not_reached();
2918    }
2919}
2920
2921static void tcg_target_init(TCGContext *s)
2922{
2923    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2924    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2925    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2926    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2927
2928    tcg_target_call_clobber_regs = -1ull;
2929    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2930    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2931    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2932    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2933    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2934    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2935    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2936    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2937    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2938    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2939    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2940    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2941    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2942    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2943    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2944    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2945    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2946    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2947    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2948
2949    s->reserved_regs = 0;
2950    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2951    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2952    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2953    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2954    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2955}
2956
2957/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2958#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2959
2960#define FRAME_SIZE \
2961    ((PUSH_SIZE \
2962      + TCG_STATIC_CALL_ARGS_SIZE \
2963      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2964      + TCG_TARGET_STACK_ALIGN - 1) \
2965     & ~(TCG_TARGET_STACK_ALIGN - 1))
2966
2967/* We're expecting a 2 byte uleb128 encoded value.  */
2968QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2969
2970/* We're expecting to use a single ADDI insn.  */
2971QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2972
2973static void tcg_target_qemu_prologue(TCGContext *s)
2974{
2975    TCGReg r;
2976
2977    /* Push (FP, LR) and allocate space for all saved registers.  */
2978    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2979                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2980
2981    /* Set up frame pointer for canonical unwinding.  */
2982    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2983
2984    /* Store callee-preserved regs x19..x28.  */
2985    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2986        int ofs = (r - TCG_REG_X19 + 2) * 8;
2987        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2988    }
2989
2990    /* Make stack space for TCG locals.  */
2991    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2992                 FRAME_SIZE - PUSH_SIZE);
2993
2994    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2995    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2996                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2997
2998#if !defined(CONFIG_SOFTMMU)
2999    if (USE_GUEST_BASE) {
3000        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3001        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3002    }
3003#endif
3004
3005    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3006    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3007
3008    /*
3009     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3010     * and fall through to the rest of the epilogue.
3011     */
3012    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3013    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3014
3015    /* TB epilogue */
3016    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3017
3018    /* Remove TCG locals stack space.  */
3019    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3020                 FRAME_SIZE - PUSH_SIZE);
3021
3022    /* Restore registers x19..x28.  */
3023    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3024        int ofs = (r - TCG_REG_X19 + 2) * 8;
3025        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3026    }
3027
3028    /* Pop (FP, LR), restore SP to previous frame.  */
3029    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3030                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3031    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3032}
3033
3034static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3035{
3036    int i;
3037    for (i = 0; i < count; ++i) {
3038        p[i] = NOP;
3039    }
3040}
3041
3042typedef struct {
3043    DebugFrameHeader h;
3044    uint8_t fde_def_cfa[4];
3045    uint8_t fde_reg_ofs[24];
3046} DebugFrame;
3047
3048#define ELF_HOST_MACHINE EM_AARCH64
3049
3050static const DebugFrame debug_frame = {
3051    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3052    .h.cie.id = -1,
3053    .h.cie.version = 1,
3054    .h.cie.code_align = 1,
3055    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3056    .h.cie.return_column = TCG_REG_LR,
3057
3058    /* Total FDE size does not include the "len" member.  */
3059    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3060
3061    .fde_def_cfa = {
3062        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3063        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3064        (FRAME_SIZE >> 7)
3065    },
3066    .fde_reg_ofs = {
3067        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3068        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3069        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3070        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3071        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3072        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3073        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3074        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3075        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3076        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3077        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3078        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3079    }
3080};
3081
3082void tcg_register_jit(const void *buf, size_t buf_size)
3083{
3084    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3085}
3086