xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision 1141159c)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-ldst.c.inc"
14#include "../tcg-pool.c.inc"
15#include "qemu/bitops.h"
16#ifdef __linux__
17#include <asm/hwcap.h>
18#endif
19#ifdef CONFIG_DARWIN
20#include <sys/sysctl.h>
21#endif
22
23/* We're going to re-use TCGType in setting of the SF bit, which controls
24   the size of the operation performed.  If we know the values match, it
25   makes things much cleaner.  */
26QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
27
28#ifdef CONFIG_DEBUG_TCG
29static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
30    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
31    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
32    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
33    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
34
35    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
36    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
37    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
38    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
39};
40#endif /* CONFIG_DEBUG_TCG */
41
42static const int tcg_target_reg_alloc_order[] = {
43    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
44    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
45    TCG_REG_X28, /* we will reserve this for guest_base if configured */
46
47    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
48    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
49    TCG_REG_X16, TCG_REG_X17,
50
51    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
52    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
53
54    /* X18 reserved by system */
55    /* X19 reserved for AREG0 */
56    /* X29 reserved as fp */
57    /* X30 reserved as temporary */
58
59    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
60    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
61    /* V8 - V15 are call-saved, and skipped.  */
62    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
63    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
64    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
65    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
66};
67
68static const int tcg_target_call_iarg_regs[8] = {
69    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
70    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
71};
72
73static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
74{
75    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
76    tcg_debug_assert(slot >= 0 && slot <= 1);
77    return TCG_REG_X0 + slot;
78}
79
80bool have_lse;
81bool have_lse2;
82
83#define TCG_REG_TMP TCG_REG_X30
84#define TCG_VEC_TMP TCG_REG_V31
85
86#ifndef CONFIG_SOFTMMU
87#define TCG_REG_GUEST_BASE TCG_REG_X28
88#endif
89
90static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
91{
92    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
93    ptrdiff_t offset = target - src_rx;
94
95    if (offset == sextract64(offset, 0, 26)) {
96        /* read instruction, mask away previous PC_REL26 parameter contents,
97           set the proper offset, then write back the instruction. */
98        *src_rw = deposit32(*src_rw, 0, 26, offset);
99        return true;
100    }
101    return false;
102}
103
104static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
105{
106    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
107    ptrdiff_t offset = target - src_rx;
108
109    if (offset == sextract64(offset, 0, 19)) {
110        *src_rw = deposit32(*src_rw, 5, 19, offset);
111        return true;
112    }
113    return false;
114}
115
116static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
117                        intptr_t value, intptr_t addend)
118{
119    tcg_debug_assert(addend == 0);
120    switch (type) {
121    case R_AARCH64_JUMP26:
122    case R_AARCH64_CALL26:
123        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
124    case R_AARCH64_CONDBR19:
125        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
126    default:
127        g_assert_not_reached();
128    }
129}
130
131#define TCG_CT_CONST_AIMM 0x100
132#define TCG_CT_CONST_LIMM 0x200
133#define TCG_CT_CONST_ZERO 0x400
134#define TCG_CT_CONST_MONE 0x800
135#define TCG_CT_CONST_ORRI 0x1000
136#define TCG_CT_CONST_ANDI 0x2000
137
138#define ALL_GENERAL_REGS  0xffffffffu
139#define ALL_VECTOR_REGS   0xffffffff00000000ull
140
141#ifdef CONFIG_SOFTMMU
142#define ALL_QLDST_REGS \
143    (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
144                          (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
145#else
146#define ALL_QLDST_REGS   ALL_GENERAL_REGS
147#endif
148
149/* Match a constant valid for addition (12-bit, optionally shifted).  */
150static inline bool is_aimm(uint64_t val)
151{
152    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
153}
154
155/* Match a constant valid for logical operations.  */
156static inline bool is_limm(uint64_t val)
157{
158    /* Taking a simplified view of the logical immediates for now, ignoring
159       the replication that can happen across the field.  Match bit patterns
160       of the forms
161           0....01....1
162           0..01..10..0
163       and their inverses.  */
164
165    /* Make things easier below, by testing the form with msb clear. */
166    if ((int64_t)val < 0) {
167        val = ~val;
168    }
169    if (val == 0) {
170        return false;
171    }
172    val += val & -val;
173    return (val & (val - 1)) == 0;
174}
175
176/* Return true if v16 is a valid 16-bit shifted immediate.  */
177static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
178{
179    if (v16 == (v16 & 0xff)) {
180        *cmode = 0x8;
181        *imm8 = v16 & 0xff;
182        return true;
183    } else if (v16 == (v16 & 0xff00)) {
184        *cmode = 0xa;
185        *imm8 = v16 >> 8;
186        return true;
187    }
188    return false;
189}
190
191/* Return true if v32 is a valid 32-bit shifted immediate.  */
192static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
193{
194    if (v32 == (v32 & 0xff)) {
195        *cmode = 0x0;
196        *imm8 = v32 & 0xff;
197        return true;
198    } else if (v32 == (v32 & 0xff00)) {
199        *cmode = 0x2;
200        *imm8 = (v32 >> 8) & 0xff;
201        return true;
202    } else if (v32 == (v32 & 0xff0000)) {
203        *cmode = 0x4;
204        *imm8 = (v32 >> 16) & 0xff;
205        return true;
206    } else if (v32 == (v32 & 0xff000000)) {
207        *cmode = 0x6;
208        *imm8 = v32 >> 24;
209        return true;
210    }
211    return false;
212}
213
214/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
215static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
216{
217    if ((v32 & 0xffff00ff) == 0xff) {
218        *cmode = 0xc;
219        *imm8 = (v32 >> 8) & 0xff;
220        return true;
221    } else if ((v32 & 0xff00ffff) == 0xffff) {
222        *cmode = 0xd;
223        *imm8 = (v32 >> 16) & 0xff;
224        return true;
225    }
226    return false;
227}
228
229/* Return true if v32 is a valid float32 immediate.  */
230static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
231{
232    if (extract32(v32, 0, 19) == 0
233        && (extract32(v32, 25, 6) == 0x20
234            || extract32(v32, 25, 6) == 0x1f)) {
235        *cmode = 0xf;
236        *imm8 = (extract32(v32, 31, 1) << 7)
237              | (extract32(v32, 25, 1) << 6)
238              | extract32(v32, 19, 6);
239        return true;
240    }
241    return false;
242}
243
244/* Return true if v64 is a valid float64 immediate.  */
245static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
246{
247    if (extract64(v64, 0, 48) == 0
248        && (extract64(v64, 54, 9) == 0x100
249            || extract64(v64, 54, 9) == 0x0ff)) {
250        *cmode = 0xf;
251        *imm8 = (extract64(v64, 63, 1) << 7)
252              | (extract64(v64, 54, 1) << 6)
253              | extract64(v64, 48, 6);
254        return true;
255    }
256    return false;
257}
258
259/*
260 * Return non-zero if v32 can be formed by MOVI+ORR.
261 * Place the parameters for MOVI in (cmode, imm8).
262 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
263 */
264static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
265{
266    int i;
267
268    for (i = 6; i > 0; i -= 2) {
269        /* Mask out one byte we can add with ORR.  */
270        uint32_t tmp = v32 & ~(0xffu << (i * 4));
271        if (is_shimm32(tmp, cmode, imm8) ||
272            is_soimm32(tmp, cmode, imm8)) {
273            break;
274        }
275    }
276    return i;
277}
278
279/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
280static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
281{
282    if (v32 == deposit32(v32, 16, 16, v32)) {
283        return is_shimm16(v32, cmode, imm8);
284    } else {
285        return is_shimm32(v32, cmode, imm8);
286    }
287}
288
289static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
290{
291    if (ct & TCG_CT_CONST) {
292        return 1;
293    }
294    if (type == TCG_TYPE_I32) {
295        val = (int32_t)val;
296    }
297    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
298        return 1;
299    }
300    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
301        return 1;
302    }
303    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
304        return 1;
305    }
306    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
307        return 1;
308    }
309
310    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
311    case 0:
312        break;
313    case TCG_CT_CONST_ANDI:
314        val = ~val;
315        /* fallthru */
316    case TCG_CT_CONST_ORRI:
317        if (val == deposit64(val, 32, 32, val)) {
318            int cmode, imm8;
319            return is_shimm1632(val, &cmode, &imm8);
320        }
321        break;
322    default:
323        /* Both bits should not be set for the same insn.  */
324        g_assert_not_reached();
325    }
326
327    return 0;
328}
329
330enum aarch64_cond_code {
331    COND_EQ = 0x0,
332    COND_NE = 0x1,
333    COND_CS = 0x2,     /* Unsigned greater or equal */
334    COND_HS = COND_CS, /* ALIAS greater or equal */
335    COND_CC = 0x3,     /* Unsigned less than */
336    COND_LO = COND_CC, /* ALIAS Lower */
337    COND_MI = 0x4,     /* Negative */
338    COND_PL = 0x5,     /* Zero or greater */
339    COND_VS = 0x6,     /* Overflow */
340    COND_VC = 0x7,     /* No overflow */
341    COND_HI = 0x8,     /* Unsigned greater than */
342    COND_LS = 0x9,     /* Unsigned less or equal */
343    COND_GE = 0xa,
344    COND_LT = 0xb,
345    COND_GT = 0xc,
346    COND_LE = 0xd,
347    COND_AL = 0xe,
348    COND_NV = 0xf, /* behaves like COND_AL here */
349};
350
351static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
352    [TCG_COND_EQ] = COND_EQ,
353    [TCG_COND_NE] = COND_NE,
354    [TCG_COND_LT] = COND_LT,
355    [TCG_COND_GE] = COND_GE,
356    [TCG_COND_LE] = COND_LE,
357    [TCG_COND_GT] = COND_GT,
358    /* unsigned */
359    [TCG_COND_LTU] = COND_LO,
360    [TCG_COND_GTU] = COND_HI,
361    [TCG_COND_GEU] = COND_HS,
362    [TCG_COND_LEU] = COND_LS,
363};
364
365typedef enum {
366    LDST_ST = 0,    /* store */
367    LDST_LD = 1,    /* load */
368    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
369    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
370} AArch64LdstType;
371
372/* We encode the format of the insn into the beginning of the name, so that
373   we can have the preprocessor help "typecheck" the insn vs the output
374   function.  Arm didn't provide us with nice names for the formats, so we
375   use the section number of the architecture reference manual in which the
376   instruction group is described.  */
377typedef enum {
378    /* Compare and branch (immediate).  */
379    I3201_CBZ       = 0x34000000,
380    I3201_CBNZ      = 0x35000000,
381
382    /* Conditional branch (immediate).  */
383    I3202_B_C       = 0x54000000,
384
385    /* Unconditional branch (immediate).  */
386    I3206_B         = 0x14000000,
387    I3206_BL        = 0x94000000,
388
389    /* Unconditional branch (register).  */
390    I3207_BR        = 0xd61f0000,
391    I3207_BLR       = 0xd63f0000,
392    I3207_RET       = 0xd65f0000,
393
394    /* AdvSIMD load/store single structure.  */
395    I3303_LD1R      = 0x0d40c000,
396
397    /* Load literal for loading the address at pc-relative offset */
398    I3305_LDR       = 0x58000000,
399    I3305_LDR_v64   = 0x5c000000,
400    I3305_LDR_v128  = 0x9c000000,
401
402    /* Load/store register.  Described here as 3.3.12, but the helper
403       that emits them can transform to 3.3.10 or 3.3.13.  */
404    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
405    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
406    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
407    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
408
409    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
410    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
411    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
412    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
413
414    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
415    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
416
417    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
418    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
419    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
420
421    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
422    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
423
424    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
425    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
426
427    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
428    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
429
430    I3312_TO_I3310  = 0x00200800,
431    I3312_TO_I3313  = 0x01000000,
432
433    /* Load/store register pair instructions.  */
434    I3314_LDP       = 0x28400000,
435    I3314_STP       = 0x28000000,
436
437    /* Add/subtract immediate instructions.  */
438    I3401_ADDI      = 0x11000000,
439    I3401_ADDSI     = 0x31000000,
440    I3401_SUBI      = 0x51000000,
441    I3401_SUBSI     = 0x71000000,
442
443    /* Bitfield instructions.  */
444    I3402_BFM       = 0x33000000,
445    I3402_SBFM      = 0x13000000,
446    I3402_UBFM      = 0x53000000,
447
448    /* Extract instruction.  */
449    I3403_EXTR      = 0x13800000,
450
451    /* Logical immediate instructions.  */
452    I3404_ANDI      = 0x12000000,
453    I3404_ORRI      = 0x32000000,
454    I3404_EORI      = 0x52000000,
455    I3404_ANDSI     = 0x72000000,
456
457    /* Move wide immediate instructions.  */
458    I3405_MOVN      = 0x12800000,
459    I3405_MOVZ      = 0x52800000,
460    I3405_MOVK      = 0x72800000,
461
462    /* PC relative addressing instructions.  */
463    I3406_ADR       = 0x10000000,
464    I3406_ADRP      = 0x90000000,
465
466    /* Add/subtract shifted register instructions (without a shift).  */
467    I3502_ADD       = 0x0b000000,
468    I3502_ADDS      = 0x2b000000,
469    I3502_SUB       = 0x4b000000,
470    I3502_SUBS      = 0x6b000000,
471
472    /* Add/subtract shifted register instructions (with a shift).  */
473    I3502S_ADD_LSL  = I3502_ADD,
474
475    /* Add/subtract with carry instructions.  */
476    I3503_ADC       = 0x1a000000,
477    I3503_SBC       = 0x5a000000,
478
479    /* Conditional select instructions.  */
480    I3506_CSEL      = 0x1a800000,
481    I3506_CSINC     = 0x1a800400,
482    I3506_CSINV     = 0x5a800000,
483    I3506_CSNEG     = 0x5a800400,
484
485    /* Data-processing (1 source) instructions.  */
486    I3507_CLZ       = 0x5ac01000,
487    I3507_RBIT      = 0x5ac00000,
488    I3507_REV       = 0x5ac00000, /* + size << 10 */
489
490    /* Data-processing (2 source) instructions.  */
491    I3508_LSLV      = 0x1ac02000,
492    I3508_LSRV      = 0x1ac02400,
493    I3508_ASRV      = 0x1ac02800,
494    I3508_RORV      = 0x1ac02c00,
495    I3508_SMULH     = 0x9b407c00,
496    I3508_UMULH     = 0x9bc07c00,
497    I3508_UDIV      = 0x1ac00800,
498    I3508_SDIV      = 0x1ac00c00,
499
500    /* Data-processing (3 source) instructions.  */
501    I3509_MADD      = 0x1b000000,
502    I3509_MSUB      = 0x1b008000,
503
504    /* Logical shifted register instructions (without a shift).  */
505    I3510_AND       = 0x0a000000,
506    I3510_BIC       = 0x0a200000,
507    I3510_ORR       = 0x2a000000,
508    I3510_ORN       = 0x2a200000,
509    I3510_EOR       = 0x4a000000,
510    I3510_EON       = 0x4a200000,
511    I3510_ANDS      = 0x6a000000,
512
513    /* Logical shifted register instructions (with a shift).  */
514    I3502S_AND_LSR  = I3510_AND | (1 << 22),
515
516    /* AdvSIMD copy */
517    I3605_DUP      = 0x0e000400,
518    I3605_INS      = 0x4e001c00,
519    I3605_UMOV     = 0x0e003c00,
520
521    /* AdvSIMD modified immediate */
522    I3606_MOVI      = 0x0f000400,
523    I3606_MVNI      = 0x2f000400,
524    I3606_BIC       = 0x2f001400,
525    I3606_ORR       = 0x0f001400,
526
527    /* AdvSIMD scalar shift by immediate */
528    I3609_SSHR      = 0x5f000400,
529    I3609_SSRA      = 0x5f001400,
530    I3609_SHL       = 0x5f005400,
531    I3609_USHR      = 0x7f000400,
532    I3609_USRA      = 0x7f001400,
533    I3609_SLI       = 0x7f005400,
534
535    /* AdvSIMD scalar three same */
536    I3611_SQADD     = 0x5e200c00,
537    I3611_SQSUB     = 0x5e202c00,
538    I3611_CMGT      = 0x5e203400,
539    I3611_CMGE      = 0x5e203c00,
540    I3611_SSHL      = 0x5e204400,
541    I3611_ADD       = 0x5e208400,
542    I3611_CMTST     = 0x5e208c00,
543    I3611_UQADD     = 0x7e200c00,
544    I3611_UQSUB     = 0x7e202c00,
545    I3611_CMHI      = 0x7e203400,
546    I3611_CMHS      = 0x7e203c00,
547    I3611_USHL      = 0x7e204400,
548    I3611_SUB       = 0x7e208400,
549    I3611_CMEQ      = 0x7e208c00,
550
551    /* AdvSIMD scalar two-reg misc */
552    I3612_CMGT0     = 0x5e208800,
553    I3612_CMEQ0     = 0x5e209800,
554    I3612_CMLT0     = 0x5e20a800,
555    I3612_ABS       = 0x5e20b800,
556    I3612_CMGE0     = 0x7e208800,
557    I3612_CMLE0     = 0x7e209800,
558    I3612_NEG       = 0x7e20b800,
559
560    /* AdvSIMD shift by immediate */
561    I3614_SSHR      = 0x0f000400,
562    I3614_SSRA      = 0x0f001400,
563    I3614_SHL       = 0x0f005400,
564    I3614_SLI       = 0x2f005400,
565    I3614_USHR      = 0x2f000400,
566    I3614_USRA      = 0x2f001400,
567
568    /* AdvSIMD three same.  */
569    I3616_ADD       = 0x0e208400,
570    I3616_AND       = 0x0e201c00,
571    I3616_BIC       = 0x0e601c00,
572    I3616_BIF       = 0x2ee01c00,
573    I3616_BIT       = 0x2ea01c00,
574    I3616_BSL       = 0x2e601c00,
575    I3616_EOR       = 0x2e201c00,
576    I3616_MUL       = 0x0e209c00,
577    I3616_ORR       = 0x0ea01c00,
578    I3616_ORN       = 0x0ee01c00,
579    I3616_SUB       = 0x2e208400,
580    I3616_CMGT      = 0x0e203400,
581    I3616_CMGE      = 0x0e203c00,
582    I3616_CMTST     = 0x0e208c00,
583    I3616_CMHI      = 0x2e203400,
584    I3616_CMHS      = 0x2e203c00,
585    I3616_CMEQ      = 0x2e208c00,
586    I3616_SMAX      = 0x0e206400,
587    I3616_SMIN      = 0x0e206c00,
588    I3616_SSHL      = 0x0e204400,
589    I3616_SQADD     = 0x0e200c00,
590    I3616_SQSUB     = 0x0e202c00,
591    I3616_UMAX      = 0x2e206400,
592    I3616_UMIN      = 0x2e206c00,
593    I3616_UQADD     = 0x2e200c00,
594    I3616_UQSUB     = 0x2e202c00,
595    I3616_USHL      = 0x2e204400,
596
597    /* AdvSIMD two-reg misc.  */
598    I3617_CMGT0     = 0x0e208800,
599    I3617_CMEQ0     = 0x0e209800,
600    I3617_CMLT0     = 0x0e20a800,
601    I3617_CMGE0     = 0x2e208800,
602    I3617_CMLE0     = 0x2e209800,
603    I3617_NOT       = 0x2e205800,
604    I3617_ABS       = 0x0e20b800,
605    I3617_NEG       = 0x2e20b800,
606
607    /* System instructions.  */
608    NOP             = 0xd503201f,
609    DMB_ISH         = 0xd50338bf,
610    DMB_LD          = 0x00000100,
611    DMB_ST          = 0x00000200,
612} AArch64Insn;
613
614static inline uint32_t tcg_in32(TCGContext *s)
615{
616    uint32_t v = *(uint32_t *)s->code_ptr;
617    return v;
618}
619
620/* Emit an opcode with "type-checking" of the format.  */
621#define tcg_out_insn(S, FMT, OP, ...) \
622    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
623
624static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
625                              TCGReg rt, TCGReg rn, unsigned size)
626{
627    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
628}
629
630static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
631                              int imm19, TCGReg rt)
632{
633    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
634}
635
636static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
637                              TCGReg rt, int imm19)
638{
639    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
640}
641
642static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
643                              TCGCond c, int imm19)
644{
645    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
646}
647
648static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
649{
650    tcg_out32(s, insn | (imm26 & 0x03ffffff));
651}
652
653static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
654{
655    tcg_out32(s, insn | rn << 5);
656}
657
658static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
659                              TCGReg r1, TCGReg r2, TCGReg rn,
660                              tcg_target_long ofs, bool pre, bool w)
661{
662    insn |= 1u << 31; /* ext */
663    insn |= pre << 24;
664    insn |= w << 23;
665
666    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
667    insn |= (ofs & (0x7f << 3)) << (15 - 3);
668
669    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
670}
671
672static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
673                              TCGReg rd, TCGReg rn, uint64_t aimm)
674{
675    if (aimm > 0xfff) {
676        tcg_debug_assert((aimm & 0xfff) == 0);
677        aimm >>= 12;
678        tcg_debug_assert(aimm <= 0xfff);
679        aimm |= 1 << 12;  /* apply LSL 12 */
680    }
681    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
682}
683
684/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
685   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
686   that feed the DecodeBitMasks pseudo function.  */
687static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
688                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
689{
690    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
691              | rn << 5 | rd);
692}
693
694#define tcg_out_insn_3404  tcg_out_insn_3402
695
696static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
697                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
698{
699    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
700              | rn << 5 | rd);
701}
702
703/* This function is used for the Move (wide immediate) instruction group.
704   Note that SHIFT is a full shift count, not the 2 bit HW field. */
705static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
706                              TCGReg rd, uint16_t half, unsigned shift)
707{
708    tcg_debug_assert((shift & ~0x30) == 0);
709    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
710}
711
712static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
713                              TCGReg rd, int64_t disp)
714{
715    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
716}
717
718/* This function is for both 3.5.2 (Add/Subtract shifted register), for
719   the rare occasion when we actually want to supply a shift amount.  */
720static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
721                                      TCGType ext, TCGReg rd, TCGReg rn,
722                                      TCGReg rm, int imm6)
723{
724    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
725}
726
727/* This function is for 3.5.2 (Add/subtract shifted register),
728   and 3.5.10 (Logical shifted register), for the vast majorty of cases
729   when we don't want to apply a shift.  Thus it can also be used for
730   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
731static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
732                              TCGReg rd, TCGReg rn, TCGReg rm)
733{
734    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
735}
736
737#define tcg_out_insn_3503  tcg_out_insn_3502
738#define tcg_out_insn_3508  tcg_out_insn_3502
739#define tcg_out_insn_3510  tcg_out_insn_3502
740
741static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
742                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
743{
744    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
745              | tcg_cond_to_aarch64[c] << 12);
746}
747
748static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
749                              TCGReg rd, TCGReg rn)
750{
751    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
752}
753
754static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
755                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
756{
757    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
758}
759
760static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
761                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
762{
763    /* Note that bit 11 set means general register input.  Therefore
764       we can handle both register sets with one function.  */
765    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
766              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
767}
768
769static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
770                              TCGReg rd, bool op, int cmode, uint8_t imm8)
771{
772    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
773              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
774}
775
776static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
777                              TCGReg rd, TCGReg rn, unsigned immhb)
778{
779    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
780}
781
782static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
783                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
784{
785    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
786              | (rn & 0x1f) << 5 | (rd & 0x1f));
787}
788
789static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
790                              unsigned size, TCGReg rd, TCGReg rn)
791{
792    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
793}
794
795static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
796                              TCGReg rd, TCGReg rn, unsigned immhb)
797{
798    tcg_out32(s, insn | q << 30 | immhb << 16
799              | (rn & 0x1f) << 5 | (rd & 0x1f));
800}
801
802static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
803                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
804{
805    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
806              | (rn & 0x1f) << 5 | (rd & 0x1f));
807}
808
809static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
810                              unsigned size, TCGReg rd, TCGReg rn)
811{
812    tcg_out32(s, insn | q << 30 | (size << 22)
813              | (rn & 0x1f) << 5 | (rd & 0x1f));
814}
815
816static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
817                              TCGReg rd, TCGReg base, TCGType ext,
818                              TCGReg regoff)
819{
820    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
821    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
822              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
823}
824
825static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
826                              TCGReg rd, TCGReg rn, intptr_t offset)
827{
828    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
829}
830
831static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
832                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
833{
834    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
835    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
836              | rn << 5 | (rd & 0x1f));
837}
838
839/* Register to register move using ORR (shifted register with no shift). */
840static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
841{
842    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
843}
844
845/* Register to register move using ADDI (move to/from SP).  */
846static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
847{
848    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
849}
850
851/* This function is used for the Logical (immediate) instruction group.
852   The value of LIMM must satisfy IS_LIMM.  See the comment above about
853   only supporting simplified logical immediates.  */
854static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
855                             TCGReg rd, TCGReg rn, uint64_t limm)
856{
857    unsigned h, l, r, c;
858
859    tcg_debug_assert(is_limm(limm));
860
861    h = clz64(limm);
862    l = ctz64(limm);
863    if (l == 0) {
864        r = 0;                  /* form 0....01....1 */
865        c = ctz64(~limm) - 1;
866        if (h == 0) {
867            r = clz64(~limm);   /* form 1..10..01..1 */
868            c += r;
869        }
870    } else {
871        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
872        c = r - h - 1;
873    }
874    if (ext == TCG_TYPE_I32) {
875        r &= 31;
876        c &= 31;
877    }
878
879    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
880}
881
882static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
883                             TCGReg rd, int64_t v64)
884{
885    bool q = type == TCG_TYPE_V128;
886    int cmode, imm8, i;
887
888    /* Test all bytes equal first.  */
889    if (vece == MO_8) {
890        imm8 = (uint8_t)v64;
891        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
892        return;
893    }
894
895    /*
896     * Test all bytes 0x00 or 0xff second.  This can match cases that
897     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
898     */
899    for (i = imm8 = 0; i < 8; i++) {
900        uint8_t byte = v64 >> (i * 8);
901        if (byte == 0xff) {
902            imm8 |= 1 << i;
903        } else if (byte != 0) {
904            goto fail_bytes;
905        }
906    }
907    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
908    return;
909 fail_bytes:
910
911    /*
912     * Tests for various replications.  For each element width, if we
913     * cannot find an expansion there's no point checking a larger
914     * width because we already know by replication it cannot match.
915     */
916    if (vece == MO_16) {
917        uint16_t v16 = v64;
918
919        if (is_shimm16(v16, &cmode, &imm8)) {
920            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
921            return;
922        }
923        if (is_shimm16(~v16, &cmode, &imm8)) {
924            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
925            return;
926        }
927
928        /*
929         * Otherwise, all remaining constants can be loaded in two insns:
930         * rd = v16 & 0xff, rd |= v16 & 0xff00.
931         */
932        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
933        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
934        return;
935    } else if (vece == MO_32) {
936        uint32_t v32 = v64;
937        uint32_t n32 = ~v32;
938
939        if (is_shimm32(v32, &cmode, &imm8) ||
940            is_soimm32(v32, &cmode, &imm8) ||
941            is_fimm32(v32, &cmode, &imm8)) {
942            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
943            return;
944        }
945        if (is_shimm32(n32, &cmode, &imm8) ||
946            is_soimm32(n32, &cmode, &imm8)) {
947            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
948            return;
949        }
950
951        /*
952         * Restrict the set of constants to those we can load with
953         * two instructions.  Others we load from the pool.
954         */
955        i = is_shimm32_pair(v32, &cmode, &imm8);
956        if (i) {
957            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
958            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
959            return;
960        }
961        i = is_shimm32_pair(n32, &cmode, &imm8);
962        if (i) {
963            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
964            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
965            return;
966        }
967    } else if (is_fimm64(v64, &cmode, &imm8)) {
968        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
969        return;
970    }
971
972    /*
973     * As a last resort, load from the constant pool.  Sadly there
974     * is no LD1R (literal), so store the full 16-byte vector.
975     */
976    if (type == TCG_TYPE_V128) {
977        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
978        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
979    } else {
980        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
981        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
982    }
983}
984
985static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
986                            TCGReg rd, TCGReg rs)
987{
988    int is_q = type - TCG_TYPE_V64;
989    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
990    return true;
991}
992
993static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
994                             TCGReg r, TCGReg base, intptr_t offset)
995{
996    TCGReg temp = TCG_REG_TMP;
997
998    if (offset < -0xffffff || offset > 0xffffff) {
999        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
1000        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
1001        base = temp;
1002    } else {
1003        AArch64Insn add_insn = I3401_ADDI;
1004
1005        if (offset < 0) {
1006            add_insn = I3401_SUBI;
1007            offset = -offset;
1008        }
1009        if (offset & 0xfff000) {
1010            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1011            base = temp;
1012        }
1013        if (offset & 0xfff) {
1014            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1015            base = temp;
1016        }
1017    }
1018    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1019    return true;
1020}
1021
1022static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1023                         tcg_target_long value)
1024{
1025    tcg_target_long svalue = value;
1026    tcg_target_long ivalue = ~value;
1027    tcg_target_long t0, t1, t2;
1028    int s0, s1;
1029    AArch64Insn opc;
1030
1031    switch (type) {
1032    case TCG_TYPE_I32:
1033    case TCG_TYPE_I64:
1034        tcg_debug_assert(rd < 32);
1035        break;
1036    default:
1037        g_assert_not_reached();
1038    }
1039
1040    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1041       values within [2**31, 2**32-1], we can create smaller sequences by
1042       interpreting this as a negative 32-bit number, while ensuring that
1043       the high 32 bits are cleared by setting SF=0.  */
1044    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1045        svalue = (int32_t)value;
1046        value = (uint32_t)value;
1047        ivalue = (uint32_t)ivalue;
1048        type = TCG_TYPE_I32;
1049    }
1050
1051    /* Speed things up by handling the common case of small positive
1052       and negative values specially.  */
1053    if ((value & ~0xffffull) == 0) {
1054        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1055        return;
1056    } else if ((ivalue & ~0xffffull) == 0) {
1057        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1058        return;
1059    }
1060
1061    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1062       use the sign-extended value.  That lets us match rotated values such
1063       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1064    if (is_limm(svalue)) {
1065        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1066        return;
1067    }
1068
1069    /* Look for host pointer values within 4G of the PC.  This happens
1070       often when loading pointers to QEMU's own data structures.  */
1071    if (type == TCG_TYPE_I64) {
1072        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1073        tcg_target_long disp = value - src_rx;
1074        if (disp == sextract64(disp, 0, 21)) {
1075            tcg_out_insn(s, 3406, ADR, rd, disp);
1076            return;
1077        }
1078        disp = (value >> 12) - (src_rx >> 12);
1079        if (disp == sextract64(disp, 0, 21)) {
1080            tcg_out_insn(s, 3406, ADRP, rd, disp);
1081            if (value & 0xfff) {
1082                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1083            }
1084            return;
1085        }
1086    }
1087
1088    /* Would it take fewer insns to begin with MOVN?  */
1089    if (ctpop64(value) >= 32) {
1090        t0 = ivalue;
1091        opc = I3405_MOVN;
1092    } else {
1093        t0 = value;
1094        opc = I3405_MOVZ;
1095    }
1096    s0 = ctz64(t0) & (63 & -16);
1097    t1 = t0 & ~(0xffffull << s0);
1098    s1 = ctz64(t1) & (63 & -16);
1099    t2 = t1 & ~(0xffffull << s1);
1100    if (t2 == 0) {
1101        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1102        if (t1 != 0) {
1103            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1104        }
1105        return;
1106    }
1107
1108    /* For more than 2 insns, dump it into the constant pool.  */
1109    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1110    tcg_out_insn(s, 3305, LDR, 0, rd);
1111}
1112
1113static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1114{
1115    return false;
1116}
1117
1118static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1119                             tcg_target_long imm)
1120{
1121    /* This function is only used for passing structs by reference. */
1122    g_assert_not_reached();
1123}
1124
1125/* Define something more legible for general use.  */
1126#define tcg_out_ldst_r  tcg_out_insn_3310
1127
1128static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1129                         TCGReg rn, intptr_t offset, int lgsize)
1130{
1131    /* If the offset is naturally aligned and in range, then we can
1132       use the scaled uimm12 encoding */
1133    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1134        uintptr_t scaled_uimm = offset >> lgsize;
1135        if (scaled_uimm <= 0xfff) {
1136            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1137            return;
1138        }
1139    }
1140
1141    /* Small signed offsets can use the unscaled encoding.  */
1142    if (offset >= -256 && offset < 256) {
1143        tcg_out_insn_3312(s, insn, rd, rn, offset);
1144        return;
1145    }
1146
1147    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1148    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1149    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1150}
1151
1152static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1153{
1154    if (ret == arg) {
1155        return true;
1156    }
1157    switch (type) {
1158    case TCG_TYPE_I32:
1159    case TCG_TYPE_I64:
1160        if (ret < 32 && arg < 32) {
1161            tcg_out_movr(s, type, ret, arg);
1162            break;
1163        } else if (ret < 32) {
1164            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1165            break;
1166        } else if (arg < 32) {
1167            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1168            break;
1169        }
1170        /* FALLTHRU */
1171
1172    case TCG_TYPE_V64:
1173        tcg_debug_assert(ret >= 32 && arg >= 32);
1174        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1175        break;
1176    case TCG_TYPE_V128:
1177        tcg_debug_assert(ret >= 32 && arg >= 32);
1178        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1179        break;
1180
1181    default:
1182        g_assert_not_reached();
1183    }
1184    return true;
1185}
1186
1187static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1188                       TCGReg base, intptr_t ofs)
1189{
1190    AArch64Insn insn;
1191    int lgsz;
1192
1193    switch (type) {
1194    case TCG_TYPE_I32:
1195        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1196        lgsz = 2;
1197        break;
1198    case TCG_TYPE_I64:
1199        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1200        lgsz = 3;
1201        break;
1202    case TCG_TYPE_V64:
1203        insn = I3312_LDRVD;
1204        lgsz = 3;
1205        break;
1206    case TCG_TYPE_V128:
1207        insn = I3312_LDRVQ;
1208        lgsz = 4;
1209        break;
1210    default:
1211        g_assert_not_reached();
1212    }
1213    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1214}
1215
1216static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1217                       TCGReg base, intptr_t ofs)
1218{
1219    AArch64Insn insn;
1220    int lgsz;
1221
1222    switch (type) {
1223    case TCG_TYPE_I32:
1224        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1225        lgsz = 2;
1226        break;
1227    case TCG_TYPE_I64:
1228        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1229        lgsz = 3;
1230        break;
1231    case TCG_TYPE_V64:
1232        insn = I3312_STRVD;
1233        lgsz = 3;
1234        break;
1235    case TCG_TYPE_V128:
1236        insn = I3312_STRVQ;
1237        lgsz = 4;
1238        break;
1239    default:
1240        g_assert_not_reached();
1241    }
1242    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1243}
1244
1245static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1246                               TCGReg base, intptr_t ofs)
1247{
1248    if (type <= TCG_TYPE_I64 && val == 0) {
1249        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1250        return true;
1251    }
1252    return false;
1253}
1254
1255static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1256                               TCGReg rn, unsigned int a, unsigned int b)
1257{
1258    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1259}
1260
1261static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1262                                TCGReg rn, unsigned int a, unsigned int b)
1263{
1264    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1265}
1266
1267static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1268                                TCGReg rn, unsigned int a, unsigned int b)
1269{
1270    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1271}
1272
1273static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1274                                TCGReg rn, TCGReg rm, unsigned int a)
1275{
1276    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1277}
1278
1279static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1280                               TCGReg rd, TCGReg rn, unsigned int m)
1281{
1282    int bits = ext ? 64 : 32;
1283    int max = bits - 1;
1284    tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1285}
1286
1287static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1288                               TCGReg rd, TCGReg rn, unsigned int m)
1289{
1290    int max = ext ? 63 : 31;
1291    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1292}
1293
1294static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1295                               TCGReg rd, TCGReg rn, unsigned int m)
1296{
1297    int max = ext ? 63 : 31;
1298    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1299}
1300
1301static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1302                                TCGReg rd, TCGReg rn, unsigned int m)
1303{
1304    int max = ext ? 63 : 31;
1305    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1306}
1307
1308static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1309                                TCGReg rd, TCGReg rn, unsigned int m)
1310{
1311    int max = ext ? 63 : 31;
1312    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1313}
1314
1315static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1316                               TCGReg rn, unsigned lsb, unsigned width)
1317{
1318    unsigned size = ext ? 64 : 32;
1319    unsigned a = (size - lsb) & (size - 1);
1320    unsigned b = width - 1;
1321    tcg_out_bfm(s, ext, rd, rn, a, b);
1322}
1323
1324static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1325                        tcg_target_long b, bool const_b)
1326{
1327    if (const_b) {
1328        /* Using CMP or CMN aliases.  */
1329        if (b >= 0) {
1330            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1331        } else {
1332            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1333        }
1334    } else {
1335        /* Using CMP alias SUBS wzr, Wn, Wm */
1336        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1337    }
1338}
1339
1340static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1341{
1342    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1343    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1344    tcg_out_insn(s, 3206, B, offset);
1345}
1346
1347static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1348{
1349    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1350    if (offset == sextract64(offset, 0, 26)) {
1351        tcg_out_insn(s, 3206, B, offset);
1352    } else {
1353        /* Choose X9 as a call-clobbered non-LR temporary. */
1354        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
1355        tcg_out_insn(s, 3207, BR, TCG_REG_X9);
1356    }
1357}
1358
1359static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1360{
1361    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1362    if (offset == sextract64(offset, 0, 26)) {
1363        tcg_out_insn(s, 3206, BL, offset);
1364    } else {
1365        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1366        tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
1367    }
1368}
1369
1370static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1371                         const TCGHelperInfo *info)
1372{
1373    tcg_out_call_int(s, target);
1374}
1375
1376static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1377{
1378    if (!l->has_value) {
1379        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1380        tcg_out_insn(s, 3206, B, 0);
1381    } else {
1382        tcg_out_goto(s, l->u.value_ptr);
1383    }
1384}
1385
1386static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1387                           TCGArg b, bool b_const, TCGLabel *l)
1388{
1389    intptr_t offset;
1390    bool need_cmp;
1391
1392    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1393        need_cmp = false;
1394    } else {
1395        need_cmp = true;
1396        tcg_out_cmp(s, ext, a, b, b_const);
1397    }
1398
1399    if (!l->has_value) {
1400        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1401        offset = tcg_in32(s) >> 5;
1402    } else {
1403        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1404        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1405    }
1406
1407    if (need_cmp) {
1408        tcg_out_insn(s, 3202, B_C, c, offset);
1409    } else if (c == TCG_COND_EQ) {
1410        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1411    } else {
1412        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1413    }
1414}
1415
1416static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1417                               TCGReg rd, TCGReg rn)
1418{
1419    /* REV, REV16, REV32 */
1420    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1421}
1422
1423static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1424                               TCGReg rd, TCGReg rn)
1425{
1426    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1427    int bits = (8 << s_bits) - 1;
1428    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1429}
1430
1431static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1432{
1433    tcg_out_sxt(s, type, MO_8, rd, rn);
1434}
1435
1436static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1437{
1438    tcg_out_sxt(s, type, MO_16, rd, rn);
1439}
1440
1441static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1442{
1443    tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1444}
1445
1446static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1447{
1448    tcg_out_ext32s(s, rd, rn);
1449}
1450
1451static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1452                               TCGReg rd, TCGReg rn)
1453{
1454    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1455    int bits = (8 << s_bits) - 1;
1456    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1457}
1458
1459static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1460{
1461    tcg_out_uxt(s, MO_8, rd, rn);
1462}
1463
1464static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1465{
1466    tcg_out_uxt(s, MO_16, rd, rn);
1467}
1468
1469static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1470{
1471    tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1472}
1473
1474static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1475{
1476    tcg_out_ext32u(s, rd, rn);
1477}
1478
1479static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1480{
1481    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1482}
1483
1484static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1485                            TCGReg rn, int64_t aimm)
1486{
1487    if (aimm >= 0) {
1488        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1489    } else {
1490        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1491    }
1492}
1493
1494static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1495                            TCGReg rh, TCGReg al, TCGReg ah,
1496                            tcg_target_long bl, tcg_target_long bh,
1497                            bool const_bl, bool const_bh, bool sub)
1498{
1499    TCGReg orig_rl = rl;
1500    AArch64Insn insn;
1501
1502    if (rl == ah || (!const_bh && rl == bh)) {
1503        rl = TCG_REG_TMP;
1504    }
1505
1506    if (const_bl) {
1507        if (bl < 0) {
1508            bl = -bl;
1509            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1510        } else {
1511            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1512        }
1513
1514        if (unlikely(al == TCG_REG_XZR)) {
1515            /* ??? We want to allow al to be zero for the benefit of
1516               negation via subtraction.  However, that leaves open the
1517               possibility of adding 0+const in the low part, and the
1518               immediate add instructions encode XSP not XZR.  Don't try
1519               anything more elaborate here than loading another zero.  */
1520            al = TCG_REG_TMP;
1521            tcg_out_movi(s, ext, al, 0);
1522        }
1523        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1524    } else {
1525        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1526    }
1527
1528    insn = I3503_ADC;
1529    if (const_bh) {
1530        /* Note that the only two constants we support are 0 and -1, and
1531           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1532        if ((bh != 0) ^ sub) {
1533            insn = I3503_SBC;
1534        }
1535        bh = TCG_REG_XZR;
1536    } else if (sub) {
1537        insn = I3503_SBC;
1538    }
1539    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1540
1541    tcg_out_mov(s, ext, orig_rl, rl);
1542}
1543
1544static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1545{
1546    static const uint32_t sync[] = {
1547        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1548        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1549        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1550        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1551        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1552    };
1553    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1554}
1555
1556static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1557                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1558{
1559    TCGReg a1 = a0;
1560    if (is_ctz) {
1561        a1 = TCG_REG_TMP;
1562        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1563    }
1564    if (const_b && b == (ext ? 64 : 32)) {
1565        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1566    } else {
1567        AArch64Insn sel = I3506_CSEL;
1568
1569        tcg_out_cmp(s, ext, a0, 0, 1);
1570        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1571
1572        if (const_b) {
1573            if (b == -1) {
1574                b = TCG_REG_XZR;
1575                sel = I3506_CSINV;
1576            } else if (b == 0) {
1577                b = TCG_REG_XZR;
1578            } else {
1579                tcg_out_movi(s, ext, d, b);
1580                b = d;
1581            }
1582        }
1583        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1584    }
1585}
1586
1587typedef struct {
1588    TCGReg base;
1589    TCGReg index;
1590    TCGType index_ext;
1591    TCGAtomAlign aa;
1592} HostAddress;
1593
1594bool tcg_target_has_memory_bswap(MemOp memop)
1595{
1596    return false;
1597}
1598
1599static const TCGLdstHelperParam ldst_helper_param = {
1600    .ntmp = 1, .tmp = { TCG_REG_TMP }
1601};
1602
1603static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1604{
1605    MemOp opc = get_memop(lb->oi);
1606
1607    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1608        return false;
1609    }
1610
1611    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1612    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1613    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1614    tcg_out_goto(s, lb->raddr);
1615    return true;
1616}
1617
1618static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1619{
1620    MemOp opc = get_memop(lb->oi);
1621
1622    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1623        return false;
1624    }
1625
1626    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1627    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1628    tcg_out_goto(s, lb->raddr);
1629    return true;
1630}
1631
1632/*
1633 * For softmmu, perform the TLB load and compare.
1634 * For useronly, perform any required alignment tests.
1635 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1636 * is required and fill in @h with the host address for the fast path.
1637 */
1638static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1639                                           TCGReg addr_reg, MemOpIdx oi,
1640                                           bool is_ld)
1641{
1642    TCGType addr_type = s->addr_type;
1643    TCGLabelQemuLdst *ldst = NULL;
1644    MemOp opc = get_memop(oi);
1645    unsigned a_mask;
1646
1647    h->aa = atom_and_align_for_opc(s, opc,
1648                                   have_lse2 ? MO_ATOM_WITHIN16
1649                                             : MO_ATOM_IFALIGN,
1650                                   false);
1651    a_mask = (1 << h->aa.align) - 1;
1652
1653#ifdef CONFIG_SOFTMMU
1654    unsigned s_bits = opc & MO_SIZE;
1655    unsigned s_mask = (1u << s_bits) - 1;
1656    unsigned mem_index = get_mmuidx(oi);
1657    TCGReg x3;
1658    TCGType mask_type;
1659    uint64_t compare_mask;
1660
1661    ldst = new_ldst_label(s);
1662    ldst->is_ld = is_ld;
1663    ldst->oi = oi;
1664    ldst->addrlo_reg = addr_reg;
1665
1666    mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
1667                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1668
1669    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1670    QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1671    QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1672    QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1673    QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1674    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1675                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1676
1677    /* Extract the TLB index from the address into X0.  */
1678    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1679                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1680                 s->page_bits - CPU_TLB_ENTRY_BITS);
1681
1682    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1683    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1684
1685    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1686    tcg_out_ld(s, addr_type, TCG_REG_X0, TCG_REG_X1,
1687               is_ld ? offsetof(CPUTLBEntry, addr_read)
1688                     : offsetof(CPUTLBEntry, addr_write));
1689    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1690               offsetof(CPUTLBEntry, addend));
1691
1692    /*
1693     * For aligned accesses, we check the first byte and include the alignment
1694     * bits within the address.  For unaligned access, we check that we don't
1695     * cross pages using the address of the last byte of the access.
1696     */
1697    if (a_mask >= s_mask) {
1698        x3 = addr_reg;
1699    } else {
1700        tcg_out_insn(s, 3401, ADDI, addr_type,
1701                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1702        x3 = TCG_REG_X3;
1703    }
1704    compare_mask = (uint64_t)s->page_mask | a_mask;
1705
1706    /* Store the page mask part of the address into X3.  */
1707    tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_X3, x3, compare_mask);
1708
1709    /* Perform the address comparison. */
1710    tcg_out_cmp(s, addr_type, TCG_REG_X0, TCG_REG_X3, 0);
1711
1712    /* If not equal, we jump to the slow path. */
1713    ldst->label_ptr[0] = s->code_ptr;
1714    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1715
1716    h->base = TCG_REG_X1,
1717    h->index = addr_reg;
1718    h->index_ext = addr_type;
1719#else
1720    if (a_mask) {
1721        ldst = new_ldst_label(s);
1722
1723        ldst->is_ld = is_ld;
1724        ldst->oi = oi;
1725        ldst->addrlo_reg = addr_reg;
1726
1727        /* tst addr, #mask */
1728        tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1729
1730        /* b.ne slow_path */
1731        ldst->label_ptr[0] = s->code_ptr;
1732        tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1733    }
1734
1735    if (guest_base || addr_type == TCG_TYPE_I32) {
1736        h->base = TCG_REG_GUEST_BASE;
1737        h->index = addr_reg;
1738        h->index_ext = addr_type;
1739    } else {
1740        h->base = addr_reg;
1741        h->index = TCG_REG_XZR;
1742        h->index_ext = TCG_TYPE_I64;
1743    }
1744#endif
1745
1746    return ldst;
1747}
1748
1749static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1750                                   TCGReg data_r, HostAddress h)
1751{
1752    switch (memop & MO_SSIZE) {
1753    case MO_UB:
1754        tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
1755        break;
1756    case MO_SB:
1757        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1758                       data_r, h.base, h.index_ext, h.index);
1759        break;
1760    case MO_UW:
1761        tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
1762        break;
1763    case MO_SW:
1764        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1765                       data_r, h.base, h.index_ext, h.index);
1766        break;
1767    case MO_UL:
1768        tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
1769        break;
1770    case MO_SL:
1771        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
1772        break;
1773    case MO_UQ:
1774        tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
1775        break;
1776    default:
1777        g_assert_not_reached();
1778    }
1779}
1780
1781static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1782                                   TCGReg data_r, HostAddress h)
1783{
1784    switch (memop & MO_SIZE) {
1785    case MO_8:
1786        tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
1787        break;
1788    case MO_16:
1789        tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
1790        break;
1791    case MO_32:
1792        tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
1793        break;
1794    case MO_64:
1795        tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
1796        break;
1797    default:
1798        g_assert_not_reached();
1799    }
1800}
1801
1802static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1803                            MemOpIdx oi, TCGType data_type)
1804{
1805    TCGLabelQemuLdst *ldst;
1806    HostAddress h;
1807
1808    ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1809    tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
1810
1811    if (ldst) {
1812        ldst->type = data_type;
1813        ldst->datalo_reg = data_reg;
1814        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1815    }
1816}
1817
1818static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1819                            MemOpIdx oi, TCGType data_type)
1820{
1821    TCGLabelQemuLdst *ldst;
1822    HostAddress h;
1823
1824    ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1825    tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1826
1827    if (ldst) {
1828        ldst->type = data_type;
1829        ldst->datalo_reg = data_reg;
1830        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1831    }
1832}
1833
1834static const tcg_insn_unit *tb_ret_addr;
1835
1836static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1837{
1838    /* Reuse the zeroing that exists for goto_ptr.  */
1839    if (a0 == 0) {
1840        tcg_out_goto_long(s, tcg_code_gen_epilogue);
1841    } else {
1842        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1843        tcg_out_goto_long(s, tb_ret_addr);
1844    }
1845}
1846
1847static void tcg_out_goto_tb(TCGContext *s, int which)
1848{
1849    /*
1850     * Direct branch, or indirect address load, will be patched
1851     * by tb_target_set_jmp_target.  Assert indirect load offset
1852     * in range early, regardless of direct branch distance.
1853     */
1854    intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1855    tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1856
1857    set_jmp_insn_offset(s, which);
1858    tcg_out32(s, I3206_B);
1859    tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1860    set_jmp_reset_offset(s, which);
1861}
1862
1863void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1864                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1865{
1866    uintptr_t d_addr = tb->jmp_target_addr[n];
1867    ptrdiff_t d_offset = d_addr - jmp_rx;
1868    tcg_insn_unit insn;
1869
1870    /* Either directly branch, or indirect branch load. */
1871    if (d_offset == sextract64(d_offset, 0, 28)) {
1872        insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
1873    } else {
1874        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
1875        ptrdiff_t i_offset = i_addr - jmp_rx;
1876
1877        /* Note that we asserted this in range in tcg_out_goto_tb. */
1878        insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2);
1879    }
1880    qatomic_set((uint32_t *)jmp_rw, insn);
1881    flush_idcache_range(jmp_rx, jmp_rw, 4);
1882}
1883
1884static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1885                       const TCGArg args[TCG_MAX_OP_ARGS],
1886                       const int const_args[TCG_MAX_OP_ARGS])
1887{
1888    /* 99% of the time, we can signal the use of extension registers
1889       by looking to see if the opcode handles 64-bit data.  */
1890    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1891
1892    /* Hoist the loads of the most common arguments.  */
1893    TCGArg a0 = args[0];
1894    TCGArg a1 = args[1];
1895    TCGArg a2 = args[2];
1896    int c2 = const_args[2];
1897
1898    /* Some operands are defined with "rZ" constraint, a register or
1899       the zero register.  These need not actually test args[I] == 0.  */
1900#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1901
1902    switch (opc) {
1903    case INDEX_op_goto_ptr:
1904        tcg_out_insn(s, 3207, BR, a0);
1905        break;
1906
1907    case INDEX_op_br:
1908        tcg_out_goto_label(s, arg_label(a0));
1909        break;
1910
1911    case INDEX_op_ld8u_i32:
1912    case INDEX_op_ld8u_i64:
1913        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1914        break;
1915    case INDEX_op_ld8s_i32:
1916        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1917        break;
1918    case INDEX_op_ld8s_i64:
1919        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1920        break;
1921    case INDEX_op_ld16u_i32:
1922    case INDEX_op_ld16u_i64:
1923        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1924        break;
1925    case INDEX_op_ld16s_i32:
1926        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1927        break;
1928    case INDEX_op_ld16s_i64:
1929        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1930        break;
1931    case INDEX_op_ld_i32:
1932    case INDEX_op_ld32u_i64:
1933        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1934        break;
1935    case INDEX_op_ld32s_i64:
1936        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1937        break;
1938    case INDEX_op_ld_i64:
1939        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1940        break;
1941
1942    case INDEX_op_st8_i32:
1943    case INDEX_op_st8_i64:
1944        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1945        break;
1946    case INDEX_op_st16_i32:
1947    case INDEX_op_st16_i64:
1948        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1949        break;
1950    case INDEX_op_st_i32:
1951    case INDEX_op_st32_i64:
1952        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1953        break;
1954    case INDEX_op_st_i64:
1955        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1956        break;
1957
1958    case INDEX_op_add_i32:
1959        a2 = (int32_t)a2;
1960        /* FALLTHRU */
1961    case INDEX_op_add_i64:
1962        if (c2) {
1963            tcg_out_addsubi(s, ext, a0, a1, a2);
1964        } else {
1965            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1966        }
1967        break;
1968
1969    case INDEX_op_sub_i32:
1970        a2 = (int32_t)a2;
1971        /* FALLTHRU */
1972    case INDEX_op_sub_i64:
1973        if (c2) {
1974            tcg_out_addsubi(s, ext, a0, a1, -a2);
1975        } else {
1976            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1977        }
1978        break;
1979
1980    case INDEX_op_neg_i64:
1981    case INDEX_op_neg_i32:
1982        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1983        break;
1984
1985    case INDEX_op_and_i32:
1986        a2 = (int32_t)a2;
1987        /* FALLTHRU */
1988    case INDEX_op_and_i64:
1989        if (c2) {
1990            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1991        } else {
1992            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1993        }
1994        break;
1995
1996    case INDEX_op_andc_i32:
1997        a2 = (int32_t)a2;
1998        /* FALLTHRU */
1999    case INDEX_op_andc_i64:
2000        if (c2) {
2001            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2002        } else {
2003            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2004        }
2005        break;
2006
2007    case INDEX_op_or_i32:
2008        a2 = (int32_t)a2;
2009        /* FALLTHRU */
2010    case INDEX_op_or_i64:
2011        if (c2) {
2012            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2013        } else {
2014            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2015        }
2016        break;
2017
2018    case INDEX_op_orc_i32:
2019        a2 = (int32_t)a2;
2020        /* FALLTHRU */
2021    case INDEX_op_orc_i64:
2022        if (c2) {
2023            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2024        } else {
2025            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2026        }
2027        break;
2028
2029    case INDEX_op_xor_i32:
2030        a2 = (int32_t)a2;
2031        /* FALLTHRU */
2032    case INDEX_op_xor_i64:
2033        if (c2) {
2034            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2035        } else {
2036            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2037        }
2038        break;
2039
2040    case INDEX_op_eqv_i32:
2041        a2 = (int32_t)a2;
2042        /* FALLTHRU */
2043    case INDEX_op_eqv_i64:
2044        if (c2) {
2045            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2046        } else {
2047            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2048        }
2049        break;
2050
2051    case INDEX_op_not_i64:
2052    case INDEX_op_not_i32:
2053        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2054        break;
2055
2056    case INDEX_op_mul_i64:
2057    case INDEX_op_mul_i32:
2058        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2059        break;
2060
2061    case INDEX_op_div_i64:
2062    case INDEX_op_div_i32:
2063        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2064        break;
2065    case INDEX_op_divu_i64:
2066    case INDEX_op_divu_i32:
2067        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2068        break;
2069
2070    case INDEX_op_rem_i64:
2071    case INDEX_op_rem_i32:
2072        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2073        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2074        break;
2075    case INDEX_op_remu_i64:
2076    case INDEX_op_remu_i32:
2077        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2078        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2079        break;
2080
2081    case INDEX_op_shl_i64:
2082    case INDEX_op_shl_i32:
2083        if (c2) {
2084            tcg_out_shl(s, ext, a0, a1, a2);
2085        } else {
2086            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2087        }
2088        break;
2089
2090    case INDEX_op_shr_i64:
2091    case INDEX_op_shr_i32:
2092        if (c2) {
2093            tcg_out_shr(s, ext, a0, a1, a2);
2094        } else {
2095            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2096        }
2097        break;
2098
2099    case INDEX_op_sar_i64:
2100    case INDEX_op_sar_i32:
2101        if (c2) {
2102            tcg_out_sar(s, ext, a0, a1, a2);
2103        } else {
2104            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2105        }
2106        break;
2107
2108    case INDEX_op_rotr_i64:
2109    case INDEX_op_rotr_i32:
2110        if (c2) {
2111            tcg_out_rotr(s, ext, a0, a1, a2);
2112        } else {
2113            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2114        }
2115        break;
2116
2117    case INDEX_op_rotl_i64:
2118    case INDEX_op_rotl_i32:
2119        if (c2) {
2120            tcg_out_rotl(s, ext, a0, a1, a2);
2121        } else {
2122            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2123            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2124        }
2125        break;
2126
2127    case INDEX_op_clz_i64:
2128    case INDEX_op_clz_i32:
2129        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2130        break;
2131    case INDEX_op_ctz_i64:
2132    case INDEX_op_ctz_i32:
2133        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2134        break;
2135
2136    case INDEX_op_brcond_i32:
2137        a1 = (int32_t)a1;
2138        /* FALLTHRU */
2139    case INDEX_op_brcond_i64:
2140        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2141        break;
2142
2143    case INDEX_op_setcond_i32:
2144        a2 = (int32_t)a2;
2145        /* FALLTHRU */
2146    case INDEX_op_setcond_i64:
2147        tcg_out_cmp(s, ext, a1, a2, c2);
2148        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2149        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2150                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2151        break;
2152
2153    case INDEX_op_movcond_i32:
2154        a2 = (int32_t)a2;
2155        /* FALLTHRU */
2156    case INDEX_op_movcond_i64:
2157        tcg_out_cmp(s, ext, a1, a2, c2);
2158        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2159        break;
2160
2161    case INDEX_op_qemu_ld_a32_i32:
2162    case INDEX_op_qemu_ld_a64_i32:
2163    case INDEX_op_qemu_ld_a32_i64:
2164    case INDEX_op_qemu_ld_a64_i64:
2165        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2166        break;
2167    case INDEX_op_qemu_st_a32_i32:
2168    case INDEX_op_qemu_st_a64_i32:
2169    case INDEX_op_qemu_st_a32_i64:
2170    case INDEX_op_qemu_st_a64_i64:
2171        tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
2172        break;
2173
2174    case INDEX_op_bswap64_i64:
2175        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2176        break;
2177    case INDEX_op_bswap32_i64:
2178        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2179        if (a2 & TCG_BSWAP_OS) {
2180            tcg_out_ext32s(s, a0, a0);
2181        }
2182        break;
2183    case INDEX_op_bswap32_i32:
2184        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2185        break;
2186    case INDEX_op_bswap16_i64:
2187    case INDEX_op_bswap16_i32:
2188        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2189        if (a2 & TCG_BSWAP_OS) {
2190            /* Output must be sign-extended. */
2191            tcg_out_ext16s(s, ext, a0, a0);
2192        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2193            /* Output must be zero-extended, but input isn't. */
2194            tcg_out_ext16u(s, a0, a0);
2195        }
2196        break;
2197
2198    case INDEX_op_deposit_i64:
2199    case INDEX_op_deposit_i32:
2200        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2201        break;
2202
2203    case INDEX_op_extract_i64:
2204    case INDEX_op_extract_i32:
2205        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2206        break;
2207
2208    case INDEX_op_sextract_i64:
2209    case INDEX_op_sextract_i32:
2210        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2211        break;
2212
2213    case INDEX_op_extract2_i64:
2214    case INDEX_op_extract2_i32:
2215        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2216        break;
2217
2218    case INDEX_op_add2_i32:
2219        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2220                        (int32_t)args[4], args[5], const_args[4],
2221                        const_args[5], false);
2222        break;
2223    case INDEX_op_add2_i64:
2224        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2225                        args[5], const_args[4], const_args[5], false);
2226        break;
2227    case INDEX_op_sub2_i32:
2228        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2229                        (int32_t)args[4], args[5], const_args[4],
2230                        const_args[5], true);
2231        break;
2232    case INDEX_op_sub2_i64:
2233        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2234                        args[5], const_args[4], const_args[5], true);
2235        break;
2236
2237    case INDEX_op_muluh_i64:
2238        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2239        break;
2240    case INDEX_op_mulsh_i64:
2241        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2242        break;
2243
2244    case INDEX_op_mb:
2245        tcg_out_mb(s, a0);
2246        break;
2247
2248    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2249    case INDEX_op_mov_i64:
2250    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2251    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2252    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2253    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2254    case INDEX_op_ext8s_i64:
2255    case INDEX_op_ext8u_i32:
2256    case INDEX_op_ext8u_i64:
2257    case INDEX_op_ext16s_i64:
2258    case INDEX_op_ext16s_i32:
2259    case INDEX_op_ext16u_i64:
2260    case INDEX_op_ext16u_i32:
2261    case INDEX_op_ext32s_i64:
2262    case INDEX_op_ext32u_i64:
2263    case INDEX_op_ext_i32_i64:
2264    case INDEX_op_extu_i32_i64:
2265    case INDEX_op_extrl_i64_i32:
2266    default:
2267        g_assert_not_reached();
2268    }
2269
2270#undef REG0
2271}
2272
2273static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2274                           unsigned vecl, unsigned vece,
2275                           const TCGArg args[TCG_MAX_OP_ARGS],
2276                           const int const_args[TCG_MAX_OP_ARGS])
2277{
2278    static const AArch64Insn cmp_vec_insn[16] = {
2279        [TCG_COND_EQ] = I3616_CMEQ,
2280        [TCG_COND_GT] = I3616_CMGT,
2281        [TCG_COND_GE] = I3616_CMGE,
2282        [TCG_COND_GTU] = I3616_CMHI,
2283        [TCG_COND_GEU] = I3616_CMHS,
2284    };
2285    static const AArch64Insn cmp_scalar_insn[16] = {
2286        [TCG_COND_EQ] = I3611_CMEQ,
2287        [TCG_COND_GT] = I3611_CMGT,
2288        [TCG_COND_GE] = I3611_CMGE,
2289        [TCG_COND_GTU] = I3611_CMHI,
2290        [TCG_COND_GEU] = I3611_CMHS,
2291    };
2292    static const AArch64Insn cmp0_vec_insn[16] = {
2293        [TCG_COND_EQ] = I3617_CMEQ0,
2294        [TCG_COND_GT] = I3617_CMGT0,
2295        [TCG_COND_GE] = I3617_CMGE0,
2296        [TCG_COND_LT] = I3617_CMLT0,
2297        [TCG_COND_LE] = I3617_CMLE0,
2298    };
2299    static const AArch64Insn cmp0_scalar_insn[16] = {
2300        [TCG_COND_EQ] = I3612_CMEQ0,
2301        [TCG_COND_GT] = I3612_CMGT0,
2302        [TCG_COND_GE] = I3612_CMGE0,
2303        [TCG_COND_LT] = I3612_CMLT0,
2304        [TCG_COND_LE] = I3612_CMLE0,
2305    };
2306
2307    TCGType type = vecl + TCG_TYPE_V64;
2308    unsigned is_q = vecl;
2309    bool is_scalar = !is_q && vece == MO_64;
2310    TCGArg a0, a1, a2, a3;
2311    int cmode, imm8;
2312
2313    a0 = args[0];
2314    a1 = args[1];
2315    a2 = args[2];
2316
2317    switch (opc) {
2318    case INDEX_op_ld_vec:
2319        tcg_out_ld(s, type, a0, a1, a2);
2320        break;
2321    case INDEX_op_st_vec:
2322        tcg_out_st(s, type, a0, a1, a2);
2323        break;
2324    case INDEX_op_dupm_vec:
2325        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2326        break;
2327    case INDEX_op_add_vec:
2328        if (is_scalar) {
2329            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2330        } else {
2331            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2332        }
2333        break;
2334    case INDEX_op_sub_vec:
2335        if (is_scalar) {
2336            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2337        } else {
2338            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2339        }
2340        break;
2341    case INDEX_op_mul_vec:
2342        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2343        break;
2344    case INDEX_op_neg_vec:
2345        if (is_scalar) {
2346            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2347        } else {
2348            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2349        }
2350        break;
2351    case INDEX_op_abs_vec:
2352        if (is_scalar) {
2353            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2354        } else {
2355            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2356        }
2357        break;
2358    case INDEX_op_and_vec:
2359        if (const_args[2]) {
2360            is_shimm1632(~a2, &cmode, &imm8);
2361            if (a0 == a1) {
2362                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2363                return;
2364            }
2365            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2366            a2 = a0;
2367        }
2368        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2369        break;
2370    case INDEX_op_or_vec:
2371        if (const_args[2]) {
2372            is_shimm1632(a2, &cmode, &imm8);
2373            if (a0 == a1) {
2374                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2375                return;
2376            }
2377            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2378            a2 = a0;
2379        }
2380        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2381        break;
2382    case INDEX_op_andc_vec:
2383        if (const_args[2]) {
2384            is_shimm1632(a2, &cmode, &imm8);
2385            if (a0 == a1) {
2386                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2387                return;
2388            }
2389            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2390            a2 = a0;
2391        }
2392        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2393        break;
2394    case INDEX_op_orc_vec:
2395        if (const_args[2]) {
2396            is_shimm1632(~a2, &cmode, &imm8);
2397            if (a0 == a1) {
2398                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2399                return;
2400            }
2401            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2402            a2 = a0;
2403        }
2404        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2405        break;
2406    case INDEX_op_xor_vec:
2407        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2408        break;
2409    case INDEX_op_ssadd_vec:
2410        if (is_scalar) {
2411            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2412        } else {
2413            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2414        }
2415        break;
2416    case INDEX_op_sssub_vec:
2417        if (is_scalar) {
2418            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2419        } else {
2420            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2421        }
2422        break;
2423    case INDEX_op_usadd_vec:
2424        if (is_scalar) {
2425            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2426        } else {
2427            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2428        }
2429        break;
2430    case INDEX_op_ussub_vec:
2431        if (is_scalar) {
2432            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2433        } else {
2434            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2435        }
2436        break;
2437    case INDEX_op_smax_vec:
2438        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2439        break;
2440    case INDEX_op_smin_vec:
2441        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2442        break;
2443    case INDEX_op_umax_vec:
2444        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2445        break;
2446    case INDEX_op_umin_vec:
2447        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2448        break;
2449    case INDEX_op_not_vec:
2450        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2451        break;
2452    case INDEX_op_shli_vec:
2453        if (is_scalar) {
2454            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2455        } else {
2456            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2457        }
2458        break;
2459    case INDEX_op_shri_vec:
2460        if (is_scalar) {
2461            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2462        } else {
2463            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2464        }
2465        break;
2466    case INDEX_op_sari_vec:
2467        if (is_scalar) {
2468            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2469        } else {
2470            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2471        }
2472        break;
2473    case INDEX_op_aa64_sli_vec:
2474        if (is_scalar) {
2475            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2476        } else {
2477            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2478        }
2479        break;
2480    case INDEX_op_shlv_vec:
2481        if (is_scalar) {
2482            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2483        } else {
2484            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2485        }
2486        break;
2487    case INDEX_op_aa64_sshl_vec:
2488        if (is_scalar) {
2489            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2490        } else {
2491            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2492        }
2493        break;
2494    case INDEX_op_cmp_vec:
2495        {
2496            TCGCond cond = args[3];
2497            AArch64Insn insn;
2498
2499            if (cond == TCG_COND_NE) {
2500                if (const_args[2]) {
2501                    if (is_scalar) {
2502                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2503                    } else {
2504                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2505                    }
2506                } else {
2507                    if (is_scalar) {
2508                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2509                    } else {
2510                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2511                    }
2512                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2513                }
2514            } else {
2515                if (const_args[2]) {
2516                    if (is_scalar) {
2517                        insn = cmp0_scalar_insn[cond];
2518                        if (insn) {
2519                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2520                            break;
2521                        }
2522                    } else {
2523                        insn = cmp0_vec_insn[cond];
2524                        if (insn) {
2525                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2526                            break;
2527                        }
2528                    }
2529                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2530                    a2 = TCG_VEC_TMP;
2531                }
2532                if (is_scalar) {
2533                    insn = cmp_scalar_insn[cond];
2534                    if (insn == 0) {
2535                        TCGArg t;
2536                        t = a1, a1 = a2, a2 = t;
2537                        cond = tcg_swap_cond(cond);
2538                        insn = cmp_scalar_insn[cond];
2539                        tcg_debug_assert(insn != 0);
2540                    }
2541                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2542                } else {
2543                    insn = cmp_vec_insn[cond];
2544                    if (insn == 0) {
2545                        TCGArg t;
2546                        t = a1, a1 = a2, a2 = t;
2547                        cond = tcg_swap_cond(cond);
2548                        insn = cmp_vec_insn[cond];
2549                        tcg_debug_assert(insn != 0);
2550                    }
2551                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2552                }
2553            }
2554        }
2555        break;
2556
2557    case INDEX_op_bitsel_vec:
2558        a3 = args[3];
2559        if (a0 == a3) {
2560            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2561        } else if (a0 == a2) {
2562            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2563        } else {
2564            if (a0 != a1) {
2565                tcg_out_mov(s, type, a0, a1);
2566            }
2567            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2568        }
2569        break;
2570
2571    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2572    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2573    default:
2574        g_assert_not_reached();
2575    }
2576}
2577
2578int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2579{
2580    switch (opc) {
2581    case INDEX_op_add_vec:
2582    case INDEX_op_sub_vec:
2583    case INDEX_op_and_vec:
2584    case INDEX_op_or_vec:
2585    case INDEX_op_xor_vec:
2586    case INDEX_op_andc_vec:
2587    case INDEX_op_orc_vec:
2588    case INDEX_op_neg_vec:
2589    case INDEX_op_abs_vec:
2590    case INDEX_op_not_vec:
2591    case INDEX_op_cmp_vec:
2592    case INDEX_op_shli_vec:
2593    case INDEX_op_shri_vec:
2594    case INDEX_op_sari_vec:
2595    case INDEX_op_ssadd_vec:
2596    case INDEX_op_sssub_vec:
2597    case INDEX_op_usadd_vec:
2598    case INDEX_op_ussub_vec:
2599    case INDEX_op_shlv_vec:
2600    case INDEX_op_bitsel_vec:
2601        return 1;
2602    case INDEX_op_rotli_vec:
2603    case INDEX_op_shrv_vec:
2604    case INDEX_op_sarv_vec:
2605    case INDEX_op_rotlv_vec:
2606    case INDEX_op_rotrv_vec:
2607        return -1;
2608    case INDEX_op_mul_vec:
2609    case INDEX_op_smax_vec:
2610    case INDEX_op_smin_vec:
2611    case INDEX_op_umax_vec:
2612    case INDEX_op_umin_vec:
2613        return vece < MO_64;
2614
2615    default:
2616        return 0;
2617    }
2618}
2619
2620void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2621                       TCGArg a0, ...)
2622{
2623    va_list va;
2624    TCGv_vec v0, v1, v2, t1, t2, c1;
2625    TCGArg a2;
2626
2627    va_start(va, a0);
2628    v0 = temp_tcgv_vec(arg_temp(a0));
2629    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2630    a2 = va_arg(va, TCGArg);
2631    va_end(va);
2632
2633    switch (opc) {
2634    case INDEX_op_rotli_vec:
2635        t1 = tcg_temp_new_vec(type);
2636        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2637        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2638                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2639        tcg_temp_free_vec(t1);
2640        break;
2641
2642    case INDEX_op_shrv_vec:
2643    case INDEX_op_sarv_vec:
2644        /* Right shifts are negative left shifts for AArch64.  */
2645        v2 = temp_tcgv_vec(arg_temp(a2));
2646        t1 = tcg_temp_new_vec(type);
2647        tcg_gen_neg_vec(vece, t1, v2);
2648        opc = (opc == INDEX_op_shrv_vec
2649               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2650        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2651                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2652        tcg_temp_free_vec(t1);
2653        break;
2654
2655    case INDEX_op_rotlv_vec:
2656        v2 = temp_tcgv_vec(arg_temp(a2));
2657        t1 = tcg_temp_new_vec(type);
2658        c1 = tcg_constant_vec(type, vece, 8 << vece);
2659        tcg_gen_sub_vec(vece, t1, v2, c1);
2660        /* Right shifts are negative left shifts for AArch64.  */
2661        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2662                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2663        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2664                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2665        tcg_gen_or_vec(vece, v0, v0, t1);
2666        tcg_temp_free_vec(t1);
2667        break;
2668
2669    case INDEX_op_rotrv_vec:
2670        v2 = temp_tcgv_vec(arg_temp(a2));
2671        t1 = tcg_temp_new_vec(type);
2672        t2 = tcg_temp_new_vec(type);
2673        c1 = tcg_constant_vec(type, vece, 8 << vece);
2674        tcg_gen_neg_vec(vece, t1, v2);
2675        tcg_gen_sub_vec(vece, t2, c1, v2);
2676        /* Right shifts are negative left shifts for AArch64.  */
2677        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2678                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2679        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2680                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2681        tcg_gen_or_vec(vece, v0, t1, t2);
2682        tcg_temp_free_vec(t1);
2683        tcg_temp_free_vec(t2);
2684        break;
2685
2686    default:
2687        g_assert_not_reached();
2688    }
2689}
2690
2691static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2692{
2693    switch (op) {
2694    case INDEX_op_goto_ptr:
2695        return C_O0_I1(r);
2696
2697    case INDEX_op_ld8u_i32:
2698    case INDEX_op_ld8s_i32:
2699    case INDEX_op_ld16u_i32:
2700    case INDEX_op_ld16s_i32:
2701    case INDEX_op_ld_i32:
2702    case INDEX_op_ld8u_i64:
2703    case INDEX_op_ld8s_i64:
2704    case INDEX_op_ld16u_i64:
2705    case INDEX_op_ld16s_i64:
2706    case INDEX_op_ld32u_i64:
2707    case INDEX_op_ld32s_i64:
2708    case INDEX_op_ld_i64:
2709    case INDEX_op_neg_i32:
2710    case INDEX_op_neg_i64:
2711    case INDEX_op_not_i32:
2712    case INDEX_op_not_i64:
2713    case INDEX_op_bswap16_i32:
2714    case INDEX_op_bswap32_i32:
2715    case INDEX_op_bswap16_i64:
2716    case INDEX_op_bswap32_i64:
2717    case INDEX_op_bswap64_i64:
2718    case INDEX_op_ext8s_i32:
2719    case INDEX_op_ext16s_i32:
2720    case INDEX_op_ext8u_i32:
2721    case INDEX_op_ext16u_i32:
2722    case INDEX_op_ext8s_i64:
2723    case INDEX_op_ext16s_i64:
2724    case INDEX_op_ext32s_i64:
2725    case INDEX_op_ext8u_i64:
2726    case INDEX_op_ext16u_i64:
2727    case INDEX_op_ext32u_i64:
2728    case INDEX_op_ext_i32_i64:
2729    case INDEX_op_extu_i32_i64:
2730    case INDEX_op_extract_i32:
2731    case INDEX_op_extract_i64:
2732    case INDEX_op_sextract_i32:
2733    case INDEX_op_sextract_i64:
2734        return C_O1_I1(r, r);
2735
2736    case INDEX_op_st8_i32:
2737    case INDEX_op_st16_i32:
2738    case INDEX_op_st_i32:
2739    case INDEX_op_st8_i64:
2740    case INDEX_op_st16_i64:
2741    case INDEX_op_st32_i64:
2742    case INDEX_op_st_i64:
2743        return C_O0_I2(rZ, r);
2744
2745    case INDEX_op_add_i32:
2746    case INDEX_op_add_i64:
2747    case INDEX_op_sub_i32:
2748    case INDEX_op_sub_i64:
2749    case INDEX_op_setcond_i32:
2750    case INDEX_op_setcond_i64:
2751        return C_O1_I2(r, r, rA);
2752
2753    case INDEX_op_mul_i32:
2754    case INDEX_op_mul_i64:
2755    case INDEX_op_div_i32:
2756    case INDEX_op_div_i64:
2757    case INDEX_op_divu_i32:
2758    case INDEX_op_divu_i64:
2759    case INDEX_op_rem_i32:
2760    case INDEX_op_rem_i64:
2761    case INDEX_op_remu_i32:
2762    case INDEX_op_remu_i64:
2763    case INDEX_op_muluh_i64:
2764    case INDEX_op_mulsh_i64:
2765        return C_O1_I2(r, r, r);
2766
2767    case INDEX_op_and_i32:
2768    case INDEX_op_and_i64:
2769    case INDEX_op_or_i32:
2770    case INDEX_op_or_i64:
2771    case INDEX_op_xor_i32:
2772    case INDEX_op_xor_i64:
2773    case INDEX_op_andc_i32:
2774    case INDEX_op_andc_i64:
2775    case INDEX_op_orc_i32:
2776    case INDEX_op_orc_i64:
2777    case INDEX_op_eqv_i32:
2778    case INDEX_op_eqv_i64:
2779        return C_O1_I2(r, r, rL);
2780
2781    case INDEX_op_shl_i32:
2782    case INDEX_op_shr_i32:
2783    case INDEX_op_sar_i32:
2784    case INDEX_op_rotl_i32:
2785    case INDEX_op_rotr_i32:
2786    case INDEX_op_shl_i64:
2787    case INDEX_op_shr_i64:
2788    case INDEX_op_sar_i64:
2789    case INDEX_op_rotl_i64:
2790    case INDEX_op_rotr_i64:
2791        return C_O1_I2(r, r, ri);
2792
2793    case INDEX_op_clz_i32:
2794    case INDEX_op_ctz_i32:
2795    case INDEX_op_clz_i64:
2796    case INDEX_op_ctz_i64:
2797        return C_O1_I2(r, r, rAL);
2798
2799    case INDEX_op_brcond_i32:
2800    case INDEX_op_brcond_i64:
2801        return C_O0_I2(r, rA);
2802
2803    case INDEX_op_movcond_i32:
2804    case INDEX_op_movcond_i64:
2805        return C_O1_I4(r, r, rA, rZ, rZ);
2806
2807    case INDEX_op_qemu_ld_a32_i32:
2808    case INDEX_op_qemu_ld_a64_i32:
2809    case INDEX_op_qemu_ld_a32_i64:
2810    case INDEX_op_qemu_ld_a64_i64:
2811        return C_O1_I1(r, l);
2812    case INDEX_op_qemu_st_a32_i32:
2813    case INDEX_op_qemu_st_a64_i32:
2814    case INDEX_op_qemu_st_a32_i64:
2815    case INDEX_op_qemu_st_a64_i64:
2816        return C_O0_I2(lZ, l);
2817
2818    case INDEX_op_deposit_i32:
2819    case INDEX_op_deposit_i64:
2820        return C_O1_I2(r, 0, rZ);
2821
2822    case INDEX_op_extract2_i32:
2823    case INDEX_op_extract2_i64:
2824        return C_O1_I2(r, rZ, rZ);
2825
2826    case INDEX_op_add2_i32:
2827    case INDEX_op_add2_i64:
2828    case INDEX_op_sub2_i32:
2829    case INDEX_op_sub2_i64:
2830        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2831
2832    case INDEX_op_add_vec:
2833    case INDEX_op_sub_vec:
2834    case INDEX_op_mul_vec:
2835    case INDEX_op_xor_vec:
2836    case INDEX_op_ssadd_vec:
2837    case INDEX_op_sssub_vec:
2838    case INDEX_op_usadd_vec:
2839    case INDEX_op_ussub_vec:
2840    case INDEX_op_smax_vec:
2841    case INDEX_op_smin_vec:
2842    case INDEX_op_umax_vec:
2843    case INDEX_op_umin_vec:
2844    case INDEX_op_shlv_vec:
2845    case INDEX_op_shrv_vec:
2846    case INDEX_op_sarv_vec:
2847    case INDEX_op_aa64_sshl_vec:
2848        return C_O1_I2(w, w, w);
2849    case INDEX_op_not_vec:
2850    case INDEX_op_neg_vec:
2851    case INDEX_op_abs_vec:
2852    case INDEX_op_shli_vec:
2853    case INDEX_op_shri_vec:
2854    case INDEX_op_sari_vec:
2855        return C_O1_I1(w, w);
2856    case INDEX_op_ld_vec:
2857    case INDEX_op_dupm_vec:
2858        return C_O1_I1(w, r);
2859    case INDEX_op_st_vec:
2860        return C_O0_I2(w, r);
2861    case INDEX_op_dup_vec:
2862        return C_O1_I1(w, wr);
2863    case INDEX_op_or_vec:
2864    case INDEX_op_andc_vec:
2865        return C_O1_I2(w, w, wO);
2866    case INDEX_op_and_vec:
2867    case INDEX_op_orc_vec:
2868        return C_O1_I2(w, w, wN);
2869    case INDEX_op_cmp_vec:
2870        return C_O1_I2(w, w, wZ);
2871    case INDEX_op_bitsel_vec:
2872        return C_O1_I3(w, w, w, w);
2873    case INDEX_op_aa64_sli_vec:
2874        return C_O1_I2(w, 0, w);
2875
2876    default:
2877        g_assert_not_reached();
2878    }
2879}
2880
2881#ifdef CONFIG_DARWIN
2882static bool sysctl_for_bool(const char *name)
2883{
2884    int val = 0;
2885    size_t len = sizeof(val);
2886
2887    if (sysctlbyname(name, &val, &len, NULL, 0) == 0) {
2888        return val != 0;
2889    }
2890
2891    /*
2892     * We might in the future ask for properties not present in older kernels,
2893     * but we're only asking about static properties, all of which should be
2894     * 'int'.  So we shouln't see ENOMEM (val too small), or any of the other
2895     * more exotic errors.
2896     */
2897    assert(errno == ENOENT);
2898    return false;
2899}
2900#endif
2901
2902static void tcg_target_init(TCGContext *s)
2903{
2904#ifdef __linux__
2905    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
2906    have_lse = hwcap & HWCAP_ATOMICS;
2907    have_lse2 = hwcap & HWCAP_USCAT;
2908#endif
2909#ifdef CONFIG_DARWIN
2910    have_lse = sysctl_for_bool("hw.optional.arm.FEAT_LSE");
2911    have_lse2 = sysctl_for_bool("hw.optional.arm.FEAT_LSE2");
2912#endif
2913
2914    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2915    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2916    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2917    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2918
2919    tcg_target_call_clobber_regs = -1ull;
2920    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2921    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2922    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2923    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2924    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2925    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2926    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2927    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2928    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2929    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2930    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2931    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2932    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2933    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2934    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2935    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2936    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2937    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2938    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2939
2940    s->reserved_regs = 0;
2941    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2942    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2943    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2944    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2945    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2946}
2947
2948/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2949#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2950
2951#define FRAME_SIZE \
2952    ((PUSH_SIZE \
2953      + TCG_STATIC_CALL_ARGS_SIZE \
2954      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2955      + TCG_TARGET_STACK_ALIGN - 1) \
2956     & ~(TCG_TARGET_STACK_ALIGN - 1))
2957
2958/* We're expecting a 2 byte uleb128 encoded value.  */
2959QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2960
2961/* We're expecting to use a single ADDI insn.  */
2962QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2963
2964static void tcg_target_qemu_prologue(TCGContext *s)
2965{
2966    TCGReg r;
2967
2968    /* Push (FP, LR) and allocate space for all saved registers.  */
2969    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2970                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2971
2972    /* Set up frame pointer for canonical unwinding.  */
2973    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2974
2975    /* Store callee-preserved regs x19..x28.  */
2976    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2977        int ofs = (r - TCG_REG_X19 + 2) * 8;
2978        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2979    }
2980
2981    /* Make stack space for TCG locals.  */
2982    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2983                 FRAME_SIZE - PUSH_SIZE);
2984
2985    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2986    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2987                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2988
2989#if !defined(CONFIG_SOFTMMU)
2990    /*
2991     * Note that XZR cannot be encoded in the address base register slot,
2992     * as that actaully encodes SP.  Depending on the guest, we may need
2993     * to zero-extend the guest address via the address index register slot,
2994     * therefore we need to load even a zero guest base into a register.
2995     */
2996    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2997    tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2998#endif
2999
3000    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3001    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3002
3003    /*
3004     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3005     * and fall through to the rest of the epilogue.
3006     */
3007    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3008    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3009
3010    /* TB epilogue */
3011    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3012
3013    /* Remove TCG locals stack space.  */
3014    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3015                 FRAME_SIZE - PUSH_SIZE);
3016
3017    /* Restore registers x19..x28.  */
3018    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3019        int ofs = (r - TCG_REG_X19 + 2) * 8;
3020        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3021    }
3022
3023    /* Pop (FP, LR), restore SP to previous frame.  */
3024    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3025                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3026    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3027}
3028
3029static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3030{
3031    int i;
3032    for (i = 0; i < count; ++i) {
3033        p[i] = NOP;
3034    }
3035}
3036
3037typedef struct {
3038    DebugFrameHeader h;
3039    uint8_t fde_def_cfa[4];
3040    uint8_t fde_reg_ofs[24];
3041} DebugFrame;
3042
3043#define ELF_HOST_MACHINE EM_AARCH64
3044
3045static const DebugFrame debug_frame = {
3046    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3047    .h.cie.id = -1,
3048    .h.cie.version = 1,
3049    .h.cie.code_align = 1,
3050    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3051    .h.cie.return_column = TCG_REG_LR,
3052
3053    /* Total FDE size does not include the "len" member.  */
3054    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3055
3056    .fde_def_cfa = {
3057        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3058        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3059        (FRAME_SIZE >> 7)
3060    },
3061    .fde_reg_ofs = {
3062        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3063        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3064        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3065        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3066        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3067        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3068        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3069        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3070        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3071        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3072        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3073        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3074    }
3075};
3076
3077void tcg_register_jit(const void *buf, size_t buf_size)
3078{
3079    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3080}
3081