xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision 89aafcf2)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-ldst.c.inc"
14#include "../tcg-pool.c.inc"
15#include "qemu/bitops.h"
16
17/* We're going to re-use TCGType in setting of the SF bit, which controls
18   the size of the operation performed.  If we know the values match, it
19   makes things much cleaner.  */
20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21
22#ifdef CONFIG_DEBUG_TCG
23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28
29    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33};
34#endif /* CONFIG_DEBUG_TCG */
35
36static const int tcg_target_reg_alloc_order[] = {
37    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39    TCG_REG_X28, /* we will reserve this for guest_base if configured */
40
41    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
43
44    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47    /* X16 reserved as temporary */
48    /* X17 reserved as temporary */
49    /* X18 reserved by system */
50    /* X19 reserved for AREG0 */
51    /* X29 reserved as fp */
52    /* X30 reserved as temporary */
53
54    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
55    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
56    /* V8 - V15 are call-saved, and skipped.  */
57    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
58    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
59    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
60    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
61};
62
63static const int tcg_target_call_iarg_regs[8] = {
64    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
65    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
66};
67
68static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
69{
70    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
71    tcg_debug_assert(slot >= 0 && slot <= 1);
72    return TCG_REG_X0 + slot;
73}
74
75#define TCG_REG_TMP0 TCG_REG_X16
76#define TCG_REG_TMP1 TCG_REG_X17
77#define TCG_REG_TMP2 TCG_REG_X30
78#define TCG_VEC_TMP0 TCG_REG_V31
79
80#ifndef CONFIG_SOFTMMU
81#define TCG_REG_GUEST_BASE TCG_REG_X28
82#endif
83
84static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
85{
86    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
87    ptrdiff_t offset = target - src_rx;
88
89    if (offset == sextract64(offset, 0, 26)) {
90        /* read instruction, mask away previous PC_REL26 parameter contents,
91           set the proper offset, then write back the instruction. */
92        *src_rw = deposit32(*src_rw, 0, 26, offset);
93        return true;
94    }
95    return false;
96}
97
98static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
99{
100    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
101    ptrdiff_t offset = target - src_rx;
102
103    if (offset == sextract64(offset, 0, 19)) {
104        *src_rw = deposit32(*src_rw, 5, 19, offset);
105        return true;
106    }
107    return false;
108}
109
110static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
111                        intptr_t value, intptr_t addend)
112{
113    tcg_debug_assert(addend == 0);
114    switch (type) {
115    case R_AARCH64_JUMP26:
116    case R_AARCH64_CALL26:
117        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
118    case R_AARCH64_CONDBR19:
119        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
120    default:
121        g_assert_not_reached();
122    }
123}
124
125#define TCG_CT_CONST_AIMM 0x100
126#define TCG_CT_CONST_LIMM 0x200
127#define TCG_CT_CONST_ZERO 0x400
128#define TCG_CT_CONST_MONE 0x800
129#define TCG_CT_CONST_ORRI 0x1000
130#define TCG_CT_CONST_ANDI 0x2000
131
132#define ALL_GENERAL_REGS  0xffffffffu
133#define ALL_VECTOR_REGS   0xffffffff00000000ull
134
135/* Match a constant valid for addition (12-bit, optionally shifted).  */
136static inline bool is_aimm(uint64_t val)
137{
138    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
139}
140
141/* Match a constant valid for logical operations.  */
142static inline bool is_limm(uint64_t val)
143{
144    /* Taking a simplified view of the logical immediates for now, ignoring
145       the replication that can happen across the field.  Match bit patterns
146       of the forms
147           0....01....1
148           0..01..10..0
149       and their inverses.  */
150
151    /* Make things easier below, by testing the form with msb clear. */
152    if ((int64_t)val < 0) {
153        val = ~val;
154    }
155    if (val == 0) {
156        return false;
157    }
158    val += val & -val;
159    return (val & (val - 1)) == 0;
160}
161
162/* Return true if v16 is a valid 16-bit shifted immediate.  */
163static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
164{
165    if (v16 == (v16 & 0xff)) {
166        *cmode = 0x8;
167        *imm8 = v16 & 0xff;
168        return true;
169    } else if (v16 == (v16 & 0xff00)) {
170        *cmode = 0xa;
171        *imm8 = v16 >> 8;
172        return true;
173    }
174    return false;
175}
176
177/* Return true if v32 is a valid 32-bit shifted immediate.  */
178static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
179{
180    if (v32 == (v32 & 0xff)) {
181        *cmode = 0x0;
182        *imm8 = v32 & 0xff;
183        return true;
184    } else if (v32 == (v32 & 0xff00)) {
185        *cmode = 0x2;
186        *imm8 = (v32 >> 8) & 0xff;
187        return true;
188    } else if (v32 == (v32 & 0xff0000)) {
189        *cmode = 0x4;
190        *imm8 = (v32 >> 16) & 0xff;
191        return true;
192    } else if (v32 == (v32 & 0xff000000)) {
193        *cmode = 0x6;
194        *imm8 = v32 >> 24;
195        return true;
196    }
197    return false;
198}
199
200/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
201static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
202{
203    if ((v32 & 0xffff00ff) == 0xff) {
204        *cmode = 0xc;
205        *imm8 = (v32 >> 8) & 0xff;
206        return true;
207    } else if ((v32 & 0xff00ffff) == 0xffff) {
208        *cmode = 0xd;
209        *imm8 = (v32 >> 16) & 0xff;
210        return true;
211    }
212    return false;
213}
214
215/* Return true if v32 is a valid float32 immediate.  */
216static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
217{
218    if (extract32(v32, 0, 19) == 0
219        && (extract32(v32, 25, 6) == 0x20
220            || extract32(v32, 25, 6) == 0x1f)) {
221        *cmode = 0xf;
222        *imm8 = (extract32(v32, 31, 1) << 7)
223              | (extract32(v32, 25, 1) << 6)
224              | extract32(v32, 19, 6);
225        return true;
226    }
227    return false;
228}
229
230/* Return true if v64 is a valid float64 immediate.  */
231static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
232{
233    if (extract64(v64, 0, 48) == 0
234        && (extract64(v64, 54, 9) == 0x100
235            || extract64(v64, 54, 9) == 0x0ff)) {
236        *cmode = 0xf;
237        *imm8 = (extract64(v64, 63, 1) << 7)
238              | (extract64(v64, 54, 1) << 6)
239              | extract64(v64, 48, 6);
240        return true;
241    }
242    return false;
243}
244
245/*
246 * Return non-zero if v32 can be formed by MOVI+ORR.
247 * Place the parameters for MOVI in (cmode, imm8).
248 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
249 */
250static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
251{
252    int i;
253
254    for (i = 6; i > 0; i -= 2) {
255        /* Mask out one byte we can add with ORR.  */
256        uint32_t tmp = v32 & ~(0xffu << (i * 4));
257        if (is_shimm32(tmp, cmode, imm8) ||
258            is_soimm32(tmp, cmode, imm8)) {
259            break;
260        }
261    }
262    return i;
263}
264
265/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
266static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
267{
268    if (v32 == deposit32(v32, 16, 16, v32)) {
269        return is_shimm16(v32, cmode, imm8);
270    } else {
271        return is_shimm32(v32, cmode, imm8);
272    }
273}
274
275static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
276{
277    if (ct & TCG_CT_CONST) {
278        return 1;
279    }
280    if (type == TCG_TYPE_I32) {
281        val = (int32_t)val;
282    }
283    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
284        return 1;
285    }
286    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
287        return 1;
288    }
289    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
290        return 1;
291    }
292    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
293        return 1;
294    }
295
296    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
297    case 0:
298        break;
299    case TCG_CT_CONST_ANDI:
300        val = ~val;
301        /* fallthru */
302    case TCG_CT_CONST_ORRI:
303        if (val == deposit64(val, 32, 32, val)) {
304            int cmode, imm8;
305            return is_shimm1632(val, &cmode, &imm8);
306        }
307        break;
308    default:
309        /* Both bits should not be set for the same insn.  */
310        g_assert_not_reached();
311    }
312
313    return 0;
314}
315
316enum aarch64_cond_code {
317    COND_EQ = 0x0,
318    COND_NE = 0x1,
319    COND_CS = 0x2,     /* Unsigned greater or equal */
320    COND_HS = COND_CS, /* ALIAS greater or equal */
321    COND_CC = 0x3,     /* Unsigned less than */
322    COND_LO = COND_CC, /* ALIAS Lower */
323    COND_MI = 0x4,     /* Negative */
324    COND_PL = 0x5,     /* Zero or greater */
325    COND_VS = 0x6,     /* Overflow */
326    COND_VC = 0x7,     /* No overflow */
327    COND_HI = 0x8,     /* Unsigned greater than */
328    COND_LS = 0x9,     /* Unsigned less or equal */
329    COND_GE = 0xa,
330    COND_LT = 0xb,
331    COND_GT = 0xc,
332    COND_LE = 0xd,
333    COND_AL = 0xe,
334    COND_NV = 0xf, /* behaves like COND_AL here */
335};
336
337static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
338    [TCG_COND_EQ] = COND_EQ,
339    [TCG_COND_NE] = COND_NE,
340    [TCG_COND_LT] = COND_LT,
341    [TCG_COND_GE] = COND_GE,
342    [TCG_COND_LE] = COND_LE,
343    [TCG_COND_GT] = COND_GT,
344    /* unsigned */
345    [TCG_COND_LTU] = COND_LO,
346    [TCG_COND_GTU] = COND_HI,
347    [TCG_COND_GEU] = COND_HS,
348    [TCG_COND_LEU] = COND_LS,
349};
350
351typedef enum {
352    LDST_ST = 0,    /* store */
353    LDST_LD = 1,    /* load */
354    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
355    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
356} AArch64LdstType;
357
358/* We encode the format of the insn into the beginning of the name, so that
359   we can have the preprocessor help "typecheck" the insn vs the output
360   function.  Arm didn't provide us with nice names for the formats, so we
361   use the section number of the architecture reference manual in which the
362   instruction group is described.  */
363typedef enum {
364    /* Compare and branch (immediate).  */
365    I3201_CBZ       = 0x34000000,
366    I3201_CBNZ      = 0x35000000,
367
368    /* Conditional branch (immediate).  */
369    I3202_B_C       = 0x54000000,
370
371    /* Unconditional branch (immediate).  */
372    I3206_B         = 0x14000000,
373    I3206_BL        = 0x94000000,
374
375    /* Unconditional branch (register).  */
376    I3207_BR        = 0xd61f0000,
377    I3207_BLR       = 0xd63f0000,
378    I3207_RET       = 0xd65f0000,
379
380    /* AdvSIMD load/store single structure.  */
381    I3303_LD1R      = 0x0d40c000,
382
383    /* Load literal for loading the address at pc-relative offset */
384    I3305_LDR       = 0x58000000,
385    I3305_LDR_v64   = 0x5c000000,
386    I3305_LDR_v128  = 0x9c000000,
387
388    /* Load/store exclusive. */
389    I3306_LDXP      = 0xc8600000,
390    I3306_STXP      = 0xc8200000,
391
392    /* Load/store register.  Described here as 3.3.12, but the helper
393       that emits them can transform to 3.3.10 or 3.3.13.  */
394    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
395    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
396    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
397    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
398
399    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
400    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
401    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
402    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
403
404    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
405    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
406
407    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
408    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
409    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
410
411    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
412    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
413
414    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
415    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
416
417    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
418    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
419
420    I3312_TO_I3310  = 0x00200800,
421    I3312_TO_I3313  = 0x01000000,
422
423    /* Load/store register pair instructions.  */
424    I3314_LDP       = 0x28400000,
425    I3314_STP       = 0x28000000,
426
427    /* Add/subtract immediate instructions.  */
428    I3401_ADDI      = 0x11000000,
429    I3401_ADDSI     = 0x31000000,
430    I3401_SUBI      = 0x51000000,
431    I3401_SUBSI     = 0x71000000,
432
433    /* Bitfield instructions.  */
434    I3402_BFM       = 0x33000000,
435    I3402_SBFM      = 0x13000000,
436    I3402_UBFM      = 0x53000000,
437
438    /* Extract instruction.  */
439    I3403_EXTR      = 0x13800000,
440
441    /* Logical immediate instructions.  */
442    I3404_ANDI      = 0x12000000,
443    I3404_ORRI      = 0x32000000,
444    I3404_EORI      = 0x52000000,
445    I3404_ANDSI     = 0x72000000,
446
447    /* Move wide immediate instructions.  */
448    I3405_MOVN      = 0x12800000,
449    I3405_MOVZ      = 0x52800000,
450    I3405_MOVK      = 0x72800000,
451
452    /* PC relative addressing instructions.  */
453    I3406_ADR       = 0x10000000,
454    I3406_ADRP      = 0x90000000,
455
456    /* Add/subtract extended register instructions. */
457    I3501_ADD       = 0x0b200000,
458
459    /* Add/subtract shifted register instructions (without a shift).  */
460    I3502_ADD       = 0x0b000000,
461    I3502_ADDS      = 0x2b000000,
462    I3502_SUB       = 0x4b000000,
463    I3502_SUBS      = 0x6b000000,
464
465    /* Add/subtract shifted register instructions (with a shift).  */
466    I3502S_ADD_LSL  = I3502_ADD,
467
468    /* Add/subtract with carry instructions.  */
469    I3503_ADC       = 0x1a000000,
470    I3503_SBC       = 0x5a000000,
471
472    /* Conditional select instructions.  */
473    I3506_CSEL      = 0x1a800000,
474    I3506_CSINC     = 0x1a800400,
475    I3506_CSINV     = 0x5a800000,
476    I3506_CSNEG     = 0x5a800400,
477
478    /* Data-processing (1 source) instructions.  */
479    I3507_CLZ       = 0x5ac01000,
480    I3507_RBIT      = 0x5ac00000,
481    I3507_REV       = 0x5ac00000, /* + size << 10 */
482
483    /* Data-processing (2 source) instructions.  */
484    I3508_LSLV      = 0x1ac02000,
485    I3508_LSRV      = 0x1ac02400,
486    I3508_ASRV      = 0x1ac02800,
487    I3508_RORV      = 0x1ac02c00,
488    I3508_SMULH     = 0x9b407c00,
489    I3508_UMULH     = 0x9bc07c00,
490    I3508_UDIV      = 0x1ac00800,
491    I3508_SDIV      = 0x1ac00c00,
492
493    /* Data-processing (3 source) instructions.  */
494    I3509_MADD      = 0x1b000000,
495    I3509_MSUB      = 0x1b008000,
496
497    /* Logical shifted register instructions (without a shift).  */
498    I3510_AND       = 0x0a000000,
499    I3510_BIC       = 0x0a200000,
500    I3510_ORR       = 0x2a000000,
501    I3510_ORN       = 0x2a200000,
502    I3510_EOR       = 0x4a000000,
503    I3510_EON       = 0x4a200000,
504    I3510_ANDS      = 0x6a000000,
505
506    /* Logical shifted register instructions (with a shift).  */
507    I3502S_AND_LSR  = I3510_AND | (1 << 22),
508
509    /* AdvSIMD copy */
510    I3605_DUP      = 0x0e000400,
511    I3605_INS      = 0x4e001c00,
512    I3605_UMOV     = 0x0e003c00,
513
514    /* AdvSIMD modified immediate */
515    I3606_MOVI      = 0x0f000400,
516    I3606_MVNI      = 0x2f000400,
517    I3606_BIC       = 0x2f001400,
518    I3606_ORR       = 0x0f001400,
519
520    /* AdvSIMD scalar shift by immediate */
521    I3609_SSHR      = 0x5f000400,
522    I3609_SSRA      = 0x5f001400,
523    I3609_SHL       = 0x5f005400,
524    I3609_USHR      = 0x7f000400,
525    I3609_USRA      = 0x7f001400,
526    I3609_SLI       = 0x7f005400,
527
528    /* AdvSIMD scalar three same */
529    I3611_SQADD     = 0x5e200c00,
530    I3611_SQSUB     = 0x5e202c00,
531    I3611_CMGT      = 0x5e203400,
532    I3611_CMGE      = 0x5e203c00,
533    I3611_SSHL      = 0x5e204400,
534    I3611_ADD       = 0x5e208400,
535    I3611_CMTST     = 0x5e208c00,
536    I3611_UQADD     = 0x7e200c00,
537    I3611_UQSUB     = 0x7e202c00,
538    I3611_CMHI      = 0x7e203400,
539    I3611_CMHS      = 0x7e203c00,
540    I3611_USHL      = 0x7e204400,
541    I3611_SUB       = 0x7e208400,
542    I3611_CMEQ      = 0x7e208c00,
543
544    /* AdvSIMD scalar two-reg misc */
545    I3612_CMGT0     = 0x5e208800,
546    I3612_CMEQ0     = 0x5e209800,
547    I3612_CMLT0     = 0x5e20a800,
548    I3612_ABS       = 0x5e20b800,
549    I3612_CMGE0     = 0x7e208800,
550    I3612_CMLE0     = 0x7e209800,
551    I3612_NEG       = 0x7e20b800,
552
553    /* AdvSIMD shift by immediate */
554    I3614_SSHR      = 0x0f000400,
555    I3614_SSRA      = 0x0f001400,
556    I3614_SHL       = 0x0f005400,
557    I3614_SLI       = 0x2f005400,
558    I3614_USHR      = 0x2f000400,
559    I3614_USRA      = 0x2f001400,
560
561    /* AdvSIMD three same.  */
562    I3616_ADD       = 0x0e208400,
563    I3616_AND       = 0x0e201c00,
564    I3616_BIC       = 0x0e601c00,
565    I3616_BIF       = 0x2ee01c00,
566    I3616_BIT       = 0x2ea01c00,
567    I3616_BSL       = 0x2e601c00,
568    I3616_EOR       = 0x2e201c00,
569    I3616_MUL       = 0x0e209c00,
570    I3616_ORR       = 0x0ea01c00,
571    I3616_ORN       = 0x0ee01c00,
572    I3616_SUB       = 0x2e208400,
573    I3616_CMGT      = 0x0e203400,
574    I3616_CMGE      = 0x0e203c00,
575    I3616_CMTST     = 0x0e208c00,
576    I3616_CMHI      = 0x2e203400,
577    I3616_CMHS      = 0x2e203c00,
578    I3616_CMEQ      = 0x2e208c00,
579    I3616_SMAX      = 0x0e206400,
580    I3616_SMIN      = 0x0e206c00,
581    I3616_SSHL      = 0x0e204400,
582    I3616_SQADD     = 0x0e200c00,
583    I3616_SQSUB     = 0x0e202c00,
584    I3616_UMAX      = 0x2e206400,
585    I3616_UMIN      = 0x2e206c00,
586    I3616_UQADD     = 0x2e200c00,
587    I3616_UQSUB     = 0x2e202c00,
588    I3616_USHL      = 0x2e204400,
589
590    /* AdvSIMD two-reg misc.  */
591    I3617_CMGT0     = 0x0e208800,
592    I3617_CMEQ0     = 0x0e209800,
593    I3617_CMLT0     = 0x0e20a800,
594    I3617_CMGE0     = 0x2e208800,
595    I3617_CMLE0     = 0x2e209800,
596    I3617_NOT       = 0x2e205800,
597    I3617_ABS       = 0x0e20b800,
598    I3617_NEG       = 0x2e20b800,
599
600    /* System instructions.  */
601    NOP             = 0xd503201f,
602    DMB_ISH         = 0xd50338bf,
603    DMB_LD          = 0x00000100,
604    DMB_ST          = 0x00000200,
605} AArch64Insn;
606
607static inline uint32_t tcg_in32(TCGContext *s)
608{
609    uint32_t v = *(uint32_t *)s->code_ptr;
610    return v;
611}
612
613/* Emit an opcode with "type-checking" of the format.  */
614#define tcg_out_insn(S, FMT, OP, ...) \
615    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
616
617static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
618                              TCGReg rt, TCGReg rn, unsigned size)
619{
620    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
621}
622
623static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
624                              int imm19, TCGReg rt)
625{
626    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
627}
628
629static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
630                              TCGReg rt, TCGReg rt2, TCGReg rn)
631{
632    tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
633}
634
635static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
636                              TCGReg rt, int imm19)
637{
638    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
639}
640
641static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
642                              TCGCond c, int imm19)
643{
644    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
645}
646
647static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
648{
649    tcg_out32(s, insn | (imm26 & 0x03ffffff));
650}
651
652static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
653{
654    tcg_out32(s, insn | rn << 5);
655}
656
657static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
658                              TCGReg r1, TCGReg r2, TCGReg rn,
659                              tcg_target_long ofs, bool pre, bool w)
660{
661    insn |= 1u << 31; /* ext */
662    insn |= pre << 24;
663    insn |= w << 23;
664
665    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
666    insn |= (ofs & (0x7f << 3)) << (15 - 3);
667
668    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
669}
670
671static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
672                              TCGReg rd, TCGReg rn, uint64_t aimm)
673{
674    if (aimm > 0xfff) {
675        tcg_debug_assert((aimm & 0xfff) == 0);
676        aimm >>= 12;
677        tcg_debug_assert(aimm <= 0xfff);
678        aimm |= 1 << 12;  /* apply LSL 12 */
679    }
680    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
681}
682
683/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
684   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
685   that feed the DecodeBitMasks pseudo function.  */
686static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
687                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
688{
689    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
690              | rn << 5 | rd);
691}
692
693#define tcg_out_insn_3404  tcg_out_insn_3402
694
695static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
696                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
697{
698    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
699              | rn << 5 | rd);
700}
701
702/* This function is used for the Move (wide immediate) instruction group.
703   Note that SHIFT is a full shift count, not the 2 bit HW field. */
704static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
705                              TCGReg rd, uint16_t half, unsigned shift)
706{
707    tcg_debug_assert((shift & ~0x30) == 0);
708    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
709}
710
711static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
712                              TCGReg rd, int64_t disp)
713{
714    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
715}
716
717static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
718                                     TCGType sf, TCGReg rd, TCGReg rn,
719                                     TCGReg rm, int opt, int imm3)
720{
721    tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
722              imm3 << 10 | rn << 5 | rd);
723}
724
725/* This function is for both 3.5.2 (Add/Subtract shifted register), for
726   the rare occasion when we actually want to supply a shift amount.  */
727static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
728                                      TCGType ext, TCGReg rd, TCGReg rn,
729                                      TCGReg rm, int imm6)
730{
731    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
732}
733
734/* This function is for 3.5.2 (Add/subtract shifted register),
735   and 3.5.10 (Logical shifted register), for the vast majorty of cases
736   when we don't want to apply a shift.  Thus it can also be used for
737   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
738static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
739                              TCGReg rd, TCGReg rn, TCGReg rm)
740{
741    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
742}
743
744#define tcg_out_insn_3503  tcg_out_insn_3502
745#define tcg_out_insn_3508  tcg_out_insn_3502
746#define tcg_out_insn_3510  tcg_out_insn_3502
747
748static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
749                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
750{
751    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
752              | tcg_cond_to_aarch64[c] << 12);
753}
754
755static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
756                              TCGReg rd, TCGReg rn)
757{
758    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
759}
760
761static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
762                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
763{
764    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
765}
766
767static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
768                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
769{
770    /* Note that bit 11 set means general register input.  Therefore
771       we can handle both register sets with one function.  */
772    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
773              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
774}
775
776static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
777                              TCGReg rd, bool op, int cmode, uint8_t imm8)
778{
779    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
780              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
781}
782
783static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
784                              TCGReg rd, TCGReg rn, unsigned immhb)
785{
786    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
787}
788
789static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
790                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
791{
792    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
793              | (rn & 0x1f) << 5 | (rd & 0x1f));
794}
795
796static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
797                              unsigned size, TCGReg rd, TCGReg rn)
798{
799    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
800}
801
802static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
803                              TCGReg rd, TCGReg rn, unsigned immhb)
804{
805    tcg_out32(s, insn | q << 30 | immhb << 16
806              | (rn & 0x1f) << 5 | (rd & 0x1f));
807}
808
809static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
810                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
811{
812    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
813              | (rn & 0x1f) << 5 | (rd & 0x1f));
814}
815
816static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
817                              unsigned size, TCGReg rd, TCGReg rn)
818{
819    tcg_out32(s, insn | q << 30 | (size << 22)
820              | (rn & 0x1f) << 5 | (rd & 0x1f));
821}
822
823static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
824                              TCGReg rd, TCGReg base, TCGType ext,
825                              TCGReg regoff)
826{
827    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
828    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
829              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
830}
831
832static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
833                              TCGReg rd, TCGReg rn, intptr_t offset)
834{
835    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
836}
837
838static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
839                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
840{
841    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
842    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
843              | rn << 5 | (rd & 0x1f));
844}
845
846/* Register to register move using ORR (shifted register with no shift). */
847static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
848{
849    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
850}
851
852/* Register to register move using ADDI (move to/from SP).  */
853static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
854{
855    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
856}
857
858/* This function is used for the Logical (immediate) instruction group.
859   The value of LIMM must satisfy IS_LIMM.  See the comment above about
860   only supporting simplified logical immediates.  */
861static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
862                             TCGReg rd, TCGReg rn, uint64_t limm)
863{
864    unsigned h, l, r, c;
865
866    tcg_debug_assert(is_limm(limm));
867
868    h = clz64(limm);
869    l = ctz64(limm);
870    if (l == 0) {
871        r = 0;                  /* form 0....01....1 */
872        c = ctz64(~limm) - 1;
873        if (h == 0) {
874            r = clz64(~limm);   /* form 1..10..01..1 */
875            c += r;
876        }
877    } else {
878        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
879        c = r - h - 1;
880    }
881    if (ext == TCG_TYPE_I32) {
882        r &= 31;
883        c &= 31;
884    }
885
886    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
887}
888
889static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
890                             TCGReg rd, int64_t v64)
891{
892    bool q = type == TCG_TYPE_V128;
893    int cmode, imm8, i;
894
895    /* Test all bytes equal first.  */
896    if (vece == MO_8) {
897        imm8 = (uint8_t)v64;
898        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
899        return;
900    }
901
902    /*
903     * Test all bytes 0x00 or 0xff second.  This can match cases that
904     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
905     */
906    for (i = imm8 = 0; i < 8; i++) {
907        uint8_t byte = v64 >> (i * 8);
908        if (byte == 0xff) {
909            imm8 |= 1 << i;
910        } else if (byte != 0) {
911            goto fail_bytes;
912        }
913    }
914    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
915    return;
916 fail_bytes:
917
918    /*
919     * Tests for various replications.  For each element width, if we
920     * cannot find an expansion there's no point checking a larger
921     * width because we already know by replication it cannot match.
922     */
923    if (vece == MO_16) {
924        uint16_t v16 = v64;
925
926        if (is_shimm16(v16, &cmode, &imm8)) {
927            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
928            return;
929        }
930        if (is_shimm16(~v16, &cmode, &imm8)) {
931            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
932            return;
933        }
934
935        /*
936         * Otherwise, all remaining constants can be loaded in two insns:
937         * rd = v16 & 0xff, rd |= v16 & 0xff00.
938         */
939        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
940        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
941        return;
942    } else if (vece == MO_32) {
943        uint32_t v32 = v64;
944        uint32_t n32 = ~v32;
945
946        if (is_shimm32(v32, &cmode, &imm8) ||
947            is_soimm32(v32, &cmode, &imm8) ||
948            is_fimm32(v32, &cmode, &imm8)) {
949            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
950            return;
951        }
952        if (is_shimm32(n32, &cmode, &imm8) ||
953            is_soimm32(n32, &cmode, &imm8)) {
954            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
955            return;
956        }
957
958        /*
959         * Restrict the set of constants to those we can load with
960         * two instructions.  Others we load from the pool.
961         */
962        i = is_shimm32_pair(v32, &cmode, &imm8);
963        if (i) {
964            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
965            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
966            return;
967        }
968        i = is_shimm32_pair(n32, &cmode, &imm8);
969        if (i) {
970            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
971            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
972            return;
973        }
974    } else if (is_fimm64(v64, &cmode, &imm8)) {
975        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
976        return;
977    }
978
979    /*
980     * As a last resort, load from the constant pool.  Sadly there
981     * is no LD1R (literal), so store the full 16-byte vector.
982     */
983    if (type == TCG_TYPE_V128) {
984        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
985        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
986    } else {
987        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
988        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
989    }
990}
991
992static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
993                            TCGReg rd, TCGReg rs)
994{
995    int is_q = type - TCG_TYPE_V64;
996    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
997    return true;
998}
999
1000static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1001                             TCGReg r, TCGReg base, intptr_t offset)
1002{
1003    TCGReg temp = TCG_REG_TMP0;
1004
1005    if (offset < -0xffffff || offset > 0xffffff) {
1006        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
1007        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
1008        base = temp;
1009    } else {
1010        AArch64Insn add_insn = I3401_ADDI;
1011
1012        if (offset < 0) {
1013            add_insn = I3401_SUBI;
1014            offset = -offset;
1015        }
1016        if (offset & 0xfff000) {
1017            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1018            base = temp;
1019        }
1020        if (offset & 0xfff) {
1021            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1022            base = temp;
1023        }
1024    }
1025    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1026    return true;
1027}
1028
1029static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1030                         tcg_target_long value)
1031{
1032    tcg_target_long svalue = value;
1033    tcg_target_long ivalue = ~value;
1034    tcg_target_long t0, t1, t2;
1035    int s0, s1;
1036    AArch64Insn opc;
1037
1038    switch (type) {
1039    case TCG_TYPE_I32:
1040    case TCG_TYPE_I64:
1041        tcg_debug_assert(rd < 32);
1042        break;
1043    default:
1044        g_assert_not_reached();
1045    }
1046
1047    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1048       values within [2**31, 2**32-1], we can create smaller sequences by
1049       interpreting this as a negative 32-bit number, while ensuring that
1050       the high 32 bits are cleared by setting SF=0.  */
1051    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1052        svalue = (int32_t)value;
1053        value = (uint32_t)value;
1054        ivalue = (uint32_t)ivalue;
1055        type = TCG_TYPE_I32;
1056    }
1057
1058    /* Speed things up by handling the common case of small positive
1059       and negative values specially.  */
1060    if ((value & ~0xffffull) == 0) {
1061        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1062        return;
1063    } else if ((ivalue & ~0xffffull) == 0) {
1064        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1065        return;
1066    }
1067
1068    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1069       use the sign-extended value.  That lets us match rotated values such
1070       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1071    if (is_limm(svalue)) {
1072        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1073        return;
1074    }
1075
1076    /* Look for host pointer values within 4G of the PC.  This happens
1077       often when loading pointers to QEMU's own data structures.  */
1078    if (type == TCG_TYPE_I64) {
1079        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1080        tcg_target_long disp = value - src_rx;
1081        if (disp == sextract64(disp, 0, 21)) {
1082            tcg_out_insn(s, 3406, ADR, rd, disp);
1083            return;
1084        }
1085        disp = (value >> 12) - (src_rx >> 12);
1086        if (disp == sextract64(disp, 0, 21)) {
1087            tcg_out_insn(s, 3406, ADRP, rd, disp);
1088            if (value & 0xfff) {
1089                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1090            }
1091            return;
1092        }
1093    }
1094
1095    /* Would it take fewer insns to begin with MOVN?  */
1096    if (ctpop64(value) >= 32) {
1097        t0 = ivalue;
1098        opc = I3405_MOVN;
1099    } else {
1100        t0 = value;
1101        opc = I3405_MOVZ;
1102    }
1103    s0 = ctz64(t0) & (63 & -16);
1104    t1 = t0 & ~(0xffffull << s0);
1105    s1 = ctz64(t1) & (63 & -16);
1106    t2 = t1 & ~(0xffffull << s1);
1107    if (t2 == 0) {
1108        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1109        if (t1 != 0) {
1110            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1111        }
1112        return;
1113    }
1114
1115    /* For more than 2 insns, dump it into the constant pool.  */
1116    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1117    tcg_out_insn(s, 3305, LDR, 0, rd);
1118}
1119
1120static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1121{
1122    return false;
1123}
1124
1125static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1126                             tcg_target_long imm)
1127{
1128    /* This function is only used for passing structs by reference. */
1129    g_assert_not_reached();
1130}
1131
1132/* Define something more legible for general use.  */
1133#define tcg_out_ldst_r  tcg_out_insn_3310
1134
1135static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1136                         TCGReg rn, intptr_t offset, int lgsize)
1137{
1138    /* If the offset is naturally aligned and in range, then we can
1139       use the scaled uimm12 encoding */
1140    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1141        uintptr_t scaled_uimm = offset >> lgsize;
1142        if (scaled_uimm <= 0xfff) {
1143            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1144            return;
1145        }
1146    }
1147
1148    /* Small signed offsets can use the unscaled encoding.  */
1149    if (offset >= -256 && offset < 256) {
1150        tcg_out_insn_3312(s, insn, rd, rn, offset);
1151        return;
1152    }
1153
1154    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1155    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
1156    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
1157}
1158
1159static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1160{
1161    if (ret == arg) {
1162        return true;
1163    }
1164    switch (type) {
1165    case TCG_TYPE_I32:
1166    case TCG_TYPE_I64:
1167        if (ret < 32 && arg < 32) {
1168            tcg_out_movr(s, type, ret, arg);
1169            break;
1170        } else if (ret < 32) {
1171            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1172            break;
1173        } else if (arg < 32) {
1174            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1175            break;
1176        }
1177        /* FALLTHRU */
1178
1179    case TCG_TYPE_V64:
1180        tcg_debug_assert(ret >= 32 && arg >= 32);
1181        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1182        break;
1183    case TCG_TYPE_V128:
1184        tcg_debug_assert(ret >= 32 && arg >= 32);
1185        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1186        break;
1187
1188    default:
1189        g_assert_not_reached();
1190    }
1191    return true;
1192}
1193
1194static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1195                       TCGReg base, intptr_t ofs)
1196{
1197    AArch64Insn insn;
1198    int lgsz;
1199
1200    switch (type) {
1201    case TCG_TYPE_I32:
1202        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1203        lgsz = 2;
1204        break;
1205    case TCG_TYPE_I64:
1206        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1207        lgsz = 3;
1208        break;
1209    case TCG_TYPE_V64:
1210        insn = I3312_LDRVD;
1211        lgsz = 3;
1212        break;
1213    case TCG_TYPE_V128:
1214        insn = I3312_LDRVQ;
1215        lgsz = 4;
1216        break;
1217    default:
1218        g_assert_not_reached();
1219    }
1220    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1221}
1222
1223static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1224                       TCGReg base, intptr_t ofs)
1225{
1226    AArch64Insn insn;
1227    int lgsz;
1228
1229    switch (type) {
1230    case TCG_TYPE_I32:
1231        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1232        lgsz = 2;
1233        break;
1234    case TCG_TYPE_I64:
1235        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1236        lgsz = 3;
1237        break;
1238    case TCG_TYPE_V64:
1239        insn = I3312_STRVD;
1240        lgsz = 3;
1241        break;
1242    case TCG_TYPE_V128:
1243        insn = I3312_STRVQ;
1244        lgsz = 4;
1245        break;
1246    default:
1247        g_assert_not_reached();
1248    }
1249    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1250}
1251
1252static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1253                               TCGReg base, intptr_t ofs)
1254{
1255    if (type <= TCG_TYPE_I64 && val == 0) {
1256        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1257        return true;
1258    }
1259    return false;
1260}
1261
1262static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1263                               TCGReg rn, unsigned int a, unsigned int b)
1264{
1265    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1266}
1267
1268static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1269                                TCGReg rn, unsigned int a, unsigned int b)
1270{
1271    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1272}
1273
1274static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1275                                TCGReg rn, unsigned int a, unsigned int b)
1276{
1277    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1278}
1279
1280static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1281                                TCGReg rn, TCGReg rm, unsigned int a)
1282{
1283    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1284}
1285
1286static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1287                               TCGReg rd, TCGReg rn, unsigned int m)
1288{
1289    int bits = ext ? 64 : 32;
1290    int max = bits - 1;
1291    tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1292}
1293
1294static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1295                               TCGReg rd, TCGReg rn, unsigned int m)
1296{
1297    int max = ext ? 63 : 31;
1298    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1299}
1300
1301static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1302                               TCGReg rd, TCGReg rn, unsigned int m)
1303{
1304    int max = ext ? 63 : 31;
1305    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1306}
1307
1308static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1309                                TCGReg rd, TCGReg rn, unsigned int m)
1310{
1311    int max = ext ? 63 : 31;
1312    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1313}
1314
1315static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1316                                TCGReg rd, TCGReg rn, unsigned int m)
1317{
1318    int max = ext ? 63 : 31;
1319    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1320}
1321
1322static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1323                               TCGReg rn, unsigned lsb, unsigned width)
1324{
1325    unsigned size = ext ? 64 : 32;
1326    unsigned a = (size - lsb) & (size - 1);
1327    unsigned b = width - 1;
1328    tcg_out_bfm(s, ext, rd, rn, a, b);
1329}
1330
1331static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1332                        tcg_target_long b, bool const_b)
1333{
1334    if (const_b) {
1335        /* Using CMP or CMN aliases.  */
1336        if (b >= 0) {
1337            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1338        } else {
1339            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1340        }
1341    } else {
1342        /* Using CMP alias SUBS wzr, Wn, Wm */
1343        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1344    }
1345}
1346
1347static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1348{
1349    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1350    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1351    tcg_out_insn(s, 3206, B, offset);
1352}
1353
1354static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1355{
1356    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1357    if (offset == sextract64(offset, 0, 26)) {
1358        tcg_out_insn(s, 3206, B, offset);
1359    } else {
1360        /* Choose X9 as a call-clobbered non-LR temporary. */
1361        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
1362        tcg_out_insn(s, 3207, BR, TCG_REG_X9);
1363    }
1364}
1365
1366static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1367{
1368    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1369    if (offset == sextract64(offset, 0, 26)) {
1370        tcg_out_insn(s, 3206, BL, offset);
1371    } else {
1372        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1373        tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
1374    }
1375}
1376
1377static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1378                         const TCGHelperInfo *info)
1379{
1380    tcg_out_call_int(s, target);
1381}
1382
1383static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1384{
1385    if (!l->has_value) {
1386        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1387        tcg_out_insn(s, 3206, B, 0);
1388    } else {
1389        tcg_out_goto(s, l->u.value_ptr);
1390    }
1391}
1392
1393static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1394                           TCGArg b, bool b_const, TCGLabel *l)
1395{
1396    intptr_t offset;
1397    bool need_cmp;
1398
1399    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1400        need_cmp = false;
1401    } else {
1402        need_cmp = true;
1403        tcg_out_cmp(s, ext, a, b, b_const);
1404    }
1405
1406    if (!l->has_value) {
1407        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1408        offset = tcg_in32(s) >> 5;
1409    } else {
1410        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1411        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1412    }
1413
1414    if (need_cmp) {
1415        tcg_out_insn(s, 3202, B_C, c, offset);
1416    } else if (c == TCG_COND_EQ) {
1417        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1418    } else {
1419        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1420    }
1421}
1422
1423static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1424                               TCGReg rd, TCGReg rn)
1425{
1426    /* REV, REV16, REV32 */
1427    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1428}
1429
1430static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1431                               TCGReg rd, TCGReg rn)
1432{
1433    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1434    int bits = (8 << s_bits) - 1;
1435    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1436}
1437
1438static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1439{
1440    tcg_out_sxt(s, type, MO_8, rd, rn);
1441}
1442
1443static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1444{
1445    tcg_out_sxt(s, type, MO_16, rd, rn);
1446}
1447
1448static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1449{
1450    tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1451}
1452
1453static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1454{
1455    tcg_out_ext32s(s, rd, rn);
1456}
1457
1458static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1459                               TCGReg rd, TCGReg rn)
1460{
1461    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1462    int bits = (8 << s_bits) - 1;
1463    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1464}
1465
1466static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1467{
1468    tcg_out_uxt(s, MO_8, rd, rn);
1469}
1470
1471static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1472{
1473    tcg_out_uxt(s, MO_16, rd, rn);
1474}
1475
1476static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1477{
1478    tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1479}
1480
1481static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1482{
1483    tcg_out_ext32u(s, rd, rn);
1484}
1485
1486static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1487{
1488    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1489}
1490
1491static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1492                            TCGReg rn, int64_t aimm)
1493{
1494    if (aimm >= 0) {
1495        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1496    } else {
1497        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1498    }
1499}
1500
1501static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1502                            TCGReg rh, TCGReg al, TCGReg ah,
1503                            tcg_target_long bl, tcg_target_long bh,
1504                            bool const_bl, bool const_bh, bool sub)
1505{
1506    TCGReg orig_rl = rl;
1507    AArch64Insn insn;
1508
1509    if (rl == ah || (!const_bh && rl == bh)) {
1510        rl = TCG_REG_TMP0;
1511    }
1512
1513    if (const_bl) {
1514        if (bl < 0) {
1515            bl = -bl;
1516            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1517        } else {
1518            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1519        }
1520
1521        if (unlikely(al == TCG_REG_XZR)) {
1522            /* ??? We want to allow al to be zero for the benefit of
1523               negation via subtraction.  However, that leaves open the
1524               possibility of adding 0+const in the low part, and the
1525               immediate add instructions encode XSP not XZR.  Don't try
1526               anything more elaborate here than loading another zero.  */
1527            al = TCG_REG_TMP0;
1528            tcg_out_movi(s, ext, al, 0);
1529        }
1530        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1531    } else {
1532        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1533    }
1534
1535    insn = I3503_ADC;
1536    if (const_bh) {
1537        /* Note that the only two constants we support are 0 and -1, and
1538           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1539        if ((bh != 0) ^ sub) {
1540            insn = I3503_SBC;
1541        }
1542        bh = TCG_REG_XZR;
1543    } else if (sub) {
1544        insn = I3503_SBC;
1545    }
1546    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1547
1548    tcg_out_mov(s, ext, orig_rl, rl);
1549}
1550
1551static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1552{
1553    static const uint32_t sync[] = {
1554        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1555        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1556        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1557        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1558        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1559    };
1560    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1561}
1562
1563static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1564                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1565{
1566    TCGReg a1 = a0;
1567    if (is_ctz) {
1568        a1 = TCG_REG_TMP0;
1569        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1570    }
1571    if (const_b && b == (ext ? 64 : 32)) {
1572        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1573    } else {
1574        AArch64Insn sel = I3506_CSEL;
1575
1576        tcg_out_cmp(s, ext, a0, 0, 1);
1577        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1);
1578
1579        if (const_b) {
1580            if (b == -1) {
1581                b = TCG_REG_XZR;
1582                sel = I3506_CSINV;
1583            } else if (b == 0) {
1584                b = TCG_REG_XZR;
1585            } else {
1586                tcg_out_movi(s, ext, d, b);
1587                b = d;
1588            }
1589        }
1590        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE);
1591    }
1592}
1593
1594typedef struct {
1595    TCGReg base;
1596    TCGReg index;
1597    TCGType index_ext;
1598    TCGAtomAlign aa;
1599} HostAddress;
1600
1601bool tcg_target_has_memory_bswap(MemOp memop)
1602{
1603    return false;
1604}
1605
1606static const TCGLdstHelperParam ldst_helper_param = {
1607    .ntmp = 1, .tmp = { TCG_REG_TMP0 }
1608};
1609
1610static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1611{
1612    MemOp opc = get_memop(lb->oi);
1613
1614    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1615        return false;
1616    }
1617
1618    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1619    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1620    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1621    tcg_out_goto(s, lb->raddr);
1622    return true;
1623}
1624
1625static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1626{
1627    MemOp opc = get_memop(lb->oi);
1628
1629    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1630        return false;
1631    }
1632
1633    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1634    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1635    tcg_out_goto(s, lb->raddr);
1636    return true;
1637}
1638
1639/*
1640 * For softmmu, perform the TLB load and compare.
1641 * For useronly, perform any required alignment tests.
1642 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1643 * is required and fill in @h with the host address for the fast path.
1644 */
1645static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1646                                           TCGReg addr_reg, MemOpIdx oi,
1647                                           bool is_ld)
1648{
1649    TCGType addr_type = s->addr_type;
1650    TCGLabelQemuLdst *ldst = NULL;
1651    MemOp opc = get_memop(oi);
1652    MemOp s_bits = opc & MO_SIZE;
1653    unsigned a_mask;
1654
1655    h->aa = atom_and_align_for_opc(s, opc,
1656                                   have_lse2 ? MO_ATOM_WITHIN16
1657                                             : MO_ATOM_IFALIGN,
1658                                   s_bits == MO_128);
1659    a_mask = (1 << h->aa.align) - 1;
1660
1661#ifdef CONFIG_SOFTMMU
1662    unsigned s_mask = (1u << s_bits) - 1;
1663    unsigned mem_index = get_mmuidx(oi);
1664    TCGReg addr_adj;
1665    TCGType mask_type;
1666    uint64_t compare_mask;
1667
1668    ldst = new_ldst_label(s);
1669    ldst->is_ld = is_ld;
1670    ldst->oi = oi;
1671    ldst->addrlo_reg = addr_reg;
1672
1673    mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
1674                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1675
1676    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
1677    QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1678    QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1679    QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1680    QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1681    tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
1682                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1683
1684    /* Extract the TLB index from the address into X0.  */
1685    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1686                 TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
1687                 s->page_bits - CPU_TLB_ENTRY_BITS);
1688
1689    /* Add the tlb_table pointer, forming the CPUTLBEntry address in TMP1. */
1690    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
1691
1692    /* Load the tlb comparator into TMP0, and the fast path addend into TMP1. */
1693    tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
1694               is_ld ? offsetof(CPUTLBEntry, addr_read)
1695                     : offsetof(CPUTLBEntry, addr_write));
1696    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
1697               offsetof(CPUTLBEntry, addend));
1698
1699    /*
1700     * For aligned accesses, we check the first byte and include the alignment
1701     * bits within the address.  For unaligned access, we check that we don't
1702     * cross pages using the address of the last byte of the access.
1703     */
1704    if (a_mask >= s_mask) {
1705        addr_adj = addr_reg;
1706    } else {
1707        addr_adj = TCG_REG_TMP2;
1708        tcg_out_insn(s, 3401, ADDI, addr_type,
1709                     addr_adj, addr_reg, s_mask - a_mask);
1710    }
1711    compare_mask = (uint64_t)s->page_mask | a_mask;
1712
1713    /* Store the page mask part of the address into TMP2.  */
1714    tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
1715                     addr_adj, compare_mask);
1716
1717    /* Perform the address comparison. */
1718    tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0);
1719
1720    /* If not equal, we jump to the slow path. */
1721    ldst->label_ptr[0] = s->code_ptr;
1722    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1723
1724    h->base = TCG_REG_TMP1;
1725    h->index = addr_reg;
1726    h->index_ext = addr_type;
1727#else
1728    if (a_mask) {
1729        ldst = new_ldst_label(s);
1730
1731        ldst->is_ld = is_ld;
1732        ldst->oi = oi;
1733        ldst->addrlo_reg = addr_reg;
1734
1735        /* tst addr, #mask */
1736        tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1737
1738        /* b.ne slow_path */
1739        ldst->label_ptr[0] = s->code_ptr;
1740        tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1741    }
1742
1743    if (guest_base || addr_type == TCG_TYPE_I32) {
1744        h->base = TCG_REG_GUEST_BASE;
1745        h->index = addr_reg;
1746        h->index_ext = addr_type;
1747    } else {
1748        h->base = addr_reg;
1749        h->index = TCG_REG_XZR;
1750        h->index_ext = TCG_TYPE_I64;
1751    }
1752#endif
1753
1754    return ldst;
1755}
1756
1757static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1758                                   TCGReg data_r, HostAddress h)
1759{
1760    switch (memop & MO_SSIZE) {
1761    case MO_UB:
1762        tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
1763        break;
1764    case MO_SB:
1765        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1766                       data_r, h.base, h.index_ext, h.index);
1767        break;
1768    case MO_UW:
1769        tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
1770        break;
1771    case MO_SW:
1772        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1773                       data_r, h.base, h.index_ext, h.index);
1774        break;
1775    case MO_UL:
1776        tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
1777        break;
1778    case MO_SL:
1779        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
1780        break;
1781    case MO_UQ:
1782        tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
1783        break;
1784    default:
1785        g_assert_not_reached();
1786    }
1787}
1788
1789static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1790                                   TCGReg data_r, HostAddress h)
1791{
1792    switch (memop & MO_SIZE) {
1793    case MO_8:
1794        tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
1795        break;
1796    case MO_16:
1797        tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
1798        break;
1799    case MO_32:
1800        tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
1801        break;
1802    case MO_64:
1803        tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
1804        break;
1805    default:
1806        g_assert_not_reached();
1807    }
1808}
1809
1810static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1811                            MemOpIdx oi, TCGType data_type)
1812{
1813    TCGLabelQemuLdst *ldst;
1814    HostAddress h;
1815
1816    ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1817    tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
1818
1819    if (ldst) {
1820        ldst->type = data_type;
1821        ldst->datalo_reg = data_reg;
1822        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1823    }
1824}
1825
1826static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1827                            MemOpIdx oi, TCGType data_type)
1828{
1829    TCGLabelQemuLdst *ldst;
1830    HostAddress h;
1831
1832    ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1833    tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1834
1835    if (ldst) {
1836        ldst->type = data_type;
1837        ldst->datalo_reg = data_reg;
1838        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1839    }
1840}
1841
1842static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
1843                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
1844{
1845    TCGLabelQemuLdst *ldst;
1846    HostAddress h;
1847    TCGReg base;
1848    bool use_pair;
1849
1850    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
1851
1852    /* Compose the final address, as LDP/STP have no indexing. */
1853    if (h.index == TCG_REG_XZR) {
1854        base = h.base;
1855    } else {
1856        base = TCG_REG_TMP2;
1857        if (h.index_ext == TCG_TYPE_I32) {
1858            /* add base, base, index, uxtw */
1859            tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base,
1860                         h.base, h.index, MO_32, 0);
1861        } else {
1862            /* add base, base, index */
1863            tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index);
1864        }
1865    }
1866
1867    use_pair = h.aa.atom < MO_128 || have_lse2;
1868
1869    if (!use_pair) {
1870        tcg_insn_unit *branch = NULL;
1871        TCGReg ll, lh, sl, sh;
1872
1873        /*
1874         * If we have already checked for 16-byte alignment, that's all
1875         * we need. Otherwise we have determined that misaligned atomicity
1876         * may be handled with two 8-byte loads.
1877         */
1878        if (h.aa.align < MO_128) {
1879            /*
1880             * TODO: align should be MO_64, so we only need test bit 3,
1881             * which means we could use TBNZ instead of ANDS+B_C.
1882             */
1883            tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15);
1884            branch = s->code_ptr;
1885            tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1886            use_pair = true;
1887        }
1888
1889        if (is_ld) {
1890            /*
1891             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1892             *    ldxp lo, hi, [base]
1893             *    stxp t0, lo, hi, [base]
1894             *    cbnz t0, .-8
1895             * Require no overlap between data{lo,hi} and base.
1896             */
1897            if (datalo == base || datahi == base) {
1898                tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base);
1899                base = TCG_REG_TMP2;
1900            }
1901            ll = sl = datalo;
1902            lh = sh = datahi;
1903        } else {
1904            /*
1905             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1906             * 1: ldxp t0, t1, [base]
1907             *    stxp t0, lo, hi, [base]
1908             *    cbnz t0, 1b
1909             */
1910            tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1);
1911            ll = TCG_REG_TMP0;
1912            lh = TCG_REG_TMP1;
1913            sl = datalo;
1914            sh = datahi;
1915        }
1916
1917        tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base);
1918        tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base);
1919        tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2);
1920
1921        if (use_pair) {
1922            /* "b .+8", branching across the one insn of use_pair. */
1923            tcg_out_insn(s, 3206, B, 2);
1924            reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr));
1925        }
1926    }
1927
1928    if (use_pair) {
1929        if (is_ld) {
1930            tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0);
1931        } else {
1932            tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0);
1933        }
1934    }
1935
1936    if (ldst) {
1937        ldst->type = TCG_TYPE_I128;
1938        ldst->datalo_reg = datalo;
1939        ldst->datahi_reg = datahi;
1940        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1941    }
1942}
1943
1944static const tcg_insn_unit *tb_ret_addr;
1945
1946static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1947{
1948    /* Reuse the zeroing that exists for goto_ptr.  */
1949    if (a0 == 0) {
1950        tcg_out_goto_long(s, tcg_code_gen_epilogue);
1951    } else {
1952        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1953        tcg_out_goto_long(s, tb_ret_addr);
1954    }
1955}
1956
1957static void tcg_out_goto_tb(TCGContext *s, int which)
1958{
1959    /*
1960     * Direct branch, or indirect address load, will be patched
1961     * by tb_target_set_jmp_target.  Assert indirect load offset
1962     * in range early, regardless of direct branch distance.
1963     */
1964    intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1965    tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1966
1967    set_jmp_insn_offset(s, which);
1968    tcg_out32(s, I3206_B);
1969    tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
1970    set_jmp_reset_offset(s, which);
1971}
1972
1973void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1974                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1975{
1976    uintptr_t d_addr = tb->jmp_target_addr[n];
1977    ptrdiff_t d_offset = d_addr - jmp_rx;
1978    tcg_insn_unit insn;
1979
1980    /* Either directly branch, or indirect branch load. */
1981    if (d_offset == sextract64(d_offset, 0, 28)) {
1982        insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
1983    } else {
1984        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
1985        ptrdiff_t i_offset = i_addr - jmp_rx;
1986
1987        /* Note that we asserted this in range in tcg_out_goto_tb. */
1988        insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
1989    }
1990    qatomic_set((uint32_t *)jmp_rw, insn);
1991    flush_idcache_range(jmp_rx, jmp_rw, 4);
1992}
1993
1994static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1995                       const TCGArg args[TCG_MAX_OP_ARGS],
1996                       const int const_args[TCG_MAX_OP_ARGS])
1997{
1998    /* 99% of the time, we can signal the use of extension registers
1999       by looking to see if the opcode handles 64-bit data.  */
2000    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
2001
2002    /* Hoist the loads of the most common arguments.  */
2003    TCGArg a0 = args[0];
2004    TCGArg a1 = args[1];
2005    TCGArg a2 = args[2];
2006    int c2 = const_args[2];
2007
2008    /* Some operands are defined with "rZ" constraint, a register or
2009       the zero register.  These need not actually test args[I] == 0.  */
2010#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
2011
2012    switch (opc) {
2013    case INDEX_op_goto_ptr:
2014        tcg_out_insn(s, 3207, BR, a0);
2015        break;
2016
2017    case INDEX_op_br:
2018        tcg_out_goto_label(s, arg_label(a0));
2019        break;
2020
2021    case INDEX_op_ld8u_i32:
2022    case INDEX_op_ld8u_i64:
2023        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
2024        break;
2025    case INDEX_op_ld8s_i32:
2026        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
2027        break;
2028    case INDEX_op_ld8s_i64:
2029        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
2030        break;
2031    case INDEX_op_ld16u_i32:
2032    case INDEX_op_ld16u_i64:
2033        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
2034        break;
2035    case INDEX_op_ld16s_i32:
2036        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
2037        break;
2038    case INDEX_op_ld16s_i64:
2039        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
2040        break;
2041    case INDEX_op_ld_i32:
2042    case INDEX_op_ld32u_i64:
2043        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
2044        break;
2045    case INDEX_op_ld32s_i64:
2046        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
2047        break;
2048    case INDEX_op_ld_i64:
2049        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
2050        break;
2051
2052    case INDEX_op_st8_i32:
2053    case INDEX_op_st8_i64:
2054        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
2055        break;
2056    case INDEX_op_st16_i32:
2057    case INDEX_op_st16_i64:
2058        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
2059        break;
2060    case INDEX_op_st_i32:
2061    case INDEX_op_st32_i64:
2062        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
2063        break;
2064    case INDEX_op_st_i64:
2065        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
2066        break;
2067
2068    case INDEX_op_add_i32:
2069        a2 = (int32_t)a2;
2070        /* FALLTHRU */
2071    case INDEX_op_add_i64:
2072        if (c2) {
2073            tcg_out_addsubi(s, ext, a0, a1, a2);
2074        } else {
2075            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2076        }
2077        break;
2078
2079    case INDEX_op_sub_i32:
2080        a2 = (int32_t)a2;
2081        /* FALLTHRU */
2082    case INDEX_op_sub_i64:
2083        if (c2) {
2084            tcg_out_addsubi(s, ext, a0, a1, -a2);
2085        } else {
2086            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2087        }
2088        break;
2089
2090    case INDEX_op_neg_i64:
2091    case INDEX_op_neg_i32:
2092        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2093        break;
2094
2095    case INDEX_op_and_i32:
2096        a2 = (int32_t)a2;
2097        /* FALLTHRU */
2098    case INDEX_op_and_i64:
2099        if (c2) {
2100            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2101        } else {
2102            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2103        }
2104        break;
2105
2106    case INDEX_op_andc_i32:
2107        a2 = (int32_t)a2;
2108        /* FALLTHRU */
2109    case INDEX_op_andc_i64:
2110        if (c2) {
2111            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2112        } else {
2113            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2114        }
2115        break;
2116
2117    case INDEX_op_or_i32:
2118        a2 = (int32_t)a2;
2119        /* FALLTHRU */
2120    case INDEX_op_or_i64:
2121        if (c2) {
2122            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2123        } else {
2124            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2125        }
2126        break;
2127
2128    case INDEX_op_orc_i32:
2129        a2 = (int32_t)a2;
2130        /* FALLTHRU */
2131    case INDEX_op_orc_i64:
2132        if (c2) {
2133            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2134        } else {
2135            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2136        }
2137        break;
2138
2139    case INDEX_op_xor_i32:
2140        a2 = (int32_t)a2;
2141        /* FALLTHRU */
2142    case INDEX_op_xor_i64:
2143        if (c2) {
2144            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2145        } else {
2146            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2147        }
2148        break;
2149
2150    case INDEX_op_eqv_i32:
2151        a2 = (int32_t)a2;
2152        /* FALLTHRU */
2153    case INDEX_op_eqv_i64:
2154        if (c2) {
2155            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2156        } else {
2157            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2158        }
2159        break;
2160
2161    case INDEX_op_not_i64:
2162    case INDEX_op_not_i32:
2163        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2164        break;
2165
2166    case INDEX_op_mul_i64:
2167    case INDEX_op_mul_i32:
2168        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2169        break;
2170
2171    case INDEX_op_div_i64:
2172    case INDEX_op_div_i32:
2173        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2174        break;
2175    case INDEX_op_divu_i64:
2176    case INDEX_op_divu_i32:
2177        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2178        break;
2179
2180    case INDEX_op_rem_i64:
2181    case INDEX_op_rem_i32:
2182        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2);
2183        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2184        break;
2185    case INDEX_op_remu_i64:
2186    case INDEX_op_remu_i32:
2187        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2);
2188        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2189        break;
2190
2191    case INDEX_op_shl_i64:
2192    case INDEX_op_shl_i32:
2193        if (c2) {
2194            tcg_out_shl(s, ext, a0, a1, a2);
2195        } else {
2196            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2197        }
2198        break;
2199
2200    case INDEX_op_shr_i64:
2201    case INDEX_op_shr_i32:
2202        if (c2) {
2203            tcg_out_shr(s, ext, a0, a1, a2);
2204        } else {
2205            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2206        }
2207        break;
2208
2209    case INDEX_op_sar_i64:
2210    case INDEX_op_sar_i32:
2211        if (c2) {
2212            tcg_out_sar(s, ext, a0, a1, a2);
2213        } else {
2214            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2215        }
2216        break;
2217
2218    case INDEX_op_rotr_i64:
2219    case INDEX_op_rotr_i32:
2220        if (c2) {
2221            tcg_out_rotr(s, ext, a0, a1, a2);
2222        } else {
2223            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2224        }
2225        break;
2226
2227    case INDEX_op_rotl_i64:
2228    case INDEX_op_rotl_i32:
2229        if (c2) {
2230            tcg_out_rotl(s, ext, a0, a1, a2);
2231        } else {
2232            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2);
2233            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0);
2234        }
2235        break;
2236
2237    case INDEX_op_clz_i64:
2238    case INDEX_op_clz_i32:
2239        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2240        break;
2241    case INDEX_op_ctz_i64:
2242    case INDEX_op_ctz_i32:
2243        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2244        break;
2245
2246    case INDEX_op_brcond_i32:
2247        a1 = (int32_t)a1;
2248        /* FALLTHRU */
2249    case INDEX_op_brcond_i64:
2250        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2251        break;
2252
2253    case INDEX_op_setcond_i32:
2254        a2 = (int32_t)a2;
2255        /* FALLTHRU */
2256    case INDEX_op_setcond_i64:
2257        tcg_out_cmp(s, ext, a1, a2, c2);
2258        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2259        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2260                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2261        break;
2262
2263    case INDEX_op_movcond_i32:
2264        a2 = (int32_t)a2;
2265        /* FALLTHRU */
2266    case INDEX_op_movcond_i64:
2267        tcg_out_cmp(s, ext, a1, a2, c2);
2268        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2269        break;
2270
2271    case INDEX_op_qemu_ld_a32_i32:
2272    case INDEX_op_qemu_ld_a64_i32:
2273    case INDEX_op_qemu_ld_a32_i64:
2274    case INDEX_op_qemu_ld_a64_i64:
2275        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2276        break;
2277    case INDEX_op_qemu_st_a32_i32:
2278    case INDEX_op_qemu_st_a64_i32:
2279    case INDEX_op_qemu_st_a32_i64:
2280    case INDEX_op_qemu_st_a64_i64:
2281        tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
2282        break;
2283    case INDEX_op_qemu_ld_a32_i128:
2284    case INDEX_op_qemu_ld_a64_i128:
2285        tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
2286        break;
2287    case INDEX_op_qemu_st_a32_i128:
2288    case INDEX_op_qemu_st_a64_i128:
2289        tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false);
2290        break;
2291
2292    case INDEX_op_bswap64_i64:
2293        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2294        break;
2295    case INDEX_op_bswap32_i64:
2296        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2297        if (a2 & TCG_BSWAP_OS) {
2298            tcg_out_ext32s(s, a0, a0);
2299        }
2300        break;
2301    case INDEX_op_bswap32_i32:
2302        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2303        break;
2304    case INDEX_op_bswap16_i64:
2305    case INDEX_op_bswap16_i32:
2306        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2307        if (a2 & TCG_BSWAP_OS) {
2308            /* Output must be sign-extended. */
2309            tcg_out_ext16s(s, ext, a0, a0);
2310        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2311            /* Output must be zero-extended, but input isn't. */
2312            tcg_out_ext16u(s, a0, a0);
2313        }
2314        break;
2315
2316    case INDEX_op_deposit_i64:
2317    case INDEX_op_deposit_i32:
2318        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2319        break;
2320
2321    case INDEX_op_extract_i64:
2322    case INDEX_op_extract_i32:
2323        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2324        break;
2325
2326    case INDEX_op_sextract_i64:
2327    case INDEX_op_sextract_i32:
2328        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2329        break;
2330
2331    case INDEX_op_extract2_i64:
2332    case INDEX_op_extract2_i32:
2333        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2334        break;
2335
2336    case INDEX_op_add2_i32:
2337        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2338                        (int32_t)args[4], args[5], const_args[4],
2339                        const_args[5], false);
2340        break;
2341    case INDEX_op_add2_i64:
2342        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2343                        args[5], const_args[4], const_args[5], false);
2344        break;
2345    case INDEX_op_sub2_i32:
2346        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2347                        (int32_t)args[4], args[5], const_args[4],
2348                        const_args[5], true);
2349        break;
2350    case INDEX_op_sub2_i64:
2351        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2352                        args[5], const_args[4], const_args[5], true);
2353        break;
2354
2355    case INDEX_op_muluh_i64:
2356        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2357        break;
2358    case INDEX_op_mulsh_i64:
2359        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2360        break;
2361
2362    case INDEX_op_mb:
2363        tcg_out_mb(s, a0);
2364        break;
2365
2366    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2367    case INDEX_op_mov_i64:
2368    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2369    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2370    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2371    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2372    case INDEX_op_ext8s_i64:
2373    case INDEX_op_ext8u_i32:
2374    case INDEX_op_ext8u_i64:
2375    case INDEX_op_ext16s_i64:
2376    case INDEX_op_ext16s_i32:
2377    case INDEX_op_ext16u_i64:
2378    case INDEX_op_ext16u_i32:
2379    case INDEX_op_ext32s_i64:
2380    case INDEX_op_ext32u_i64:
2381    case INDEX_op_ext_i32_i64:
2382    case INDEX_op_extu_i32_i64:
2383    case INDEX_op_extrl_i64_i32:
2384    default:
2385        g_assert_not_reached();
2386    }
2387
2388#undef REG0
2389}
2390
2391static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2392                           unsigned vecl, unsigned vece,
2393                           const TCGArg args[TCG_MAX_OP_ARGS],
2394                           const int const_args[TCG_MAX_OP_ARGS])
2395{
2396    static const AArch64Insn cmp_vec_insn[16] = {
2397        [TCG_COND_EQ] = I3616_CMEQ,
2398        [TCG_COND_GT] = I3616_CMGT,
2399        [TCG_COND_GE] = I3616_CMGE,
2400        [TCG_COND_GTU] = I3616_CMHI,
2401        [TCG_COND_GEU] = I3616_CMHS,
2402    };
2403    static const AArch64Insn cmp_scalar_insn[16] = {
2404        [TCG_COND_EQ] = I3611_CMEQ,
2405        [TCG_COND_GT] = I3611_CMGT,
2406        [TCG_COND_GE] = I3611_CMGE,
2407        [TCG_COND_GTU] = I3611_CMHI,
2408        [TCG_COND_GEU] = I3611_CMHS,
2409    };
2410    static const AArch64Insn cmp0_vec_insn[16] = {
2411        [TCG_COND_EQ] = I3617_CMEQ0,
2412        [TCG_COND_GT] = I3617_CMGT0,
2413        [TCG_COND_GE] = I3617_CMGE0,
2414        [TCG_COND_LT] = I3617_CMLT0,
2415        [TCG_COND_LE] = I3617_CMLE0,
2416    };
2417    static const AArch64Insn cmp0_scalar_insn[16] = {
2418        [TCG_COND_EQ] = I3612_CMEQ0,
2419        [TCG_COND_GT] = I3612_CMGT0,
2420        [TCG_COND_GE] = I3612_CMGE0,
2421        [TCG_COND_LT] = I3612_CMLT0,
2422        [TCG_COND_LE] = I3612_CMLE0,
2423    };
2424
2425    TCGType type = vecl + TCG_TYPE_V64;
2426    unsigned is_q = vecl;
2427    bool is_scalar = !is_q && vece == MO_64;
2428    TCGArg a0, a1, a2, a3;
2429    int cmode, imm8;
2430
2431    a0 = args[0];
2432    a1 = args[1];
2433    a2 = args[2];
2434
2435    switch (opc) {
2436    case INDEX_op_ld_vec:
2437        tcg_out_ld(s, type, a0, a1, a2);
2438        break;
2439    case INDEX_op_st_vec:
2440        tcg_out_st(s, type, a0, a1, a2);
2441        break;
2442    case INDEX_op_dupm_vec:
2443        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2444        break;
2445    case INDEX_op_add_vec:
2446        if (is_scalar) {
2447            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2448        } else {
2449            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2450        }
2451        break;
2452    case INDEX_op_sub_vec:
2453        if (is_scalar) {
2454            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2455        } else {
2456            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2457        }
2458        break;
2459    case INDEX_op_mul_vec:
2460        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2461        break;
2462    case INDEX_op_neg_vec:
2463        if (is_scalar) {
2464            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2465        } else {
2466            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2467        }
2468        break;
2469    case INDEX_op_abs_vec:
2470        if (is_scalar) {
2471            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2472        } else {
2473            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2474        }
2475        break;
2476    case INDEX_op_and_vec:
2477        if (const_args[2]) {
2478            is_shimm1632(~a2, &cmode, &imm8);
2479            if (a0 == a1) {
2480                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2481                return;
2482            }
2483            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2484            a2 = a0;
2485        }
2486        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2487        break;
2488    case INDEX_op_or_vec:
2489        if (const_args[2]) {
2490            is_shimm1632(a2, &cmode, &imm8);
2491            if (a0 == a1) {
2492                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2493                return;
2494            }
2495            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2496            a2 = a0;
2497        }
2498        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2499        break;
2500    case INDEX_op_andc_vec:
2501        if (const_args[2]) {
2502            is_shimm1632(a2, &cmode, &imm8);
2503            if (a0 == a1) {
2504                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2505                return;
2506            }
2507            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2508            a2 = a0;
2509        }
2510        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2511        break;
2512    case INDEX_op_orc_vec:
2513        if (const_args[2]) {
2514            is_shimm1632(~a2, &cmode, &imm8);
2515            if (a0 == a1) {
2516                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2517                return;
2518            }
2519            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2520            a2 = a0;
2521        }
2522        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2523        break;
2524    case INDEX_op_xor_vec:
2525        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2526        break;
2527    case INDEX_op_ssadd_vec:
2528        if (is_scalar) {
2529            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2530        } else {
2531            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2532        }
2533        break;
2534    case INDEX_op_sssub_vec:
2535        if (is_scalar) {
2536            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2537        } else {
2538            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2539        }
2540        break;
2541    case INDEX_op_usadd_vec:
2542        if (is_scalar) {
2543            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2544        } else {
2545            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2546        }
2547        break;
2548    case INDEX_op_ussub_vec:
2549        if (is_scalar) {
2550            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2551        } else {
2552            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2553        }
2554        break;
2555    case INDEX_op_smax_vec:
2556        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2557        break;
2558    case INDEX_op_smin_vec:
2559        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2560        break;
2561    case INDEX_op_umax_vec:
2562        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2563        break;
2564    case INDEX_op_umin_vec:
2565        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2566        break;
2567    case INDEX_op_not_vec:
2568        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2569        break;
2570    case INDEX_op_shli_vec:
2571        if (is_scalar) {
2572            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2573        } else {
2574            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2575        }
2576        break;
2577    case INDEX_op_shri_vec:
2578        if (is_scalar) {
2579            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2580        } else {
2581            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2582        }
2583        break;
2584    case INDEX_op_sari_vec:
2585        if (is_scalar) {
2586            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2587        } else {
2588            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2589        }
2590        break;
2591    case INDEX_op_aa64_sli_vec:
2592        if (is_scalar) {
2593            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2594        } else {
2595            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2596        }
2597        break;
2598    case INDEX_op_shlv_vec:
2599        if (is_scalar) {
2600            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2601        } else {
2602            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2603        }
2604        break;
2605    case INDEX_op_aa64_sshl_vec:
2606        if (is_scalar) {
2607            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2608        } else {
2609            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2610        }
2611        break;
2612    case INDEX_op_cmp_vec:
2613        {
2614            TCGCond cond = args[3];
2615            AArch64Insn insn;
2616
2617            if (cond == TCG_COND_NE) {
2618                if (const_args[2]) {
2619                    if (is_scalar) {
2620                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2621                    } else {
2622                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2623                    }
2624                } else {
2625                    if (is_scalar) {
2626                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2627                    } else {
2628                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2629                    }
2630                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2631                }
2632            } else {
2633                if (const_args[2]) {
2634                    if (is_scalar) {
2635                        insn = cmp0_scalar_insn[cond];
2636                        if (insn) {
2637                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2638                            break;
2639                        }
2640                    } else {
2641                        insn = cmp0_vec_insn[cond];
2642                        if (insn) {
2643                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2644                            break;
2645                        }
2646                    }
2647                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
2648                    a2 = TCG_VEC_TMP0;
2649                }
2650                if (is_scalar) {
2651                    insn = cmp_scalar_insn[cond];
2652                    if (insn == 0) {
2653                        TCGArg t;
2654                        t = a1, a1 = a2, a2 = t;
2655                        cond = tcg_swap_cond(cond);
2656                        insn = cmp_scalar_insn[cond];
2657                        tcg_debug_assert(insn != 0);
2658                    }
2659                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2660                } else {
2661                    insn = cmp_vec_insn[cond];
2662                    if (insn == 0) {
2663                        TCGArg t;
2664                        t = a1, a1 = a2, a2 = t;
2665                        cond = tcg_swap_cond(cond);
2666                        insn = cmp_vec_insn[cond];
2667                        tcg_debug_assert(insn != 0);
2668                    }
2669                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2670                }
2671            }
2672        }
2673        break;
2674
2675    case INDEX_op_bitsel_vec:
2676        a3 = args[3];
2677        if (a0 == a3) {
2678            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2679        } else if (a0 == a2) {
2680            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2681        } else {
2682            if (a0 != a1) {
2683                tcg_out_mov(s, type, a0, a1);
2684            }
2685            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2686        }
2687        break;
2688
2689    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2690    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2691    default:
2692        g_assert_not_reached();
2693    }
2694}
2695
2696int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2697{
2698    switch (opc) {
2699    case INDEX_op_add_vec:
2700    case INDEX_op_sub_vec:
2701    case INDEX_op_and_vec:
2702    case INDEX_op_or_vec:
2703    case INDEX_op_xor_vec:
2704    case INDEX_op_andc_vec:
2705    case INDEX_op_orc_vec:
2706    case INDEX_op_neg_vec:
2707    case INDEX_op_abs_vec:
2708    case INDEX_op_not_vec:
2709    case INDEX_op_cmp_vec:
2710    case INDEX_op_shli_vec:
2711    case INDEX_op_shri_vec:
2712    case INDEX_op_sari_vec:
2713    case INDEX_op_ssadd_vec:
2714    case INDEX_op_sssub_vec:
2715    case INDEX_op_usadd_vec:
2716    case INDEX_op_ussub_vec:
2717    case INDEX_op_shlv_vec:
2718    case INDEX_op_bitsel_vec:
2719        return 1;
2720    case INDEX_op_rotli_vec:
2721    case INDEX_op_shrv_vec:
2722    case INDEX_op_sarv_vec:
2723    case INDEX_op_rotlv_vec:
2724    case INDEX_op_rotrv_vec:
2725        return -1;
2726    case INDEX_op_mul_vec:
2727    case INDEX_op_smax_vec:
2728    case INDEX_op_smin_vec:
2729    case INDEX_op_umax_vec:
2730    case INDEX_op_umin_vec:
2731        return vece < MO_64;
2732
2733    default:
2734        return 0;
2735    }
2736}
2737
2738void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2739                       TCGArg a0, ...)
2740{
2741    va_list va;
2742    TCGv_vec v0, v1, v2, t1, t2, c1;
2743    TCGArg a2;
2744
2745    va_start(va, a0);
2746    v0 = temp_tcgv_vec(arg_temp(a0));
2747    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2748    a2 = va_arg(va, TCGArg);
2749    va_end(va);
2750
2751    switch (opc) {
2752    case INDEX_op_rotli_vec:
2753        t1 = tcg_temp_new_vec(type);
2754        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2755        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2756                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2757        tcg_temp_free_vec(t1);
2758        break;
2759
2760    case INDEX_op_shrv_vec:
2761    case INDEX_op_sarv_vec:
2762        /* Right shifts are negative left shifts for AArch64.  */
2763        v2 = temp_tcgv_vec(arg_temp(a2));
2764        t1 = tcg_temp_new_vec(type);
2765        tcg_gen_neg_vec(vece, t1, v2);
2766        opc = (opc == INDEX_op_shrv_vec
2767               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2768        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2769                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2770        tcg_temp_free_vec(t1);
2771        break;
2772
2773    case INDEX_op_rotlv_vec:
2774        v2 = temp_tcgv_vec(arg_temp(a2));
2775        t1 = tcg_temp_new_vec(type);
2776        c1 = tcg_constant_vec(type, vece, 8 << vece);
2777        tcg_gen_sub_vec(vece, t1, v2, c1);
2778        /* Right shifts are negative left shifts for AArch64.  */
2779        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2780                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2781        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2782                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2783        tcg_gen_or_vec(vece, v0, v0, t1);
2784        tcg_temp_free_vec(t1);
2785        break;
2786
2787    case INDEX_op_rotrv_vec:
2788        v2 = temp_tcgv_vec(arg_temp(a2));
2789        t1 = tcg_temp_new_vec(type);
2790        t2 = tcg_temp_new_vec(type);
2791        c1 = tcg_constant_vec(type, vece, 8 << vece);
2792        tcg_gen_neg_vec(vece, t1, v2);
2793        tcg_gen_sub_vec(vece, t2, c1, v2);
2794        /* Right shifts are negative left shifts for AArch64.  */
2795        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2796                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2797        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2798                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2799        tcg_gen_or_vec(vece, v0, t1, t2);
2800        tcg_temp_free_vec(t1);
2801        tcg_temp_free_vec(t2);
2802        break;
2803
2804    default:
2805        g_assert_not_reached();
2806    }
2807}
2808
2809static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2810{
2811    switch (op) {
2812    case INDEX_op_goto_ptr:
2813        return C_O0_I1(r);
2814
2815    case INDEX_op_ld8u_i32:
2816    case INDEX_op_ld8s_i32:
2817    case INDEX_op_ld16u_i32:
2818    case INDEX_op_ld16s_i32:
2819    case INDEX_op_ld_i32:
2820    case INDEX_op_ld8u_i64:
2821    case INDEX_op_ld8s_i64:
2822    case INDEX_op_ld16u_i64:
2823    case INDEX_op_ld16s_i64:
2824    case INDEX_op_ld32u_i64:
2825    case INDEX_op_ld32s_i64:
2826    case INDEX_op_ld_i64:
2827    case INDEX_op_neg_i32:
2828    case INDEX_op_neg_i64:
2829    case INDEX_op_not_i32:
2830    case INDEX_op_not_i64:
2831    case INDEX_op_bswap16_i32:
2832    case INDEX_op_bswap32_i32:
2833    case INDEX_op_bswap16_i64:
2834    case INDEX_op_bswap32_i64:
2835    case INDEX_op_bswap64_i64:
2836    case INDEX_op_ext8s_i32:
2837    case INDEX_op_ext16s_i32:
2838    case INDEX_op_ext8u_i32:
2839    case INDEX_op_ext16u_i32:
2840    case INDEX_op_ext8s_i64:
2841    case INDEX_op_ext16s_i64:
2842    case INDEX_op_ext32s_i64:
2843    case INDEX_op_ext8u_i64:
2844    case INDEX_op_ext16u_i64:
2845    case INDEX_op_ext32u_i64:
2846    case INDEX_op_ext_i32_i64:
2847    case INDEX_op_extu_i32_i64:
2848    case INDEX_op_extract_i32:
2849    case INDEX_op_extract_i64:
2850    case INDEX_op_sextract_i32:
2851    case INDEX_op_sextract_i64:
2852        return C_O1_I1(r, r);
2853
2854    case INDEX_op_st8_i32:
2855    case INDEX_op_st16_i32:
2856    case INDEX_op_st_i32:
2857    case INDEX_op_st8_i64:
2858    case INDEX_op_st16_i64:
2859    case INDEX_op_st32_i64:
2860    case INDEX_op_st_i64:
2861        return C_O0_I2(rZ, r);
2862
2863    case INDEX_op_add_i32:
2864    case INDEX_op_add_i64:
2865    case INDEX_op_sub_i32:
2866    case INDEX_op_sub_i64:
2867    case INDEX_op_setcond_i32:
2868    case INDEX_op_setcond_i64:
2869        return C_O1_I2(r, r, rA);
2870
2871    case INDEX_op_mul_i32:
2872    case INDEX_op_mul_i64:
2873    case INDEX_op_div_i32:
2874    case INDEX_op_div_i64:
2875    case INDEX_op_divu_i32:
2876    case INDEX_op_divu_i64:
2877    case INDEX_op_rem_i32:
2878    case INDEX_op_rem_i64:
2879    case INDEX_op_remu_i32:
2880    case INDEX_op_remu_i64:
2881    case INDEX_op_muluh_i64:
2882    case INDEX_op_mulsh_i64:
2883        return C_O1_I2(r, r, r);
2884
2885    case INDEX_op_and_i32:
2886    case INDEX_op_and_i64:
2887    case INDEX_op_or_i32:
2888    case INDEX_op_or_i64:
2889    case INDEX_op_xor_i32:
2890    case INDEX_op_xor_i64:
2891    case INDEX_op_andc_i32:
2892    case INDEX_op_andc_i64:
2893    case INDEX_op_orc_i32:
2894    case INDEX_op_orc_i64:
2895    case INDEX_op_eqv_i32:
2896    case INDEX_op_eqv_i64:
2897        return C_O1_I2(r, r, rL);
2898
2899    case INDEX_op_shl_i32:
2900    case INDEX_op_shr_i32:
2901    case INDEX_op_sar_i32:
2902    case INDEX_op_rotl_i32:
2903    case INDEX_op_rotr_i32:
2904    case INDEX_op_shl_i64:
2905    case INDEX_op_shr_i64:
2906    case INDEX_op_sar_i64:
2907    case INDEX_op_rotl_i64:
2908    case INDEX_op_rotr_i64:
2909        return C_O1_I2(r, r, ri);
2910
2911    case INDEX_op_clz_i32:
2912    case INDEX_op_ctz_i32:
2913    case INDEX_op_clz_i64:
2914    case INDEX_op_ctz_i64:
2915        return C_O1_I2(r, r, rAL);
2916
2917    case INDEX_op_brcond_i32:
2918    case INDEX_op_brcond_i64:
2919        return C_O0_I2(r, rA);
2920
2921    case INDEX_op_movcond_i32:
2922    case INDEX_op_movcond_i64:
2923        return C_O1_I4(r, r, rA, rZ, rZ);
2924
2925    case INDEX_op_qemu_ld_a32_i32:
2926    case INDEX_op_qemu_ld_a64_i32:
2927    case INDEX_op_qemu_ld_a32_i64:
2928    case INDEX_op_qemu_ld_a64_i64:
2929        return C_O1_I1(r, r);
2930    case INDEX_op_qemu_ld_a32_i128:
2931    case INDEX_op_qemu_ld_a64_i128:
2932        return C_O2_I1(r, r, r);
2933    case INDEX_op_qemu_st_a32_i32:
2934    case INDEX_op_qemu_st_a64_i32:
2935    case INDEX_op_qemu_st_a32_i64:
2936    case INDEX_op_qemu_st_a64_i64:
2937        return C_O0_I2(rZ, r);
2938    case INDEX_op_qemu_st_a32_i128:
2939    case INDEX_op_qemu_st_a64_i128:
2940        return C_O0_I3(rZ, rZ, r);
2941
2942    case INDEX_op_deposit_i32:
2943    case INDEX_op_deposit_i64:
2944        return C_O1_I2(r, 0, rZ);
2945
2946    case INDEX_op_extract2_i32:
2947    case INDEX_op_extract2_i64:
2948        return C_O1_I2(r, rZ, rZ);
2949
2950    case INDEX_op_add2_i32:
2951    case INDEX_op_add2_i64:
2952    case INDEX_op_sub2_i32:
2953    case INDEX_op_sub2_i64:
2954        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2955
2956    case INDEX_op_add_vec:
2957    case INDEX_op_sub_vec:
2958    case INDEX_op_mul_vec:
2959    case INDEX_op_xor_vec:
2960    case INDEX_op_ssadd_vec:
2961    case INDEX_op_sssub_vec:
2962    case INDEX_op_usadd_vec:
2963    case INDEX_op_ussub_vec:
2964    case INDEX_op_smax_vec:
2965    case INDEX_op_smin_vec:
2966    case INDEX_op_umax_vec:
2967    case INDEX_op_umin_vec:
2968    case INDEX_op_shlv_vec:
2969    case INDEX_op_shrv_vec:
2970    case INDEX_op_sarv_vec:
2971    case INDEX_op_aa64_sshl_vec:
2972        return C_O1_I2(w, w, w);
2973    case INDEX_op_not_vec:
2974    case INDEX_op_neg_vec:
2975    case INDEX_op_abs_vec:
2976    case INDEX_op_shli_vec:
2977    case INDEX_op_shri_vec:
2978    case INDEX_op_sari_vec:
2979        return C_O1_I1(w, w);
2980    case INDEX_op_ld_vec:
2981    case INDEX_op_dupm_vec:
2982        return C_O1_I1(w, r);
2983    case INDEX_op_st_vec:
2984        return C_O0_I2(w, r);
2985    case INDEX_op_dup_vec:
2986        return C_O1_I1(w, wr);
2987    case INDEX_op_or_vec:
2988    case INDEX_op_andc_vec:
2989        return C_O1_I2(w, w, wO);
2990    case INDEX_op_and_vec:
2991    case INDEX_op_orc_vec:
2992        return C_O1_I2(w, w, wN);
2993    case INDEX_op_cmp_vec:
2994        return C_O1_I2(w, w, wZ);
2995    case INDEX_op_bitsel_vec:
2996        return C_O1_I3(w, w, w, w);
2997    case INDEX_op_aa64_sli_vec:
2998        return C_O1_I2(w, 0, w);
2999
3000    default:
3001        g_assert_not_reached();
3002    }
3003}
3004
3005static void tcg_target_init(TCGContext *s)
3006{
3007    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
3008    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
3009    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3010    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3011
3012    tcg_target_call_clobber_regs = -1ull;
3013    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
3014    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
3015    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
3016    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
3017    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
3018    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
3019    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
3020    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
3021    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
3022    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
3023    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
3024    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3025    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3026    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3027    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3028    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3029    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3030    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3031    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3032
3033    s->reserved_regs = 0;
3034    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
3035    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
3036    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
3037    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
3038    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
3039    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
3040    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
3041}
3042
3043/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
3044#define PUSH_SIZE  ((30 - 19 + 1) * 8)
3045
3046#define FRAME_SIZE \
3047    ((PUSH_SIZE \
3048      + TCG_STATIC_CALL_ARGS_SIZE \
3049      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3050      + TCG_TARGET_STACK_ALIGN - 1) \
3051     & ~(TCG_TARGET_STACK_ALIGN - 1))
3052
3053/* We're expecting a 2 byte uleb128 encoded value.  */
3054QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3055
3056/* We're expecting to use a single ADDI insn.  */
3057QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
3058
3059static void tcg_target_qemu_prologue(TCGContext *s)
3060{
3061    TCGReg r;
3062
3063    /* Push (FP, LR) and allocate space for all saved registers.  */
3064    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
3065                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
3066
3067    /* Set up frame pointer for canonical unwinding.  */
3068    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
3069
3070    /* Store callee-preserved regs x19..x28.  */
3071    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3072        int ofs = (r - TCG_REG_X19 + 2) * 8;
3073        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3074    }
3075
3076    /* Make stack space for TCG locals.  */
3077    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3078                 FRAME_SIZE - PUSH_SIZE);
3079
3080    /* Inform TCG about how to find TCG locals with register, offset, size.  */
3081    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
3082                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3083
3084#if !defined(CONFIG_SOFTMMU)
3085    /*
3086     * Note that XZR cannot be encoded in the address base register slot,
3087     * as that actaully encodes SP.  Depending on the guest, we may need
3088     * to zero-extend the guest address via the address index register slot,
3089     * therefore we need to load even a zero guest base into a register.
3090     */
3091    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3092    tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3093#endif
3094
3095    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3096    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3097
3098    /*
3099     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3100     * and fall through to the rest of the epilogue.
3101     */
3102    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3103    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3104
3105    /* TB epilogue */
3106    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3107
3108    /* Remove TCG locals stack space.  */
3109    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3110                 FRAME_SIZE - PUSH_SIZE);
3111
3112    /* Restore registers x19..x28.  */
3113    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3114        int ofs = (r - TCG_REG_X19 + 2) * 8;
3115        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3116    }
3117
3118    /* Pop (FP, LR), restore SP to previous frame.  */
3119    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3120                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3121    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3122}
3123
3124static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3125{
3126    int i;
3127    for (i = 0; i < count; ++i) {
3128        p[i] = NOP;
3129    }
3130}
3131
3132typedef struct {
3133    DebugFrameHeader h;
3134    uint8_t fde_def_cfa[4];
3135    uint8_t fde_reg_ofs[24];
3136} DebugFrame;
3137
3138#define ELF_HOST_MACHINE EM_AARCH64
3139
3140static const DebugFrame debug_frame = {
3141    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3142    .h.cie.id = -1,
3143    .h.cie.version = 1,
3144    .h.cie.code_align = 1,
3145    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3146    .h.cie.return_column = TCG_REG_LR,
3147
3148    /* Total FDE size does not include the "len" member.  */
3149    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3150
3151    .fde_def_cfa = {
3152        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3153        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3154        (FRAME_SIZE >> 7)
3155    },
3156    .fde_reg_ofs = {
3157        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3158        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3159        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3160        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3161        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3162        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3163        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3164        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3165        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3166        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3167        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3168        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3169    }
3170};
3171
3172void tcg_register_jit(const void *buf, size_t buf_size)
3173{
3174    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3175}
3176