xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision a675ca4c)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-ldst.c.inc"
14#include "../tcg-pool.c.inc"
15#include "qemu/bitops.h"
16
17/* We're going to re-use TCGType in setting of the SF bit, which controls
18   the size of the operation performed.  If we know the values match, it
19   makes things much cleaner.  */
20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21
22#ifdef CONFIG_DEBUG_TCG
23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28
29    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33};
34#endif /* CONFIG_DEBUG_TCG */
35
36static const int tcg_target_reg_alloc_order[] = {
37    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39    TCG_REG_X28, /* we will reserve this for guest_base if configured */
40
41    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
43
44    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47    /* X16 reserved as temporary */
48    /* X17 reserved as temporary */
49    /* X18 reserved by system */
50    /* X19 reserved for AREG0 */
51    /* X29 reserved as fp */
52    /* X30 reserved as temporary */
53
54    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
55    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
56    /* V8 - V15 are call-saved, and skipped.  */
57    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
58    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
59    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
60    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
61};
62
63static const int tcg_target_call_iarg_regs[8] = {
64    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
65    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
66};
67
68static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
69{
70    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
71    tcg_debug_assert(slot >= 0 && slot <= 1);
72    return TCG_REG_X0 + slot;
73}
74
75#define TCG_REG_TMP0 TCG_REG_X16
76#define TCG_REG_TMP1 TCG_REG_X17
77#define TCG_REG_TMP2 TCG_REG_X30
78#define TCG_VEC_TMP0 TCG_REG_V31
79
80#ifndef CONFIG_SOFTMMU
81#define TCG_REG_GUEST_BASE TCG_REG_X28
82#endif
83
84static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
85{
86    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
87    ptrdiff_t offset = target - src_rx;
88
89    if (offset == sextract64(offset, 0, 26)) {
90        /* read instruction, mask away previous PC_REL26 parameter contents,
91           set the proper offset, then write back the instruction. */
92        *src_rw = deposit32(*src_rw, 0, 26, offset);
93        return true;
94    }
95    return false;
96}
97
98static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
99{
100    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
101    ptrdiff_t offset = target - src_rx;
102
103    if (offset == sextract64(offset, 0, 19)) {
104        *src_rw = deposit32(*src_rw, 5, 19, offset);
105        return true;
106    }
107    return false;
108}
109
110static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
111                        intptr_t value, intptr_t addend)
112{
113    tcg_debug_assert(addend == 0);
114    switch (type) {
115    case R_AARCH64_JUMP26:
116    case R_AARCH64_CALL26:
117        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
118    case R_AARCH64_CONDBR19:
119        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
120    default:
121        g_assert_not_reached();
122    }
123}
124
125#define TCG_CT_CONST_AIMM 0x100
126#define TCG_CT_CONST_LIMM 0x200
127#define TCG_CT_CONST_ZERO 0x400
128#define TCG_CT_CONST_MONE 0x800
129#define TCG_CT_CONST_ORRI 0x1000
130#define TCG_CT_CONST_ANDI 0x2000
131
132#define ALL_GENERAL_REGS  0xffffffffu
133#define ALL_VECTOR_REGS   0xffffffff00000000ull
134
135/* Match a constant valid for addition (12-bit, optionally shifted).  */
136static inline bool is_aimm(uint64_t val)
137{
138    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
139}
140
141/* Match a constant valid for logical operations.  */
142static inline bool is_limm(uint64_t val)
143{
144    /* Taking a simplified view of the logical immediates for now, ignoring
145       the replication that can happen across the field.  Match bit patterns
146       of the forms
147           0....01....1
148           0..01..10..0
149       and their inverses.  */
150
151    /* Make things easier below, by testing the form with msb clear. */
152    if ((int64_t)val < 0) {
153        val = ~val;
154    }
155    if (val == 0) {
156        return false;
157    }
158    val += val & -val;
159    return (val & (val - 1)) == 0;
160}
161
162/* Return true if v16 is a valid 16-bit shifted immediate.  */
163static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
164{
165    if (v16 == (v16 & 0xff)) {
166        *cmode = 0x8;
167        *imm8 = v16 & 0xff;
168        return true;
169    } else if (v16 == (v16 & 0xff00)) {
170        *cmode = 0xa;
171        *imm8 = v16 >> 8;
172        return true;
173    }
174    return false;
175}
176
177/* Return true if v32 is a valid 32-bit shifted immediate.  */
178static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
179{
180    if (v32 == (v32 & 0xff)) {
181        *cmode = 0x0;
182        *imm8 = v32 & 0xff;
183        return true;
184    } else if (v32 == (v32 & 0xff00)) {
185        *cmode = 0x2;
186        *imm8 = (v32 >> 8) & 0xff;
187        return true;
188    } else if (v32 == (v32 & 0xff0000)) {
189        *cmode = 0x4;
190        *imm8 = (v32 >> 16) & 0xff;
191        return true;
192    } else if (v32 == (v32 & 0xff000000)) {
193        *cmode = 0x6;
194        *imm8 = v32 >> 24;
195        return true;
196    }
197    return false;
198}
199
200/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
201static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
202{
203    if ((v32 & 0xffff00ff) == 0xff) {
204        *cmode = 0xc;
205        *imm8 = (v32 >> 8) & 0xff;
206        return true;
207    } else if ((v32 & 0xff00ffff) == 0xffff) {
208        *cmode = 0xd;
209        *imm8 = (v32 >> 16) & 0xff;
210        return true;
211    }
212    return false;
213}
214
215/* Return true if v32 is a valid float32 immediate.  */
216static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
217{
218    if (extract32(v32, 0, 19) == 0
219        && (extract32(v32, 25, 6) == 0x20
220            || extract32(v32, 25, 6) == 0x1f)) {
221        *cmode = 0xf;
222        *imm8 = (extract32(v32, 31, 1) << 7)
223              | (extract32(v32, 25, 1) << 6)
224              | extract32(v32, 19, 6);
225        return true;
226    }
227    return false;
228}
229
230/* Return true if v64 is a valid float64 immediate.  */
231static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
232{
233    if (extract64(v64, 0, 48) == 0
234        && (extract64(v64, 54, 9) == 0x100
235            || extract64(v64, 54, 9) == 0x0ff)) {
236        *cmode = 0xf;
237        *imm8 = (extract64(v64, 63, 1) << 7)
238              | (extract64(v64, 54, 1) << 6)
239              | extract64(v64, 48, 6);
240        return true;
241    }
242    return false;
243}
244
245/*
246 * Return non-zero if v32 can be formed by MOVI+ORR.
247 * Place the parameters for MOVI in (cmode, imm8).
248 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
249 */
250static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
251{
252    int i;
253
254    for (i = 6; i > 0; i -= 2) {
255        /* Mask out one byte we can add with ORR.  */
256        uint32_t tmp = v32 & ~(0xffu << (i * 4));
257        if (is_shimm32(tmp, cmode, imm8) ||
258            is_soimm32(tmp, cmode, imm8)) {
259            break;
260        }
261    }
262    return i;
263}
264
265/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
266static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
267{
268    if (v32 == deposit32(v32, 16, 16, v32)) {
269        return is_shimm16(v32, cmode, imm8);
270    } else {
271        return is_shimm32(v32, cmode, imm8);
272    }
273}
274
275static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
276{
277    if (ct & TCG_CT_CONST) {
278        return 1;
279    }
280    if (type == TCG_TYPE_I32) {
281        val = (int32_t)val;
282    }
283    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
284        return 1;
285    }
286    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
287        return 1;
288    }
289    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
290        return 1;
291    }
292    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
293        return 1;
294    }
295
296    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
297    case 0:
298        break;
299    case TCG_CT_CONST_ANDI:
300        val = ~val;
301        /* fallthru */
302    case TCG_CT_CONST_ORRI:
303        if (val == deposit64(val, 32, 32, val)) {
304            int cmode, imm8;
305            return is_shimm1632(val, &cmode, &imm8);
306        }
307        break;
308    default:
309        /* Both bits should not be set for the same insn.  */
310        g_assert_not_reached();
311    }
312
313    return 0;
314}
315
316enum aarch64_cond_code {
317    COND_EQ = 0x0,
318    COND_NE = 0x1,
319    COND_CS = 0x2,     /* Unsigned greater or equal */
320    COND_HS = COND_CS, /* ALIAS greater or equal */
321    COND_CC = 0x3,     /* Unsigned less than */
322    COND_LO = COND_CC, /* ALIAS Lower */
323    COND_MI = 0x4,     /* Negative */
324    COND_PL = 0x5,     /* Zero or greater */
325    COND_VS = 0x6,     /* Overflow */
326    COND_VC = 0x7,     /* No overflow */
327    COND_HI = 0x8,     /* Unsigned greater than */
328    COND_LS = 0x9,     /* Unsigned less or equal */
329    COND_GE = 0xa,
330    COND_LT = 0xb,
331    COND_GT = 0xc,
332    COND_LE = 0xd,
333    COND_AL = 0xe,
334    COND_NV = 0xf, /* behaves like COND_AL here */
335};
336
337static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
338    [TCG_COND_EQ] = COND_EQ,
339    [TCG_COND_NE] = COND_NE,
340    [TCG_COND_LT] = COND_LT,
341    [TCG_COND_GE] = COND_GE,
342    [TCG_COND_LE] = COND_LE,
343    [TCG_COND_GT] = COND_GT,
344    /* unsigned */
345    [TCG_COND_LTU] = COND_LO,
346    [TCG_COND_GTU] = COND_HI,
347    [TCG_COND_GEU] = COND_HS,
348    [TCG_COND_LEU] = COND_LS,
349};
350
351typedef enum {
352    LDST_ST = 0,    /* store */
353    LDST_LD = 1,    /* load */
354    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
355    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
356} AArch64LdstType;
357
358/* We encode the format of the insn into the beginning of the name, so that
359   we can have the preprocessor help "typecheck" the insn vs the output
360   function.  Arm didn't provide us with nice names for the formats, so we
361   use the section number of the architecture reference manual in which the
362   instruction group is described.  */
363typedef enum {
364    /* Compare and branch (immediate).  */
365    I3201_CBZ       = 0x34000000,
366    I3201_CBNZ      = 0x35000000,
367
368    /* Conditional branch (immediate).  */
369    I3202_B_C       = 0x54000000,
370
371    /* Unconditional branch (immediate).  */
372    I3206_B         = 0x14000000,
373    I3206_BL        = 0x94000000,
374
375    /* Unconditional branch (register).  */
376    I3207_BR        = 0xd61f0000,
377    I3207_BLR       = 0xd63f0000,
378    I3207_RET       = 0xd65f0000,
379
380    /* AdvSIMD load/store single structure.  */
381    I3303_LD1R      = 0x0d40c000,
382
383    /* Load literal for loading the address at pc-relative offset */
384    I3305_LDR       = 0x58000000,
385    I3305_LDR_v64   = 0x5c000000,
386    I3305_LDR_v128  = 0x9c000000,
387
388    /* Load/store exclusive. */
389    I3306_LDXP      = 0xc8600000,
390    I3306_STXP      = 0xc8200000,
391
392    /* Load/store register.  Described here as 3.3.12, but the helper
393       that emits them can transform to 3.3.10 or 3.3.13.  */
394    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
395    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
396    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
397    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
398
399    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
400    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
401    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
402    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
403
404    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
405    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
406
407    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
408    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
409    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
410
411    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
412    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
413
414    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
415    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
416
417    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
418    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
419
420    I3312_TO_I3310  = 0x00200800,
421    I3312_TO_I3313  = 0x01000000,
422
423    /* Load/store register pair instructions.  */
424    I3314_LDP       = 0x28400000,
425    I3314_STP       = 0x28000000,
426
427    /* Add/subtract immediate instructions.  */
428    I3401_ADDI      = 0x11000000,
429    I3401_ADDSI     = 0x31000000,
430    I3401_SUBI      = 0x51000000,
431    I3401_SUBSI     = 0x71000000,
432
433    /* Bitfield instructions.  */
434    I3402_BFM       = 0x33000000,
435    I3402_SBFM      = 0x13000000,
436    I3402_UBFM      = 0x53000000,
437
438    /* Extract instruction.  */
439    I3403_EXTR      = 0x13800000,
440
441    /* Logical immediate instructions.  */
442    I3404_ANDI      = 0x12000000,
443    I3404_ORRI      = 0x32000000,
444    I3404_EORI      = 0x52000000,
445    I3404_ANDSI     = 0x72000000,
446
447    /* Move wide immediate instructions.  */
448    I3405_MOVN      = 0x12800000,
449    I3405_MOVZ      = 0x52800000,
450    I3405_MOVK      = 0x72800000,
451
452    /* PC relative addressing instructions.  */
453    I3406_ADR       = 0x10000000,
454    I3406_ADRP      = 0x90000000,
455
456    /* Add/subtract extended register instructions. */
457    I3501_ADD       = 0x0b200000,
458
459    /* Add/subtract shifted register instructions (without a shift).  */
460    I3502_ADD       = 0x0b000000,
461    I3502_ADDS      = 0x2b000000,
462    I3502_SUB       = 0x4b000000,
463    I3502_SUBS      = 0x6b000000,
464
465    /* Add/subtract shifted register instructions (with a shift).  */
466    I3502S_ADD_LSL  = I3502_ADD,
467
468    /* Add/subtract with carry instructions.  */
469    I3503_ADC       = 0x1a000000,
470    I3503_SBC       = 0x5a000000,
471
472    /* Conditional select instructions.  */
473    I3506_CSEL      = 0x1a800000,
474    I3506_CSINC     = 0x1a800400,
475    I3506_CSINV     = 0x5a800000,
476    I3506_CSNEG     = 0x5a800400,
477
478    /* Data-processing (1 source) instructions.  */
479    I3507_CLZ       = 0x5ac01000,
480    I3507_RBIT      = 0x5ac00000,
481    I3507_REV       = 0x5ac00000, /* + size << 10 */
482
483    /* Data-processing (2 source) instructions.  */
484    I3508_LSLV      = 0x1ac02000,
485    I3508_LSRV      = 0x1ac02400,
486    I3508_ASRV      = 0x1ac02800,
487    I3508_RORV      = 0x1ac02c00,
488    I3508_SMULH     = 0x9b407c00,
489    I3508_UMULH     = 0x9bc07c00,
490    I3508_UDIV      = 0x1ac00800,
491    I3508_SDIV      = 0x1ac00c00,
492
493    /* Data-processing (3 source) instructions.  */
494    I3509_MADD      = 0x1b000000,
495    I3509_MSUB      = 0x1b008000,
496
497    /* Logical shifted register instructions (without a shift).  */
498    I3510_AND       = 0x0a000000,
499    I3510_BIC       = 0x0a200000,
500    I3510_ORR       = 0x2a000000,
501    I3510_ORN       = 0x2a200000,
502    I3510_EOR       = 0x4a000000,
503    I3510_EON       = 0x4a200000,
504    I3510_ANDS      = 0x6a000000,
505
506    /* Logical shifted register instructions (with a shift).  */
507    I3502S_AND_LSR  = I3510_AND | (1 << 22),
508
509    /* AdvSIMD copy */
510    I3605_DUP      = 0x0e000400,
511    I3605_INS      = 0x4e001c00,
512    I3605_UMOV     = 0x0e003c00,
513
514    /* AdvSIMD modified immediate */
515    I3606_MOVI      = 0x0f000400,
516    I3606_MVNI      = 0x2f000400,
517    I3606_BIC       = 0x2f001400,
518    I3606_ORR       = 0x0f001400,
519
520    /* AdvSIMD scalar shift by immediate */
521    I3609_SSHR      = 0x5f000400,
522    I3609_SSRA      = 0x5f001400,
523    I3609_SHL       = 0x5f005400,
524    I3609_USHR      = 0x7f000400,
525    I3609_USRA      = 0x7f001400,
526    I3609_SLI       = 0x7f005400,
527
528    /* AdvSIMD scalar three same */
529    I3611_SQADD     = 0x5e200c00,
530    I3611_SQSUB     = 0x5e202c00,
531    I3611_CMGT      = 0x5e203400,
532    I3611_CMGE      = 0x5e203c00,
533    I3611_SSHL      = 0x5e204400,
534    I3611_ADD       = 0x5e208400,
535    I3611_CMTST     = 0x5e208c00,
536    I3611_UQADD     = 0x7e200c00,
537    I3611_UQSUB     = 0x7e202c00,
538    I3611_CMHI      = 0x7e203400,
539    I3611_CMHS      = 0x7e203c00,
540    I3611_USHL      = 0x7e204400,
541    I3611_SUB       = 0x7e208400,
542    I3611_CMEQ      = 0x7e208c00,
543
544    /* AdvSIMD scalar two-reg misc */
545    I3612_CMGT0     = 0x5e208800,
546    I3612_CMEQ0     = 0x5e209800,
547    I3612_CMLT0     = 0x5e20a800,
548    I3612_ABS       = 0x5e20b800,
549    I3612_CMGE0     = 0x7e208800,
550    I3612_CMLE0     = 0x7e209800,
551    I3612_NEG       = 0x7e20b800,
552
553    /* AdvSIMD shift by immediate */
554    I3614_SSHR      = 0x0f000400,
555    I3614_SSRA      = 0x0f001400,
556    I3614_SHL       = 0x0f005400,
557    I3614_SLI       = 0x2f005400,
558    I3614_USHR      = 0x2f000400,
559    I3614_USRA      = 0x2f001400,
560
561    /* AdvSIMD three same.  */
562    I3616_ADD       = 0x0e208400,
563    I3616_AND       = 0x0e201c00,
564    I3616_BIC       = 0x0e601c00,
565    I3616_BIF       = 0x2ee01c00,
566    I3616_BIT       = 0x2ea01c00,
567    I3616_BSL       = 0x2e601c00,
568    I3616_EOR       = 0x2e201c00,
569    I3616_MUL       = 0x0e209c00,
570    I3616_ORR       = 0x0ea01c00,
571    I3616_ORN       = 0x0ee01c00,
572    I3616_SUB       = 0x2e208400,
573    I3616_CMGT      = 0x0e203400,
574    I3616_CMGE      = 0x0e203c00,
575    I3616_CMTST     = 0x0e208c00,
576    I3616_CMHI      = 0x2e203400,
577    I3616_CMHS      = 0x2e203c00,
578    I3616_CMEQ      = 0x2e208c00,
579    I3616_SMAX      = 0x0e206400,
580    I3616_SMIN      = 0x0e206c00,
581    I3616_SSHL      = 0x0e204400,
582    I3616_SQADD     = 0x0e200c00,
583    I3616_SQSUB     = 0x0e202c00,
584    I3616_UMAX      = 0x2e206400,
585    I3616_UMIN      = 0x2e206c00,
586    I3616_UQADD     = 0x2e200c00,
587    I3616_UQSUB     = 0x2e202c00,
588    I3616_USHL      = 0x2e204400,
589
590    /* AdvSIMD two-reg misc.  */
591    I3617_CMGT0     = 0x0e208800,
592    I3617_CMEQ0     = 0x0e209800,
593    I3617_CMLT0     = 0x0e20a800,
594    I3617_CMGE0     = 0x2e208800,
595    I3617_CMLE0     = 0x2e209800,
596    I3617_NOT       = 0x2e205800,
597    I3617_ABS       = 0x0e20b800,
598    I3617_NEG       = 0x2e20b800,
599
600    /* System instructions.  */
601    NOP             = 0xd503201f,
602    DMB_ISH         = 0xd50338bf,
603    DMB_LD          = 0x00000100,
604    DMB_ST          = 0x00000200,
605} AArch64Insn;
606
607static inline uint32_t tcg_in32(TCGContext *s)
608{
609    uint32_t v = *(uint32_t *)s->code_ptr;
610    return v;
611}
612
613/* Emit an opcode with "type-checking" of the format.  */
614#define tcg_out_insn(S, FMT, OP, ...) \
615    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
616
617static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
618                              TCGReg rt, TCGReg rn, unsigned size)
619{
620    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
621}
622
623static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
624                              int imm19, TCGReg rt)
625{
626    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
627}
628
629static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
630                              TCGReg rt, TCGReg rt2, TCGReg rn)
631{
632    tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
633}
634
635static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
636                              TCGReg rt, int imm19)
637{
638    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
639}
640
641static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
642                              TCGCond c, int imm19)
643{
644    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
645}
646
647static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
648{
649    tcg_out32(s, insn | (imm26 & 0x03ffffff));
650}
651
652static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
653{
654    tcg_out32(s, insn | rn << 5);
655}
656
657static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
658                              TCGReg r1, TCGReg r2, TCGReg rn,
659                              tcg_target_long ofs, bool pre, bool w)
660{
661    insn |= 1u << 31; /* ext */
662    insn |= pre << 24;
663    insn |= w << 23;
664
665    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
666    insn |= (ofs & (0x7f << 3)) << (15 - 3);
667
668    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
669}
670
671static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
672                              TCGReg rd, TCGReg rn, uint64_t aimm)
673{
674    if (aimm > 0xfff) {
675        tcg_debug_assert((aimm & 0xfff) == 0);
676        aimm >>= 12;
677        tcg_debug_assert(aimm <= 0xfff);
678        aimm |= 1 << 12;  /* apply LSL 12 */
679    }
680    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
681}
682
683/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
684   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
685   that feed the DecodeBitMasks pseudo function.  */
686static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
687                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
688{
689    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
690              | rn << 5 | rd);
691}
692
693#define tcg_out_insn_3404  tcg_out_insn_3402
694
695static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
696                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
697{
698    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
699              | rn << 5 | rd);
700}
701
702/* This function is used for the Move (wide immediate) instruction group.
703   Note that SHIFT is a full shift count, not the 2 bit HW field. */
704static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
705                              TCGReg rd, uint16_t half, unsigned shift)
706{
707    tcg_debug_assert((shift & ~0x30) == 0);
708    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
709}
710
711static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
712                              TCGReg rd, int64_t disp)
713{
714    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
715}
716
717static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
718                                     TCGType sf, TCGReg rd, TCGReg rn,
719                                     TCGReg rm, int opt, int imm3)
720{
721    tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
722              imm3 << 10 | rn << 5 | rd);
723}
724
725/* This function is for both 3.5.2 (Add/Subtract shifted register), for
726   the rare occasion when we actually want to supply a shift amount.  */
727static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
728                                      TCGType ext, TCGReg rd, TCGReg rn,
729                                      TCGReg rm, int imm6)
730{
731    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
732}
733
734/* This function is for 3.5.2 (Add/subtract shifted register),
735   and 3.5.10 (Logical shifted register), for the vast majorty of cases
736   when we don't want to apply a shift.  Thus it can also be used for
737   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
738static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
739                              TCGReg rd, TCGReg rn, TCGReg rm)
740{
741    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
742}
743
744#define tcg_out_insn_3503  tcg_out_insn_3502
745#define tcg_out_insn_3508  tcg_out_insn_3502
746#define tcg_out_insn_3510  tcg_out_insn_3502
747
748static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
749                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
750{
751    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
752              | tcg_cond_to_aarch64[c] << 12);
753}
754
755static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
756                              TCGReg rd, TCGReg rn)
757{
758    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
759}
760
761static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
762                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
763{
764    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
765}
766
767static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
768                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
769{
770    /* Note that bit 11 set means general register input.  Therefore
771       we can handle both register sets with one function.  */
772    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
773              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
774}
775
776static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
777                              TCGReg rd, bool op, int cmode, uint8_t imm8)
778{
779    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
780              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
781}
782
783static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
784                              TCGReg rd, TCGReg rn, unsigned immhb)
785{
786    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
787}
788
789static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
790                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
791{
792    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
793              | (rn & 0x1f) << 5 | (rd & 0x1f));
794}
795
796static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
797                              unsigned size, TCGReg rd, TCGReg rn)
798{
799    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
800}
801
802static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
803                              TCGReg rd, TCGReg rn, unsigned immhb)
804{
805    tcg_out32(s, insn | q << 30 | immhb << 16
806              | (rn & 0x1f) << 5 | (rd & 0x1f));
807}
808
809static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
810                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
811{
812    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
813              | (rn & 0x1f) << 5 | (rd & 0x1f));
814}
815
816static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
817                              unsigned size, TCGReg rd, TCGReg rn)
818{
819    tcg_out32(s, insn | q << 30 | (size << 22)
820              | (rn & 0x1f) << 5 | (rd & 0x1f));
821}
822
823static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
824                              TCGReg rd, TCGReg base, TCGType ext,
825                              TCGReg regoff)
826{
827    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
828    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
829              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
830}
831
832static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
833                              TCGReg rd, TCGReg rn, intptr_t offset)
834{
835    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
836}
837
838static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
839                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
840{
841    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
842    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
843              | rn << 5 | (rd & 0x1f));
844}
845
846/* Register to register move using ORR (shifted register with no shift). */
847static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
848{
849    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
850}
851
852/* Register to register move using ADDI (move to/from SP).  */
853static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
854{
855    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
856}
857
858/* This function is used for the Logical (immediate) instruction group.
859   The value of LIMM must satisfy IS_LIMM.  See the comment above about
860   only supporting simplified logical immediates.  */
861static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
862                             TCGReg rd, TCGReg rn, uint64_t limm)
863{
864    unsigned h, l, r, c;
865
866    tcg_debug_assert(is_limm(limm));
867
868    h = clz64(limm);
869    l = ctz64(limm);
870    if (l == 0) {
871        r = 0;                  /* form 0....01....1 */
872        c = ctz64(~limm) - 1;
873        if (h == 0) {
874            r = clz64(~limm);   /* form 1..10..01..1 */
875            c += r;
876        }
877    } else {
878        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
879        c = r - h - 1;
880    }
881    if (ext == TCG_TYPE_I32) {
882        r &= 31;
883        c &= 31;
884    }
885
886    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
887}
888
889static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
890                             TCGReg rd, int64_t v64)
891{
892    bool q = type == TCG_TYPE_V128;
893    int cmode, imm8, i;
894
895    /* Test all bytes equal first.  */
896    if (vece == MO_8) {
897        imm8 = (uint8_t)v64;
898        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
899        return;
900    }
901
902    /*
903     * Test all bytes 0x00 or 0xff second.  This can match cases that
904     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
905     */
906    for (i = imm8 = 0; i < 8; i++) {
907        uint8_t byte = v64 >> (i * 8);
908        if (byte == 0xff) {
909            imm8 |= 1 << i;
910        } else if (byte != 0) {
911            goto fail_bytes;
912        }
913    }
914    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
915    return;
916 fail_bytes:
917
918    /*
919     * Tests for various replications.  For each element width, if we
920     * cannot find an expansion there's no point checking a larger
921     * width because we already know by replication it cannot match.
922     */
923    if (vece == MO_16) {
924        uint16_t v16 = v64;
925
926        if (is_shimm16(v16, &cmode, &imm8)) {
927            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
928            return;
929        }
930        if (is_shimm16(~v16, &cmode, &imm8)) {
931            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
932            return;
933        }
934
935        /*
936         * Otherwise, all remaining constants can be loaded in two insns:
937         * rd = v16 & 0xff, rd |= v16 & 0xff00.
938         */
939        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
940        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
941        return;
942    } else if (vece == MO_32) {
943        uint32_t v32 = v64;
944        uint32_t n32 = ~v32;
945
946        if (is_shimm32(v32, &cmode, &imm8) ||
947            is_soimm32(v32, &cmode, &imm8) ||
948            is_fimm32(v32, &cmode, &imm8)) {
949            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
950            return;
951        }
952        if (is_shimm32(n32, &cmode, &imm8) ||
953            is_soimm32(n32, &cmode, &imm8)) {
954            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
955            return;
956        }
957
958        /*
959         * Restrict the set of constants to those we can load with
960         * two instructions.  Others we load from the pool.
961         */
962        i = is_shimm32_pair(v32, &cmode, &imm8);
963        if (i) {
964            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
965            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
966            return;
967        }
968        i = is_shimm32_pair(n32, &cmode, &imm8);
969        if (i) {
970            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
971            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
972            return;
973        }
974    } else if (is_fimm64(v64, &cmode, &imm8)) {
975        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
976        return;
977    }
978
979    /*
980     * As a last resort, load from the constant pool.  Sadly there
981     * is no LD1R (literal), so store the full 16-byte vector.
982     */
983    if (type == TCG_TYPE_V128) {
984        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
985        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
986    } else {
987        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
988        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
989    }
990}
991
992static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
993                            TCGReg rd, TCGReg rs)
994{
995    int is_q = type - TCG_TYPE_V64;
996    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
997    return true;
998}
999
1000static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1001                             TCGReg r, TCGReg base, intptr_t offset)
1002{
1003    TCGReg temp = TCG_REG_TMP0;
1004
1005    if (offset < -0xffffff || offset > 0xffffff) {
1006        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
1007        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
1008        base = temp;
1009    } else {
1010        AArch64Insn add_insn = I3401_ADDI;
1011
1012        if (offset < 0) {
1013            add_insn = I3401_SUBI;
1014            offset = -offset;
1015        }
1016        if (offset & 0xfff000) {
1017            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1018            base = temp;
1019        }
1020        if (offset & 0xfff) {
1021            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1022            base = temp;
1023        }
1024    }
1025    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1026    return true;
1027}
1028
1029static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1030                         tcg_target_long value)
1031{
1032    tcg_target_long svalue = value;
1033    tcg_target_long ivalue = ~value;
1034    tcg_target_long t0, t1, t2;
1035    int s0, s1;
1036    AArch64Insn opc;
1037
1038    switch (type) {
1039    case TCG_TYPE_I32:
1040    case TCG_TYPE_I64:
1041        tcg_debug_assert(rd < 32);
1042        break;
1043    default:
1044        g_assert_not_reached();
1045    }
1046
1047    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1048       values within [2**31, 2**32-1], we can create smaller sequences by
1049       interpreting this as a negative 32-bit number, while ensuring that
1050       the high 32 bits are cleared by setting SF=0.  */
1051    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1052        svalue = (int32_t)value;
1053        value = (uint32_t)value;
1054        ivalue = (uint32_t)ivalue;
1055        type = TCG_TYPE_I32;
1056    }
1057
1058    /* Speed things up by handling the common case of small positive
1059       and negative values specially.  */
1060    if ((value & ~0xffffull) == 0) {
1061        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1062        return;
1063    } else if ((ivalue & ~0xffffull) == 0) {
1064        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1065        return;
1066    }
1067
1068    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1069       use the sign-extended value.  That lets us match rotated values such
1070       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1071    if (is_limm(svalue)) {
1072        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1073        return;
1074    }
1075
1076    /* Look for host pointer values within 4G of the PC.  This happens
1077       often when loading pointers to QEMU's own data structures.  */
1078    if (type == TCG_TYPE_I64) {
1079        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1080        tcg_target_long disp = value - src_rx;
1081        if (disp == sextract64(disp, 0, 21)) {
1082            tcg_out_insn(s, 3406, ADR, rd, disp);
1083            return;
1084        }
1085        disp = (value >> 12) - (src_rx >> 12);
1086        if (disp == sextract64(disp, 0, 21)) {
1087            tcg_out_insn(s, 3406, ADRP, rd, disp);
1088            if (value & 0xfff) {
1089                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1090            }
1091            return;
1092        }
1093    }
1094
1095    /* Would it take fewer insns to begin with MOVN?  */
1096    if (ctpop64(value) >= 32) {
1097        t0 = ivalue;
1098        opc = I3405_MOVN;
1099    } else {
1100        t0 = value;
1101        opc = I3405_MOVZ;
1102    }
1103    s0 = ctz64(t0) & (63 & -16);
1104    t1 = t0 & ~(0xffffull << s0);
1105    s1 = ctz64(t1) & (63 & -16);
1106    t2 = t1 & ~(0xffffull << s1);
1107    if (t2 == 0) {
1108        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1109        if (t1 != 0) {
1110            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1111        }
1112        return;
1113    }
1114
1115    /* For more than 2 insns, dump it into the constant pool.  */
1116    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1117    tcg_out_insn(s, 3305, LDR, 0, rd);
1118}
1119
1120static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1121{
1122    return false;
1123}
1124
1125static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1126                             tcg_target_long imm)
1127{
1128    /* This function is only used for passing structs by reference. */
1129    g_assert_not_reached();
1130}
1131
1132/* Define something more legible for general use.  */
1133#define tcg_out_ldst_r  tcg_out_insn_3310
1134
1135static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1136                         TCGReg rn, intptr_t offset, int lgsize)
1137{
1138    /* If the offset is naturally aligned and in range, then we can
1139       use the scaled uimm12 encoding */
1140    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1141        uintptr_t scaled_uimm = offset >> lgsize;
1142        if (scaled_uimm <= 0xfff) {
1143            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1144            return;
1145        }
1146    }
1147
1148    /* Small signed offsets can use the unscaled encoding.  */
1149    if (offset >= -256 && offset < 256) {
1150        tcg_out_insn_3312(s, insn, rd, rn, offset);
1151        return;
1152    }
1153
1154    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1155    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
1156    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
1157}
1158
1159static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1160{
1161    if (ret == arg) {
1162        return true;
1163    }
1164    switch (type) {
1165    case TCG_TYPE_I32:
1166    case TCG_TYPE_I64:
1167        if (ret < 32 && arg < 32) {
1168            tcg_out_movr(s, type, ret, arg);
1169            break;
1170        } else if (ret < 32) {
1171            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1172            break;
1173        } else if (arg < 32) {
1174            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1175            break;
1176        }
1177        /* FALLTHRU */
1178
1179    case TCG_TYPE_V64:
1180        tcg_debug_assert(ret >= 32 && arg >= 32);
1181        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1182        break;
1183    case TCG_TYPE_V128:
1184        tcg_debug_assert(ret >= 32 && arg >= 32);
1185        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1186        break;
1187
1188    default:
1189        g_assert_not_reached();
1190    }
1191    return true;
1192}
1193
1194static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1195                       TCGReg base, intptr_t ofs)
1196{
1197    AArch64Insn insn;
1198    int lgsz;
1199
1200    switch (type) {
1201    case TCG_TYPE_I32:
1202        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1203        lgsz = 2;
1204        break;
1205    case TCG_TYPE_I64:
1206        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1207        lgsz = 3;
1208        break;
1209    case TCG_TYPE_V64:
1210        insn = I3312_LDRVD;
1211        lgsz = 3;
1212        break;
1213    case TCG_TYPE_V128:
1214        insn = I3312_LDRVQ;
1215        lgsz = 4;
1216        break;
1217    default:
1218        g_assert_not_reached();
1219    }
1220    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1221}
1222
1223static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1224                       TCGReg base, intptr_t ofs)
1225{
1226    AArch64Insn insn;
1227    int lgsz;
1228
1229    switch (type) {
1230    case TCG_TYPE_I32:
1231        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1232        lgsz = 2;
1233        break;
1234    case TCG_TYPE_I64:
1235        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1236        lgsz = 3;
1237        break;
1238    case TCG_TYPE_V64:
1239        insn = I3312_STRVD;
1240        lgsz = 3;
1241        break;
1242    case TCG_TYPE_V128:
1243        insn = I3312_STRVQ;
1244        lgsz = 4;
1245        break;
1246    default:
1247        g_assert_not_reached();
1248    }
1249    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1250}
1251
1252static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1253                               TCGReg base, intptr_t ofs)
1254{
1255    if (type <= TCG_TYPE_I64 && val == 0) {
1256        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1257        return true;
1258    }
1259    return false;
1260}
1261
1262static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1263                               TCGReg rn, unsigned int a, unsigned int b)
1264{
1265    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1266}
1267
1268static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1269                                TCGReg rn, unsigned int a, unsigned int b)
1270{
1271    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1272}
1273
1274static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1275                                TCGReg rn, unsigned int a, unsigned int b)
1276{
1277    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1278}
1279
1280static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1281                                TCGReg rn, TCGReg rm, unsigned int a)
1282{
1283    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1284}
1285
1286static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1287                               TCGReg rd, TCGReg rn, unsigned int m)
1288{
1289    int bits = ext ? 64 : 32;
1290    int max = bits - 1;
1291    tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1292}
1293
1294static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1295                               TCGReg rd, TCGReg rn, unsigned int m)
1296{
1297    int max = ext ? 63 : 31;
1298    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1299}
1300
1301static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1302                               TCGReg rd, TCGReg rn, unsigned int m)
1303{
1304    int max = ext ? 63 : 31;
1305    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1306}
1307
1308static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1309                                TCGReg rd, TCGReg rn, unsigned int m)
1310{
1311    int max = ext ? 63 : 31;
1312    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1313}
1314
1315static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1316                                TCGReg rd, TCGReg rn, unsigned int m)
1317{
1318    int max = ext ? 63 : 31;
1319    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1320}
1321
1322static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1323                               TCGReg rn, unsigned lsb, unsigned width)
1324{
1325    unsigned size = ext ? 64 : 32;
1326    unsigned a = (size - lsb) & (size - 1);
1327    unsigned b = width - 1;
1328    tcg_out_bfm(s, ext, rd, rn, a, b);
1329}
1330
1331static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1332                        tcg_target_long b, bool const_b)
1333{
1334    if (const_b) {
1335        /* Using CMP or CMN aliases.  */
1336        if (b >= 0) {
1337            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1338        } else {
1339            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1340        }
1341    } else {
1342        /* Using CMP alias SUBS wzr, Wn, Wm */
1343        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1344    }
1345}
1346
1347static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1348{
1349    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1350    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1351    tcg_out_insn(s, 3206, B, offset);
1352}
1353
1354static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1355{
1356    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1357    if (offset == sextract64(offset, 0, 26)) {
1358        tcg_out_insn(s, 3206, B, offset);
1359    } else {
1360        /* Choose X9 as a call-clobbered non-LR temporary. */
1361        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
1362        tcg_out_insn(s, 3207, BR, TCG_REG_X9);
1363    }
1364}
1365
1366static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1367{
1368    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1369    if (offset == sextract64(offset, 0, 26)) {
1370        tcg_out_insn(s, 3206, BL, offset);
1371    } else {
1372        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1373        tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
1374    }
1375}
1376
1377static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1378                         const TCGHelperInfo *info)
1379{
1380    tcg_out_call_int(s, target);
1381}
1382
1383static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1384{
1385    if (!l->has_value) {
1386        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1387        tcg_out_insn(s, 3206, B, 0);
1388    } else {
1389        tcg_out_goto(s, l->u.value_ptr);
1390    }
1391}
1392
1393static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1394                           TCGArg b, bool b_const, TCGLabel *l)
1395{
1396    intptr_t offset;
1397    bool need_cmp;
1398
1399    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1400        need_cmp = false;
1401    } else {
1402        need_cmp = true;
1403        tcg_out_cmp(s, ext, a, b, b_const);
1404    }
1405
1406    if (!l->has_value) {
1407        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1408        offset = tcg_in32(s) >> 5;
1409    } else {
1410        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1411        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1412    }
1413
1414    if (need_cmp) {
1415        tcg_out_insn(s, 3202, B_C, c, offset);
1416    } else if (c == TCG_COND_EQ) {
1417        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1418    } else {
1419        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1420    }
1421}
1422
1423static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1424                               TCGReg rd, TCGReg rn)
1425{
1426    /* REV, REV16, REV32 */
1427    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1428}
1429
1430static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1431                               TCGReg rd, TCGReg rn)
1432{
1433    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1434    int bits = (8 << s_bits) - 1;
1435    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1436}
1437
1438static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1439{
1440    tcg_out_sxt(s, type, MO_8, rd, rn);
1441}
1442
1443static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1444{
1445    tcg_out_sxt(s, type, MO_16, rd, rn);
1446}
1447
1448static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1449{
1450    tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1451}
1452
1453static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1454{
1455    tcg_out_ext32s(s, rd, rn);
1456}
1457
1458static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1459                               TCGReg rd, TCGReg rn)
1460{
1461    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1462    int bits = (8 << s_bits) - 1;
1463    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1464}
1465
1466static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1467{
1468    tcg_out_uxt(s, MO_8, rd, rn);
1469}
1470
1471static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1472{
1473    tcg_out_uxt(s, MO_16, rd, rn);
1474}
1475
1476static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1477{
1478    tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1479}
1480
1481static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1482{
1483    tcg_out_ext32u(s, rd, rn);
1484}
1485
1486static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1487{
1488    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1489}
1490
1491static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1492                            TCGReg rn, int64_t aimm)
1493{
1494    if (aimm >= 0) {
1495        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1496    } else {
1497        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1498    }
1499}
1500
1501static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1502                            TCGReg rh, TCGReg al, TCGReg ah,
1503                            tcg_target_long bl, tcg_target_long bh,
1504                            bool const_bl, bool const_bh, bool sub)
1505{
1506    TCGReg orig_rl = rl;
1507    AArch64Insn insn;
1508
1509    if (rl == ah || (!const_bh && rl == bh)) {
1510        rl = TCG_REG_TMP0;
1511    }
1512
1513    if (const_bl) {
1514        if (bl < 0) {
1515            bl = -bl;
1516            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1517        } else {
1518            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1519        }
1520
1521        if (unlikely(al == TCG_REG_XZR)) {
1522            /* ??? We want to allow al to be zero for the benefit of
1523               negation via subtraction.  However, that leaves open the
1524               possibility of adding 0+const in the low part, and the
1525               immediate add instructions encode XSP not XZR.  Don't try
1526               anything more elaborate here than loading another zero.  */
1527            al = TCG_REG_TMP0;
1528            tcg_out_movi(s, ext, al, 0);
1529        }
1530        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1531    } else {
1532        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1533    }
1534
1535    insn = I3503_ADC;
1536    if (const_bh) {
1537        /* Note that the only two constants we support are 0 and -1, and
1538           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1539        if ((bh != 0) ^ sub) {
1540            insn = I3503_SBC;
1541        }
1542        bh = TCG_REG_XZR;
1543    } else if (sub) {
1544        insn = I3503_SBC;
1545    }
1546    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1547
1548    tcg_out_mov(s, ext, orig_rl, rl);
1549}
1550
1551static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1552{
1553    static const uint32_t sync[] = {
1554        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1555        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1556        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1557        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1558        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1559    };
1560    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1561}
1562
1563static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1564                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1565{
1566    TCGReg a1 = a0;
1567    if (is_ctz) {
1568        a1 = TCG_REG_TMP0;
1569        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1570    }
1571    if (const_b && b == (ext ? 64 : 32)) {
1572        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1573    } else {
1574        AArch64Insn sel = I3506_CSEL;
1575
1576        tcg_out_cmp(s, ext, a0, 0, 1);
1577        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1);
1578
1579        if (const_b) {
1580            if (b == -1) {
1581                b = TCG_REG_XZR;
1582                sel = I3506_CSINV;
1583            } else if (b == 0) {
1584                b = TCG_REG_XZR;
1585            } else {
1586                tcg_out_movi(s, ext, d, b);
1587                b = d;
1588            }
1589        }
1590        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE);
1591    }
1592}
1593
1594typedef struct {
1595    TCGReg base;
1596    TCGReg index;
1597    TCGType index_ext;
1598    TCGAtomAlign aa;
1599} HostAddress;
1600
1601bool tcg_target_has_memory_bswap(MemOp memop)
1602{
1603    return false;
1604}
1605
1606static const TCGLdstHelperParam ldst_helper_param = {
1607    .ntmp = 1, .tmp = { TCG_REG_TMP0 }
1608};
1609
1610static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1611{
1612    MemOp opc = get_memop(lb->oi);
1613
1614    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1615        return false;
1616    }
1617
1618    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1619    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1620    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1621    tcg_out_goto(s, lb->raddr);
1622    return true;
1623}
1624
1625static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1626{
1627    MemOp opc = get_memop(lb->oi);
1628
1629    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1630        return false;
1631    }
1632
1633    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1634    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1635    tcg_out_goto(s, lb->raddr);
1636    return true;
1637}
1638
1639/* We expect to use a 7-bit scaled negative offset from ENV.  */
1640#define MIN_TLB_MASK_TABLE_OFS  -512
1641
1642/*
1643 * For softmmu, perform the TLB load and compare.
1644 * For useronly, perform any required alignment tests.
1645 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1646 * is required and fill in @h with the host address for the fast path.
1647 */
1648static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1649                                           TCGReg addr_reg, MemOpIdx oi,
1650                                           bool is_ld)
1651{
1652    TCGType addr_type = s->addr_type;
1653    TCGLabelQemuLdst *ldst = NULL;
1654    MemOp opc = get_memop(oi);
1655    MemOp s_bits = opc & MO_SIZE;
1656    unsigned a_mask;
1657
1658    h->aa = atom_and_align_for_opc(s, opc,
1659                                   have_lse2 ? MO_ATOM_WITHIN16
1660                                             : MO_ATOM_IFALIGN,
1661                                   s_bits == MO_128);
1662    a_mask = (1 << h->aa.align) - 1;
1663
1664#ifdef CONFIG_SOFTMMU
1665    unsigned s_mask = (1u << s_bits) - 1;
1666    unsigned mem_index = get_mmuidx(oi);
1667    TCGReg addr_adj;
1668    TCGType mask_type;
1669    uint64_t compare_mask;
1670
1671    ldst = new_ldst_label(s);
1672    ldst->is_ld = is_ld;
1673    ldst->oi = oi;
1674    ldst->addrlo_reg = addr_reg;
1675
1676    mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
1677                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1678
1679    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
1680    QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1681    QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1682    tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
1683                 tlb_mask_table_ofs(s, mem_index), 1, 0);
1684
1685    /* Extract the TLB index from the address into X0.  */
1686    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1687                 TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
1688                 s->page_bits - CPU_TLB_ENTRY_BITS);
1689
1690    /* Add the tlb_table pointer, forming the CPUTLBEntry address in TMP1. */
1691    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
1692
1693    /* Load the tlb comparator into TMP0, and the fast path addend into TMP1. */
1694    QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
1695    tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
1696               is_ld ? offsetof(CPUTLBEntry, addr_read)
1697                     : offsetof(CPUTLBEntry, addr_write));
1698    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
1699               offsetof(CPUTLBEntry, addend));
1700
1701    /*
1702     * For aligned accesses, we check the first byte and include the alignment
1703     * bits within the address.  For unaligned access, we check that we don't
1704     * cross pages using the address of the last byte of the access.
1705     */
1706    if (a_mask >= s_mask) {
1707        addr_adj = addr_reg;
1708    } else {
1709        addr_adj = TCG_REG_TMP2;
1710        tcg_out_insn(s, 3401, ADDI, addr_type,
1711                     addr_adj, addr_reg, s_mask - a_mask);
1712    }
1713    compare_mask = (uint64_t)s->page_mask | a_mask;
1714
1715    /* Store the page mask part of the address into TMP2.  */
1716    tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
1717                     addr_adj, compare_mask);
1718
1719    /* Perform the address comparison. */
1720    tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0);
1721
1722    /* If not equal, we jump to the slow path. */
1723    ldst->label_ptr[0] = s->code_ptr;
1724    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1725
1726    h->base = TCG_REG_TMP1;
1727    h->index = addr_reg;
1728    h->index_ext = addr_type;
1729#else
1730    if (a_mask) {
1731        ldst = new_ldst_label(s);
1732
1733        ldst->is_ld = is_ld;
1734        ldst->oi = oi;
1735        ldst->addrlo_reg = addr_reg;
1736
1737        /* tst addr, #mask */
1738        tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1739
1740        /* b.ne slow_path */
1741        ldst->label_ptr[0] = s->code_ptr;
1742        tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1743    }
1744
1745    if (guest_base || addr_type == TCG_TYPE_I32) {
1746        h->base = TCG_REG_GUEST_BASE;
1747        h->index = addr_reg;
1748        h->index_ext = addr_type;
1749    } else {
1750        h->base = addr_reg;
1751        h->index = TCG_REG_XZR;
1752        h->index_ext = TCG_TYPE_I64;
1753    }
1754#endif
1755
1756    return ldst;
1757}
1758
1759static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1760                                   TCGReg data_r, HostAddress h)
1761{
1762    switch (memop & MO_SSIZE) {
1763    case MO_UB:
1764        tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
1765        break;
1766    case MO_SB:
1767        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1768                       data_r, h.base, h.index_ext, h.index);
1769        break;
1770    case MO_UW:
1771        tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
1772        break;
1773    case MO_SW:
1774        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1775                       data_r, h.base, h.index_ext, h.index);
1776        break;
1777    case MO_UL:
1778        tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
1779        break;
1780    case MO_SL:
1781        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
1782        break;
1783    case MO_UQ:
1784        tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
1785        break;
1786    default:
1787        g_assert_not_reached();
1788    }
1789}
1790
1791static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1792                                   TCGReg data_r, HostAddress h)
1793{
1794    switch (memop & MO_SIZE) {
1795    case MO_8:
1796        tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
1797        break;
1798    case MO_16:
1799        tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
1800        break;
1801    case MO_32:
1802        tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
1803        break;
1804    case MO_64:
1805        tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
1806        break;
1807    default:
1808        g_assert_not_reached();
1809    }
1810}
1811
1812static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1813                            MemOpIdx oi, TCGType data_type)
1814{
1815    TCGLabelQemuLdst *ldst;
1816    HostAddress h;
1817
1818    ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1819    tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
1820
1821    if (ldst) {
1822        ldst->type = data_type;
1823        ldst->datalo_reg = data_reg;
1824        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1825    }
1826}
1827
1828static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1829                            MemOpIdx oi, TCGType data_type)
1830{
1831    TCGLabelQemuLdst *ldst;
1832    HostAddress h;
1833
1834    ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1835    tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1836
1837    if (ldst) {
1838        ldst->type = data_type;
1839        ldst->datalo_reg = data_reg;
1840        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1841    }
1842}
1843
1844static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
1845                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
1846{
1847    TCGLabelQemuLdst *ldst;
1848    HostAddress h;
1849    TCGReg base;
1850    bool use_pair;
1851
1852    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
1853
1854    /* Compose the final address, as LDP/STP have no indexing. */
1855    if (h.index == TCG_REG_XZR) {
1856        base = h.base;
1857    } else {
1858        base = TCG_REG_TMP2;
1859        if (h.index_ext == TCG_TYPE_I32) {
1860            /* add base, base, index, uxtw */
1861            tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base,
1862                         h.base, h.index, MO_32, 0);
1863        } else {
1864            /* add base, base, index */
1865            tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index);
1866        }
1867    }
1868
1869    use_pair = h.aa.atom < MO_128 || have_lse2;
1870
1871    if (!use_pair) {
1872        tcg_insn_unit *branch = NULL;
1873        TCGReg ll, lh, sl, sh;
1874
1875        /*
1876         * If we have already checked for 16-byte alignment, that's all
1877         * we need. Otherwise we have determined that misaligned atomicity
1878         * may be handled with two 8-byte loads.
1879         */
1880        if (h.aa.align < MO_128) {
1881            /*
1882             * TODO: align should be MO_64, so we only need test bit 3,
1883             * which means we could use TBNZ instead of ANDS+B_C.
1884             */
1885            tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15);
1886            branch = s->code_ptr;
1887            tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1888            use_pair = true;
1889        }
1890
1891        if (is_ld) {
1892            /*
1893             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1894             *    ldxp lo, hi, [base]
1895             *    stxp t0, lo, hi, [base]
1896             *    cbnz t0, .-8
1897             * Require no overlap between data{lo,hi} and base.
1898             */
1899            if (datalo == base || datahi == base) {
1900                tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base);
1901                base = TCG_REG_TMP2;
1902            }
1903            ll = sl = datalo;
1904            lh = sh = datahi;
1905        } else {
1906            /*
1907             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1908             * 1: ldxp t0, t1, [base]
1909             *    stxp t0, lo, hi, [base]
1910             *    cbnz t0, 1b
1911             */
1912            tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1);
1913            ll = TCG_REG_TMP0;
1914            lh = TCG_REG_TMP1;
1915            sl = datalo;
1916            sh = datahi;
1917        }
1918
1919        tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base);
1920        tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base);
1921        tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2);
1922
1923        if (use_pair) {
1924            /* "b .+8", branching across the one insn of use_pair. */
1925            tcg_out_insn(s, 3206, B, 2);
1926            reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr));
1927        }
1928    }
1929
1930    if (use_pair) {
1931        if (is_ld) {
1932            tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0);
1933        } else {
1934            tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0);
1935        }
1936    }
1937
1938    if (ldst) {
1939        ldst->type = TCG_TYPE_I128;
1940        ldst->datalo_reg = datalo;
1941        ldst->datahi_reg = datahi;
1942        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1943    }
1944}
1945
1946static const tcg_insn_unit *tb_ret_addr;
1947
1948static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1949{
1950    /* Reuse the zeroing that exists for goto_ptr.  */
1951    if (a0 == 0) {
1952        tcg_out_goto_long(s, tcg_code_gen_epilogue);
1953    } else {
1954        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1955        tcg_out_goto_long(s, tb_ret_addr);
1956    }
1957}
1958
1959static void tcg_out_goto_tb(TCGContext *s, int which)
1960{
1961    /*
1962     * Direct branch, or indirect address load, will be patched
1963     * by tb_target_set_jmp_target.  Assert indirect load offset
1964     * in range early, regardless of direct branch distance.
1965     */
1966    intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1967    tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1968
1969    set_jmp_insn_offset(s, which);
1970    tcg_out32(s, I3206_B);
1971    tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
1972    set_jmp_reset_offset(s, which);
1973}
1974
1975void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1976                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1977{
1978    uintptr_t d_addr = tb->jmp_target_addr[n];
1979    ptrdiff_t d_offset = d_addr - jmp_rx;
1980    tcg_insn_unit insn;
1981
1982    /* Either directly branch, or indirect branch load. */
1983    if (d_offset == sextract64(d_offset, 0, 28)) {
1984        insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
1985    } else {
1986        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
1987        ptrdiff_t i_offset = i_addr - jmp_rx;
1988
1989        /* Note that we asserted this in range in tcg_out_goto_tb. */
1990        insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
1991    }
1992    qatomic_set((uint32_t *)jmp_rw, insn);
1993    flush_idcache_range(jmp_rx, jmp_rw, 4);
1994}
1995
1996static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1997                       const TCGArg args[TCG_MAX_OP_ARGS],
1998                       const int const_args[TCG_MAX_OP_ARGS])
1999{
2000    /* 99% of the time, we can signal the use of extension registers
2001       by looking to see if the opcode handles 64-bit data.  */
2002    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
2003
2004    /* Hoist the loads of the most common arguments.  */
2005    TCGArg a0 = args[0];
2006    TCGArg a1 = args[1];
2007    TCGArg a2 = args[2];
2008    int c2 = const_args[2];
2009
2010    /* Some operands are defined with "rZ" constraint, a register or
2011       the zero register.  These need not actually test args[I] == 0.  */
2012#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
2013
2014    switch (opc) {
2015    case INDEX_op_goto_ptr:
2016        tcg_out_insn(s, 3207, BR, a0);
2017        break;
2018
2019    case INDEX_op_br:
2020        tcg_out_goto_label(s, arg_label(a0));
2021        break;
2022
2023    case INDEX_op_ld8u_i32:
2024    case INDEX_op_ld8u_i64:
2025        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
2026        break;
2027    case INDEX_op_ld8s_i32:
2028        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
2029        break;
2030    case INDEX_op_ld8s_i64:
2031        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
2032        break;
2033    case INDEX_op_ld16u_i32:
2034    case INDEX_op_ld16u_i64:
2035        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
2036        break;
2037    case INDEX_op_ld16s_i32:
2038        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
2039        break;
2040    case INDEX_op_ld16s_i64:
2041        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
2042        break;
2043    case INDEX_op_ld_i32:
2044    case INDEX_op_ld32u_i64:
2045        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
2046        break;
2047    case INDEX_op_ld32s_i64:
2048        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
2049        break;
2050    case INDEX_op_ld_i64:
2051        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
2052        break;
2053
2054    case INDEX_op_st8_i32:
2055    case INDEX_op_st8_i64:
2056        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
2057        break;
2058    case INDEX_op_st16_i32:
2059    case INDEX_op_st16_i64:
2060        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
2061        break;
2062    case INDEX_op_st_i32:
2063    case INDEX_op_st32_i64:
2064        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
2065        break;
2066    case INDEX_op_st_i64:
2067        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
2068        break;
2069
2070    case INDEX_op_add_i32:
2071        a2 = (int32_t)a2;
2072        /* FALLTHRU */
2073    case INDEX_op_add_i64:
2074        if (c2) {
2075            tcg_out_addsubi(s, ext, a0, a1, a2);
2076        } else {
2077            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2078        }
2079        break;
2080
2081    case INDEX_op_sub_i32:
2082        a2 = (int32_t)a2;
2083        /* FALLTHRU */
2084    case INDEX_op_sub_i64:
2085        if (c2) {
2086            tcg_out_addsubi(s, ext, a0, a1, -a2);
2087        } else {
2088            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2089        }
2090        break;
2091
2092    case INDEX_op_neg_i64:
2093    case INDEX_op_neg_i32:
2094        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2095        break;
2096
2097    case INDEX_op_and_i32:
2098        a2 = (int32_t)a2;
2099        /* FALLTHRU */
2100    case INDEX_op_and_i64:
2101        if (c2) {
2102            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2103        } else {
2104            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2105        }
2106        break;
2107
2108    case INDEX_op_andc_i32:
2109        a2 = (int32_t)a2;
2110        /* FALLTHRU */
2111    case INDEX_op_andc_i64:
2112        if (c2) {
2113            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2114        } else {
2115            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2116        }
2117        break;
2118
2119    case INDEX_op_or_i32:
2120        a2 = (int32_t)a2;
2121        /* FALLTHRU */
2122    case INDEX_op_or_i64:
2123        if (c2) {
2124            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2125        } else {
2126            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2127        }
2128        break;
2129
2130    case INDEX_op_orc_i32:
2131        a2 = (int32_t)a2;
2132        /* FALLTHRU */
2133    case INDEX_op_orc_i64:
2134        if (c2) {
2135            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2136        } else {
2137            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2138        }
2139        break;
2140
2141    case INDEX_op_xor_i32:
2142        a2 = (int32_t)a2;
2143        /* FALLTHRU */
2144    case INDEX_op_xor_i64:
2145        if (c2) {
2146            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2147        } else {
2148            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2149        }
2150        break;
2151
2152    case INDEX_op_eqv_i32:
2153        a2 = (int32_t)a2;
2154        /* FALLTHRU */
2155    case INDEX_op_eqv_i64:
2156        if (c2) {
2157            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2158        } else {
2159            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2160        }
2161        break;
2162
2163    case INDEX_op_not_i64:
2164    case INDEX_op_not_i32:
2165        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2166        break;
2167
2168    case INDEX_op_mul_i64:
2169    case INDEX_op_mul_i32:
2170        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2171        break;
2172
2173    case INDEX_op_div_i64:
2174    case INDEX_op_div_i32:
2175        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2176        break;
2177    case INDEX_op_divu_i64:
2178    case INDEX_op_divu_i32:
2179        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2180        break;
2181
2182    case INDEX_op_rem_i64:
2183    case INDEX_op_rem_i32:
2184        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2);
2185        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2186        break;
2187    case INDEX_op_remu_i64:
2188    case INDEX_op_remu_i32:
2189        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2);
2190        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2191        break;
2192
2193    case INDEX_op_shl_i64:
2194    case INDEX_op_shl_i32:
2195        if (c2) {
2196            tcg_out_shl(s, ext, a0, a1, a2);
2197        } else {
2198            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2199        }
2200        break;
2201
2202    case INDEX_op_shr_i64:
2203    case INDEX_op_shr_i32:
2204        if (c2) {
2205            tcg_out_shr(s, ext, a0, a1, a2);
2206        } else {
2207            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2208        }
2209        break;
2210
2211    case INDEX_op_sar_i64:
2212    case INDEX_op_sar_i32:
2213        if (c2) {
2214            tcg_out_sar(s, ext, a0, a1, a2);
2215        } else {
2216            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2217        }
2218        break;
2219
2220    case INDEX_op_rotr_i64:
2221    case INDEX_op_rotr_i32:
2222        if (c2) {
2223            tcg_out_rotr(s, ext, a0, a1, a2);
2224        } else {
2225            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2226        }
2227        break;
2228
2229    case INDEX_op_rotl_i64:
2230    case INDEX_op_rotl_i32:
2231        if (c2) {
2232            tcg_out_rotl(s, ext, a0, a1, a2);
2233        } else {
2234            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2);
2235            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0);
2236        }
2237        break;
2238
2239    case INDEX_op_clz_i64:
2240    case INDEX_op_clz_i32:
2241        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2242        break;
2243    case INDEX_op_ctz_i64:
2244    case INDEX_op_ctz_i32:
2245        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2246        break;
2247
2248    case INDEX_op_brcond_i32:
2249        a1 = (int32_t)a1;
2250        /* FALLTHRU */
2251    case INDEX_op_brcond_i64:
2252        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2253        break;
2254
2255    case INDEX_op_setcond_i32:
2256        a2 = (int32_t)a2;
2257        /* FALLTHRU */
2258    case INDEX_op_setcond_i64:
2259        tcg_out_cmp(s, ext, a1, a2, c2);
2260        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2261        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2262                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2263        break;
2264
2265    case INDEX_op_negsetcond_i32:
2266        a2 = (int32_t)a2;
2267        /* FALLTHRU */
2268    case INDEX_op_negsetcond_i64:
2269        tcg_out_cmp(s, ext, a1, a2, c2);
2270        /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond).  */
2271        tcg_out_insn(s, 3506, CSINV, ext, a0, TCG_REG_XZR,
2272                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2273        break;
2274
2275    case INDEX_op_movcond_i32:
2276        a2 = (int32_t)a2;
2277        /* FALLTHRU */
2278    case INDEX_op_movcond_i64:
2279        tcg_out_cmp(s, ext, a1, a2, c2);
2280        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2281        break;
2282
2283    case INDEX_op_qemu_ld_a32_i32:
2284    case INDEX_op_qemu_ld_a64_i32:
2285    case INDEX_op_qemu_ld_a32_i64:
2286    case INDEX_op_qemu_ld_a64_i64:
2287        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2288        break;
2289    case INDEX_op_qemu_st_a32_i32:
2290    case INDEX_op_qemu_st_a64_i32:
2291    case INDEX_op_qemu_st_a32_i64:
2292    case INDEX_op_qemu_st_a64_i64:
2293        tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
2294        break;
2295    case INDEX_op_qemu_ld_a32_i128:
2296    case INDEX_op_qemu_ld_a64_i128:
2297        tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
2298        break;
2299    case INDEX_op_qemu_st_a32_i128:
2300    case INDEX_op_qemu_st_a64_i128:
2301        tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false);
2302        break;
2303
2304    case INDEX_op_bswap64_i64:
2305        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2306        break;
2307    case INDEX_op_bswap32_i64:
2308        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2309        if (a2 & TCG_BSWAP_OS) {
2310            tcg_out_ext32s(s, a0, a0);
2311        }
2312        break;
2313    case INDEX_op_bswap32_i32:
2314        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2315        break;
2316    case INDEX_op_bswap16_i64:
2317    case INDEX_op_bswap16_i32:
2318        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2319        if (a2 & TCG_BSWAP_OS) {
2320            /* Output must be sign-extended. */
2321            tcg_out_ext16s(s, ext, a0, a0);
2322        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2323            /* Output must be zero-extended, but input isn't. */
2324            tcg_out_ext16u(s, a0, a0);
2325        }
2326        break;
2327
2328    case INDEX_op_deposit_i64:
2329    case INDEX_op_deposit_i32:
2330        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2331        break;
2332
2333    case INDEX_op_extract_i64:
2334    case INDEX_op_extract_i32:
2335        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2336        break;
2337
2338    case INDEX_op_sextract_i64:
2339    case INDEX_op_sextract_i32:
2340        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2341        break;
2342
2343    case INDEX_op_extract2_i64:
2344    case INDEX_op_extract2_i32:
2345        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2346        break;
2347
2348    case INDEX_op_add2_i32:
2349        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2350                        (int32_t)args[4], args[5], const_args[4],
2351                        const_args[5], false);
2352        break;
2353    case INDEX_op_add2_i64:
2354        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2355                        args[5], const_args[4], const_args[5], false);
2356        break;
2357    case INDEX_op_sub2_i32:
2358        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2359                        (int32_t)args[4], args[5], const_args[4],
2360                        const_args[5], true);
2361        break;
2362    case INDEX_op_sub2_i64:
2363        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2364                        args[5], const_args[4], const_args[5], true);
2365        break;
2366
2367    case INDEX_op_muluh_i64:
2368        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2369        break;
2370    case INDEX_op_mulsh_i64:
2371        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2372        break;
2373
2374    case INDEX_op_mb:
2375        tcg_out_mb(s, a0);
2376        break;
2377
2378    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2379    case INDEX_op_mov_i64:
2380    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2381    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2382    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2383    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2384    case INDEX_op_ext8s_i64:
2385    case INDEX_op_ext8u_i32:
2386    case INDEX_op_ext8u_i64:
2387    case INDEX_op_ext16s_i64:
2388    case INDEX_op_ext16s_i32:
2389    case INDEX_op_ext16u_i64:
2390    case INDEX_op_ext16u_i32:
2391    case INDEX_op_ext32s_i64:
2392    case INDEX_op_ext32u_i64:
2393    case INDEX_op_ext_i32_i64:
2394    case INDEX_op_extu_i32_i64:
2395    case INDEX_op_extrl_i64_i32:
2396    default:
2397        g_assert_not_reached();
2398    }
2399
2400#undef REG0
2401}
2402
2403static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2404                           unsigned vecl, unsigned vece,
2405                           const TCGArg args[TCG_MAX_OP_ARGS],
2406                           const int const_args[TCG_MAX_OP_ARGS])
2407{
2408    static const AArch64Insn cmp_vec_insn[16] = {
2409        [TCG_COND_EQ] = I3616_CMEQ,
2410        [TCG_COND_GT] = I3616_CMGT,
2411        [TCG_COND_GE] = I3616_CMGE,
2412        [TCG_COND_GTU] = I3616_CMHI,
2413        [TCG_COND_GEU] = I3616_CMHS,
2414    };
2415    static const AArch64Insn cmp_scalar_insn[16] = {
2416        [TCG_COND_EQ] = I3611_CMEQ,
2417        [TCG_COND_GT] = I3611_CMGT,
2418        [TCG_COND_GE] = I3611_CMGE,
2419        [TCG_COND_GTU] = I3611_CMHI,
2420        [TCG_COND_GEU] = I3611_CMHS,
2421    };
2422    static const AArch64Insn cmp0_vec_insn[16] = {
2423        [TCG_COND_EQ] = I3617_CMEQ0,
2424        [TCG_COND_GT] = I3617_CMGT0,
2425        [TCG_COND_GE] = I3617_CMGE0,
2426        [TCG_COND_LT] = I3617_CMLT0,
2427        [TCG_COND_LE] = I3617_CMLE0,
2428    };
2429    static const AArch64Insn cmp0_scalar_insn[16] = {
2430        [TCG_COND_EQ] = I3612_CMEQ0,
2431        [TCG_COND_GT] = I3612_CMGT0,
2432        [TCG_COND_GE] = I3612_CMGE0,
2433        [TCG_COND_LT] = I3612_CMLT0,
2434        [TCG_COND_LE] = I3612_CMLE0,
2435    };
2436
2437    TCGType type = vecl + TCG_TYPE_V64;
2438    unsigned is_q = vecl;
2439    bool is_scalar = !is_q && vece == MO_64;
2440    TCGArg a0, a1, a2, a3;
2441    int cmode, imm8;
2442
2443    a0 = args[0];
2444    a1 = args[1];
2445    a2 = args[2];
2446
2447    switch (opc) {
2448    case INDEX_op_ld_vec:
2449        tcg_out_ld(s, type, a0, a1, a2);
2450        break;
2451    case INDEX_op_st_vec:
2452        tcg_out_st(s, type, a0, a1, a2);
2453        break;
2454    case INDEX_op_dupm_vec:
2455        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2456        break;
2457    case INDEX_op_add_vec:
2458        if (is_scalar) {
2459            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2460        } else {
2461            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2462        }
2463        break;
2464    case INDEX_op_sub_vec:
2465        if (is_scalar) {
2466            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2467        } else {
2468            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2469        }
2470        break;
2471    case INDEX_op_mul_vec:
2472        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2473        break;
2474    case INDEX_op_neg_vec:
2475        if (is_scalar) {
2476            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2477        } else {
2478            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2479        }
2480        break;
2481    case INDEX_op_abs_vec:
2482        if (is_scalar) {
2483            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2484        } else {
2485            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2486        }
2487        break;
2488    case INDEX_op_and_vec:
2489        if (const_args[2]) {
2490            is_shimm1632(~a2, &cmode, &imm8);
2491            if (a0 == a1) {
2492                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2493                return;
2494            }
2495            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2496            a2 = a0;
2497        }
2498        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2499        break;
2500    case INDEX_op_or_vec:
2501        if (const_args[2]) {
2502            is_shimm1632(a2, &cmode, &imm8);
2503            if (a0 == a1) {
2504                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2505                return;
2506            }
2507            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2508            a2 = a0;
2509        }
2510        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2511        break;
2512    case INDEX_op_andc_vec:
2513        if (const_args[2]) {
2514            is_shimm1632(a2, &cmode, &imm8);
2515            if (a0 == a1) {
2516                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2517                return;
2518            }
2519            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2520            a2 = a0;
2521        }
2522        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2523        break;
2524    case INDEX_op_orc_vec:
2525        if (const_args[2]) {
2526            is_shimm1632(~a2, &cmode, &imm8);
2527            if (a0 == a1) {
2528                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2529                return;
2530            }
2531            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2532            a2 = a0;
2533        }
2534        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2535        break;
2536    case INDEX_op_xor_vec:
2537        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2538        break;
2539    case INDEX_op_ssadd_vec:
2540        if (is_scalar) {
2541            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2542        } else {
2543            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2544        }
2545        break;
2546    case INDEX_op_sssub_vec:
2547        if (is_scalar) {
2548            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2549        } else {
2550            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2551        }
2552        break;
2553    case INDEX_op_usadd_vec:
2554        if (is_scalar) {
2555            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2556        } else {
2557            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2558        }
2559        break;
2560    case INDEX_op_ussub_vec:
2561        if (is_scalar) {
2562            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2563        } else {
2564            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2565        }
2566        break;
2567    case INDEX_op_smax_vec:
2568        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2569        break;
2570    case INDEX_op_smin_vec:
2571        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2572        break;
2573    case INDEX_op_umax_vec:
2574        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2575        break;
2576    case INDEX_op_umin_vec:
2577        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2578        break;
2579    case INDEX_op_not_vec:
2580        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2581        break;
2582    case INDEX_op_shli_vec:
2583        if (is_scalar) {
2584            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2585        } else {
2586            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2587        }
2588        break;
2589    case INDEX_op_shri_vec:
2590        if (is_scalar) {
2591            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2592        } else {
2593            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2594        }
2595        break;
2596    case INDEX_op_sari_vec:
2597        if (is_scalar) {
2598            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2599        } else {
2600            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2601        }
2602        break;
2603    case INDEX_op_aa64_sli_vec:
2604        if (is_scalar) {
2605            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2606        } else {
2607            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2608        }
2609        break;
2610    case INDEX_op_shlv_vec:
2611        if (is_scalar) {
2612            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2613        } else {
2614            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2615        }
2616        break;
2617    case INDEX_op_aa64_sshl_vec:
2618        if (is_scalar) {
2619            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2620        } else {
2621            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2622        }
2623        break;
2624    case INDEX_op_cmp_vec:
2625        {
2626            TCGCond cond = args[3];
2627            AArch64Insn insn;
2628
2629            if (cond == TCG_COND_NE) {
2630                if (const_args[2]) {
2631                    if (is_scalar) {
2632                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2633                    } else {
2634                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2635                    }
2636                } else {
2637                    if (is_scalar) {
2638                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2639                    } else {
2640                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2641                    }
2642                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2643                }
2644            } else {
2645                if (const_args[2]) {
2646                    if (is_scalar) {
2647                        insn = cmp0_scalar_insn[cond];
2648                        if (insn) {
2649                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2650                            break;
2651                        }
2652                    } else {
2653                        insn = cmp0_vec_insn[cond];
2654                        if (insn) {
2655                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2656                            break;
2657                        }
2658                    }
2659                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
2660                    a2 = TCG_VEC_TMP0;
2661                }
2662                if (is_scalar) {
2663                    insn = cmp_scalar_insn[cond];
2664                    if (insn == 0) {
2665                        TCGArg t;
2666                        t = a1, a1 = a2, a2 = t;
2667                        cond = tcg_swap_cond(cond);
2668                        insn = cmp_scalar_insn[cond];
2669                        tcg_debug_assert(insn != 0);
2670                    }
2671                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2672                } else {
2673                    insn = cmp_vec_insn[cond];
2674                    if (insn == 0) {
2675                        TCGArg t;
2676                        t = a1, a1 = a2, a2 = t;
2677                        cond = tcg_swap_cond(cond);
2678                        insn = cmp_vec_insn[cond];
2679                        tcg_debug_assert(insn != 0);
2680                    }
2681                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2682                }
2683            }
2684        }
2685        break;
2686
2687    case INDEX_op_bitsel_vec:
2688        a3 = args[3];
2689        if (a0 == a3) {
2690            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2691        } else if (a0 == a2) {
2692            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2693        } else {
2694            if (a0 != a1) {
2695                tcg_out_mov(s, type, a0, a1);
2696            }
2697            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2698        }
2699        break;
2700
2701    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2702    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2703    default:
2704        g_assert_not_reached();
2705    }
2706}
2707
2708int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2709{
2710    switch (opc) {
2711    case INDEX_op_add_vec:
2712    case INDEX_op_sub_vec:
2713    case INDEX_op_and_vec:
2714    case INDEX_op_or_vec:
2715    case INDEX_op_xor_vec:
2716    case INDEX_op_andc_vec:
2717    case INDEX_op_orc_vec:
2718    case INDEX_op_neg_vec:
2719    case INDEX_op_abs_vec:
2720    case INDEX_op_not_vec:
2721    case INDEX_op_cmp_vec:
2722    case INDEX_op_shli_vec:
2723    case INDEX_op_shri_vec:
2724    case INDEX_op_sari_vec:
2725    case INDEX_op_ssadd_vec:
2726    case INDEX_op_sssub_vec:
2727    case INDEX_op_usadd_vec:
2728    case INDEX_op_ussub_vec:
2729    case INDEX_op_shlv_vec:
2730    case INDEX_op_bitsel_vec:
2731        return 1;
2732    case INDEX_op_rotli_vec:
2733    case INDEX_op_shrv_vec:
2734    case INDEX_op_sarv_vec:
2735    case INDEX_op_rotlv_vec:
2736    case INDEX_op_rotrv_vec:
2737        return -1;
2738    case INDEX_op_mul_vec:
2739    case INDEX_op_smax_vec:
2740    case INDEX_op_smin_vec:
2741    case INDEX_op_umax_vec:
2742    case INDEX_op_umin_vec:
2743        return vece < MO_64;
2744
2745    default:
2746        return 0;
2747    }
2748}
2749
2750void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2751                       TCGArg a0, ...)
2752{
2753    va_list va;
2754    TCGv_vec v0, v1, v2, t1, t2, c1;
2755    TCGArg a2;
2756
2757    va_start(va, a0);
2758    v0 = temp_tcgv_vec(arg_temp(a0));
2759    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2760    a2 = va_arg(va, TCGArg);
2761    va_end(va);
2762
2763    switch (opc) {
2764    case INDEX_op_rotli_vec:
2765        t1 = tcg_temp_new_vec(type);
2766        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2767        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2768                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2769        tcg_temp_free_vec(t1);
2770        break;
2771
2772    case INDEX_op_shrv_vec:
2773    case INDEX_op_sarv_vec:
2774        /* Right shifts are negative left shifts for AArch64.  */
2775        v2 = temp_tcgv_vec(arg_temp(a2));
2776        t1 = tcg_temp_new_vec(type);
2777        tcg_gen_neg_vec(vece, t1, v2);
2778        opc = (opc == INDEX_op_shrv_vec
2779               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2780        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2781                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2782        tcg_temp_free_vec(t1);
2783        break;
2784
2785    case INDEX_op_rotlv_vec:
2786        v2 = temp_tcgv_vec(arg_temp(a2));
2787        t1 = tcg_temp_new_vec(type);
2788        c1 = tcg_constant_vec(type, vece, 8 << vece);
2789        tcg_gen_sub_vec(vece, t1, v2, c1);
2790        /* Right shifts are negative left shifts for AArch64.  */
2791        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2792                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2793        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2794                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2795        tcg_gen_or_vec(vece, v0, v0, t1);
2796        tcg_temp_free_vec(t1);
2797        break;
2798
2799    case INDEX_op_rotrv_vec:
2800        v2 = temp_tcgv_vec(arg_temp(a2));
2801        t1 = tcg_temp_new_vec(type);
2802        t2 = tcg_temp_new_vec(type);
2803        c1 = tcg_constant_vec(type, vece, 8 << vece);
2804        tcg_gen_neg_vec(vece, t1, v2);
2805        tcg_gen_sub_vec(vece, t2, c1, v2);
2806        /* Right shifts are negative left shifts for AArch64.  */
2807        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2808                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2809        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2810                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2811        tcg_gen_or_vec(vece, v0, t1, t2);
2812        tcg_temp_free_vec(t1);
2813        tcg_temp_free_vec(t2);
2814        break;
2815
2816    default:
2817        g_assert_not_reached();
2818    }
2819}
2820
2821static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2822{
2823    switch (op) {
2824    case INDEX_op_goto_ptr:
2825        return C_O0_I1(r);
2826
2827    case INDEX_op_ld8u_i32:
2828    case INDEX_op_ld8s_i32:
2829    case INDEX_op_ld16u_i32:
2830    case INDEX_op_ld16s_i32:
2831    case INDEX_op_ld_i32:
2832    case INDEX_op_ld8u_i64:
2833    case INDEX_op_ld8s_i64:
2834    case INDEX_op_ld16u_i64:
2835    case INDEX_op_ld16s_i64:
2836    case INDEX_op_ld32u_i64:
2837    case INDEX_op_ld32s_i64:
2838    case INDEX_op_ld_i64:
2839    case INDEX_op_neg_i32:
2840    case INDEX_op_neg_i64:
2841    case INDEX_op_not_i32:
2842    case INDEX_op_not_i64:
2843    case INDEX_op_bswap16_i32:
2844    case INDEX_op_bswap32_i32:
2845    case INDEX_op_bswap16_i64:
2846    case INDEX_op_bswap32_i64:
2847    case INDEX_op_bswap64_i64:
2848    case INDEX_op_ext8s_i32:
2849    case INDEX_op_ext16s_i32:
2850    case INDEX_op_ext8u_i32:
2851    case INDEX_op_ext16u_i32:
2852    case INDEX_op_ext8s_i64:
2853    case INDEX_op_ext16s_i64:
2854    case INDEX_op_ext32s_i64:
2855    case INDEX_op_ext8u_i64:
2856    case INDEX_op_ext16u_i64:
2857    case INDEX_op_ext32u_i64:
2858    case INDEX_op_ext_i32_i64:
2859    case INDEX_op_extu_i32_i64:
2860    case INDEX_op_extract_i32:
2861    case INDEX_op_extract_i64:
2862    case INDEX_op_sextract_i32:
2863    case INDEX_op_sextract_i64:
2864        return C_O1_I1(r, r);
2865
2866    case INDEX_op_st8_i32:
2867    case INDEX_op_st16_i32:
2868    case INDEX_op_st_i32:
2869    case INDEX_op_st8_i64:
2870    case INDEX_op_st16_i64:
2871    case INDEX_op_st32_i64:
2872    case INDEX_op_st_i64:
2873        return C_O0_I2(rZ, r);
2874
2875    case INDEX_op_add_i32:
2876    case INDEX_op_add_i64:
2877    case INDEX_op_sub_i32:
2878    case INDEX_op_sub_i64:
2879    case INDEX_op_setcond_i32:
2880    case INDEX_op_setcond_i64:
2881    case INDEX_op_negsetcond_i32:
2882    case INDEX_op_negsetcond_i64:
2883        return C_O1_I2(r, r, rA);
2884
2885    case INDEX_op_mul_i32:
2886    case INDEX_op_mul_i64:
2887    case INDEX_op_div_i32:
2888    case INDEX_op_div_i64:
2889    case INDEX_op_divu_i32:
2890    case INDEX_op_divu_i64:
2891    case INDEX_op_rem_i32:
2892    case INDEX_op_rem_i64:
2893    case INDEX_op_remu_i32:
2894    case INDEX_op_remu_i64:
2895    case INDEX_op_muluh_i64:
2896    case INDEX_op_mulsh_i64:
2897        return C_O1_I2(r, r, r);
2898
2899    case INDEX_op_and_i32:
2900    case INDEX_op_and_i64:
2901    case INDEX_op_or_i32:
2902    case INDEX_op_or_i64:
2903    case INDEX_op_xor_i32:
2904    case INDEX_op_xor_i64:
2905    case INDEX_op_andc_i32:
2906    case INDEX_op_andc_i64:
2907    case INDEX_op_orc_i32:
2908    case INDEX_op_orc_i64:
2909    case INDEX_op_eqv_i32:
2910    case INDEX_op_eqv_i64:
2911        return C_O1_I2(r, r, rL);
2912
2913    case INDEX_op_shl_i32:
2914    case INDEX_op_shr_i32:
2915    case INDEX_op_sar_i32:
2916    case INDEX_op_rotl_i32:
2917    case INDEX_op_rotr_i32:
2918    case INDEX_op_shl_i64:
2919    case INDEX_op_shr_i64:
2920    case INDEX_op_sar_i64:
2921    case INDEX_op_rotl_i64:
2922    case INDEX_op_rotr_i64:
2923        return C_O1_I2(r, r, ri);
2924
2925    case INDEX_op_clz_i32:
2926    case INDEX_op_ctz_i32:
2927    case INDEX_op_clz_i64:
2928    case INDEX_op_ctz_i64:
2929        return C_O1_I2(r, r, rAL);
2930
2931    case INDEX_op_brcond_i32:
2932    case INDEX_op_brcond_i64:
2933        return C_O0_I2(r, rA);
2934
2935    case INDEX_op_movcond_i32:
2936    case INDEX_op_movcond_i64:
2937        return C_O1_I4(r, r, rA, rZ, rZ);
2938
2939    case INDEX_op_qemu_ld_a32_i32:
2940    case INDEX_op_qemu_ld_a64_i32:
2941    case INDEX_op_qemu_ld_a32_i64:
2942    case INDEX_op_qemu_ld_a64_i64:
2943        return C_O1_I1(r, r);
2944    case INDEX_op_qemu_ld_a32_i128:
2945    case INDEX_op_qemu_ld_a64_i128:
2946        return C_O2_I1(r, r, r);
2947    case INDEX_op_qemu_st_a32_i32:
2948    case INDEX_op_qemu_st_a64_i32:
2949    case INDEX_op_qemu_st_a32_i64:
2950    case INDEX_op_qemu_st_a64_i64:
2951        return C_O0_I2(rZ, r);
2952    case INDEX_op_qemu_st_a32_i128:
2953    case INDEX_op_qemu_st_a64_i128:
2954        return C_O0_I3(rZ, rZ, r);
2955
2956    case INDEX_op_deposit_i32:
2957    case INDEX_op_deposit_i64:
2958        return C_O1_I2(r, 0, rZ);
2959
2960    case INDEX_op_extract2_i32:
2961    case INDEX_op_extract2_i64:
2962        return C_O1_I2(r, rZ, rZ);
2963
2964    case INDEX_op_add2_i32:
2965    case INDEX_op_add2_i64:
2966    case INDEX_op_sub2_i32:
2967    case INDEX_op_sub2_i64:
2968        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2969
2970    case INDEX_op_add_vec:
2971    case INDEX_op_sub_vec:
2972    case INDEX_op_mul_vec:
2973    case INDEX_op_xor_vec:
2974    case INDEX_op_ssadd_vec:
2975    case INDEX_op_sssub_vec:
2976    case INDEX_op_usadd_vec:
2977    case INDEX_op_ussub_vec:
2978    case INDEX_op_smax_vec:
2979    case INDEX_op_smin_vec:
2980    case INDEX_op_umax_vec:
2981    case INDEX_op_umin_vec:
2982    case INDEX_op_shlv_vec:
2983    case INDEX_op_shrv_vec:
2984    case INDEX_op_sarv_vec:
2985    case INDEX_op_aa64_sshl_vec:
2986        return C_O1_I2(w, w, w);
2987    case INDEX_op_not_vec:
2988    case INDEX_op_neg_vec:
2989    case INDEX_op_abs_vec:
2990    case INDEX_op_shli_vec:
2991    case INDEX_op_shri_vec:
2992    case INDEX_op_sari_vec:
2993        return C_O1_I1(w, w);
2994    case INDEX_op_ld_vec:
2995    case INDEX_op_dupm_vec:
2996        return C_O1_I1(w, r);
2997    case INDEX_op_st_vec:
2998        return C_O0_I2(w, r);
2999    case INDEX_op_dup_vec:
3000        return C_O1_I1(w, wr);
3001    case INDEX_op_or_vec:
3002    case INDEX_op_andc_vec:
3003        return C_O1_I2(w, w, wO);
3004    case INDEX_op_and_vec:
3005    case INDEX_op_orc_vec:
3006        return C_O1_I2(w, w, wN);
3007    case INDEX_op_cmp_vec:
3008        return C_O1_I2(w, w, wZ);
3009    case INDEX_op_bitsel_vec:
3010        return C_O1_I3(w, w, w, w);
3011    case INDEX_op_aa64_sli_vec:
3012        return C_O1_I2(w, 0, w);
3013
3014    default:
3015        g_assert_not_reached();
3016    }
3017}
3018
3019static void tcg_target_init(TCGContext *s)
3020{
3021    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
3022    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
3023    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3024    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3025
3026    tcg_target_call_clobber_regs = -1ull;
3027    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
3028    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
3029    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
3030    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
3031    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
3032    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
3033    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
3034    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
3035    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
3036    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
3037    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
3038    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3039    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3040    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3041    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3042    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3043    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3044    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3045    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3046
3047    s->reserved_regs = 0;
3048    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
3049    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
3050    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
3051    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
3052    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
3053    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
3054    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
3055}
3056
3057/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
3058#define PUSH_SIZE  ((30 - 19 + 1) * 8)
3059
3060#define FRAME_SIZE \
3061    ((PUSH_SIZE \
3062      + TCG_STATIC_CALL_ARGS_SIZE \
3063      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3064      + TCG_TARGET_STACK_ALIGN - 1) \
3065     & ~(TCG_TARGET_STACK_ALIGN - 1))
3066
3067/* We're expecting a 2 byte uleb128 encoded value.  */
3068QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3069
3070/* We're expecting to use a single ADDI insn.  */
3071QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
3072
3073static void tcg_target_qemu_prologue(TCGContext *s)
3074{
3075    TCGReg r;
3076
3077    /* Push (FP, LR) and allocate space for all saved registers.  */
3078    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
3079                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
3080
3081    /* Set up frame pointer for canonical unwinding.  */
3082    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
3083
3084    /* Store callee-preserved regs x19..x28.  */
3085    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3086        int ofs = (r - TCG_REG_X19 + 2) * 8;
3087        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3088    }
3089
3090    /* Make stack space for TCG locals.  */
3091    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3092                 FRAME_SIZE - PUSH_SIZE);
3093
3094    /* Inform TCG about how to find TCG locals with register, offset, size.  */
3095    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
3096                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3097
3098#if !defined(CONFIG_SOFTMMU)
3099    /*
3100     * Note that XZR cannot be encoded in the address base register slot,
3101     * as that actually encodes SP.  Depending on the guest, we may need
3102     * to zero-extend the guest address via the address index register slot,
3103     * therefore we need to load even a zero guest base into a register.
3104     */
3105    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3106    tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3107#endif
3108
3109    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3110    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3111
3112    /*
3113     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3114     * and fall through to the rest of the epilogue.
3115     */
3116    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3117    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3118
3119    /* TB epilogue */
3120    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3121
3122    /* Remove TCG locals stack space.  */
3123    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3124                 FRAME_SIZE - PUSH_SIZE);
3125
3126    /* Restore registers x19..x28.  */
3127    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3128        int ofs = (r - TCG_REG_X19 + 2) * 8;
3129        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3130    }
3131
3132    /* Pop (FP, LR), restore SP to previous frame.  */
3133    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3134                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3135    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3136}
3137
3138static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3139{
3140    int i;
3141    for (i = 0; i < count; ++i) {
3142        p[i] = NOP;
3143    }
3144}
3145
3146typedef struct {
3147    DebugFrameHeader h;
3148    uint8_t fde_def_cfa[4];
3149    uint8_t fde_reg_ofs[24];
3150} DebugFrame;
3151
3152#define ELF_HOST_MACHINE EM_AARCH64
3153
3154static const DebugFrame debug_frame = {
3155    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3156    .h.cie.id = -1,
3157    .h.cie.version = 1,
3158    .h.cie.code_align = 1,
3159    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3160    .h.cie.return_column = TCG_REG_LR,
3161
3162    /* Total FDE size does not include the "len" member.  */
3163    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3164
3165    .fde_def_cfa = {
3166        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3167        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3168        (FRAME_SIZE >> 7)
3169    },
3170    .fde_reg_ofs = {
3171        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3172        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3173        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3174        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3175        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3176        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3177        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3178        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3179        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3180        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3181        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3182        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3183    }
3184};
3185
3186void tcg_register_jit(const void *buf, size_t buf_size)
3187{
3188    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3189}
3190