xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision 4d137ff819bae33d045f13bb9186e3a2c71cb7e4)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "qemu/bitops.h"
14
15/* Used for function call generation. */
16#define TCG_REG_CALL_STACK              TCG_REG_SP
17#define TCG_TARGET_STACK_ALIGN          16
18#define TCG_TARGET_CALL_STACK_OFFSET    0
19#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
20#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
21#ifdef CONFIG_DARWIN
22# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_NORMAL
23#else
24# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_EVEN
25#endif
26#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
27
28/* We're going to re-use TCGType in setting of the SF bit, which controls
29   the size of the operation performed.  If we know the values match, it
30   makes things much cleaner.  */
31QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
32
33#ifdef CONFIG_DEBUG_TCG
34static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
35    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
36    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
37    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
38    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
39
40    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
41    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
42    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
43    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
44};
45#endif /* CONFIG_DEBUG_TCG */
46
47static const int tcg_target_reg_alloc_order[] = {
48    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
49    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
50    TCG_REG_X28, /* we will reserve this for guest_base if configured */
51
52    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
53    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
54
55    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
56    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
57
58    /* X16 reserved as temporary */
59    /* X17 reserved as temporary */
60    /* X18 reserved by system */
61    /* X19 reserved for AREG0 */
62    /* X29 reserved as fp */
63    /* X30 reserved as temporary */
64
65    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
66    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
67    /* V8 - V15 are call-saved, and skipped.  */
68    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
69    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
70    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
71    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
72};
73
74static const int tcg_target_call_iarg_regs[8] = {
75    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
76    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
77};
78
79static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
80{
81    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
82    tcg_debug_assert(slot >= 0 && slot <= 1);
83    return TCG_REG_X0 + slot;
84}
85
86#define TCG_REG_TMP0 TCG_REG_X16
87#define TCG_REG_TMP1 TCG_REG_X17
88#define TCG_REG_TMP2 TCG_REG_X30
89#define TCG_VEC_TMP0 TCG_REG_V31
90
91#define TCG_REG_GUEST_BASE TCG_REG_X28
92
93static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
94{
95    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
96    ptrdiff_t offset = target - src_rx;
97
98    if (offset == sextract64(offset, 0, 26)) {
99        /* read instruction, mask away previous PC_REL26 parameter contents,
100           set the proper offset, then write back the instruction. */
101        *src_rw = deposit32(*src_rw, 0, 26, offset);
102        return true;
103    }
104    return false;
105}
106
107static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
108{
109    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
110    ptrdiff_t offset = target - src_rx;
111
112    if (offset == sextract64(offset, 0, 19)) {
113        *src_rw = deposit32(*src_rw, 5, 19, offset);
114        return true;
115    }
116    return false;
117}
118
119static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
120{
121    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
122    ptrdiff_t offset = target - src_rx;
123
124    if (offset == sextract64(offset, 0, 14)) {
125        *src_rw = deposit32(*src_rw, 5, 14, offset);
126        return true;
127    }
128    return false;
129}
130
131static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
132                        intptr_t value, intptr_t addend)
133{
134    tcg_debug_assert(addend == 0);
135    switch (type) {
136    case R_AARCH64_JUMP26:
137    case R_AARCH64_CALL26:
138        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
139    case R_AARCH64_CONDBR19:
140        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
141    case R_AARCH64_TSTBR14:
142        return reloc_pc14(code_ptr, (const tcg_insn_unit *)value);
143    default:
144        g_assert_not_reached();
145    }
146}
147
148#define TCG_CT_CONST_AIMM 0x100
149#define TCG_CT_CONST_LIMM 0x200
150#define TCG_CT_CONST_ZERO 0x400
151#define TCG_CT_CONST_MONE 0x800
152#define TCG_CT_CONST_ORRI 0x1000
153#define TCG_CT_CONST_ANDI 0x2000
154#define TCG_CT_CONST_CMP  0x4000
155
156#define ALL_GENERAL_REGS  0xffffffffu
157#define ALL_VECTOR_REGS   0xffffffff00000000ull
158
159/* Match a constant valid for addition (12-bit, optionally shifted).  */
160static inline bool is_aimm(uint64_t val)
161{
162    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
163}
164
165/* Match a constant valid for logical operations.  */
166static inline bool is_limm(uint64_t val)
167{
168    /* Taking a simplified view of the logical immediates for now, ignoring
169       the replication that can happen across the field.  Match bit patterns
170       of the forms
171           0....01....1
172           0..01..10..0
173       and their inverses.  */
174
175    /* Make things easier below, by testing the form with msb clear. */
176    if ((int64_t)val < 0) {
177        val = ~val;
178    }
179    if (val == 0) {
180        return false;
181    }
182    val += val & -val;
183    return (val & (val - 1)) == 0;
184}
185
186/* Return true if v16 is a valid 16-bit shifted immediate.  */
187static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
188{
189    if (v16 == (v16 & 0xff)) {
190        *cmode = 0x8;
191        *imm8 = v16 & 0xff;
192        return true;
193    } else if (v16 == (v16 & 0xff00)) {
194        *cmode = 0xa;
195        *imm8 = v16 >> 8;
196        return true;
197    }
198    return false;
199}
200
201/* Return true if v32 is a valid 32-bit shifted immediate.  */
202static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
203{
204    if (v32 == (v32 & 0xff)) {
205        *cmode = 0x0;
206        *imm8 = v32 & 0xff;
207        return true;
208    } else if (v32 == (v32 & 0xff00)) {
209        *cmode = 0x2;
210        *imm8 = (v32 >> 8) & 0xff;
211        return true;
212    } else if (v32 == (v32 & 0xff0000)) {
213        *cmode = 0x4;
214        *imm8 = (v32 >> 16) & 0xff;
215        return true;
216    } else if (v32 == (v32 & 0xff000000)) {
217        *cmode = 0x6;
218        *imm8 = v32 >> 24;
219        return true;
220    }
221    return false;
222}
223
224/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
225static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
226{
227    if ((v32 & 0xffff00ff) == 0xff) {
228        *cmode = 0xc;
229        *imm8 = (v32 >> 8) & 0xff;
230        return true;
231    } else if ((v32 & 0xff00ffff) == 0xffff) {
232        *cmode = 0xd;
233        *imm8 = (v32 >> 16) & 0xff;
234        return true;
235    }
236    return false;
237}
238
239/* Return true if v32 is a valid float32 immediate.  */
240static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
241{
242    if (extract32(v32, 0, 19) == 0
243        && (extract32(v32, 25, 6) == 0x20
244            || extract32(v32, 25, 6) == 0x1f)) {
245        *cmode = 0xf;
246        *imm8 = (extract32(v32, 31, 1) << 7)
247              | (extract32(v32, 25, 1) << 6)
248              | extract32(v32, 19, 6);
249        return true;
250    }
251    return false;
252}
253
254/* Return true if v64 is a valid float64 immediate.  */
255static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
256{
257    if (extract64(v64, 0, 48) == 0
258        && (extract64(v64, 54, 9) == 0x100
259            || extract64(v64, 54, 9) == 0x0ff)) {
260        *cmode = 0xf;
261        *imm8 = (extract64(v64, 63, 1) << 7)
262              | (extract64(v64, 54, 1) << 6)
263              | extract64(v64, 48, 6);
264        return true;
265    }
266    return false;
267}
268
269/*
270 * Return non-zero if v32 can be formed by MOVI+ORR.
271 * Place the parameters for MOVI in (cmode, imm8).
272 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
273 */
274static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
275{
276    int i;
277
278    for (i = 6; i > 0; i -= 2) {
279        /* Mask out one byte we can add with ORR.  */
280        uint32_t tmp = v32 & ~(0xffu << (i * 4));
281        if (is_shimm32(tmp, cmode, imm8) ||
282            is_soimm32(tmp, cmode, imm8)) {
283            break;
284        }
285    }
286    return i;
287}
288
289/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
290static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
291{
292    if (v32 == deposit32(v32, 16, 16, v32)) {
293        return is_shimm16(v32, cmode, imm8);
294    } else {
295        return is_shimm32(v32, cmode, imm8);
296    }
297}
298
299static bool tcg_target_const_match(int64_t val, int ct,
300                                   TCGType type, TCGCond cond, int vece)
301{
302    if (ct & TCG_CT_CONST) {
303        return 1;
304    }
305    if (type == TCG_TYPE_I32) {
306        val = (int32_t)val;
307    }
308
309    if (ct & TCG_CT_CONST_CMP) {
310        if (is_tst_cond(cond)) {
311            ct |= TCG_CT_CONST_LIMM;
312        } else {
313            ct |= TCG_CT_CONST_AIMM;
314        }
315    }
316
317    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
318        return 1;
319    }
320    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
321        return 1;
322    }
323    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
324        return 1;
325    }
326    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
327        return 1;
328    }
329
330    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
331    case 0:
332        break;
333    case TCG_CT_CONST_ANDI:
334        val = ~val;
335        /* fallthru */
336    case TCG_CT_CONST_ORRI:
337        if (val == deposit64(val, 32, 32, val)) {
338            int cmode, imm8;
339            return is_shimm1632(val, &cmode, &imm8);
340        }
341        break;
342    default:
343        /* Both bits should not be set for the same insn.  */
344        g_assert_not_reached();
345    }
346
347    return 0;
348}
349
350enum aarch64_cond_code {
351    COND_EQ = 0x0,
352    COND_NE = 0x1,
353    COND_CS = 0x2,     /* Unsigned greater or equal */
354    COND_HS = COND_CS, /* ALIAS greater or equal */
355    COND_CC = 0x3,     /* Unsigned less than */
356    COND_LO = COND_CC, /* ALIAS Lower */
357    COND_MI = 0x4,     /* Negative */
358    COND_PL = 0x5,     /* Zero or greater */
359    COND_VS = 0x6,     /* Overflow */
360    COND_VC = 0x7,     /* No overflow */
361    COND_HI = 0x8,     /* Unsigned greater than */
362    COND_LS = 0x9,     /* Unsigned less or equal */
363    COND_GE = 0xa,
364    COND_LT = 0xb,
365    COND_GT = 0xc,
366    COND_LE = 0xd,
367    COND_AL = 0xe,
368    COND_NV = 0xf, /* behaves like COND_AL here */
369};
370
371static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
372    [TCG_COND_EQ] = COND_EQ,
373    [TCG_COND_NE] = COND_NE,
374    [TCG_COND_LT] = COND_LT,
375    [TCG_COND_GE] = COND_GE,
376    [TCG_COND_LE] = COND_LE,
377    [TCG_COND_GT] = COND_GT,
378    /* unsigned */
379    [TCG_COND_LTU] = COND_LO,
380    [TCG_COND_GTU] = COND_HI,
381    [TCG_COND_GEU] = COND_HS,
382    [TCG_COND_LEU] = COND_LS,
383    /* bit test */
384    [TCG_COND_TSTEQ] = COND_EQ,
385    [TCG_COND_TSTNE] = COND_NE,
386};
387
388typedef enum {
389    LDST_ST = 0,    /* store */
390    LDST_LD = 1,    /* load */
391    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
392    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
393} AArch64LdstType;
394
395/* We encode the format of the insn into the beginning of the name, so that
396   we can have the preprocessor help "typecheck" the insn vs the output
397   function.  Arm didn't provide us with nice names for the formats, so we
398   use the section number of the architecture reference manual in which the
399   instruction group is described.  */
400typedef enum {
401    /* Compare and branch (immediate).  */
402    I3201_CBZ       = 0x34000000,
403    I3201_CBNZ      = 0x35000000,
404
405    /* Conditional branch (immediate).  */
406    I3202_B_C       = 0x54000000,
407
408    /* Test and branch (immediate).  */
409    I3205_TBZ       = 0x36000000,
410    I3205_TBNZ      = 0x37000000,
411
412    /* Unconditional branch (immediate).  */
413    I3206_B         = 0x14000000,
414    I3206_BL        = 0x94000000,
415
416    /* Unconditional branch (register).  */
417    I3207_BR        = 0xd61f0000,
418    I3207_BLR       = 0xd63f0000,
419    I3207_RET       = 0xd65f0000,
420
421    /* AdvSIMD load/store single structure.  */
422    I3303_LD1R      = 0x0d40c000,
423
424    /* Load literal for loading the address at pc-relative offset */
425    I3305_LDR       = 0x58000000,
426    I3305_LDR_v64   = 0x5c000000,
427    I3305_LDR_v128  = 0x9c000000,
428
429    /* Load/store exclusive. */
430    I3306_LDXP      = 0xc8600000,
431    I3306_STXP      = 0xc8200000,
432
433    /* Load/store register.  Described here as 3.3.12, but the helper
434       that emits them can transform to 3.3.10 or 3.3.13.  */
435    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
436    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
437    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
438    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
439
440    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
441    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
442    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
443    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
444
445    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
446    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
447
448    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
449    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
450    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
451
452    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
453    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
454
455    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
456    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
457
458    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
459    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
460
461    I3312_TO_I3310  = 0x00200800,
462    I3312_TO_I3313  = 0x01000000,
463
464    /* Load/store register pair instructions.  */
465    I3314_LDP       = 0x28400000,
466    I3314_STP       = 0x28000000,
467
468    /* Add/subtract immediate instructions.  */
469    I3401_ADDI      = 0x11000000,
470    I3401_ADDSI     = 0x31000000,
471    I3401_SUBI      = 0x51000000,
472    I3401_SUBSI     = 0x71000000,
473
474    /* Bitfield instructions.  */
475    I3402_BFM       = 0x33000000,
476    I3402_SBFM      = 0x13000000,
477    I3402_UBFM      = 0x53000000,
478
479    /* Extract instruction.  */
480    I3403_EXTR      = 0x13800000,
481
482    /* Logical immediate instructions.  */
483    I3404_ANDI      = 0x12000000,
484    I3404_ORRI      = 0x32000000,
485    I3404_EORI      = 0x52000000,
486    I3404_ANDSI     = 0x72000000,
487
488    /* Move wide immediate instructions.  */
489    I3405_MOVN      = 0x12800000,
490    I3405_MOVZ      = 0x52800000,
491    I3405_MOVK      = 0x72800000,
492
493    /* PC relative addressing instructions.  */
494    I3406_ADR       = 0x10000000,
495    I3406_ADRP      = 0x90000000,
496
497    /* Add/subtract extended register instructions. */
498    I3501_ADD       = 0x0b200000,
499
500    /* Add/subtract shifted register instructions (without a shift).  */
501    I3502_ADD       = 0x0b000000,
502    I3502_ADDS      = 0x2b000000,
503    I3502_SUB       = 0x4b000000,
504    I3502_SUBS      = 0x6b000000,
505
506    /* Add/subtract shifted register instructions (with a shift).  */
507    I3502S_ADD_LSL  = I3502_ADD,
508
509    /* Add/subtract with carry instructions.  */
510    I3503_ADC       = 0x1a000000,
511    I3503_SBC       = 0x5a000000,
512
513    /* Conditional select instructions.  */
514    I3506_CSEL      = 0x1a800000,
515    I3506_CSINC     = 0x1a800400,
516    I3506_CSINV     = 0x5a800000,
517    I3506_CSNEG     = 0x5a800400,
518
519    /* Data-processing (1 source) instructions.  */
520    I3507_CLZ       = 0x5ac01000,
521    I3507_RBIT      = 0x5ac00000,
522    I3507_REV       = 0x5ac00000, /* + size << 10 */
523
524    /* Data-processing (2 source) instructions.  */
525    I3508_LSLV      = 0x1ac02000,
526    I3508_LSRV      = 0x1ac02400,
527    I3508_ASRV      = 0x1ac02800,
528    I3508_RORV      = 0x1ac02c00,
529    I3508_SMULH     = 0x9b407c00,
530    I3508_UMULH     = 0x9bc07c00,
531    I3508_UDIV      = 0x1ac00800,
532    I3508_SDIV      = 0x1ac00c00,
533
534    /* Data-processing (3 source) instructions.  */
535    I3509_MADD      = 0x1b000000,
536    I3509_MSUB      = 0x1b008000,
537
538    /* Logical shifted register instructions (without a shift).  */
539    I3510_AND       = 0x0a000000,
540    I3510_BIC       = 0x0a200000,
541    I3510_ORR       = 0x2a000000,
542    I3510_ORN       = 0x2a200000,
543    I3510_EOR       = 0x4a000000,
544    I3510_EON       = 0x4a200000,
545    I3510_ANDS      = 0x6a000000,
546
547    /* Logical shifted register instructions (with a shift).  */
548    I3502S_AND_LSR  = I3510_AND | (1 << 22),
549
550    /* AdvSIMD copy */
551    I3605_DUP      = 0x0e000400,
552    I3605_INS      = 0x4e001c00,
553    I3605_UMOV     = 0x0e003c00,
554
555    /* AdvSIMD modified immediate */
556    I3606_MOVI      = 0x0f000400,
557    I3606_MVNI      = 0x2f000400,
558    I3606_BIC       = 0x2f001400,
559    I3606_ORR       = 0x0f001400,
560
561    /* AdvSIMD scalar shift by immediate */
562    I3609_SSHR      = 0x5f000400,
563    I3609_SSRA      = 0x5f001400,
564    I3609_SHL       = 0x5f005400,
565    I3609_USHR      = 0x7f000400,
566    I3609_USRA      = 0x7f001400,
567    I3609_SLI       = 0x7f005400,
568
569    /* AdvSIMD scalar three same */
570    I3611_SQADD     = 0x5e200c00,
571    I3611_SQSUB     = 0x5e202c00,
572    I3611_CMGT      = 0x5e203400,
573    I3611_CMGE      = 0x5e203c00,
574    I3611_SSHL      = 0x5e204400,
575    I3611_ADD       = 0x5e208400,
576    I3611_CMTST     = 0x5e208c00,
577    I3611_UQADD     = 0x7e200c00,
578    I3611_UQSUB     = 0x7e202c00,
579    I3611_CMHI      = 0x7e203400,
580    I3611_CMHS      = 0x7e203c00,
581    I3611_USHL      = 0x7e204400,
582    I3611_SUB       = 0x7e208400,
583    I3611_CMEQ      = 0x7e208c00,
584
585    /* AdvSIMD scalar two-reg misc */
586    I3612_CMGT0     = 0x5e208800,
587    I3612_CMEQ0     = 0x5e209800,
588    I3612_CMLT0     = 0x5e20a800,
589    I3612_ABS       = 0x5e20b800,
590    I3612_CMGE0     = 0x7e208800,
591    I3612_CMLE0     = 0x7e209800,
592    I3612_NEG       = 0x7e20b800,
593
594    /* AdvSIMD shift by immediate */
595    I3614_SSHR      = 0x0f000400,
596    I3614_SSRA      = 0x0f001400,
597    I3614_SHL       = 0x0f005400,
598    I3614_SLI       = 0x2f005400,
599    I3614_USHR      = 0x2f000400,
600    I3614_USRA      = 0x2f001400,
601
602    /* AdvSIMD three same.  */
603    I3616_ADD       = 0x0e208400,
604    I3616_AND       = 0x0e201c00,
605    I3616_BIC       = 0x0e601c00,
606    I3616_BIF       = 0x2ee01c00,
607    I3616_BIT       = 0x2ea01c00,
608    I3616_BSL       = 0x2e601c00,
609    I3616_EOR       = 0x2e201c00,
610    I3616_MUL       = 0x0e209c00,
611    I3616_ORR       = 0x0ea01c00,
612    I3616_ORN       = 0x0ee01c00,
613    I3616_SUB       = 0x2e208400,
614    I3616_CMGT      = 0x0e203400,
615    I3616_CMGE      = 0x0e203c00,
616    I3616_CMTST     = 0x0e208c00,
617    I3616_CMHI      = 0x2e203400,
618    I3616_CMHS      = 0x2e203c00,
619    I3616_CMEQ      = 0x2e208c00,
620    I3616_SMAX      = 0x0e206400,
621    I3616_SMIN      = 0x0e206c00,
622    I3616_SSHL      = 0x0e204400,
623    I3616_SQADD     = 0x0e200c00,
624    I3616_SQSUB     = 0x0e202c00,
625    I3616_UMAX      = 0x2e206400,
626    I3616_UMIN      = 0x2e206c00,
627    I3616_UQADD     = 0x2e200c00,
628    I3616_UQSUB     = 0x2e202c00,
629    I3616_USHL      = 0x2e204400,
630
631    /* AdvSIMD two-reg misc.  */
632    I3617_CMGT0     = 0x0e208800,
633    I3617_CMEQ0     = 0x0e209800,
634    I3617_CMLT0     = 0x0e20a800,
635    I3617_CMGE0     = 0x2e208800,
636    I3617_CMLE0     = 0x2e209800,
637    I3617_NOT       = 0x2e205800,
638    I3617_ABS       = 0x0e20b800,
639    I3617_NEG       = 0x2e20b800,
640
641    /* System instructions.  */
642    NOP             = 0xd503201f,
643    DMB_ISH         = 0xd50338bf,
644    DMB_LD          = 0x00000100,
645    DMB_ST          = 0x00000200,
646
647    BTI_C           = 0xd503245f,
648    BTI_J           = 0xd503249f,
649    BTI_JC          = 0xd50324df,
650} AArch64Insn;
651
652static inline uint32_t tcg_in32(TCGContext *s)
653{
654    uint32_t v = *(uint32_t *)s->code_ptr;
655    return v;
656}
657
658/* Emit an opcode with "type-checking" of the format.  */
659#define tcg_out_insn(S, FMT, OP, ...) \
660    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
661
662static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
663                              TCGReg rt, TCGReg rn, unsigned size)
664{
665    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
666}
667
668static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
669                              int imm19, TCGReg rt)
670{
671    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
672}
673
674static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
675                              TCGReg rt, TCGReg rt2, TCGReg rn)
676{
677    tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
678}
679
680static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
681                              TCGReg rt, int imm19)
682{
683    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
684}
685
686static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
687                              TCGCond c, int imm19)
688{
689    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
690}
691
692static void tcg_out_insn_3205(TCGContext *s, AArch64Insn insn,
693                              TCGReg rt, int imm6, int imm14)
694{
695    insn |= (imm6 & 0x20) << (31 - 5);
696    insn |= (imm6 & 0x1f) << 19;
697    tcg_out32(s, insn | (imm14 & 0x3fff) << 5 | rt);
698}
699
700static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
701{
702    tcg_out32(s, insn | (imm26 & 0x03ffffff));
703}
704
705static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
706{
707    tcg_out32(s, insn | rn << 5);
708}
709
710static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
711                              TCGReg r1, TCGReg r2, TCGReg rn,
712                              tcg_target_long ofs, bool pre, bool w)
713{
714    insn |= 1u << 31; /* ext */
715    insn |= pre << 24;
716    insn |= w << 23;
717
718    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
719    insn |= (ofs & (0x7f << 3)) << (15 - 3);
720
721    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
722}
723
724static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
725                              TCGReg rd, TCGReg rn, uint64_t aimm)
726{
727    if (aimm > 0xfff) {
728        tcg_debug_assert((aimm & 0xfff) == 0);
729        aimm >>= 12;
730        tcg_debug_assert(aimm <= 0xfff);
731        aimm |= 1 << 12;  /* apply LSL 12 */
732    }
733    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
734}
735
736/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
737   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
738   that feed the DecodeBitMasks pseudo function.  */
739static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
740                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
741{
742    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
743              | rn << 5 | rd);
744}
745
746#define tcg_out_insn_3404  tcg_out_insn_3402
747
748static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
749                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
750{
751    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
752              | rn << 5 | rd);
753}
754
755/* This function is used for the Move (wide immediate) instruction group.
756   Note that SHIFT is a full shift count, not the 2 bit HW field. */
757static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
758                              TCGReg rd, uint16_t half, unsigned shift)
759{
760    tcg_debug_assert((shift & ~0x30) == 0);
761    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
762}
763
764static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
765                              TCGReg rd, int64_t disp)
766{
767    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
768}
769
770static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
771                                     TCGType sf, TCGReg rd, TCGReg rn,
772                                     TCGReg rm, int opt, int imm3)
773{
774    tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
775              imm3 << 10 | rn << 5 | rd);
776}
777
778/* This function is for both 3.5.2 (Add/Subtract shifted register), for
779   the rare occasion when we actually want to supply a shift amount.  */
780static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
781                                      TCGType ext, TCGReg rd, TCGReg rn,
782                                      TCGReg rm, int imm6)
783{
784    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
785}
786
787/* This function is for 3.5.2 (Add/subtract shifted register),
788   and 3.5.10 (Logical shifted register), for the vast majorty of cases
789   when we don't want to apply a shift.  Thus it can also be used for
790   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
791static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
792                              TCGReg rd, TCGReg rn, TCGReg rm)
793{
794    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
795}
796
797#define tcg_out_insn_3503  tcg_out_insn_3502
798#define tcg_out_insn_3508  tcg_out_insn_3502
799#define tcg_out_insn_3510  tcg_out_insn_3502
800
801static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
802                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
803{
804    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
805              | tcg_cond_to_aarch64[c] << 12);
806}
807
808static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
809                              TCGReg rd, TCGReg rn)
810{
811    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
812}
813
814static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
815                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
816{
817    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
818}
819
820static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
821                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
822{
823    /* Note that bit 11 set means general register input.  Therefore
824       we can handle both register sets with one function.  */
825    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
826              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
827}
828
829static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
830                              TCGReg rd, bool op, int cmode, uint8_t imm8)
831{
832    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
833              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
834}
835
836static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
837                              TCGReg rd, TCGReg rn, unsigned immhb)
838{
839    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
840}
841
842static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
843                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
844{
845    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
846              | (rn & 0x1f) << 5 | (rd & 0x1f));
847}
848
849static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
850                              unsigned size, TCGReg rd, TCGReg rn)
851{
852    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
853}
854
855static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
856                              TCGReg rd, TCGReg rn, unsigned immhb)
857{
858    tcg_out32(s, insn | q << 30 | immhb << 16
859              | (rn & 0x1f) << 5 | (rd & 0x1f));
860}
861
862static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
863                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
864{
865    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
866              | (rn & 0x1f) << 5 | (rd & 0x1f));
867}
868
869static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
870                              unsigned size, TCGReg rd, TCGReg rn)
871{
872    tcg_out32(s, insn | q << 30 | (size << 22)
873              | (rn & 0x1f) << 5 | (rd & 0x1f));
874}
875
876static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
877                              TCGReg rd, TCGReg base, TCGType ext,
878                              TCGReg regoff)
879{
880    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
881    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
882              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
883}
884
885static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
886                              TCGReg rd, TCGReg rn, intptr_t offset)
887{
888    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
889}
890
891static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
892                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
893{
894    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
895    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
896              | rn << 5 | (rd & 0x1f));
897}
898
899static void tcg_out_bti(TCGContext *s, AArch64Insn insn)
900{
901    /*
902     * While BTI insns are nops on hosts without FEAT_BTI,
903     * there is no point in emitting them in that case either.
904     */
905    if (cpuinfo & CPUINFO_BTI) {
906        tcg_out32(s, insn);
907    }
908}
909
910/* Register to register move using ORR (shifted register with no shift). */
911static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
912{
913    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
914}
915
916/* Register to register move using ADDI (move to/from SP).  */
917static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
918{
919    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
920}
921
922/* This function is used for the Logical (immediate) instruction group.
923   The value of LIMM must satisfy IS_LIMM.  See the comment above about
924   only supporting simplified logical immediates.  */
925static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
926                             TCGReg rd, TCGReg rn, uint64_t limm)
927{
928    unsigned h, l, r, c;
929
930    tcg_debug_assert(is_limm(limm));
931
932    h = clz64(limm);
933    l = ctz64(limm);
934    if (l == 0) {
935        r = 0;                  /* form 0....01....1 */
936        c = ctz64(~limm) - 1;
937        if (h == 0) {
938            r = clz64(~limm);   /* form 1..10..01..1 */
939            c += r;
940        }
941    } else {
942        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
943        c = r - h - 1;
944    }
945    if (ext == TCG_TYPE_I32) {
946        r &= 31;
947        c &= 31;
948    }
949
950    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
951}
952
953static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
954                             TCGReg rd, int64_t v64)
955{
956    bool q = type == TCG_TYPE_V128;
957    int cmode, imm8, i;
958
959    /* Test all bytes equal first.  */
960    if (vece == MO_8) {
961        imm8 = (uint8_t)v64;
962        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
963        return;
964    }
965
966    /*
967     * Test all bytes 0x00 or 0xff second.  This can match cases that
968     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
969     */
970    for (i = imm8 = 0; i < 8; i++) {
971        uint8_t byte = v64 >> (i * 8);
972        if (byte == 0xff) {
973            imm8 |= 1 << i;
974        } else if (byte != 0) {
975            goto fail_bytes;
976        }
977    }
978    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
979    return;
980 fail_bytes:
981
982    /*
983     * Tests for various replications.  For each element width, if we
984     * cannot find an expansion there's no point checking a larger
985     * width because we already know by replication it cannot match.
986     */
987    if (vece == MO_16) {
988        uint16_t v16 = v64;
989
990        if (is_shimm16(v16, &cmode, &imm8)) {
991            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
992            return;
993        }
994        if (is_shimm16(~v16, &cmode, &imm8)) {
995            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
996            return;
997        }
998
999        /*
1000         * Otherwise, all remaining constants can be loaded in two insns:
1001         * rd = v16 & 0xff, rd |= v16 & 0xff00.
1002         */
1003        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
1004        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
1005        return;
1006    } else if (vece == MO_32) {
1007        uint32_t v32 = v64;
1008        uint32_t n32 = ~v32;
1009
1010        if (is_shimm32(v32, &cmode, &imm8) ||
1011            is_soimm32(v32, &cmode, &imm8) ||
1012            is_fimm32(v32, &cmode, &imm8)) {
1013            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
1014            return;
1015        }
1016        if (is_shimm32(n32, &cmode, &imm8) ||
1017            is_soimm32(n32, &cmode, &imm8)) {
1018            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
1019            return;
1020        }
1021
1022        /*
1023         * Restrict the set of constants to those we can load with
1024         * two instructions.  Others we load from the pool.
1025         */
1026        i = is_shimm32_pair(v32, &cmode, &imm8);
1027        if (i) {
1028            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
1029            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
1030            return;
1031        }
1032        i = is_shimm32_pair(n32, &cmode, &imm8);
1033        if (i) {
1034            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
1035            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
1036            return;
1037        }
1038    } else if (is_fimm64(v64, &cmode, &imm8)) {
1039        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
1040        return;
1041    }
1042
1043    /*
1044     * As a last resort, load from the constant pool.  Sadly there
1045     * is no LD1R (literal), so store the full 16-byte vector.
1046     */
1047    if (type == TCG_TYPE_V128) {
1048        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
1049        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
1050    } else {
1051        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
1052        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
1053    }
1054}
1055
1056static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
1057                            TCGReg rd, TCGReg rs)
1058{
1059    int is_q = type - TCG_TYPE_V64;
1060    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
1061    return true;
1062}
1063
1064static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1065                             TCGReg r, TCGReg base, intptr_t offset)
1066{
1067    TCGReg temp = TCG_REG_TMP0;
1068
1069    if (offset < -0xffffff || offset > 0xffffff) {
1070        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
1071        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
1072        base = temp;
1073    } else {
1074        AArch64Insn add_insn = I3401_ADDI;
1075
1076        if (offset < 0) {
1077            add_insn = I3401_SUBI;
1078            offset = -offset;
1079        }
1080        if (offset & 0xfff000) {
1081            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1082            base = temp;
1083        }
1084        if (offset & 0xfff) {
1085            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1086            base = temp;
1087        }
1088    }
1089    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1090    return true;
1091}
1092
1093static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1094                         tcg_target_long value)
1095{
1096    tcg_target_long svalue = value;
1097    tcg_target_long ivalue = ~value;
1098    tcg_target_long t0, t1, t2;
1099    int s0, s1;
1100    AArch64Insn opc;
1101
1102    switch (type) {
1103    case TCG_TYPE_I32:
1104    case TCG_TYPE_I64:
1105        tcg_debug_assert(rd < 32);
1106        break;
1107    default:
1108        g_assert_not_reached();
1109    }
1110
1111    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1112       values within [2**31, 2**32-1], we can create smaller sequences by
1113       interpreting this as a negative 32-bit number, while ensuring that
1114       the high 32 bits are cleared by setting SF=0.  */
1115    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1116        svalue = (int32_t)value;
1117        value = (uint32_t)value;
1118        ivalue = (uint32_t)ivalue;
1119        type = TCG_TYPE_I32;
1120    }
1121
1122    /* Speed things up by handling the common case of small positive
1123       and negative values specially.  */
1124    if ((value & ~0xffffull) == 0) {
1125        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1126        return;
1127    } else if ((ivalue & ~0xffffull) == 0) {
1128        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1129        return;
1130    }
1131
1132    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1133       use the sign-extended value.  That lets us match rotated values such
1134       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1135    if (is_limm(svalue)) {
1136        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1137        return;
1138    }
1139
1140    /* Look for host pointer values within 4G of the PC.  This happens
1141       often when loading pointers to QEMU's own data structures.  */
1142    if (type == TCG_TYPE_I64) {
1143        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1144        tcg_target_long disp = value - src_rx;
1145        if (disp == sextract64(disp, 0, 21)) {
1146            tcg_out_insn(s, 3406, ADR, rd, disp);
1147            return;
1148        }
1149        disp = (value >> 12) - (src_rx >> 12);
1150        if (disp == sextract64(disp, 0, 21)) {
1151            tcg_out_insn(s, 3406, ADRP, rd, disp);
1152            if (value & 0xfff) {
1153                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1154            }
1155            return;
1156        }
1157    }
1158
1159    /* Would it take fewer insns to begin with MOVN?  */
1160    if (ctpop64(value) >= 32) {
1161        t0 = ivalue;
1162        opc = I3405_MOVN;
1163    } else {
1164        t0 = value;
1165        opc = I3405_MOVZ;
1166    }
1167    s0 = ctz64(t0) & (63 & -16);
1168    t1 = t0 & ~(0xffffull << s0);
1169    s1 = ctz64(t1) & (63 & -16);
1170    t2 = t1 & ~(0xffffull << s1);
1171    if (t2 == 0) {
1172        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1173        if (t1 != 0) {
1174            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1175        }
1176        return;
1177    }
1178
1179    /* For more than 2 insns, dump it into the constant pool.  */
1180    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1181    tcg_out_insn(s, 3305, LDR, 0, rd);
1182}
1183
1184static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1185{
1186    return false;
1187}
1188
1189static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1190                             tcg_target_long imm)
1191{
1192    /* This function is only used for passing structs by reference. */
1193    g_assert_not_reached();
1194}
1195
1196/* Define something more legible for general use.  */
1197#define tcg_out_ldst_r  tcg_out_insn_3310
1198
1199static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1200                         TCGReg rn, intptr_t offset, int lgsize)
1201{
1202    /* If the offset is naturally aligned and in range, then we can
1203       use the scaled uimm12 encoding */
1204    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1205        uintptr_t scaled_uimm = offset >> lgsize;
1206        if (scaled_uimm <= 0xfff) {
1207            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1208            return;
1209        }
1210    }
1211
1212    /* Small signed offsets can use the unscaled encoding.  */
1213    if (offset >= -256 && offset < 256) {
1214        tcg_out_insn_3312(s, insn, rd, rn, offset);
1215        return;
1216    }
1217
1218    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1219    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
1220    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
1221}
1222
1223static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1224{
1225    if (ret == arg) {
1226        return true;
1227    }
1228    switch (type) {
1229    case TCG_TYPE_I32:
1230    case TCG_TYPE_I64:
1231        if (ret < 32 && arg < 32) {
1232            tcg_out_movr(s, type, ret, arg);
1233            break;
1234        } else if (ret < 32) {
1235            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1236            break;
1237        } else if (arg < 32) {
1238            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1239            break;
1240        }
1241        /* FALLTHRU */
1242
1243    case TCG_TYPE_V64:
1244        tcg_debug_assert(ret >= 32 && arg >= 32);
1245        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1246        break;
1247    case TCG_TYPE_V128:
1248        tcg_debug_assert(ret >= 32 && arg >= 32);
1249        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1250        break;
1251
1252    default:
1253        g_assert_not_reached();
1254    }
1255    return true;
1256}
1257
1258static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1259                       TCGReg base, intptr_t ofs)
1260{
1261    AArch64Insn insn;
1262    int lgsz;
1263
1264    switch (type) {
1265    case TCG_TYPE_I32:
1266        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1267        lgsz = 2;
1268        break;
1269    case TCG_TYPE_I64:
1270        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1271        lgsz = 3;
1272        break;
1273    case TCG_TYPE_V64:
1274        insn = I3312_LDRVD;
1275        lgsz = 3;
1276        break;
1277    case TCG_TYPE_V128:
1278        insn = I3312_LDRVQ;
1279        lgsz = 4;
1280        break;
1281    default:
1282        g_assert_not_reached();
1283    }
1284    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1285}
1286
1287static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1288                       TCGReg base, intptr_t ofs)
1289{
1290    AArch64Insn insn;
1291    int lgsz;
1292
1293    switch (type) {
1294    case TCG_TYPE_I32:
1295        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1296        lgsz = 2;
1297        break;
1298    case TCG_TYPE_I64:
1299        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1300        lgsz = 3;
1301        break;
1302    case TCG_TYPE_V64:
1303        insn = I3312_STRVD;
1304        lgsz = 3;
1305        break;
1306    case TCG_TYPE_V128:
1307        insn = I3312_STRVQ;
1308        lgsz = 4;
1309        break;
1310    default:
1311        g_assert_not_reached();
1312    }
1313    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1314}
1315
1316static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1317                               TCGReg base, intptr_t ofs)
1318{
1319    if (type <= TCG_TYPE_I64 && val == 0) {
1320        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1321        return true;
1322    }
1323    return false;
1324}
1325
1326static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1327                               TCGReg rn, unsigned int a, unsigned int b)
1328{
1329    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1330}
1331
1332static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1333                                TCGReg rn, unsigned int a, unsigned int b)
1334{
1335    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1336}
1337
1338static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1339                                TCGReg rn, unsigned int a, unsigned int b)
1340{
1341    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1342}
1343
1344static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1345                                TCGReg rn, TCGReg rm, unsigned int a)
1346{
1347    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1348}
1349
1350static void tgen_cmp(TCGContext *s, TCGType ext, TCGCond cond,
1351                     TCGReg a, TCGReg b)
1352{
1353    if (is_tst_cond(cond)) {
1354        tcg_out_insn(s, 3510, ANDS, ext, TCG_REG_XZR, a, b);
1355    } else {
1356        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1357    }
1358}
1359
1360static void tgen_cmpi(TCGContext *s, TCGType ext, TCGCond cond,
1361                      TCGReg a, tcg_target_long b)
1362{
1363    if (is_tst_cond(cond)) {
1364        tcg_out_logicali(s, I3404_ANDSI, ext, TCG_REG_XZR, a, b);
1365    } else if (b >= 0) {
1366        tcg_debug_assert(is_aimm(b));
1367        tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1368    } else {
1369        tcg_debug_assert(is_aimm(-b));
1370        tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1371    }
1372}
1373
1374static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGCond cond, TCGReg a,
1375                        tcg_target_long b, bool const_b)
1376{
1377    if (const_b) {
1378        tgen_cmpi(s, ext, cond, a, b);
1379    } else {
1380        tgen_cmp(s, ext, cond, a, b);
1381    }
1382}
1383
1384static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1385{
1386    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1387    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1388    tcg_out_insn(s, 3206, B, offset);
1389}
1390
1391static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1392{
1393    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1394    if (offset == sextract64(offset, 0, 26)) {
1395        tcg_out_insn(s, 3206, BL, offset);
1396    } else {
1397        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1398        tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
1399    }
1400}
1401
1402static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1403                         const TCGHelperInfo *info)
1404{
1405    tcg_out_call_int(s, target);
1406}
1407
1408static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1409{
1410    if (!l->has_value) {
1411        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1412        tcg_out_insn(s, 3206, B, 0);
1413    } else {
1414        tcg_out_goto(s, l->u.value_ptr);
1415    }
1416}
1417
1418static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
1419                        TCGReg a, TCGReg b, TCGLabel *l)
1420{
1421    tgen_cmp(s, type, c, a, b);
1422    tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1423    tcg_out_insn(s, 3202, B_C, c, 0);
1424}
1425
1426static void tgen_brcondi(TCGContext *s, TCGType ext, TCGCond c,
1427                         TCGReg a, tcg_target_long b, TCGLabel *l)
1428{
1429    int tbit = -1;
1430    bool need_cmp = true;
1431
1432    switch (c) {
1433    case TCG_COND_EQ:
1434    case TCG_COND_NE:
1435        /* cmp xN,0; b.ne L -> cbnz xN,L */
1436        if (b == 0) {
1437            need_cmp = false;
1438        }
1439        break;
1440    case TCG_COND_LT:
1441    case TCG_COND_GE:
1442        /* cmp xN,0; b.mi L -> tbnz xN,63,L */
1443        if (b == 0) {
1444            c = (c == TCG_COND_LT ? TCG_COND_TSTNE : TCG_COND_TSTEQ);
1445            tbit = ext ? 63 : 31;
1446            need_cmp = false;
1447        }
1448        break;
1449    case TCG_COND_TSTEQ:
1450    case TCG_COND_TSTNE:
1451        /* tst xN,0xffffffff; b.ne L -> cbnz wN,L */
1452        if (b == UINT32_MAX) {
1453            c = tcg_tst_eqne_cond(c);
1454            ext = TCG_TYPE_I32;
1455            need_cmp = false;
1456            break;
1457        }
1458        /* tst xN,1<<B; b.ne L -> tbnz xN,B,L */
1459        if (is_power_of_2(b)) {
1460            tbit = ctz64(b);
1461            need_cmp = false;
1462        }
1463        break;
1464    default:
1465        break;
1466    }
1467
1468    if (need_cmp) {
1469        tgen_cmpi(s, ext, c, a, b);
1470        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1471        tcg_out_insn(s, 3202, B_C, c, 0);
1472        return;
1473    }
1474
1475    if (tbit >= 0) {
1476        tcg_out_reloc(s, s->code_ptr, R_AARCH64_TSTBR14, l, 0);
1477        switch (c) {
1478        case TCG_COND_TSTEQ:
1479            tcg_out_insn(s, 3205, TBZ, a, tbit, 0);
1480            break;
1481        case TCG_COND_TSTNE:
1482            tcg_out_insn(s, 3205, TBNZ, a, tbit, 0);
1483            break;
1484        default:
1485            g_assert_not_reached();
1486        }
1487    } else {
1488        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1489        switch (c) {
1490        case TCG_COND_EQ:
1491            tcg_out_insn(s, 3201, CBZ, ext, a, 0);
1492            break;
1493        case TCG_COND_NE:
1494            tcg_out_insn(s, 3201, CBNZ, ext, a, 0);
1495            break;
1496        default:
1497            g_assert_not_reached();
1498        }
1499    }
1500}
1501
1502static const TCGOutOpBrcond outop_brcond = {
1503    .base.static_constraint = C_O0_I2(r, rC),
1504    .out_rr = tgen_brcond,
1505    .out_ri = tgen_brcondi,
1506};
1507
1508static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1509                               TCGReg rd, TCGReg rn)
1510{
1511    /* REV, REV16, REV32 */
1512    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1513}
1514
1515static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1516                               TCGReg rd, TCGReg rn)
1517{
1518    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1519    int bits = (8 << s_bits) - 1;
1520    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1521}
1522
1523static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1524{
1525    tcg_out_sxt(s, type, MO_8, rd, rn);
1526}
1527
1528static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1529{
1530    tcg_out_sxt(s, type, MO_16, rd, rn);
1531}
1532
1533static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1534{
1535    tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1536}
1537
1538static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1539{
1540    tcg_out_ext32s(s, rd, rn);
1541}
1542
1543static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1544                               TCGReg rd, TCGReg rn)
1545{
1546    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1547    int bits = (8 << s_bits) - 1;
1548    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1549}
1550
1551static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1552{
1553    tcg_out_uxt(s, MO_8, rd, rn);
1554}
1555
1556static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1557{
1558    tcg_out_uxt(s, MO_16, rd, rn);
1559}
1560
1561static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1562{
1563    tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1564}
1565
1566static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1567{
1568    tcg_out_ext32u(s, rd, rn);
1569}
1570
1571static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1572{
1573    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1574}
1575
1576static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1577                            TCGReg rh, TCGReg al, TCGReg ah,
1578                            tcg_target_long bl, tcg_target_long bh,
1579                            bool const_bl, bool const_bh, bool sub)
1580{
1581    TCGReg orig_rl = rl;
1582    AArch64Insn insn;
1583
1584    if (rl == ah || (!const_bh && rl == bh)) {
1585        rl = TCG_REG_TMP0;
1586    }
1587
1588    if (const_bl) {
1589        if (bl < 0) {
1590            bl = -bl;
1591            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1592        } else {
1593            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1594        }
1595
1596        if (unlikely(al == TCG_REG_XZR)) {
1597            /* ??? We want to allow al to be zero for the benefit of
1598               negation via subtraction.  However, that leaves open the
1599               possibility of adding 0+const in the low part, and the
1600               immediate add instructions encode XSP not XZR.  Don't try
1601               anything more elaborate here than loading another zero.  */
1602            al = TCG_REG_TMP0;
1603            tcg_out_movi(s, ext, al, 0);
1604        }
1605        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1606    } else {
1607        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1608    }
1609
1610    insn = I3503_ADC;
1611    if (const_bh) {
1612        /* Note that the only two constants we support are 0 and -1, and
1613           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1614        if ((bh != 0) ^ sub) {
1615            insn = I3503_SBC;
1616        }
1617        bh = TCG_REG_XZR;
1618    } else if (sub) {
1619        insn = I3503_SBC;
1620    }
1621    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1622
1623    tcg_out_mov(s, ext, orig_rl, rl);
1624}
1625
1626static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1627{
1628    static const uint32_t sync[] = {
1629        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1630        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1631        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1632        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1633        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1634    };
1635    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1636}
1637
1638typedef struct {
1639    TCGReg base;
1640    TCGReg index;
1641    TCGType index_ext;
1642    TCGAtomAlign aa;
1643} HostAddress;
1644
1645bool tcg_target_has_memory_bswap(MemOp memop)
1646{
1647    return false;
1648}
1649
1650static const TCGLdstHelperParam ldst_helper_param = {
1651    .ntmp = 1, .tmp = { TCG_REG_TMP0 }
1652};
1653
1654static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1655{
1656    MemOp opc = get_memop(lb->oi);
1657
1658    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1659        return false;
1660    }
1661
1662    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1663    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1664    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1665    tcg_out_goto(s, lb->raddr);
1666    return true;
1667}
1668
1669static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1670{
1671    MemOp opc = get_memop(lb->oi);
1672
1673    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1674        return false;
1675    }
1676
1677    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1678    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1679    tcg_out_goto(s, lb->raddr);
1680    return true;
1681}
1682
1683/* We expect to use a 7-bit scaled negative offset from ENV.  */
1684#define MIN_TLB_MASK_TABLE_OFS  -512
1685
1686/*
1687 * For system-mode, perform the TLB load and compare.
1688 * For user-mode, perform any required alignment tests.
1689 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1690 * is required and fill in @h with the host address for the fast path.
1691 */
1692static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1693                                           TCGReg addr_reg, MemOpIdx oi,
1694                                           bool is_ld)
1695{
1696    TCGType addr_type = s->addr_type;
1697    TCGLabelQemuLdst *ldst = NULL;
1698    MemOp opc = get_memop(oi);
1699    MemOp s_bits = opc & MO_SIZE;
1700    unsigned a_mask;
1701
1702    h->aa = atom_and_align_for_opc(s, opc,
1703                                   have_lse2 ? MO_ATOM_WITHIN16
1704                                             : MO_ATOM_IFALIGN,
1705                                   s_bits == MO_128);
1706    a_mask = (1 << h->aa.align) - 1;
1707
1708    if (tcg_use_softmmu) {
1709        unsigned s_mask = (1u << s_bits) - 1;
1710        unsigned mem_index = get_mmuidx(oi);
1711        TCGReg addr_adj;
1712        TCGType mask_type;
1713        uint64_t compare_mask;
1714
1715        ldst = new_ldst_label(s);
1716        ldst->is_ld = is_ld;
1717        ldst->oi = oi;
1718        ldst->addr_reg = addr_reg;
1719
1720        mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
1721                     ? TCG_TYPE_I64 : TCG_TYPE_I32);
1722
1723        /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
1724        QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1725        QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1726        tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
1727                     tlb_mask_table_ofs(s, mem_index), 1, 0);
1728
1729        /* Extract the TLB index from the address into X0.  */
1730        tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1731                     TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
1732                     s->page_bits - CPU_TLB_ENTRY_BITS);
1733
1734        /* Add the tlb_table pointer, forming the CPUTLBEntry address. */
1735        tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
1736
1737        /* Load the tlb comparator into TMP0, and the fast path addend. */
1738        QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
1739        tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
1740                   is_ld ? offsetof(CPUTLBEntry, addr_read)
1741                         : offsetof(CPUTLBEntry, addr_write));
1742        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
1743                   offsetof(CPUTLBEntry, addend));
1744
1745        /*
1746         * For aligned accesses, we check the first byte and include
1747         * the alignment bits within the address.  For unaligned access,
1748         * we check that we don't cross pages using the address of the
1749         * last byte of the access.
1750         */
1751        if (a_mask >= s_mask) {
1752            addr_adj = addr_reg;
1753        } else {
1754            addr_adj = TCG_REG_TMP2;
1755            tcg_out_insn(s, 3401, ADDI, addr_type,
1756                         addr_adj, addr_reg, s_mask - a_mask);
1757        }
1758        compare_mask = (uint64_t)s->page_mask | a_mask;
1759
1760        /* Store the page mask part of the address into TMP2.  */
1761        tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
1762                         addr_adj, compare_mask);
1763
1764        /* Perform the address comparison. */
1765        tcg_out_cmp(s, addr_type, TCG_COND_NE, TCG_REG_TMP0, TCG_REG_TMP2, 0);
1766
1767        /* If not equal, we jump to the slow path. */
1768        ldst->label_ptr[0] = s->code_ptr;
1769        tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1770
1771        h->base = TCG_REG_TMP1;
1772        h->index = addr_reg;
1773        h->index_ext = addr_type;
1774    } else {
1775        if (a_mask) {
1776            ldst = new_ldst_label(s);
1777
1778            ldst->is_ld = is_ld;
1779            ldst->oi = oi;
1780            ldst->addr_reg = addr_reg;
1781
1782            /* tst addr, #mask */
1783            tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1784
1785            /* b.ne slow_path */
1786            ldst->label_ptr[0] = s->code_ptr;
1787            tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1788        }
1789
1790        if (guest_base || addr_type == TCG_TYPE_I32) {
1791            h->base = TCG_REG_GUEST_BASE;
1792            h->index = addr_reg;
1793            h->index_ext = addr_type;
1794        } else {
1795            h->base = addr_reg;
1796            h->index = TCG_REG_XZR;
1797            h->index_ext = TCG_TYPE_I64;
1798        }
1799    }
1800
1801    return ldst;
1802}
1803
1804static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1805                                   TCGReg data_r, HostAddress h)
1806{
1807    switch (memop & MO_SSIZE) {
1808    case MO_UB:
1809        tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
1810        break;
1811    case MO_SB:
1812        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1813                       data_r, h.base, h.index_ext, h.index);
1814        break;
1815    case MO_UW:
1816        tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
1817        break;
1818    case MO_SW:
1819        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1820                       data_r, h.base, h.index_ext, h.index);
1821        break;
1822    case MO_UL:
1823        tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
1824        break;
1825    case MO_SL:
1826        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
1827        break;
1828    case MO_UQ:
1829        tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
1830        break;
1831    default:
1832        g_assert_not_reached();
1833    }
1834}
1835
1836static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1837                                   TCGReg data_r, HostAddress h)
1838{
1839    switch (memop & MO_SIZE) {
1840    case MO_8:
1841        tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
1842        break;
1843    case MO_16:
1844        tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
1845        break;
1846    case MO_32:
1847        tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
1848        break;
1849    case MO_64:
1850        tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
1851        break;
1852    default:
1853        g_assert_not_reached();
1854    }
1855}
1856
1857static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1858                            MemOpIdx oi, TCGType data_type)
1859{
1860    TCGLabelQemuLdst *ldst;
1861    HostAddress h;
1862
1863    ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1864    tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
1865
1866    if (ldst) {
1867        ldst->type = data_type;
1868        ldst->datalo_reg = data_reg;
1869        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1870    }
1871}
1872
1873static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1874                            MemOpIdx oi, TCGType data_type)
1875{
1876    TCGLabelQemuLdst *ldst;
1877    HostAddress h;
1878
1879    ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1880    tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1881
1882    if (ldst) {
1883        ldst->type = data_type;
1884        ldst->datalo_reg = data_reg;
1885        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1886    }
1887}
1888
1889static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
1890                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
1891{
1892    TCGLabelQemuLdst *ldst;
1893    HostAddress h;
1894    TCGReg base;
1895    bool use_pair;
1896
1897    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
1898
1899    /* Compose the final address, as LDP/STP have no indexing. */
1900    if (h.index == TCG_REG_XZR) {
1901        base = h.base;
1902    } else {
1903        base = TCG_REG_TMP2;
1904        if (h.index_ext == TCG_TYPE_I32) {
1905            /* add base, base, index, uxtw */
1906            tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base,
1907                         h.base, h.index, MO_32, 0);
1908        } else {
1909            /* add base, base, index */
1910            tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index);
1911        }
1912    }
1913
1914    use_pair = h.aa.atom < MO_128 || have_lse2;
1915
1916    if (!use_pair) {
1917        tcg_insn_unit *branch = NULL;
1918        TCGReg ll, lh, sl, sh;
1919
1920        /*
1921         * If we have already checked for 16-byte alignment, that's all
1922         * we need. Otherwise we have determined that misaligned atomicity
1923         * may be handled with two 8-byte loads.
1924         */
1925        if (h.aa.align < MO_128) {
1926            /*
1927             * TODO: align should be MO_64, so we only need test bit 3,
1928             * which means we could use TBNZ instead of ANDS+B_C.
1929             */
1930            tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15);
1931            branch = s->code_ptr;
1932            tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1933            use_pair = true;
1934        }
1935
1936        if (is_ld) {
1937            /*
1938             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1939             *    ldxp lo, hi, [base]
1940             *    stxp t0, lo, hi, [base]
1941             *    cbnz t0, .-8
1942             * Require no overlap between data{lo,hi} and base.
1943             */
1944            if (datalo == base || datahi == base) {
1945                tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base);
1946                base = TCG_REG_TMP2;
1947            }
1948            ll = sl = datalo;
1949            lh = sh = datahi;
1950        } else {
1951            /*
1952             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1953             * 1: ldxp t0, t1, [base]
1954             *    stxp t0, lo, hi, [base]
1955             *    cbnz t0, 1b
1956             */
1957            tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1);
1958            ll = TCG_REG_TMP0;
1959            lh = TCG_REG_TMP1;
1960            sl = datalo;
1961            sh = datahi;
1962        }
1963
1964        tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base);
1965        tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base);
1966        tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2);
1967
1968        if (use_pair) {
1969            /* "b .+8", branching across the one insn of use_pair. */
1970            tcg_out_insn(s, 3206, B, 2);
1971            reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr));
1972        }
1973    }
1974
1975    if (use_pair) {
1976        if (is_ld) {
1977            tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0);
1978        } else {
1979            tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0);
1980        }
1981    }
1982
1983    if (ldst) {
1984        ldst->type = TCG_TYPE_I128;
1985        ldst->datalo_reg = datalo;
1986        ldst->datahi_reg = datahi;
1987        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1988    }
1989}
1990
1991static const tcg_insn_unit *tb_ret_addr;
1992
1993static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1994{
1995    const tcg_insn_unit *target;
1996    ptrdiff_t offset;
1997
1998    /* Reuse the zeroing that exists for goto_ptr.  */
1999    if (a0 == 0) {
2000        target = tcg_code_gen_epilogue;
2001    } else {
2002        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
2003        target = tb_ret_addr;
2004    }
2005
2006    offset = tcg_pcrel_diff(s, target) >> 2;
2007    if (offset == sextract64(offset, 0, 26)) {
2008        tcg_out_insn(s, 3206, B, offset);
2009    } else {
2010        /*
2011         * Only x16/x17 generate BTI type Jump (2),
2012         * other registers generate BTI type Jump|Call (3).
2013         */
2014        QEMU_BUILD_BUG_ON(TCG_REG_TMP0 != TCG_REG_X16);
2015        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
2016        tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
2017    }
2018}
2019
2020static void tcg_out_goto_tb(TCGContext *s, int which)
2021{
2022    /*
2023     * Direct branch, or indirect address load, will be patched
2024     * by tb_target_set_jmp_target.  Assert indirect load offset
2025     * in range early, regardless of direct branch distance.
2026     */
2027    intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
2028    tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
2029
2030    set_jmp_insn_offset(s, which);
2031    tcg_out32(s, I3206_B);
2032    tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
2033    set_jmp_reset_offset(s, which);
2034    tcg_out_bti(s, BTI_J);
2035}
2036
2037void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2038                              uintptr_t jmp_rx, uintptr_t jmp_rw)
2039{
2040    uintptr_t d_addr = tb->jmp_target_addr[n];
2041    ptrdiff_t d_offset = d_addr - jmp_rx;
2042    tcg_insn_unit insn;
2043
2044    /* Either directly branch, or indirect branch load. */
2045    if (d_offset == sextract64(d_offset, 0, 28)) {
2046        insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
2047    } else {
2048        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
2049        ptrdiff_t i_offset = i_addr - jmp_rx;
2050
2051        /* Note that we asserted this in range in tcg_out_goto_tb. */
2052        insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
2053    }
2054    qatomic_set((uint32_t *)jmp_rw, insn);
2055    flush_idcache_range(jmp_rx, jmp_rw, 4);
2056}
2057
2058
2059static void tgen_add(TCGContext *s, TCGType type,
2060                     TCGReg a0, TCGReg a1, TCGReg a2)
2061{
2062    tcg_out_insn(s, 3502, ADD, type, a0, a1, a2);
2063}
2064
2065static void tgen_addi(TCGContext *s, TCGType type,
2066                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2067{
2068    if (a2 >= 0) {
2069        tcg_out_insn(s, 3401, ADDI, type, a0, a1, a2);
2070    } else {
2071        tcg_out_insn(s, 3401, SUBI, type, a0, a1, -a2);
2072    }
2073}
2074
2075static const TCGOutOpBinary outop_add = {
2076    .base.static_constraint = C_O1_I2(r, r, rA),
2077    .out_rrr = tgen_add,
2078    .out_rri = tgen_addi,
2079};
2080
2081static void tgen_and(TCGContext *s, TCGType type,
2082                     TCGReg a0, TCGReg a1, TCGReg a2)
2083{
2084    tcg_out_insn(s, 3510, AND, type, a0, a1, a2);
2085}
2086
2087static void tgen_andi(TCGContext *s, TCGType type,
2088                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2089{
2090    tcg_out_logicali(s, I3404_ANDI, type, a0, a1, a2);
2091}
2092
2093static const TCGOutOpBinary outop_and = {
2094    .base.static_constraint = C_O1_I2(r, r, rL),
2095    .out_rrr = tgen_and,
2096    .out_rri = tgen_andi,
2097};
2098
2099static void tgen_andc(TCGContext *s, TCGType type,
2100                      TCGReg a0, TCGReg a1, TCGReg a2)
2101{
2102    tcg_out_insn(s, 3510, BIC, type, a0, a1, a2);
2103}
2104
2105static const TCGOutOpBinary outop_andc = {
2106    .base.static_constraint = C_O1_I2(r, r, r),
2107    .out_rrr = tgen_andc,
2108};
2109
2110static void tgen_clz(TCGContext *s, TCGType type,
2111                     TCGReg a0, TCGReg a1, TCGReg a2)
2112{
2113    tcg_out_cmp(s, type, TCG_COND_NE, a1, 0, true);
2114    tcg_out_insn(s, 3507, CLZ, type, TCG_REG_TMP0, a1);
2115    tcg_out_insn(s, 3506, CSEL, type, a0, TCG_REG_TMP0, a2, TCG_COND_NE);
2116}
2117
2118static void tgen_clzi(TCGContext *s, TCGType type,
2119                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2120{
2121    if (a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
2122        tcg_out_insn(s, 3507, CLZ, type, a0, a1);
2123        return;
2124    }
2125
2126    tcg_out_cmp(s, type, TCG_COND_NE, a1, 0, true);
2127    tcg_out_insn(s, 3507, CLZ, type, a0, a1);
2128
2129    switch (a2) {
2130    case -1:
2131        tcg_out_insn(s, 3506, CSINV, type, a0, a0, TCG_REG_XZR, TCG_COND_NE);
2132        break;
2133    case 0:
2134        tcg_out_insn(s, 3506, CSEL, type, a0, a0, TCG_REG_XZR, TCG_COND_NE);
2135        break;
2136    default:
2137        tcg_out_movi(s, type, TCG_REG_TMP0, a2);
2138        tcg_out_insn(s, 3506, CSEL, type, a0, a0, TCG_REG_TMP0, TCG_COND_NE);
2139        break;
2140    }
2141}
2142
2143static const TCGOutOpBinary outop_clz = {
2144    .base.static_constraint = C_O1_I2(r, r, rAL),
2145    .out_rrr = tgen_clz,
2146    .out_rri = tgen_clzi,
2147};
2148
2149static const TCGOutOpUnary outop_ctpop = {
2150    .base.static_constraint = C_NotImplemented,
2151};
2152
2153static void tgen_ctz(TCGContext *s, TCGType type,
2154                     TCGReg a0, TCGReg a1, TCGReg a2)
2155{
2156    tcg_out_insn(s, 3507, RBIT, type, TCG_REG_TMP0, a1);
2157    tgen_clz(s, type, a0, TCG_REG_TMP0, a2);
2158}
2159
2160static void tgen_ctzi(TCGContext *s, TCGType type,
2161                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2162{
2163    tcg_out_insn(s, 3507, RBIT, type, TCG_REG_TMP0, a1);
2164    tgen_clzi(s, type, a0, TCG_REG_TMP0, a2);
2165}
2166
2167static const TCGOutOpBinary outop_ctz = {
2168    .base.static_constraint = C_O1_I2(r, r, rAL),
2169    .out_rrr = tgen_ctz,
2170    .out_rri = tgen_ctzi,
2171};
2172
2173static void tgen_divs(TCGContext *s, TCGType type,
2174                      TCGReg a0, TCGReg a1, TCGReg a2)
2175{
2176    tcg_out_insn(s, 3508, SDIV, type, a0, a1, a2);
2177}
2178
2179static const TCGOutOpBinary outop_divs = {
2180    .base.static_constraint = C_O1_I2(r, r, r),
2181    .out_rrr = tgen_divs,
2182};
2183
2184static const TCGOutOpDivRem outop_divs2 = {
2185    .base.static_constraint = C_NotImplemented,
2186};
2187
2188static void tgen_divu(TCGContext *s, TCGType type,
2189                      TCGReg a0, TCGReg a1, TCGReg a2)
2190{
2191    tcg_out_insn(s, 3508, UDIV, type, a0, a1, a2);
2192}
2193
2194static const TCGOutOpBinary outop_divu = {
2195    .base.static_constraint = C_O1_I2(r, r, r),
2196    .out_rrr = tgen_divu,
2197};
2198
2199static const TCGOutOpDivRem outop_divu2 = {
2200    .base.static_constraint = C_NotImplemented,
2201};
2202
2203static void tgen_eqv(TCGContext *s, TCGType type,
2204                     TCGReg a0, TCGReg a1, TCGReg a2)
2205{
2206    tcg_out_insn(s, 3510, EON, type, a0, a1, a2);
2207}
2208
2209static const TCGOutOpBinary outop_eqv = {
2210    .base.static_constraint = C_O1_I2(r, r, r),
2211    .out_rrr = tgen_eqv,
2212};
2213
2214static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
2215{
2216    tcg_out_ubfm(s, TCG_TYPE_I64, a0, a1, 32, 63);
2217}
2218
2219static const TCGOutOpUnary outop_extrh_i64_i32 = {
2220    .base.static_constraint = C_O1_I1(r, r),
2221    .out_rr = tgen_extrh_i64_i32,
2222};
2223
2224static void tgen_mul(TCGContext *s, TCGType type,
2225                     TCGReg a0, TCGReg a1, TCGReg a2)
2226{
2227    tcg_out_insn(s, 3509, MADD, type, a0, a1, a2, TCG_REG_XZR);
2228}
2229
2230static const TCGOutOpBinary outop_mul = {
2231    .base.static_constraint = C_O1_I2(r, r, r),
2232    .out_rrr = tgen_mul,
2233};
2234
2235static const TCGOutOpMul2 outop_muls2 = {
2236    .base.static_constraint = C_NotImplemented,
2237};
2238
2239static TCGConstraintSetIndex cset_mulh(TCGType type, unsigned flags)
2240{
2241    return type == TCG_TYPE_I64 ? C_O1_I2(r, r, r) : C_NotImplemented;
2242}
2243
2244static void tgen_mulsh(TCGContext *s, TCGType type,
2245                       TCGReg a0, TCGReg a1, TCGReg a2)
2246{
2247    tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2248}
2249
2250static const TCGOutOpBinary outop_mulsh = {
2251    .base.static_constraint = C_Dynamic,
2252    .base.dynamic_constraint = cset_mulh,
2253    .out_rrr = tgen_mulsh,
2254};
2255
2256static const TCGOutOpMul2 outop_mulu2 = {
2257    .base.static_constraint = C_NotImplemented,
2258};
2259
2260static void tgen_muluh(TCGContext *s, TCGType type,
2261                       TCGReg a0, TCGReg a1, TCGReg a2)
2262{
2263    tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2264}
2265
2266static const TCGOutOpBinary outop_muluh = {
2267    .base.static_constraint = C_Dynamic,
2268    .base.dynamic_constraint = cset_mulh,
2269    .out_rrr = tgen_muluh,
2270};
2271
2272static const TCGOutOpBinary outop_nand = {
2273    .base.static_constraint = C_NotImplemented,
2274};
2275
2276static const TCGOutOpBinary outop_nor = {
2277    .base.static_constraint = C_NotImplemented,
2278};
2279
2280static void tgen_or(TCGContext *s, TCGType type,
2281                    TCGReg a0, TCGReg a1, TCGReg a2)
2282{
2283    tcg_out_insn(s, 3510, ORR, type, a0, a1, a2);
2284}
2285
2286static void tgen_ori(TCGContext *s, TCGType type,
2287                     TCGReg a0, TCGReg a1, tcg_target_long a2)
2288{
2289    tcg_out_logicali(s, I3404_ORRI, type, a0, a1, a2);
2290}
2291
2292static const TCGOutOpBinary outop_or = {
2293    .base.static_constraint = C_O1_I2(r, r, rL),
2294    .out_rrr = tgen_or,
2295    .out_rri = tgen_ori,
2296};
2297
2298static void tgen_orc(TCGContext *s, TCGType type,
2299                     TCGReg a0, TCGReg a1, TCGReg a2)
2300{
2301    tcg_out_insn(s, 3510, ORN, type, a0, a1, a2);
2302}
2303
2304static const TCGOutOpBinary outop_orc = {
2305    .base.static_constraint = C_O1_I2(r, r, r),
2306    .out_rrr = tgen_orc,
2307};
2308
2309static void tgen_rems(TCGContext *s, TCGType type,
2310                      TCGReg a0, TCGReg a1, TCGReg a2)
2311{
2312    tcg_out_insn(s, 3508, SDIV, type, TCG_REG_TMP0, a1, a2);
2313    tcg_out_insn(s, 3509, MSUB, type, a0, TCG_REG_TMP0, a2, a1);
2314}
2315
2316static const TCGOutOpBinary outop_rems = {
2317    .base.static_constraint = C_O1_I2(r, r, r),
2318    .out_rrr = tgen_rems,
2319};
2320
2321static void tgen_remu(TCGContext *s, TCGType type,
2322                      TCGReg a0, TCGReg a1, TCGReg a2)
2323{
2324    tcg_out_insn(s, 3508, UDIV, type, TCG_REG_TMP0, a1, a2);
2325    tcg_out_insn(s, 3509, MSUB, type, a0, TCG_REG_TMP0, a2, a1);
2326}
2327
2328static const TCGOutOpBinary outop_remu = {
2329    .base.static_constraint = C_O1_I2(r, r, r),
2330    .out_rrr = tgen_remu,
2331};
2332
2333static const TCGOutOpBinary outop_rotl = {
2334    .base.static_constraint = C_NotImplemented,
2335};
2336
2337static void tgen_rotr(TCGContext *s, TCGType type,
2338                      TCGReg a0, TCGReg a1, TCGReg a2)
2339{
2340    tcg_out_insn(s, 3508, RORV, type, a0, a1, a2);
2341}
2342
2343static void tgen_rotri(TCGContext *s, TCGType type,
2344                       TCGReg a0, TCGReg a1, tcg_target_long a2)
2345{
2346    int max = type == TCG_TYPE_I32 ? 31 : 63;
2347    tcg_out_extr(s, type, a0, a1, a1, a2 & max);
2348}
2349
2350static const TCGOutOpBinary outop_rotr = {
2351    .base.static_constraint = C_O1_I2(r, r, ri),
2352    .out_rrr = tgen_rotr,
2353    .out_rri = tgen_rotri,
2354};
2355
2356static void tgen_sar(TCGContext *s, TCGType type,
2357                     TCGReg a0, TCGReg a1, TCGReg a2)
2358{
2359    tcg_out_insn(s, 3508, ASRV, type, a0, a1, a2);
2360}
2361
2362static void tgen_sari(TCGContext *s, TCGType type,
2363                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2364{
2365    int max = type == TCG_TYPE_I32 ? 31 : 63;
2366    tcg_out_sbfm(s, type, a0, a1, a2 & max, max);
2367}
2368
2369static const TCGOutOpBinary outop_sar = {
2370    .base.static_constraint = C_O1_I2(r, r, ri),
2371    .out_rrr = tgen_sar,
2372    .out_rri = tgen_sari,
2373};
2374
2375static void tgen_shl(TCGContext *s, TCGType type,
2376                     TCGReg a0, TCGReg a1, TCGReg a2)
2377{
2378    tcg_out_insn(s, 3508, LSLV, type, a0, a1, a2);
2379}
2380
2381static void tgen_shli(TCGContext *s, TCGType type,
2382                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2383{
2384    int max = type == TCG_TYPE_I32 ? 31 : 63;
2385    tcg_out_ubfm(s, type, a0, a1, -a2 & max, ~a2 & max);
2386}
2387
2388static const TCGOutOpBinary outop_shl = {
2389    .base.static_constraint = C_O1_I2(r, r, ri),
2390    .out_rrr = tgen_shl,
2391    .out_rri = tgen_shli,
2392};
2393
2394static void tgen_shr(TCGContext *s, TCGType type,
2395                     TCGReg a0, TCGReg a1, TCGReg a2)
2396{
2397    tcg_out_insn(s, 3508, LSRV, type, a0, a1, a2);
2398}
2399
2400static void tgen_shri(TCGContext *s, TCGType type,
2401                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2402{
2403    int max = type == TCG_TYPE_I32 ? 31 : 63;
2404    tcg_out_ubfm(s, type, a0, a1, a2 & max, max);
2405}
2406
2407static const TCGOutOpBinary outop_shr = {
2408    .base.static_constraint = C_O1_I2(r, r, ri),
2409    .out_rrr = tgen_shr,
2410    .out_rri = tgen_shri,
2411};
2412
2413static void tgen_sub(TCGContext *s, TCGType type,
2414                     TCGReg a0, TCGReg a1, TCGReg a2)
2415{
2416    tcg_out_insn(s, 3502, SUB, type, a0, a1, a2);
2417}
2418
2419static const TCGOutOpSubtract outop_sub = {
2420    .base.static_constraint = C_O1_I2(r, r, r),
2421    .out_rrr = tgen_sub,
2422};
2423
2424static void tgen_xor(TCGContext *s, TCGType type,
2425                     TCGReg a0, TCGReg a1, TCGReg a2)
2426{
2427    tcg_out_insn(s, 3510, EOR, type, a0, a1, a2);
2428}
2429
2430static void tgen_xori(TCGContext *s, TCGType type,
2431                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2432{
2433    tcg_out_logicali(s, I3404_EORI, type, a0, a1, a2);
2434}
2435
2436static const TCGOutOpBinary outop_xor = {
2437    .base.static_constraint = C_O1_I2(r, r, rL),
2438    .out_rrr = tgen_xor,
2439    .out_rri = tgen_xori,
2440};
2441
2442static void tgen_bswap16(TCGContext *s, TCGType type,
2443                         TCGReg a0, TCGReg a1, unsigned flags)
2444{
2445    tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2446    if (flags & TCG_BSWAP_OS) {
2447        /* Output must be sign-extended. */
2448        tcg_out_ext16s(s, type, a0, a0);
2449    } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2450        /* Output must be zero-extended, but input isn't. */
2451        tcg_out_ext16u(s, a0, a0);
2452    }
2453}
2454
2455static const TCGOutOpBswap outop_bswap16 = {
2456    .base.static_constraint = C_O1_I1(r, r),
2457    .out_rr = tgen_bswap16,
2458};
2459
2460static void tgen_bswap32(TCGContext *s, TCGType type,
2461                         TCGReg a0, TCGReg a1, unsigned flags)
2462{
2463    tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2464    if (flags & TCG_BSWAP_OS) {
2465        tcg_out_ext32s(s, a0, a0);
2466    }
2467}
2468
2469static const TCGOutOpBswap outop_bswap32 = {
2470    .base.static_constraint = C_O1_I1(r, r),
2471    .out_rr = tgen_bswap32,
2472};
2473
2474static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
2475{
2476    tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2477}
2478
2479static const TCGOutOpUnary outop_bswap64 = {
2480    .base.static_constraint = C_O1_I1(r, r),
2481    .out_rr = tgen_bswap64,
2482};
2483
2484static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
2485{
2486    tgen_sub(s, type, a0, TCG_REG_XZR, a1);
2487}
2488
2489static const TCGOutOpUnary outop_neg = {
2490    .base.static_constraint = C_O1_I1(r, r),
2491    .out_rr = tgen_neg,
2492};
2493
2494static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
2495{
2496    tgen_orc(s, type, a0, TCG_REG_XZR, a1);
2497}
2498
2499static const TCGOutOpUnary outop_not = {
2500    .base.static_constraint = C_O1_I1(r, r),
2501    .out_rr = tgen_not,
2502};
2503
2504static void tgen_cset(TCGContext *s, TCGCond cond, TCGReg ret)
2505{
2506    /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2507    tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, ret, TCG_REG_XZR,
2508                 TCG_REG_XZR, tcg_invert_cond(cond));
2509}
2510
2511static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
2512                         TCGReg a0, TCGReg a1, TCGReg a2)
2513{
2514    tgen_cmp(s, type, cond, a1, a2);
2515    tgen_cset(s, cond, a0);
2516}
2517
2518static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond,
2519                          TCGReg a0, TCGReg a1, tcg_target_long a2)
2520{
2521    tgen_cmpi(s, type, cond, a1, a2);
2522    tgen_cset(s, cond, a0);
2523}
2524
2525static const TCGOutOpSetcond outop_setcond = {
2526    .base.static_constraint = C_O1_I2(r, r, rC),
2527    .out_rrr = tgen_setcond,
2528    .out_rri = tgen_setcondi,
2529};
2530
2531static void tgen_csetm(TCGContext *s, TCGType ext, TCGCond cond, TCGReg ret)
2532{
2533    /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond).  */
2534    tcg_out_insn(s, 3506, CSINV, ext, ret, TCG_REG_XZR,
2535                 TCG_REG_XZR, tcg_invert_cond(cond));
2536}
2537
2538static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond,
2539                            TCGReg a0, TCGReg a1, TCGReg a2)
2540{
2541    tgen_cmp(s, type, cond, a1, a2);
2542    tgen_csetm(s, type, cond, a0);
2543}
2544
2545static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond,
2546                             TCGReg a0, TCGReg a1, tcg_target_long a2)
2547{
2548    tgen_cmpi(s, type, cond, a1, a2);
2549    tgen_csetm(s, type, cond, a0);
2550}
2551
2552static const TCGOutOpSetcond outop_negsetcond = {
2553    .base.static_constraint = C_O1_I2(r, r, rC),
2554    .out_rrr = tgen_negsetcond,
2555    .out_rri = tgen_negsetcondi,
2556};
2557
2558static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond,
2559                         TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2,
2560                         TCGArg vt, bool const_vt, TCGArg vf, bool const_vf)
2561{
2562    tcg_out_cmp(s, type, cond, c1, c2, const_c2);
2563    tcg_out_insn(s, 3506, CSEL, type, ret, vt, vf, cond);
2564}
2565
2566static const TCGOutOpMovcond outop_movcond = {
2567    .base.static_constraint = C_O1_I4(r, r, rC, rz, rz),
2568    .out = tgen_movcond,
2569};
2570
2571static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
2572                         TCGReg a2, unsigned ofs, unsigned len)
2573{
2574    unsigned mask = type == TCG_TYPE_I32 ? 31 : 63;
2575
2576    /*
2577     * Since we can't support "0Z" as a constraint, we allow a1 in
2578     * any register.  Fix things up as if a matching constraint.
2579     */
2580    if (a0 != a1) {
2581        if (a0 == a2) {
2582            tcg_out_mov(s, type, TCG_REG_TMP0, a2);
2583            a2 = TCG_REG_TMP0;
2584        }
2585        tcg_out_mov(s, type, a0, a1);
2586    }
2587    tcg_out_bfm(s, type, a0, a2, -ofs & mask, len - 1);
2588}
2589
2590static void tgen_depositi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
2591                          tcg_target_long a2, unsigned ofs, unsigned len)
2592{
2593    tgen_andi(s, type, a0, a1, ~MAKE_64BIT_MASK(ofs, len));
2594}
2595
2596static void tgen_depositz(TCGContext *s, TCGType type, TCGReg a0, TCGReg a2,
2597                          unsigned ofs, unsigned len)
2598{
2599    int max = type == TCG_TYPE_I32 ? 31 : 63;
2600    tcg_out_ubfm(s, type, a0, a2, -ofs & max, len - 1);
2601}
2602
2603static const TCGOutOpDeposit outop_deposit = {
2604    .base.static_constraint = C_O1_I2(r, rZ, rZ),
2605    .out_rrr = tgen_deposit,
2606    .out_rri = tgen_depositi,
2607    .out_rzr = tgen_depositz,
2608};
2609
2610static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
2611                         unsigned ofs, unsigned len)
2612{
2613    if (ofs == 0) {
2614        uint64_t mask = MAKE_64BIT_MASK(0, len);
2615        tcg_out_logicali(s, I3404_ANDI, type, a0, a1, mask);
2616    } else {
2617        tcg_out_ubfm(s, type, a0, a1, ofs, ofs + len - 1);
2618    }
2619}
2620
2621static const TCGOutOpExtract outop_extract = {
2622    .base.static_constraint = C_O1_I1(r, r),
2623    .out_rr = tgen_extract,
2624};
2625
2626static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
2627                          unsigned ofs, unsigned len)
2628{
2629    tcg_out_sbfm(s, type, a0, a1, ofs, ofs + len - 1);
2630}
2631
2632static const TCGOutOpExtract outop_sextract = {
2633    .base.static_constraint = C_O1_I1(r, r),
2634    .out_rr = tgen_sextract,
2635};
2636
2637static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext,
2638                       const TCGArg args[TCG_MAX_OP_ARGS],
2639                       const int const_args[TCG_MAX_OP_ARGS])
2640{
2641    /* Hoist the loads of the most common arguments.  */
2642    TCGArg a0 = args[0];
2643    TCGArg a1 = args[1];
2644    TCGArg a2 = args[2];
2645
2646    switch (opc) {
2647    case INDEX_op_goto_ptr:
2648        tcg_out_insn(s, 3207, BR, a0);
2649        break;
2650
2651    case INDEX_op_br:
2652        tcg_out_goto_label(s, arg_label(a0));
2653        break;
2654
2655    case INDEX_op_ld8u_i32:
2656    case INDEX_op_ld8u_i64:
2657        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
2658        break;
2659    case INDEX_op_ld8s_i32:
2660        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
2661        break;
2662    case INDEX_op_ld8s_i64:
2663        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
2664        break;
2665    case INDEX_op_ld16u_i32:
2666    case INDEX_op_ld16u_i64:
2667        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
2668        break;
2669    case INDEX_op_ld16s_i32:
2670        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
2671        break;
2672    case INDEX_op_ld16s_i64:
2673        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
2674        break;
2675    case INDEX_op_ld_i32:
2676    case INDEX_op_ld32u_i64:
2677        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
2678        break;
2679    case INDEX_op_ld32s_i64:
2680        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
2681        break;
2682    case INDEX_op_ld_i64:
2683        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
2684        break;
2685
2686    case INDEX_op_st8_i32:
2687    case INDEX_op_st8_i64:
2688        tcg_out_ldst(s, I3312_STRB, a0, a1, a2, 0);
2689        break;
2690    case INDEX_op_st16_i32:
2691    case INDEX_op_st16_i64:
2692        tcg_out_ldst(s, I3312_STRH, a0, a1, a2, 1);
2693        break;
2694    case INDEX_op_st_i32:
2695    case INDEX_op_st32_i64:
2696        tcg_out_ldst(s, I3312_STRW, a0, a1, a2, 2);
2697        break;
2698    case INDEX_op_st_i64:
2699        tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3);
2700        break;
2701
2702    case INDEX_op_qemu_ld_i32:
2703    case INDEX_op_qemu_ld_i64:
2704        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2705        break;
2706    case INDEX_op_qemu_st_i32:
2707    case INDEX_op_qemu_st_i64:
2708        tcg_out_qemu_st(s, a0, a1, a2, ext);
2709        break;
2710    case INDEX_op_qemu_ld_i128:
2711        tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
2712        break;
2713    case INDEX_op_qemu_st_i128:
2714        tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], false);
2715        break;
2716
2717    case INDEX_op_extract2_i64:
2718    case INDEX_op_extract2_i32:
2719        tcg_out_extr(s, ext, a0, a2, a1, args[3]);
2720        break;
2721
2722    case INDEX_op_add2_i32:
2723        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, a2, args[3],
2724                        (int32_t)args[4], args[5], const_args[4],
2725                        const_args[5], false);
2726        break;
2727    case INDEX_op_add2_i64:
2728        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, a2, args[3], args[4],
2729                        args[5], const_args[4], const_args[5], false);
2730        break;
2731    case INDEX_op_sub2_i32:
2732        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, a2, args[3],
2733                        (int32_t)args[4], args[5], const_args[4],
2734                        const_args[5], true);
2735        break;
2736    case INDEX_op_sub2_i64:
2737        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, a2, args[3], args[4],
2738                        args[5], const_args[4], const_args[5], true);
2739        break;
2740
2741    case INDEX_op_mb:
2742        tcg_out_mb(s, a0);
2743        break;
2744
2745    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2746    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2747    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2748    default:
2749        g_assert_not_reached();
2750    }
2751}
2752
2753static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2754                           unsigned vecl, unsigned vece,
2755                           const TCGArg args[TCG_MAX_OP_ARGS],
2756                           const int const_args[TCG_MAX_OP_ARGS])
2757{
2758    static const AArch64Insn cmp_vec_insn[16] = {
2759        [TCG_COND_EQ] = I3616_CMEQ,
2760        [TCG_COND_GT] = I3616_CMGT,
2761        [TCG_COND_GE] = I3616_CMGE,
2762        [TCG_COND_GTU] = I3616_CMHI,
2763        [TCG_COND_GEU] = I3616_CMHS,
2764    };
2765    static const AArch64Insn cmp_scalar_insn[16] = {
2766        [TCG_COND_EQ] = I3611_CMEQ,
2767        [TCG_COND_GT] = I3611_CMGT,
2768        [TCG_COND_GE] = I3611_CMGE,
2769        [TCG_COND_GTU] = I3611_CMHI,
2770        [TCG_COND_GEU] = I3611_CMHS,
2771    };
2772    static const AArch64Insn cmp0_vec_insn[16] = {
2773        [TCG_COND_EQ] = I3617_CMEQ0,
2774        [TCG_COND_GT] = I3617_CMGT0,
2775        [TCG_COND_GE] = I3617_CMGE0,
2776        [TCG_COND_LT] = I3617_CMLT0,
2777        [TCG_COND_LE] = I3617_CMLE0,
2778    };
2779    static const AArch64Insn cmp0_scalar_insn[16] = {
2780        [TCG_COND_EQ] = I3612_CMEQ0,
2781        [TCG_COND_GT] = I3612_CMGT0,
2782        [TCG_COND_GE] = I3612_CMGE0,
2783        [TCG_COND_LT] = I3612_CMLT0,
2784        [TCG_COND_LE] = I3612_CMLE0,
2785    };
2786
2787    TCGType type = vecl + TCG_TYPE_V64;
2788    unsigned is_q = vecl;
2789    bool is_scalar = !is_q && vece == MO_64;
2790    TCGArg a0, a1, a2, a3;
2791    int cmode, imm8;
2792
2793    a0 = args[0];
2794    a1 = args[1];
2795    a2 = args[2];
2796
2797    switch (opc) {
2798    case INDEX_op_ld_vec:
2799        tcg_out_ld(s, type, a0, a1, a2);
2800        break;
2801    case INDEX_op_st_vec:
2802        tcg_out_st(s, type, a0, a1, a2);
2803        break;
2804    case INDEX_op_dupm_vec:
2805        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2806        break;
2807    case INDEX_op_add_vec:
2808        if (is_scalar) {
2809            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2810        } else {
2811            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2812        }
2813        break;
2814    case INDEX_op_sub_vec:
2815        if (is_scalar) {
2816            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2817        } else {
2818            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2819        }
2820        break;
2821    case INDEX_op_mul_vec:
2822        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2823        break;
2824    case INDEX_op_neg_vec:
2825        if (is_scalar) {
2826            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2827        } else {
2828            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2829        }
2830        break;
2831    case INDEX_op_abs_vec:
2832        if (is_scalar) {
2833            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2834        } else {
2835            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2836        }
2837        break;
2838    case INDEX_op_and_vec:
2839        if (const_args[2]) {
2840            is_shimm1632(~a2, &cmode, &imm8);
2841            if (a0 == a1) {
2842                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2843                return;
2844            }
2845            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2846            a2 = a0;
2847        }
2848        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2849        break;
2850    case INDEX_op_or_vec:
2851        if (const_args[2]) {
2852            is_shimm1632(a2, &cmode, &imm8);
2853            if (a0 == a1) {
2854                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2855                return;
2856            }
2857            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2858            a2 = a0;
2859        }
2860        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2861        break;
2862    case INDEX_op_andc_vec:
2863        if (const_args[2]) {
2864            is_shimm1632(a2, &cmode, &imm8);
2865            if (a0 == a1) {
2866                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2867                return;
2868            }
2869            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2870            a2 = a0;
2871        }
2872        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2873        break;
2874    case INDEX_op_orc_vec:
2875        if (const_args[2]) {
2876            is_shimm1632(~a2, &cmode, &imm8);
2877            if (a0 == a1) {
2878                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2879                return;
2880            }
2881            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2882            a2 = a0;
2883        }
2884        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2885        break;
2886    case INDEX_op_xor_vec:
2887        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2888        break;
2889    case INDEX_op_ssadd_vec:
2890        if (is_scalar) {
2891            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2892        } else {
2893            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2894        }
2895        break;
2896    case INDEX_op_sssub_vec:
2897        if (is_scalar) {
2898            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2899        } else {
2900            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2901        }
2902        break;
2903    case INDEX_op_usadd_vec:
2904        if (is_scalar) {
2905            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2906        } else {
2907            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2908        }
2909        break;
2910    case INDEX_op_ussub_vec:
2911        if (is_scalar) {
2912            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2913        } else {
2914            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2915        }
2916        break;
2917    case INDEX_op_smax_vec:
2918        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2919        break;
2920    case INDEX_op_smin_vec:
2921        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2922        break;
2923    case INDEX_op_umax_vec:
2924        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2925        break;
2926    case INDEX_op_umin_vec:
2927        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2928        break;
2929    case INDEX_op_not_vec:
2930        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2931        break;
2932    case INDEX_op_shli_vec:
2933        if (is_scalar) {
2934            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2935        } else {
2936            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2937        }
2938        break;
2939    case INDEX_op_shri_vec:
2940        if (is_scalar) {
2941            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2942        } else {
2943            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2944        }
2945        break;
2946    case INDEX_op_sari_vec:
2947        if (is_scalar) {
2948            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2949        } else {
2950            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2951        }
2952        break;
2953    case INDEX_op_aa64_sli_vec:
2954        if (is_scalar) {
2955            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2956        } else {
2957            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2958        }
2959        break;
2960    case INDEX_op_shlv_vec:
2961        if (is_scalar) {
2962            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2963        } else {
2964            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2965        }
2966        break;
2967    case INDEX_op_aa64_sshl_vec:
2968        if (is_scalar) {
2969            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2970        } else {
2971            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2972        }
2973        break;
2974    case INDEX_op_cmp_vec:
2975        {
2976            TCGCond cond = args[3];
2977            AArch64Insn insn;
2978
2979            switch (cond) {
2980            case TCG_COND_NE:
2981                if (const_args[2]) {
2982                    if (is_scalar) {
2983                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2984                    } else {
2985                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2986                    }
2987                } else {
2988                    if (is_scalar) {
2989                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2990                    } else {
2991                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2992                    }
2993                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2994                }
2995                break;
2996
2997            case TCG_COND_TSTNE:
2998            case TCG_COND_TSTEQ:
2999                if (const_args[2]) {
3000                    /* (x & 0) == 0 */
3001                    tcg_out_dupi_vec(s, type, MO_8, a0,
3002                                     -(cond == TCG_COND_TSTEQ));
3003                    break;
3004                }
3005                if (is_scalar) {
3006                    tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a2);
3007                } else {
3008                    tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a2);
3009                }
3010                if (cond == TCG_COND_TSTEQ) {
3011                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
3012                }
3013                break;
3014
3015            default:
3016                if (const_args[2]) {
3017                    if (is_scalar) {
3018                        insn = cmp0_scalar_insn[cond];
3019                        if (insn) {
3020                            tcg_out_insn_3612(s, insn, vece, a0, a1);
3021                            break;
3022                        }
3023                    } else {
3024                        insn = cmp0_vec_insn[cond];
3025                        if (insn) {
3026                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
3027                            break;
3028                        }
3029                    }
3030                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
3031                    a2 = TCG_VEC_TMP0;
3032                }
3033                if (is_scalar) {
3034                    insn = cmp_scalar_insn[cond];
3035                    if (insn == 0) {
3036                        TCGArg t;
3037                        t = a1, a1 = a2, a2 = t;
3038                        cond = tcg_swap_cond(cond);
3039                        insn = cmp_scalar_insn[cond];
3040                        tcg_debug_assert(insn != 0);
3041                    }
3042                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
3043                } else {
3044                    insn = cmp_vec_insn[cond];
3045                    if (insn == 0) {
3046                        TCGArg t;
3047                        t = a1, a1 = a2, a2 = t;
3048                        cond = tcg_swap_cond(cond);
3049                        insn = cmp_vec_insn[cond];
3050                        tcg_debug_assert(insn != 0);
3051                    }
3052                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
3053                }
3054                break;
3055            }
3056        }
3057        break;
3058
3059    case INDEX_op_bitsel_vec:
3060        a3 = args[3];
3061        if (a0 == a3) {
3062            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
3063        } else if (a0 == a2) {
3064            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
3065        } else {
3066            if (a0 != a1) {
3067                tcg_out_mov(s, type, a0, a1);
3068            }
3069            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
3070        }
3071        break;
3072
3073    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
3074    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
3075    default:
3076        g_assert_not_reached();
3077    }
3078}
3079
3080int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3081{
3082    switch (opc) {
3083    case INDEX_op_add_vec:
3084    case INDEX_op_sub_vec:
3085    case INDEX_op_and_vec:
3086    case INDEX_op_or_vec:
3087    case INDEX_op_xor_vec:
3088    case INDEX_op_andc_vec:
3089    case INDEX_op_orc_vec:
3090    case INDEX_op_neg_vec:
3091    case INDEX_op_abs_vec:
3092    case INDEX_op_not_vec:
3093    case INDEX_op_cmp_vec:
3094    case INDEX_op_shli_vec:
3095    case INDEX_op_shri_vec:
3096    case INDEX_op_sari_vec:
3097    case INDEX_op_ssadd_vec:
3098    case INDEX_op_sssub_vec:
3099    case INDEX_op_usadd_vec:
3100    case INDEX_op_ussub_vec:
3101    case INDEX_op_shlv_vec:
3102    case INDEX_op_bitsel_vec:
3103        return 1;
3104    case INDEX_op_rotli_vec:
3105    case INDEX_op_shrv_vec:
3106    case INDEX_op_sarv_vec:
3107    case INDEX_op_rotlv_vec:
3108    case INDEX_op_rotrv_vec:
3109        return -1;
3110    case INDEX_op_mul_vec:
3111    case INDEX_op_smax_vec:
3112    case INDEX_op_smin_vec:
3113    case INDEX_op_umax_vec:
3114    case INDEX_op_umin_vec:
3115        return vece < MO_64;
3116
3117    default:
3118        return 0;
3119    }
3120}
3121
3122void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3123                       TCGArg a0, ...)
3124{
3125    va_list va;
3126    TCGv_vec v0, v1, v2, t1, t2, c1;
3127    TCGArg a2;
3128
3129    va_start(va, a0);
3130    v0 = temp_tcgv_vec(arg_temp(a0));
3131    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3132    a2 = va_arg(va, TCGArg);
3133    va_end(va);
3134
3135    switch (opc) {
3136    case INDEX_op_rotli_vec:
3137        t1 = tcg_temp_new_vec(type);
3138        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
3139        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
3140                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
3141        tcg_temp_free_vec(t1);
3142        break;
3143
3144    case INDEX_op_shrv_vec:
3145    case INDEX_op_sarv_vec:
3146        /* Right shifts are negative left shifts for AArch64.  */
3147        v2 = temp_tcgv_vec(arg_temp(a2));
3148        t1 = tcg_temp_new_vec(type);
3149        tcg_gen_neg_vec(vece, t1, v2);
3150        opc = (opc == INDEX_op_shrv_vec
3151               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
3152        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
3153                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3154        tcg_temp_free_vec(t1);
3155        break;
3156
3157    case INDEX_op_rotlv_vec:
3158        v2 = temp_tcgv_vec(arg_temp(a2));
3159        t1 = tcg_temp_new_vec(type);
3160        c1 = tcg_constant_vec(type, vece, 8 << vece);
3161        tcg_gen_sub_vec(vece, t1, v2, c1);
3162        /* Right shifts are negative left shifts for AArch64.  */
3163        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
3164                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3165        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
3166                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3167        tcg_gen_or_vec(vece, v0, v0, t1);
3168        tcg_temp_free_vec(t1);
3169        break;
3170
3171    case INDEX_op_rotrv_vec:
3172        v2 = temp_tcgv_vec(arg_temp(a2));
3173        t1 = tcg_temp_new_vec(type);
3174        t2 = tcg_temp_new_vec(type);
3175        c1 = tcg_constant_vec(type, vece, 8 << vece);
3176        tcg_gen_neg_vec(vece, t1, v2);
3177        tcg_gen_sub_vec(vece, t2, c1, v2);
3178        /* Right shifts are negative left shifts for AArch64.  */
3179        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
3180                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3181        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
3182                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
3183        tcg_gen_or_vec(vece, v0, t1, t2);
3184        tcg_temp_free_vec(t1);
3185        tcg_temp_free_vec(t2);
3186        break;
3187
3188    default:
3189        g_assert_not_reached();
3190    }
3191}
3192
3193static TCGConstraintSetIndex
3194tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
3195{
3196    switch (op) {
3197    case INDEX_op_goto_ptr:
3198        return C_O0_I1(r);
3199
3200    case INDEX_op_ld8u_i32:
3201    case INDEX_op_ld8s_i32:
3202    case INDEX_op_ld16u_i32:
3203    case INDEX_op_ld16s_i32:
3204    case INDEX_op_ld_i32:
3205    case INDEX_op_ld8u_i64:
3206    case INDEX_op_ld8s_i64:
3207    case INDEX_op_ld16u_i64:
3208    case INDEX_op_ld16s_i64:
3209    case INDEX_op_ld32u_i64:
3210    case INDEX_op_ld32s_i64:
3211    case INDEX_op_ld_i64:
3212        return C_O1_I1(r, r);
3213
3214    case INDEX_op_st8_i32:
3215    case INDEX_op_st16_i32:
3216    case INDEX_op_st_i32:
3217    case INDEX_op_st8_i64:
3218    case INDEX_op_st16_i64:
3219    case INDEX_op_st32_i64:
3220    case INDEX_op_st_i64:
3221        return C_O0_I2(rz, r);
3222
3223    case INDEX_op_qemu_ld_i32:
3224    case INDEX_op_qemu_ld_i64:
3225        return C_O1_I1(r, r);
3226    case INDEX_op_qemu_ld_i128:
3227        return C_O2_I1(r, r, r);
3228    case INDEX_op_qemu_st_i32:
3229    case INDEX_op_qemu_st_i64:
3230        return C_O0_I2(rz, r);
3231    case INDEX_op_qemu_st_i128:
3232        return C_O0_I3(rz, rz, r);
3233
3234    case INDEX_op_extract2_i32:
3235    case INDEX_op_extract2_i64:
3236        return C_O1_I2(r, rz, rz);
3237
3238    case INDEX_op_add2_i32:
3239    case INDEX_op_add2_i64:
3240    case INDEX_op_sub2_i32:
3241    case INDEX_op_sub2_i64:
3242        return C_O2_I4(r, r, rz, rz, rA, rMZ);
3243
3244    case INDEX_op_add_vec:
3245    case INDEX_op_sub_vec:
3246    case INDEX_op_mul_vec:
3247    case INDEX_op_xor_vec:
3248    case INDEX_op_ssadd_vec:
3249    case INDEX_op_sssub_vec:
3250    case INDEX_op_usadd_vec:
3251    case INDEX_op_ussub_vec:
3252    case INDEX_op_smax_vec:
3253    case INDEX_op_smin_vec:
3254    case INDEX_op_umax_vec:
3255    case INDEX_op_umin_vec:
3256    case INDEX_op_shlv_vec:
3257    case INDEX_op_shrv_vec:
3258    case INDEX_op_sarv_vec:
3259    case INDEX_op_aa64_sshl_vec:
3260        return C_O1_I2(w, w, w);
3261    case INDEX_op_not_vec:
3262    case INDEX_op_neg_vec:
3263    case INDEX_op_abs_vec:
3264    case INDEX_op_shli_vec:
3265    case INDEX_op_shri_vec:
3266    case INDEX_op_sari_vec:
3267        return C_O1_I1(w, w);
3268    case INDEX_op_ld_vec:
3269    case INDEX_op_dupm_vec:
3270        return C_O1_I1(w, r);
3271    case INDEX_op_st_vec:
3272        return C_O0_I2(w, r);
3273    case INDEX_op_dup_vec:
3274        return C_O1_I1(w, wr);
3275    case INDEX_op_or_vec:
3276    case INDEX_op_andc_vec:
3277        return C_O1_I2(w, w, wO);
3278    case INDEX_op_and_vec:
3279    case INDEX_op_orc_vec:
3280        return C_O1_I2(w, w, wN);
3281    case INDEX_op_cmp_vec:
3282        return C_O1_I2(w, w, wZ);
3283    case INDEX_op_bitsel_vec:
3284        return C_O1_I3(w, w, w, w);
3285    case INDEX_op_aa64_sli_vec:
3286        return C_O1_I2(w, 0, w);
3287
3288    default:
3289        return C_NotImplemented;
3290    }
3291}
3292
3293static void tcg_target_init(TCGContext *s)
3294{
3295    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
3296    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
3297    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3298    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3299
3300    tcg_target_call_clobber_regs = -1ull;
3301    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
3302    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
3303    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
3304    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
3305    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
3306    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
3307    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
3308    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
3309    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
3310    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
3311    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
3312    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3313    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3314    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3315    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3316    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3317    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3318    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3319    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3320
3321    s->reserved_regs = 0;
3322    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
3323    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
3324    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
3325    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
3326    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
3327    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
3328    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
3329}
3330
3331/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
3332#define PUSH_SIZE  ((30 - 19 + 1) * 8)
3333
3334#define FRAME_SIZE \
3335    ((PUSH_SIZE \
3336      + TCG_STATIC_CALL_ARGS_SIZE \
3337      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3338      + TCG_TARGET_STACK_ALIGN - 1) \
3339     & ~(TCG_TARGET_STACK_ALIGN - 1))
3340
3341/* We're expecting a 2 byte uleb128 encoded value.  */
3342QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3343
3344/* We're expecting to use a single ADDI insn.  */
3345QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
3346
3347static void tcg_target_qemu_prologue(TCGContext *s)
3348{
3349    TCGReg r;
3350
3351    tcg_out_bti(s, BTI_C);
3352
3353    /* Push (FP, LR) and allocate space for all saved registers.  */
3354    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
3355                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
3356
3357    /* Set up frame pointer for canonical unwinding.  */
3358    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
3359
3360    /* Store callee-preserved regs x19..x28.  */
3361    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3362        int ofs = (r - TCG_REG_X19 + 2) * 8;
3363        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3364    }
3365
3366    /* Make stack space for TCG locals.  */
3367    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3368                 FRAME_SIZE - PUSH_SIZE);
3369
3370    /* Inform TCG about how to find TCG locals with register, offset, size.  */
3371    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
3372                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3373
3374    if (!tcg_use_softmmu) {
3375        /*
3376         * Note that XZR cannot be encoded in the address base register slot,
3377         * as that actually encodes SP.  Depending on the guest, we may need
3378         * to zero-extend the guest address via the address index register slot,
3379         * therefore we need to load even a zero guest base into a register.
3380         */
3381        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3382        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3383    }
3384
3385    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3386    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3387
3388    /*
3389     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3390     * and fall through to the rest of the epilogue.
3391     */
3392    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3393    tcg_out_bti(s, BTI_J);
3394    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3395
3396    /* TB epilogue */
3397    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3398    tcg_out_bti(s, BTI_J);
3399
3400    /* Remove TCG locals stack space.  */
3401    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3402                 FRAME_SIZE - PUSH_SIZE);
3403
3404    /* Restore registers x19..x28.  */
3405    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3406        int ofs = (r - TCG_REG_X19 + 2) * 8;
3407        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3408    }
3409
3410    /* Pop (FP, LR), restore SP to previous frame.  */
3411    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3412                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3413    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3414}
3415
3416static void tcg_out_tb_start(TCGContext *s)
3417{
3418    tcg_out_bti(s, BTI_J);
3419}
3420
3421static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3422{
3423    int i;
3424    for (i = 0; i < count; ++i) {
3425        p[i] = NOP;
3426    }
3427}
3428
3429typedef struct {
3430    DebugFrameHeader h;
3431    uint8_t fde_def_cfa[4];
3432    uint8_t fde_reg_ofs[24];
3433} DebugFrame;
3434
3435#define ELF_HOST_MACHINE EM_AARCH64
3436
3437static const DebugFrame debug_frame = {
3438    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3439    .h.cie.id = -1,
3440    .h.cie.version = 1,
3441    .h.cie.code_align = 1,
3442    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3443    .h.cie.return_column = TCG_REG_LR,
3444
3445    /* Total FDE size does not include the "len" member.  */
3446    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3447
3448    .fde_def_cfa = {
3449        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3450        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3451        (FRAME_SIZE >> 7)
3452    },
3453    .fde_reg_ofs = {
3454        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3455        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3456        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3457        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3458        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3459        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3460        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3461        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3462        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3463        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3464        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3465        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3466    }
3467};
3468
3469void tcg_register_jit(const void *buf, size_t buf_size)
3470{
3471    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3472}
3473