xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision 0dd07ee1122abaf1adb4f1e00a8e0b89937f53bd)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "qemu/bitops.h"
14
15/* Used for function call generation. */
16#define TCG_REG_CALL_STACK              TCG_REG_SP
17#define TCG_TARGET_STACK_ALIGN          16
18#define TCG_TARGET_CALL_STACK_OFFSET    0
19#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
20#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
21#ifdef CONFIG_DARWIN
22# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_NORMAL
23#else
24# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_EVEN
25#endif
26#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
27
28/* We're going to re-use TCGType in setting of the SF bit, which controls
29   the size of the operation performed.  If we know the values match, it
30   makes things much cleaner.  */
31QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
32
33#ifdef CONFIG_DEBUG_TCG
34static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
35    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
36    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
37    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
38    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
39
40    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
41    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
42    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
43    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
44};
45#endif /* CONFIG_DEBUG_TCG */
46
47static const int tcg_target_reg_alloc_order[] = {
48    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
49    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
50    TCG_REG_X28, /* we will reserve this for guest_base if configured */
51
52    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
53    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
54
55    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
56    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
57
58    /* X16 reserved as temporary */
59    /* X17 reserved as temporary */
60    /* X18 reserved by system */
61    /* X19 reserved for AREG0 */
62    /* X29 reserved as fp */
63    /* X30 reserved as temporary */
64
65    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
66    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
67    /* V8 - V15 are call-saved, and skipped.  */
68    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
69    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
70    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
71    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
72};
73
74static const int tcg_target_call_iarg_regs[8] = {
75    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
76    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
77};
78
79static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
80{
81    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
82    tcg_debug_assert(slot >= 0 && slot <= 1);
83    return TCG_REG_X0 + slot;
84}
85
86#define TCG_REG_TMP0 TCG_REG_X16
87#define TCG_REG_TMP1 TCG_REG_X17
88#define TCG_REG_TMP2 TCG_REG_X30
89#define TCG_VEC_TMP0 TCG_REG_V31
90
91#define TCG_REG_GUEST_BASE TCG_REG_X28
92
93static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
94{
95    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
96    ptrdiff_t offset = target - src_rx;
97
98    if (offset == sextract64(offset, 0, 26)) {
99        /* read instruction, mask away previous PC_REL26 parameter contents,
100           set the proper offset, then write back the instruction. */
101        *src_rw = deposit32(*src_rw, 0, 26, offset);
102        return true;
103    }
104    return false;
105}
106
107static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
108{
109    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
110    ptrdiff_t offset = target - src_rx;
111
112    if (offset == sextract64(offset, 0, 19)) {
113        *src_rw = deposit32(*src_rw, 5, 19, offset);
114        return true;
115    }
116    return false;
117}
118
119static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
120{
121    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
122    ptrdiff_t offset = target - src_rx;
123
124    if (offset == sextract64(offset, 0, 14)) {
125        *src_rw = deposit32(*src_rw, 5, 14, offset);
126        return true;
127    }
128    return false;
129}
130
131static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
132                        intptr_t value, intptr_t addend)
133{
134    tcg_debug_assert(addend == 0);
135    switch (type) {
136    case R_AARCH64_JUMP26:
137    case R_AARCH64_CALL26:
138        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
139    case R_AARCH64_CONDBR19:
140        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
141    case R_AARCH64_TSTBR14:
142        return reloc_pc14(code_ptr, (const tcg_insn_unit *)value);
143    default:
144        g_assert_not_reached();
145    }
146}
147
148#define TCG_CT_CONST_AIMM 0x100
149#define TCG_CT_CONST_LIMM 0x200
150#define TCG_CT_CONST_ZERO 0x400
151#define TCG_CT_CONST_MONE 0x800
152#define TCG_CT_CONST_ORRI 0x1000
153#define TCG_CT_CONST_ANDI 0x2000
154#define TCG_CT_CONST_CMP  0x4000
155
156#define ALL_GENERAL_REGS  0xffffffffu
157#define ALL_VECTOR_REGS   0xffffffff00000000ull
158
159/* Match a constant valid for addition (12-bit, optionally shifted).  */
160static inline bool is_aimm(uint64_t val)
161{
162    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
163}
164
165/* Match a constant valid for logical operations.  */
166static inline bool is_limm(uint64_t val)
167{
168    /* Taking a simplified view of the logical immediates for now, ignoring
169       the replication that can happen across the field.  Match bit patterns
170       of the forms
171           0....01....1
172           0..01..10..0
173       and their inverses.  */
174
175    /* Make things easier below, by testing the form with msb clear. */
176    if ((int64_t)val < 0) {
177        val = ~val;
178    }
179    if (val == 0) {
180        return false;
181    }
182    val += val & -val;
183    return (val & (val - 1)) == 0;
184}
185
186/* Return true if v16 is a valid 16-bit shifted immediate.  */
187static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
188{
189    if (v16 == (v16 & 0xff)) {
190        *cmode = 0x8;
191        *imm8 = v16 & 0xff;
192        return true;
193    } else if (v16 == (v16 & 0xff00)) {
194        *cmode = 0xa;
195        *imm8 = v16 >> 8;
196        return true;
197    }
198    return false;
199}
200
201/* Return true if v32 is a valid 32-bit shifted immediate.  */
202static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
203{
204    if (v32 == (v32 & 0xff)) {
205        *cmode = 0x0;
206        *imm8 = v32 & 0xff;
207        return true;
208    } else if (v32 == (v32 & 0xff00)) {
209        *cmode = 0x2;
210        *imm8 = (v32 >> 8) & 0xff;
211        return true;
212    } else if (v32 == (v32 & 0xff0000)) {
213        *cmode = 0x4;
214        *imm8 = (v32 >> 16) & 0xff;
215        return true;
216    } else if (v32 == (v32 & 0xff000000)) {
217        *cmode = 0x6;
218        *imm8 = v32 >> 24;
219        return true;
220    }
221    return false;
222}
223
224/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
225static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
226{
227    if ((v32 & 0xffff00ff) == 0xff) {
228        *cmode = 0xc;
229        *imm8 = (v32 >> 8) & 0xff;
230        return true;
231    } else if ((v32 & 0xff00ffff) == 0xffff) {
232        *cmode = 0xd;
233        *imm8 = (v32 >> 16) & 0xff;
234        return true;
235    }
236    return false;
237}
238
239/* Return true if v32 is a valid float32 immediate.  */
240static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
241{
242    if (extract32(v32, 0, 19) == 0
243        && (extract32(v32, 25, 6) == 0x20
244            || extract32(v32, 25, 6) == 0x1f)) {
245        *cmode = 0xf;
246        *imm8 = (extract32(v32, 31, 1) << 7)
247              | (extract32(v32, 25, 1) << 6)
248              | extract32(v32, 19, 6);
249        return true;
250    }
251    return false;
252}
253
254/* Return true if v64 is a valid float64 immediate.  */
255static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
256{
257    if (extract64(v64, 0, 48) == 0
258        && (extract64(v64, 54, 9) == 0x100
259            || extract64(v64, 54, 9) == 0x0ff)) {
260        *cmode = 0xf;
261        *imm8 = (extract64(v64, 63, 1) << 7)
262              | (extract64(v64, 54, 1) << 6)
263              | extract64(v64, 48, 6);
264        return true;
265    }
266    return false;
267}
268
269/*
270 * Return non-zero if v32 can be formed by MOVI+ORR.
271 * Place the parameters for MOVI in (cmode, imm8).
272 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
273 */
274static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
275{
276    int i;
277
278    for (i = 6; i > 0; i -= 2) {
279        /* Mask out one byte we can add with ORR.  */
280        uint32_t tmp = v32 & ~(0xffu << (i * 4));
281        if (is_shimm32(tmp, cmode, imm8) ||
282            is_soimm32(tmp, cmode, imm8)) {
283            break;
284        }
285    }
286    return i;
287}
288
289/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
290static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
291{
292    if (v32 == deposit32(v32, 16, 16, v32)) {
293        return is_shimm16(v32, cmode, imm8);
294    } else {
295        return is_shimm32(v32, cmode, imm8);
296    }
297}
298
299static bool tcg_target_const_match(int64_t val, int ct,
300                                   TCGType type, TCGCond cond, int vece)
301{
302    if (ct & TCG_CT_CONST) {
303        return 1;
304    }
305    if (type == TCG_TYPE_I32) {
306        val = (int32_t)val;
307    }
308
309    if (ct & TCG_CT_CONST_CMP) {
310        if (is_tst_cond(cond)) {
311            ct |= TCG_CT_CONST_LIMM;
312        } else {
313            ct |= TCG_CT_CONST_AIMM;
314        }
315    }
316
317    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
318        return 1;
319    }
320    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
321        return 1;
322    }
323    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
324        return 1;
325    }
326    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
327        return 1;
328    }
329
330    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
331    case 0:
332        break;
333    case TCG_CT_CONST_ANDI:
334        val = ~val;
335        /* fallthru */
336    case TCG_CT_CONST_ORRI:
337        if (val == deposit64(val, 32, 32, val)) {
338            int cmode, imm8;
339            return is_shimm1632(val, &cmode, &imm8);
340        }
341        break;
342    default:
343        /* Both bits should not be set for the same insn.  */
344        g_assert_not_reached();
345    }
346
347    return 0;
348}
349
350enum aarch64_cond_code {
351    COND_EQ = 0x0,
352    COND_NE = 0x1,
353    COND_CS = 0x2,     /* Unsigned greater or equal */
354    COND_HS = COND_CS, /* ALIAS greater or equal */
355    COND_CC = 0x3,     /* Unsigned less than */
356    COND_LO = COND_CC, /* ALIAS Lower */
357    COND_MI = 0x4,     /* Negative */
358    COND_PL = 0x5,     /* Zero or greater */
359    COND_VS = 0x6,     /* Overflow */
360    COND_VC = 0x7,     /* No overflow */
361    COND_HI = 0x8,     /* Unsigned greater than */
362    COND_LS = 0x9,     /* Unsigned less or equal */
363    COND_GE = 0xa,
364    COND_LT = 0xb,
365    COND_GT = 0xc,
366    COND_LE = 0xd,
367    COND_AL = 0xe,
368    COND_NV = 0xf, /* behaves like COND_AL here */
369};
370
371static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
372    [TCG_COND_EQ] = COND_EQ,
373    [TCG_COND_NE] = COND_NE,
374    [TCG_COND_LT] = COND_LT,
375    [TCG_COND_GE] = COND_GE,
376    [TCG_COND_LE] = COND_LE,
377    [TCG_COND_GT] = COND_GT,
378    /* unsigned */
379    [TCG_COND_LTU] = COND_LO,
380    [TCG_COND_GTU] = COND_HI,
381    [TCG_COND_GEU] = COND_HS,
382    [TCG_COND_LEU] = COND_LS,
383    /* bit test */
384    [TCG_COND_TSTEQ] = COND_EQ,
385    [TCG_COND_TSTNE] = COND_NE,
386};
387
388typedef enum {
389    LDST_ST = 0,    /* store */
390    LDST_LD = 1,    /* load */
391    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
392    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
393} AArch64LdstType;
394
395/* We encode the format of the insn into the beginning of the name, so that
396   we can have the preprocessor help "typecheck" the insn vs the output
397   function.  Arm didn't provide us with nice names for the formats, so we
398   use the section number of the architecture reference manual in which the
399   instruction group is described.  */
400typedef enum {
401    /* Compare and branch (immediate).  */
402    I3201_CBZ       = 0x34000000,
403    I3201_CBNZ      = 0x35000000,
404
405    /* Conditional branch (immediate).  */
406    I3202_B_C       = 0x54000000,
407
408    /* Test and branch (immediate).  */
409    I3205_TBZ       = 0x36000000,
410    I3205_TBNZ      = 0x37000000,
411
412    /* Unconditional branch (immediate).  */
413    I3206_B         = 0x14000000,
414    I3206_BL        = 0x94000000,
415
416    /* Unconditional branch (register).  */
417    I3207_BR        = 0xd61f0000,
418    I3207_BLR       = 0xd63f0000,
419    I3207_RET       = 0xd65f0000,
420
421    /* AdvSIMD load/store single structure.  */
422    I3303_LD1R      = 0x0d40c000,
423
424    /* Load literal for loading the address at pc-relative offset */
425    I3305_LDR       = 0x58000000,
426    I3305_LDR_v64   = 0x5c000000,
427    I3305_LDR_v128  = 0x9c000000,
428
429    /* Load/store exclusive. */
430    I3306_LDXP      = 0xc8600000,
431    I3306_STXP      = 0xc8200000,
432
433    /* Load/store register.  Described here as 3.3.12, but the helper
434       that emits them can transform to 3.3.10 or 3.3.13.  */
435    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
436    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
437    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
438    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
439
440    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
441    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
442    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
443    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
444
445    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
446    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
447
448    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
449    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
450    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
451
452    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
453    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
454
455    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
456    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
457
458    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
459    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
460
461    I3312_TO_I3310  = 0x00200800,
462    I3312_TO_I3313  = 0x01000000,
463
464    /* Load/store register pair instructions.  */
465    I3314_LDP       = 0x28400000,
466    I3314_STP       = 0x28000000,
467
468    /* Add/subtract immediate instructions.  */
469    I3401_ADDI      = 0x11000000,
470    I3401_ADDSI     = 0x31000000,
471    I3401_SUBI      = 0x51000000,
472    I3401_SUBSI     = 0x71000000,
473
474    /* Bitfield instructions.  */
475    I3402_BFM       = 0x33000000,
476    I3402_SBFM      = 0x13000000,
477    I3402_UBFM      = 0x53000000,
478
479    /* Extract instruction.  */
480    I3403_EXTR      = 0x13800000,
481
482    /* Logical immediate instructions.  */
483    I3404_ANDI      = 0x12000000,
484    I3404_ORRI      = 0x32000000,
485    I3404_EORI      = 0x52000000,
486    I3404_ANDSI     = 0x72000000,
487
488    /* Move wide immediate instructions.  */
489    I3405_MOVN      = 0x12800000,
490    I3405_MOVZ      = 0x52800000,
491    I3405_MOVK      = 0x72800000,
492
493    /* PC relative addressing instructions.  */
494    I3406_ADR       = 0x10000000,
495    I3406_ADRP      = 0x90000000,
496
497    /* Add/subtract extended register instructions. */
498    I3501_ADD       = 0x0b200000,
499
500    /* Add/subtract shifted register instructions (without a shift).  */
501    I3502_ADD       = 0x0b000000,
502    I3502_ADDS      = 0x2b000000,
503    I3502_SUB       = 0x4b000000,
504    I3502_SUBS      = 0x6b000000,
505
506    /* Add/subtract shifted register instructions (with a shift).  */
507    I3502S_ADD_LSL  = I3502_ADD,
508
509    /* Add/subtract with carry instructions.  */
510    I3503_ADC       = 0x1a000000,
511    I3503_SBC       = 0x5a000000,
512
513    /* Conditional select instructions.  */
514    I3506_CSEL      = 0x1a800000,
515    I3506_CSINC     = 0x1a800400,
516    I3506_CSINV     = 0x5a800000,
517    I3506_CSNEG     = 0x5a800400,
518
519    /* Data-processing (1 source) instructions.  */
520    I3507_CLZ       = 0x5ac01000,
521    I3507_RBIT      = 0x5ac00000,
522    I3507_REV       = 0x5ac00000, /* + size << 10 */
523
524    /* Data-processing (2 source) instructions.  */
525    I3508_LSLV      = 0x1ac02000,
526    I3508_LSRV      = 0x1ac02400,
527    I3508_ASRV      = 0x1ac02800,
528    I3508_RORV      = 0x1ac02c00,
529    I3508_SMULH     = 0x9b407c00,
530    I3508_UMULH     = 0x9bc07c00,
531    I3508_UDIV      = 0x1ac00800,
532    I3508_SDIV      = 0x1ac00c00,
533
534    /* Data-processing (3 source) instructions.  */
535    I3509_MADD      = 0x1b000000,
536    I3509_MSUB      = 0x1b008000,
537
538    /* Logical shifted register instructions (without a shift).  */
539    I3510_AND       = 0x0a000000,
540    I3510_BIC       = 0x0a200000,
541    I3510_ORR       = 0x2a000000,
542    I3510_ORN       = 0x2a200000,
543    I3510_EOR       = 0x4a000000,
544    I3510_EON       = 0x4a200000,
545    I3510_ANDS      = 0x6a000000,
546
547    /* Logical shifted register instructions (with a shift).  */
548    I3502S_AND_LSR  = I3510_AND | (1 << 22),
549
550    /* AdvSIMD copy */
551    I3605_DUP      = 0x0e000400,
552    I3605_INS      = 0x4e001c00,
553    I3605_UMOV     = 0x0e003c00,
554
555    /* AdvSIMD modified immediate */
556    I3606_MOVI      = 0x0f000400,
557    I3606_MVNI      = 0x2f000400,
558    I3606_BIC       = 0x2f001400,
559    I3606_ORR       = 0x0f001400,
560
561    /* AdvSIMD scalar shift by immediate */
562    I3609_SSHR      = 0x5f000400,
563    I3609_SSRA      = 0x5f001400,
564    I3609_SHL       = 0x5f005400,
565    I3609_USHR      = 0x7f000400,
566    I3609_USRA      = 0x7f001400,
567    I3609_SLI       = 0x7f005400,
568
569    /* AdvSIMD scalar three same */
570    I3611_SQADD     = 0x5e200c00,
571    I3611_SQSUB     = 0x5e202c00,
572    I3611_CMGT      = 0x5e203400,
573    I3611_CMGE      = 0x5e203c00,
574    I3611_SSHL      = 0x5e204400,
575    I3611_ADD       = 0x5e208400,
576    I3611_CMTST     = 0x5e208c00,
577    I3611_UQADD     = 0x7e200c00,
578    I3611_UQSUB     = 0x7e202c00,
579    I3611_CMHI      = 0x7e203400,
580    I3611_CMHS      = 0x7e203c00,
581    I3611_USHL      = 0x7e204400,
582    I3611_SUB       = 0x7e208400,
583    I3611_CMEQ      = 0x7e208c00,
584
585    /* AdvSIMD scalar two-reg misc */
586    I3612_CMGT0     = 0x5e208800,
587    I3612_CMEQ0     = 0x5e209800,
588    I3612_CMLT0     = 0x5e20a800,
589    I3612_ABS       = 0x5e20b800,
590    I3612_CMGE0     = 0x7e208800,
591    I3612_CMLE0     = 0x7e209800,
592    I3612_NEG       = 0x7e20b800,
593
594    /* AdvSIMD shift by immediate */
595    I3614_SSHR      = 0x0f000400,
596    I3614_SSRA      = 0x0f001400,
597    I3614_SHL       = 0x0f005400,
598    I3614_SLI       = 0x2f005400,
599    I3614_USHR      = 0x2f000400,
600    I3614_USRA      = 0x2f001400,
601
602    /* AdvSIMD three same.  */
603    I3616_ADD       = 0x0e208400,
604    I3616_AND       = 0x0e201c00,
605    I3616_BIC       = 0x0e601c00,
606    I3616_BIF       = 0x2ee01c00,
607    I3616_BIT       = 0x2ea01c00,
608    I3616_BSL       = 0x2e601c00,
609    I3616_EOR       = 0x2e201c00,
610    I3616_MUL       = 0x0e209c00,
611    I3616_ORR       = 0x0ea01c00,
612    I3616_ORN       = 0x0ee01c00,
613    I3616_SUB       = 0x2e208400,
614    I3616_CMGT      = 0x0e203400,
615    I3616_CMGE      = 0x0e203c00,
616    I3616_CMTST     = 0x0e208c00,
617    I3616_CMHI      = 0x2e203400,
618    I3616_CMHS      = 0x2e203c00,
619    I3616_CMEQ      = 0x2e208c00,
620    I3616_SMAX      = 0x0e206400,
621    I3616_SMIN      = 0x0e206c00,
622    I3616_SSHL      = 0x0e204400,
623    I3616_SQADD     = 0x0e200c00,
624    I3616_SQSUB     = 0x0e202c00,
625    I3616_UMAX      = 0x2e206400,
626    I3616_UMIN      = 0x2e206c00,
627    I3616_UQADD     = 0x2e200c00,
628    I3616_UQSUB     = 0x2e202c00,
629    I3616_USHL      = 0x2e204400,
630
631    /* AdvSIMD two-reg misc.  */
632    I3617_CMGT0     = 0x0e208800,
633    I3617_CMEQ0     = 0x0e209800,
634    I3617_CMLT0     = 0x0e20a800,
635    I3617_CMGE0     = 0x2e208800,
636    I3617_CMLE0     = 0x2e209800,
637    I3617_NOT       = 0x2e205800,
638    I3617_ABS       = 0x0e20b800,
639    I3617_NEG       = 0x2e20b800,
640
641    /* System instructions.  */
642    NOP             = 0xd503201f,
643    DMB_ISH         = 0xd50338bf,
644    DMB_LD          = 0x00000100,
645    DMB_ST          = 0x00000200,
646
647    BTI_C           = 0xd503245f,
648    BTI_J           = 0xd503249f,
649    BTI_JC          = 0xd50324df,
650} AArch64Insn;
651
652static inline uint32_t tcg_in32(TCGContext *s)
653{
654    uint32_t v = *(uint32_t *)s->code_ptr;
655    return v;
656}
657
658/* Emit an opcode with "type-checking" of the format.  */
659#define tcg_out_insn(S, FMT, OP, ...) \
660    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
661
662static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
663                              TCGReg rt, TCGReg rn, unsigned size)
664{
665    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
666}
667
668static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
669                              int imm19, TCGReg rt)
670{
671    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
672}
673
674static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
675                              TCGReg rt, TCGReg rt2, TCGReg rn)
676{
677    tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
678}
679
680static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
681                              TCGReg rt, int imm19)
682{
683    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
684}
685
686static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
687                              TCGCond c, int imm19)
688{
689    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
690}
691
692static void tcg_out_insn_3205(TCGContext *s, AArch64Insn insn,
693                              TCGReg rt, int imm6, int imm14)
694{
695    insn |= (imm6 & 0x20) << (31 - 5);
696    insn |= (imm6 & 0x1f) << 19;
697    tcg_out32(s, insn | (imm14 & 0x3fff) << 5 | rt);
698}
699
700static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
701{
702    tcg_out32(s, insn | (imm26 & 0x03ffffff));
703}
704
705static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
706{
707    tcg_out32(s, insn | rn << 5);
708}
709
710static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
711                              TCGReg r1, TCGReg r2, TCGReg rn,
712                              tcg_target_long ofs, bool pre, bool w)
713{
714    insn |= 1u << 31; /* ext */
715    insn |= pre << 24;
716    insn |= w << 23;
717
718    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
719    insn |= (ofs & (0x7f << 3)) << (15 - 3);
720
721    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
722}
723
724static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
725                              TCGReg rd, TCGReg rn, uint64_t aimm)
726{
727    if (aimm > 0xfff) {
728        tcg_debug_assert((aimm & 0xfff) == 0);
729        aimm >>= 12;
730        tcg_debug_assert(aimm <= 0xfff);
731        aimm |= 1 << 12;  /* apply LSL 12 */
732    }
733    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
734}
735
736/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
737   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
738   that feed the DecodeBitMasks pseudo function.  */
739static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
740                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
741{
742    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
743              | rn << 5 | rd);
744}
745
746#define tcg_out_insn_3404  tcg_out_insn_3402
747
748static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
749                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
750{
751    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
752              | rn << 5 | rd);
753}
754
755/* This function is used for the Move (wide immediate) instruction group.
756   Note that SHIFT is a full shift count, not the 2 bit HW field. */
757static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
758                              TCGReg rd, uint16_t half, unsigned shift)
759{
760    tcg_debug_assert((shift & ~0x30) == 0);
761    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
762}
763
764static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
765                              TCGReg rd, int64_t disp)
766{
767    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
768}
769
770static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
771                                     TCGType sf, TCGReg rd, TCGReg rn,
772                                     TCGReg rm, int opt, int imm3)
773{
774    tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
775              imm3 << 10 | rn << 5 | rd);
776}
777
778/* This function is for both 3.5.2 (Add/Subtract shifted register), for
779   the rare occasion when we actually want to supply a shift amount.  */
780static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
781                                      TCGType ext, TCGReg rd, TCGReg rn,
782                                      TCGReg rm, int imm6)
783{
784    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
785}
786
787/* This function is for 3.5.2 (Add/subtract shifted register),
788   and 3.5.10 (Logical shifted register), for the vast majorty of cases
789   when we don't want to apply a shift.  Thus it can also be used for
790   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
791static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
792                              TCGReg rd, TCGReg rn, TCGReg rm)
793{
794    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
795}
796
797#define tcg_out_insn_3503  tcg_out_insn_3502
798#define tcg_out_insn_3508  tcg_out_insn_3502
799#define tcg_out_insn_3510  tcg_out_insn_3502
800
801static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
802                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
803{
804    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
805              | tcg_cond_to_aarch64[c] << 12);
806}
807
808static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
809                              TCGReg rd, TCGReg rn)
810{
811    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
812}
813
814static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
815                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
816{
817    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
818}
819
820static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
821                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
822{
823    /* Note that bit 11 set means general register input.  Therefore
824       we can handle both register sets with one function.  */
825    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
826              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
827}
828
829static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
830                              TCGReg rd, bool op, int cmode, uint8_t imm8)
831{
832    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
833              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
834}
835
836static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
837                              TCGReg rd, TCGReg rn, unsigned immhb)
838{
839    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
840}
841
842static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
843                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
844{
845    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
846              | (rn & 0x1f) << 5 | (rd & 0x1f));
847}
848
849static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
850                              unsigned size, TCGReg rd, TCGReg rn)
851{
852    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
853}
854
855static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
856                              TCGReg rd, TCGReg rn, unsigned immhb)
857{
858    tcg_out32(s, insn | q << 30 | immhb << 16
859              | (rn & 0x1f) << 5 | (rd & 0x1f));
860}
861
862static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
863                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
864{
865    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
866              | (rn & 0x1f) << 5 | (rd & 0x1f));
867}
868
869static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
870                              unsigned size, TCGReg rd, TCGReg rn)
871{
872    tcg_out32(s, insn | q << 30 | (size << 22)
873              | (rn & 0x1f) << 5 | (rd & 0x1f));
874}
875
876static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
877                              TCGReg rd, TCGReg base, TCGType ext,
878                              TCGReg regoff)
879{
880    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
881    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
882              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
883}
884
885static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
886                              TCGReg rd, TCGReg rn, intptr_t offset)
887{
888    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
889}
890
891static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
892                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
893{
894    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
895    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
896              | rn << 5 | (rd & 0x1f));
897}
898
899static void tcg_out_bti(TCGContext *s, AArch64Insn insn)
900{
901    /*
902     * While BTI insns are nops on hosts without FEAT_BTI,
903     * there is no point in emitting them in that case either.
904     */
905    if (cpuinfo & CPUINFO_BTI) {
906        tcg_out32(s, insn);
907    }
908}
909
910/* Register to register move using ORR (shifted register with no shift). */
911static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
912{
913    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
914}
915
916/* Register to register move using ADDI (move to/from SP).  */
917static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
918{
919    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
920}
921
922/* This function is used for the Logical (immediate) instruction group.
923   The value of LIMM must satisfy IS_LIMM.  See the comment above about
924   only supporting simplified logical immediates.  */
925static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
926                             TCGReg rd, TCGReg rn, uint64_t limm)
927{
928    unsigned h, l, r, c;
929
930    tcg_debug_assert(is_limm(limm));
931
932    h = clz64(limm);
933    l = ctz64(limm);
934    if (l == 0) {
935        r = 0;                  /* form 0....01....1 */
936        c = ctz64(~limm) - 1;
937        if (h == 0) {
938            r = clz64(~limm);   /* form 1..10..01..1 */
939            c += r;
940        }
941    } else {
942        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
943        c = r - h - 1;
944    }
945    if (ext == TCG_TYPE_I32) {
946        r &= 31;
947        c &= 31;
948    }
949
950    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
951}
952
953static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
954                             TCGReg rd, int64_t v64)
955{
956    bool q = type == TCG_TYPE_V128;
957    int cmode, imm8, i;
958
959    /* Test all bytes equal first.  */
960    if (vece == MO_8) {
961        imm8 = (uint8_t)v64;
962        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
963        return;
964    }
965
966    /*
967     * Test all bytes 0x00 or 0xff second.  This can match cases that
968     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
969     */
970    for (i = imm8 = 0; i < 8; i++) {
971        uint8_t byte = v64 >> (i * 8);
972        if (byte == 0xff) {
973            imm8 |= 1 << i;
974        } else if (byte != 0) {
975            goto fail_bytes;
976        }
977    }
978    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
979    return;
980 fail_bytes:
981
982    /*
983     * Tests for various replications.  For each element width, if we
984     * cannot find an expansion there's no point checking a larger
985     * width because we already know by replication it cannot match.
986     */
987    if (vece == MO_16) {
988        uint16_t v16 = v64;
989
990        if (is_shimm16(v16, &cmode, &imm8)) {
991            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
992            return;
993        }
994        if (is_shimm16(~v16, &cmode, &imm8)) {
995            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
996            return;
997        }
998
999        /*
1000         * Otherwise, all remaining constants can be loaded in two insns:
1001         * rd = v16 & 0xff, rd |= v16 & 0xff00.
1002         */
1003        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
1004        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
1005        return;
1006    } else if (vece == MO_32) {
1007        uint32_t v32 = v64;
1008        uint32_t n32 = ~v32;
1009
1010        if (is_shimm32(v32, &cmode, &imm8) ||
1011            is_soimm32(v32, &cmode, &imm8) ||
1012            is_fimm32(v32, &cmode, &imm8)) {
1013            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
1014            return;
1015        }
1016        if (is_shimm32(n32, &cmode, &imm8) ||
1017            is_soimm32(n32, &cmode, &imm8)) {
1018            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
1019            return;
1020        }
1021
1022        /*
1023         * Restrict the set of constants to those we can load with
1024         * two instructions.  Others we load from the pool.
1025         */
1026        i = is_shimm32_pair(v32, &cmode, &imm8);
1027        if (i) {
1028            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
1029            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
1030            return;
1031        }
1032        i = is_shimm32_pair(n32, &cmode, &imm8);
1033        if (i) {
1034            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
1035            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
1036            return;
1037        }
1038    } else if (is_fimm64(v64, &cmode, &imm8)) {
1039        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
1040        return;
1041    }
1042
1043    /*
1044     * As a last resort, load from the constant pool.  Sadly there
1045     * is no LD1R (literal), so store the full 16-byte vector.
1046     */
1047    if (type == TCG_TYPE_V128) {
1048        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
1049        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
1050    } else {
1051        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
1052        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
1053    }
1054}
1055
1056static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
1057                            TCGReg rd, TCGReg rs)
1058{
1059    int is_q = type - TCG_TYPE_V64;
1060    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
1061    return true;
1062}
1063
1064static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1065                             TCGReg r, TCGReg base, intptr_t offset)
1066{
1067    TCGReg temp = TCG_REG_TMP0;
1068
1069    if (offset < -0xffffff || offset > 0xffffff) {
1070        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
1071        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
1072        base = temp;
1073    } else {
1074        AArch64Insn add_insn = I3401_ADDI;
1075
1076        if (offset < 0) {
1077            add_insn = I3401_SUBI;
1078            offset = -offset;
1079        }
1080        if (offset & 0xfff000) {
1081            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1082            base = temp;
1083        }
1084        if (offset & 0xfff) {
1085            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1086            base = temp;
1087        }
1088    }
1089    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1090    return true;
1091}
1092
1093static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1094                         tcg_target_long value)
1095{
1096    tcg_target_long svalue = value;
1097    tcg_target_long ivalue = ~value;
1098    tcg_target_long t0, t1, t2;
1099    int s0, s1;
1100    AArch64Insn opc;
1101
1102    switch (type) {
1103    case TCG_TYPE_I32:
1104    case TCG_TYPE_I64:
1105        tcg_debug_assert(rd < 32);
1106        break;
1107    default:
1108        g_assert_not_reached();
1109    }
1110
1111    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1112       values within [2**31, 2**32-1], we can create smaller sequences by
1113       interpreting this as a negative 32-bit number, while ensuring that
1114       the high 32 bits are cleared by setting SF=0.  */
1115    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1116        svalue = (int32_t)value;
1117        value = (uint32_t)value;
1118        ivalue = (uint32_t)ivalue;
1119        type = TCG_TYPE_I32;
1120    }
1121
1122    /* Speed things up by handling the common case of small positive
1123       and negative values specially.  */
1124    if ((value & ~0xffffull) == 0) {
1125        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1126        return;
1127    } else if ((ivalue & ~0xffffull) == 0) {
1128        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1129        return;
1130    }
1131
1132    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1133       use the sign-extended value.  That lets us match rotated values such
1134       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1135    if (is_limm(svalue)) {
1136        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1137        return;
1138    }
1139
1140    /* Look for host pointer values within 4G of the PC.  This happens
1141       often when loading pointers to QEMU's own data structures.  */
1142    if (type == TCG_TYPE_I64) {
1143        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1144        tcg_target_long disp = value - src_rx;
1145        if (disp == sextract64(disp, 0, 21)) {
1146            tcg_out_insn(s, 3406, ADR, rd, disp);
1147            return;
1148        }
1149        disp = (value >> 12) - (src_rx >> 12);
1150        if (disp == sextract64(disp, 0, 21)) {
1151            tcg_out_insn(s, 3406, ADRP, rd, disp);
1152            if (value & 0xfff) {
1153                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1154            }
1155            return;
1156        }
1157    }
1158
1159    /* Would it take fewer insns to begin with MOVN?  */
1160    if (ctpop64(value) >= 32) {
1161        t0 = ivalue;
1162        opc = I3405_MOVN;
1163    } else {
1164        t0 = value;
1165        opc = I3405_MOVZ;
1166    }
1167    s0 = ctz64(t0) & (63 & -16);
1168    t1 = t0 & ~(0xffffull << s0);
1169    s1 = ctz64(t1) & (63 & -16);
1170    t2 = t1 & ~(0xffffull << s1);
1171    if (t2 == 0) {
1172        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1173        if (t1 != 0) {
1174            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1175        }
1176        return;
1177    }
1178
1179    /* For more than 2 insns, dump it into the constant pool.  */
1180    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1181    tcg_out_insn(s, 3305, LDR, 0, rd);
1182}
1183
1184static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1185{
1186    return false;
1187}
1188
1189static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1190                             tcg_target_long imm)
1191{
1192    /* This function is only used for passing structs by reference. */
1193    g_assert_not_reached();
1194}
1195
1196/* Define something more legible for general use.  */
1197#define tcg_out_ldst_r  tcg_out_insn_3310
1198
1199static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1200                         TCGReg rn, intptr_t offset, int lgsize)
1201{
1202    /* If the offset is naturally aligned and in range, then we can
1203       use the scaled uimm12 encoding */
1204    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1205        uintptr_t scaled_uimm = offset >> lgsize;
1206        if (scaled_uimm <= 0xfff) {
1207            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1208            return;
1209        }
1210    }
1211
1212    /* Small signed offsets can use the unscaled encoding.  */
1213    if (offset >= -256 && offset < 256) {
1214        tcg_out_insn_3312(s, insn, rd, rn, offset);
1215        return;
1216    }
1217
1218    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1219    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
1220    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
1221}
1222
1223static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1224{
1225    if (ret == arg) {
1226        return true;
1227    }
1228    switch (type) {
1229    case TCG_TYPE_I32:
1230    case TCG_TYPE_I64:
1231        if (ret < 32 && arg < 32) {
1232            tcg_out_movr(s, type, ret, arg);
1233            break;
1234        } else if (ret < 32) {
1235            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1236            break;
1237        } else if (arg < 32) {
1238            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1239            break;
1240        }
1241        /* FALLTHRU */
1242
1243    case TCG_TYPE_V64:
1244        tcg_debug_assert(ret >= 32 && arg >= 32);
1245        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1246        break;
1247    case TCG_TYPE_V128:
1248        tcg_debug_assert(ret >= 32 && arg >= 32);
1249        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1250        break;
1251
1252    default:
1253        g_assert_not_reached();
1254    }
1255    return true;
1256}
1257
1258static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1259                       TCGReg base, intptr_t ofs)
1260{
1261    AArch64Insn insn;
1262    int lgsz;
1263
1264    switch (type) {
1265    case TCG_TYPE_I32:
1266        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1267        lgsz = 2;
1268        break;
1269    case TCG_TYPE_I64:
1270        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1271        lgsz = 3;
1272        break;
1273    case TCG_TYPE_V64:
1274        insn = I3312_LDRVD;
1275        lgsz = 3;
1276        break;
1277    case TCG_TYPE_V128:
1278        insn = I3312_LDRVQ;
1279        lgsz = 4;
1280        break;
1281    default:
1282        g_assert_not_reached();
1283    }
1284    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1285}
1286
1287static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1288                       TCGReg base, intptr_t ofs)
1289{
1290    AArch64Insn insn;
1291    int lgsz;
1292
1293    switch (type) {
1294    case TCG_TYPE_I32:
1295        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1296        lgsz = 2;
1297        break;
1298    case TCG_TYPE_I64:
1299        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1300        lgsz = 3;
1301        break;
1302    case TCG_TYPE_V64:
1303        insn = I3312_STRVD;
1304        lgsz = 3;
1305        break;
1306    case TCG_TYPE_V128:
1307        insn = I3312_STRVQ;
1308        lgsz = 4;
1309        break;
1310    default:
1311        g_assert_not_reached();
1312    }
1313    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1314}
1315
1316static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1317                               TCGReg base, intptr_t ofs)
1318{
1319    if (type <= TCG_TYPE_I64 && val == 0) {
1320        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1321        return true;
1322    }
1323    return false;
1324}
1325
1326static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1327                               TCGReg rn, unsigned int a, unsigned int b)
1328{
1329    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1330}
1331
1332static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1333                                TCGReg rn, unsigned int a, unsigned int b)
1334{
1335    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1336}
1337
1338static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1339                                TCGReg rn, unsigned int a, unsigned int b)
1340{
1341    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1342}
1343
1344static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1345                                TCGReg rn, TCGReg rm, unsigned int a)
1346{
1347    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1348}
1349
1350static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1351                               TCGReg rn, unsigned lsb, unsigned width)
1352{
1353    unsigned size = ext ? 64 : 32;
1354    unsigned a = (size - lsb) & (size - 1);
1355    unsigned b = width - 1;
1356    tcg_out_bfm(s, ext, rd, rn, a, b);
1357}
1358
1359static void tgen_cmp(TCGContext *s, TCGType ext, TCGCond cond,
1360                     TCGReg a, TCGReg b)
1361{
1362    if (is_tst_cond(cond)) {
1363        tcg_out_insn(s, 3510, ANDS, ext, TCG_REG_XZR, a, b);
1364    } else {
1365        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1366    }
1367}
1368
1369static void tgen_cmpi(TCGContext *s, TCGType ext, TCGCond cond,
1370                      TCGReg a, tcg_target_long b)
1371{
1372    if (is_tst_cond(cond)) {
1373        tcg_out_logicali(s, I3404_ANDSI, ext, TCG_REG_XZR, a, b);
1374    } else if (b >= 0) {
1375        tcg_debug_assert(is_aimm(b));
1376        tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1377    } else {
1378        tcg_debug_assert(is_aimm(-b));
1379        tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1380    }
1381}
1382
1383static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGCond cond, TCGReg a,
1384                        tcg_target_long b, bool const_b)
1385{
1386    if (const_b) {
1387        tgen_cmpi(s, ext, cond, a, b);
1388    } else {
1389        tgen_cmp(s, ext, cond, a, b);
1390    }
1391}
1392
1393static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1394{
1395    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1396    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1397    tcg_out_insn(s, 3206, B, offset);
1398}
1399
1400static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1401{
1402    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1403    if (offset == sextract64(offset, 0, 26)) {
1404        tcg_out_insn(s, 3206, BL, offset);
1405    } else {
1406        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1407        tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
1408    }
1409}
1410
1411static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1412                         const TCGHelperInfo *info)
1413{
1414    tcg_out_call_int(s, target);
1415}
1416
1417static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1418{
1419    if (!l->has_value) {
1420        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1421        tcg_out_insn(s, 3206, B, 0);
1422    } else {
1423        tcg_out_goto(s, l->u.value_ptr);
1424    }
1425}
1426
1427static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
1428                        TCGReg a, TCGReg b, TCGLabel *l)
1429{
1430    tgen_cmp(s, type, c, a, b);
1431    tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1432    tcg_out_insn(s, 3202, B_C, c, 0);
1433}
1434
1435static void tgen_brcondi(TCGContext *s, TCGType ext, TCGCond c,
1436                         TCGReg a, tcg_target_long b, TCGLabel *l)
1437{
1438    int tbit = -1;
1439    bool need_cmp = true;
1440
1441    switch (c) {
1442    case TCG_COND_EQ:
1443    case TCG_COND_NE:
1444        /* cmp xN,0; b.ne L -> cbnz xN,L */
1445        if (b == 0) {
1446            need_cmp = false;
1447        }
1448        break;
1449    case TCG_COND_LT:
1450    case TCG_COND_GE:
1451        /* cmp xN,0; b.mi L -> tbnz xN,63,L */
1452        if (b == 0) {
1453            c = (c == TCG_COND_LT ? TCG_COND_TSTNE : TCG_COND_TSTEQ);
1454            tbit = ext ? 63 : 31;
1455            need_cmp = false;
1456        }
1457        break;
1458    case TCG_COND_TSTEQ:
1459    case TCG_COND_TSTNE:
1460        /* tst xN,0xffffffff; b.ne L -> cbnz wN,L */
1461        if (b == UINT32_MAX) {
1462            c = tcg_tst_eqne_cond(c);
1463            ext = TCG_TYPE_I32;
1464            need_cmp = false;
1465            break;
1466        }
1467        /* tst xN,1<<B; b.ne L -> tbnz xN,B,L */
1468        if (is_power_of_2(b)) {
1469            tbit = ctz64(b);
1470            need_cmp = false;
1471        }
1472        break;
1473    default:
1474        break;
1475    }
1476
1477    if (need_cmp) {
1478        tgen_cmpi(s, ext, c, a, b);
1479        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1480        tcg_out_insn(s, 3202, B_C, c, 0);
1481        return;
1482    }
1483
1484    if (tbit >= 0) {
1485        tcg_out_reloc(s, s->code_ptr, R_AARCH64_TSTBR14, l, 0);
1486        switch (c) {
1487        case TCG_COND_TSTEQ:
1488            tcg_out_insn(s, 3205, TBZ, a, tbit, 0);
1489            break;
1490        case TCG_COND_TSTNE:
1491            tcg_out_insn(s, 3205, TBNZ, a, tbit, 0);
1492            break;
1493        default:
1494            g_assert_not_reached();
1495        }
1496    } else {
1497        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1498        switch (c) {
1499        case TCG_COND_EQ:
1500            tcg_out_insn(s, 3201, CBZ, ext, a, 0);
1501            break;
1502        case TCG_COND_NE:
1503            tcg_out_insn(s, 3201, CBNZ, ext, a, 0);
1504            break;
1505        default:
1506            g_assert_not_reached();
1507        }
1508    }
1509}
1510
1511static const TCGOutOpBrcond outop_brcond = {
1512    .base.static_constraint = C_O0_I2(r, rC),
1513    .out_rr = tgen_brcond,
1514    .out_ri = tgen_brcondi,
1515};
1516
1517static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1518                               TCGReg rd, TCGReg rn)
1519{
1520    /* REV, REV16, REV32 */
1521    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1522}
1523
1524static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1525                               TCGReg rd, TCGReg rn)
1526{
1527    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1528    int bits = (8 << s_bits) - 1;
1529    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1530}
1531
1532static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1533{
1534    tcg_out_sxt(s, type, MO_8, rd, rn);
1535}
1536
1537static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1538{
1539    tcg_out_sxt(s, type, MO_16, rd, rn);
1540}
1541
1542static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1543{
1544    tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1545}
1546
1547static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1548{
1549    tcg_out_ext32s(s, rd, rn);
1550}
1551
1552static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1553                               TCGReg rd, TCGReg rn)
1554{
1555    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1556    int bits = (8 << s_bits) - 1;
1557    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1558}
1559
1560static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1561{
1562    tcg_out_uxt(s, MO_8, rd, rn);
1563}
1564
1565static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1566{
1567    tcg_out_uxt(s, MO_16, rd, rn);
1568}
1569
1570static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1571{
1572    tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1573}
1574
1575static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1576{
1577    tcg_out_ext32u(s, rd, rn);
1578}
1579
1580static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1581{
1582    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1583}
1584
1585static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1586                            TCGReg rh, TCGReg al, TCGReg ah,
1587                            tcg_target_long bl, tcg_target_long bh,
1588                            bool const_bl, bool const_bh, bool sub)
1589{
1590    TCGReg orig_rl = rl;
1591    AArch64Insn insn;
1592
1593    if (rl == ah || (!const_bh && rl == bh)) {
1594        rl = TCG_REG_TMP0;
1595    }
1596
1597    if (const_bl) {
1598        if (bl < 0) {
1599            bl = -bl;
1600            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1601        } else {
1602            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1603        }
1604
1605        if (unlikely(al == TCG_REG_XZR)) {
1606            /* ??? We want to allow al to be zero for the benefit of
1607               negation via subtraction.  However, that leaves open the
1608               possibility of adding 0+const in the low part, and the
1609               immediate add instructions encode XSP not XZR.  Don't try
1610               anything more elaborate here than loading another zero.  */
1611            al = TCG_REG_TMP0;
1612            tcg_out_movi(s, ext, al, 0);
1613        }
1614        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1615    } else {
1616        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1617    }
1618
1619    insn = I3503_ADC;
1620    if (const_bh) {
1621        /* Note that the only two constants we support are 0 and -1, and
1622           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1623        if ((bh != 0) ^ sub) {
1624            insn = I3503_SBC;
1625        }
1626        bh = TCG_REG_XZR;
1627    } else if (sub) {
1628        insn = I3503_SBC;
1629    }
1630    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1631
1632    tcg_out_mov(s, ext, orig_rl, rl);
1633}
1634
1635static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1636{
1637    static const uint32_t sync[] = {
1638        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1639        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1640        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1641        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1642        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1643    };
1644    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1645}
1646
1647typedef struct {
1648    TCGReg base;
1649    TCGReg index;
1650    TCGType index_ext;
1651    TCGAtomAlign aa;
1652} HostAddress;
1653
1654bool tcg_target_has_memory_bswap(MemOp memop)
1655{
1656    return false;
1657}
1658
1659static const TCGLdstHelperParam ldst_helper_param = {
1660    .ntmp = 1, .tmp = { TCG_REG_TMP0 }
1661};
1662
1663static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1664{
1665    MemOp opc = get_memop(lb->oi);
1666
1667    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1668        return false;
1669    }
1670
1671    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1672    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1673    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1674    tcg_out_goto(s, lb->raddr);
1675    return true;
1676}
1677
1678static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1679{
1680    MemOp opc = get_memop(lb->oi);
1681
1682    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1683        return false;
1684    }
1685
1686    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1687    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1688    tcg_out_goto(s, lb->raddr);
1689    return true;
1690}
1691
1692/* We expect to use a 7-bit scaled negative offset from ENV.  */
1693#define MIN_TLB_MASK_TABLE_OFS  -512
1694
1695/*
1696 * For system-mode, perform the TLB load and compare.
1697 * For user-mode, perform any required alignment tests.
1698 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1699 * is required and fill in @h with the host address for the fast path.
1700 */
1701static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1702                                           TCGReg addr_reg, MemOpIdx oi,
1703                                           bool is_ld)
1704{
1705    TCGType addr_type = s->addr_type;
1706    TCGLabelQemuLdst *ldst = NULL;
1707    MemOp opc = get_memop(oi);
1708    MemOp s_bits = opc & MO_SIZE;
1709    unsigned a_mask;
1710
1711    h->aa = atom_and_align_for_opc(s, opc,
1712                                   have_lse2 ? MO_ATOM_WITHIN16
1713                                             : MO_ATOM_IFALIGN,
1714                                   s_bits == MO_128);
1715    a_mask = (1 << h->aa.align) - 1;
1716
1717    if (tcg_use_softmmu) {
1718        unsigned s_mask = (1u << s_bits) - 1;
1719        unsigned mem_index = get_mmuidx(oi);
1720        TCGReg addr_adj;
1721        TCGType mask_type;
1722        uint64_t compare_mask;
1723
1724        ldst = new_ldst_label(s);
1725        ldst->is_ld = is_ld;
1726        ldst->oi = oi;
1727        ldst->addr_reg = addr_reg;
1728
1729        mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
1730                     ? TCG_TYPE_I64 : TCG_TYPE_I32);
1731
1732        /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
1733        QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1734        QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1735        tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
1736                     tlb_mask_table_ofs(s, mem_index), 1, 0);
1737
1738        /* Extract the TLB index from the address into X0.  */
1739        tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1740                     TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
1741                     s->page_bits - CPU_TLB_ENTRY_BITS);
1742
1743        /* Add the tlb_table pointer, forming the CPUTLBEntry address. */
1744        tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
1745
1746        /* Load the tlb comparator into TMP0, and the fast path addend. */
1747        QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
1748        tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
1749                   is_ld ? offsetof(CPUTLBEntry, addr_read)
1750                         : offsetof(CPUTLBEntry, addr_write));
1751        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
1752                   offsetof(CPUTLBEntry, addend));
1753
1754        /*
1755         * For aligned accesses, we check the first byte and include
1756         * the alignment bits within the address.  For unaligned access,
1757         * we check that we don't cross pages using the address of the
1758         * last byte of the access.
1759         */
1760        if (a_mask >= s_mask) {
1761            addr_adj = addr_reg;
1762        } else {
1763            addr_adj = TCG_REG_TMP2;
1764            tcg_out_insn(s, 3401, ADDI, addr_type,
1765                         addr_adj, addr_reg, s_mask - a_mask);
1766        }
1767        compare_mask = (uint64_t)s->page_mask | a_mask;
1768
1769        /* Store the page mask part of the address into TMP2.  */
1770        tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
1771                         addr_adj, compare_mask);
1772
1773        /* Perform the address comparison. */
1774        tcg_out_cmp(s, addr_type, TCG_COND_NE, TCG_REG_TMP0, TCG_REG_TMP2, 0);
1775
1776        /* If not equal, we jump to the slow path. */
1777        ldst->label_ptr[0] = s->code_ptr;
1778        tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1779
1780        h->base = TCG_REG_TMP1;
1781        h->index = addr_reg;
1782        h->index_ext = addr_type;
1783    } else {
1784        if (a_mask) {
1785            ldst = new_ldst_label(s);
1786
1787            ldst->is_ld = is_ld;
1788            ldst->oi = oi;
1789            ldst->addr_reg = addr_reg;
1790
1791            /* tst addr, #mask */
1792            tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1793
1794            /* b.ne slow_path */
1795            ldst->label_ptr[0] = s->code_ptr;
1796            tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1797        }
1798
1799        if (guest_base || addr_type == TCG_TYPE_I32) {
1800            h->base = TCG_REG_GUEST_BASE;
1801            h->index = addr_reg;
1802            h->index_ext = addr_type;
1803        } else {
1804            h->base = addr_reg;
1805            h->index = TCG_REG_XZR;
1806            h->index_ext = TCG_TYPE_I64;
1807        }
1808    }
1809
1810    return ldst;
1811}
1812
1813static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1814                                   TCGReg data_r, HostAddress h)
1815{
1816    switch (memop & MO_SSIZE) {
1817    case MO_UB:
1818        tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
1819        break;
1820    case MO_SB:
1821        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1822                       data_r, h.base, h.index_ext, h.index);
1823        break;
1824    case MO_UW:
1825        tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
1826        break;
1827    case MO_SW:
1828        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1829                       data_r, h.base, h.index_ext, h.index);
1830        break;
1831    case MO_UL:
1832        tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
1833        break;
1834    case MO_SL:
1835        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
1836        break;
1837    case MO_UQ:
1838        tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
1839        break;
1840    default:
1841        g_assert_not_reached();
1842    }
1843}
1844
1845static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1846                                   TCGReg data_r, HostAddress h)
1847{
1848    switch (memop & MO_SIZE) {
1849    case MO_8:
1850        tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
1851        break;
1852    case MO_16:
1853        tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
1854        break;
1855    case MO_32:
1856        tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
1857        break;
1858    case MO_64:
1859        tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
1860        break;
1861    default:
1862        g_assert_not_reached();
1863    }
1864}
1865
1866static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1867                            MemOpIdx oi, TCGType data_type)
1868{
1869    TCGLabelQemuLdst *ldst;
1870    HostAddress h;
1871
1872    ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1873    tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
1874
1875    if (ldst) {
1876        ldst->type = data_type;
1877        ldst->datalo_reg = data_reg;
1878        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1879    }
1880}
1881
1882static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1883                            MemOpIdx oi, TCGType data_type)
1884{
1885    TCGLabelQemuLdst *ldst;
1886    HostAddress h;
1887
1888    ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1889    tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1890
1891    if (ldst) {
1892        ldst->type = data_type;
1893        ldst->datalo_reg = data_reg;
1894        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1895    }
1896}
1897
1898static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
1899                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
1900{
1901    TCGLabelQemuLdst *ldst;
1902    HostAddress h;
1903    TCGReg base;
1904    bool use_pair;
1905
1906    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
1907
1908    /* Compose the final address, as LDP/STP have no indexing. */
1909    if (h.index == TCG_REG_XZR) {
1910        base = h.base;
1911    } else {
1912        base = TCG_REG_TMP2;
1913        if (h.index_ext == TCG_TYPE_I32) {
1914            /* add base, base, index, uxtw */
1915            tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base,
1916                         h.base, h.index, MO_32, 0);
1917        } else {
1918            /* add base, base, index */
1919            tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index);
1920        }
1921    }
1922
1923    use_pair = h.aa.atom < MO_128 || have_lse2;
1924
1925    if (!use_pair) {
1926        tcg_insn_unit *branch = NULL;
1927        TCGReg ll, lh, sl, sh;
1928
1929        /*
1930         * If we have already checked for 16-byte alignment, that's all
1931         * we need. Otherwise we have determined that misaligned atomicity
1932         * may be handled with two 8-byte loads.
1933         */
1934        if (h.aa.align < MO_128) {
1935            /*
1936             * TODO: align should be MO_64, so we only need test bit 3,
1937             * which means we could use TBNZ instead of ANDS+B_C.
1938             */
1939            tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15);
1940            branch = s->code_ptr;
1941            tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1942            use_pair = true;
1943        }
1944
1945        if (is_ld) {
1946            /*
1947             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1948             *    ldxp lo, hi, [base]
1949             *    stxp t0, lo, hi, [base]
1950             *    cbnz t0, .-8
1951             * Require no overlap between data{lo,hi} and base.
1952             */
1953            if (datalo == base || datahi == base) {
1954                tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base);
1955                base = TCG_REG_TMP2;
1956            }
1957            ll = sl = datalo;
1958            lh = sh = datahi;
1959        } else {
1960            /*
1961             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1962             * 1: ldxp t0, t1, [base]
1963             *    stxp t0, lo, hi, [base]
1964             *    cbnz t0, 1b
1965             */
1966            tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1);
1967            ll = TCG_REG_TMP0;
1968            lh = TCG_REG_TMP1;
1969            sl = datalo;
1970            sh = datahi;
1971        }
1972
1973        tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base);
1974        tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base);
1975        tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2);
1976
1977        if (use_pair) {
1978            /* "b .+8", branching across the one insn of use_pair. */
1979            tcg_out_insn(s, 3206, B, 2);
1980            reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr));
1981        }
1982    }
1983
1984    if (use_pair) {
1985        if (is_ld) {
1986            tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0);
1987        } else {
1988            tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0);
1989        }
1990    }
1991
1992    if (ldst) {
1993        ldst->type = TCG_TYPE_I128;
1994        ldst->datalo_reg = datalo;
1995        ldst->datahi_reg = datahi;
1996        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1997    }
1998}
1999
2000static const tcg_insn_unit *tb_ret_addr;
2001
2002static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
2003{
2004    const tcg_insn_unit *target;
2005    ptrdiff_t offset;
2006
2007    /* Reuse the zeroing that exists for goto_ptr.  */
2008    if (a0 == 0) {
2009        target = tcg_code_gen_epilogue;
2010    } else {
2011        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
2012        target = tb_ret_addr;
2013    }
2014
2015    offset = tcg_pcrel_diff(s, target) >> 2;
2016    if (offset == sextract64(offset, 0, 26)) {
2017        tcg_out_insn(s, 3206, B, offset);
2018    } else {
2019        /*
2020         * Only x16/x17 generate BTI type Jump (2),
2021         * other registers generate BTI type Jump|Call (3).
2022         */
2023        QEMU_BUILD_BUG_ON(TCG_REG_TMP0 != TCG_REG_X16);
2024        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
2025        tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
2026    }
2027}
2028
2029static void tcg_out_goto_tb(TCGContext *s, int which)
2030{
2031    /*
2032     * Direct branch, or indirect address load, will be patched
2033     * by tb_target_set_jmp_target.  Assert indirect load offset
2034     * in range early, regardless of direct branch distance.
2035     */
2036    intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
2037    tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
2038
2039    set_jmp_insn_offset(s, which);
2040    tcg_out32(s, I3206_B);
2041    tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
2042    set_jmp_reset_offset(s, which);
2043    tcg_out_bti(s, BTI_J);
2044}
2045
2046void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2047                              uintptr_t jmp_rx, uintptr_t jmp_rw)
2048{
2049    uintptr_t d_addr = tb->jmp_target_addr[n];
2050    ptrdiff_t d_offset = d_addr - jmp_rx;
2051    tcg_insn_unit insn;
2052
2053    /* Either directly branch, or indirect branch load. */
2054    if (d_offset == sextract64(d_offset, 0, 28)) {
2055        insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
2056    } else {
2057        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
2058        ptrdiff_t i_offset = i_addr - jmp_rx;
2059
2060        /* Note that we asserted this in range in tcg_out_goto_tb. */
2061        insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
2062    }
2063    qatomic_set((uint32_t *)jmp_rw, insn);
2064    flush_idcache_range(jmp_rx, jmp_rw, 4);
2065}
2066
2067
2068static void tgen_add(TCGContext *s, TCGType type,
2069                     TCGReg a0, TCGReg a1, TCGReg a2)
2070{
2071    tcg_out_insn(s, 3502, ADD, type, a0, a1, a2);
2072}
2073
2074static void tgen_addi(TCGContext *s, TCGType type,
2075                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2076{
2077    if (a2 >= 0) {
2078        tcg_out_insn(s, 3401, ADDI, type, a0, a1, a2);
2079    } else {
2080        tcg_out_insn(s, 3401, SUBI, type, a0, a1, -a2);
2081    }
2082}
2083
2084static const TCGOutOpBinary outop_add = {
2085    .base.static_constraint = C_O1_I2(r, r, rA),
2086    .out_rrr = tgen_add,
2087    .out_rri = tgen_addi,
2088};
2089
2090static void tgen_and(TCGContext *s, TCGType type,
2091                     TCGReg a0, TCGReg a1, TCGReg a2)
2092{
2093    tcg_out_insn(s, 3510, AND, type, a0, a1, a2);
2094}
2095
2096static void tgen_andi(TCGContext *s, TCGType type,
2097                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2098{
2099    tcg_out_logicali(s, I3404_ANDI, type, a0, a1, a2);
2100}
2101
2102static const TCGOutOpBinary outop_and = {
2103    .base.static_constraint = C_O1_I2(r, r, rL),
2104    .out_rrr = tgen_and,
2105    .out_rri = tgen_andi,
2106};
2107
2108static void tgen_andc(TCGContext *s, TCGType type,
2109                      TCGReg a0, TCGReg a1, TCGReg a2)
2110{
2111    tcg_out_insn(s, 3510, BIC, type, a0, a1, a2);
2112}
2113
2114static const TCGOutOpBinary outop_andc = {
2115    .base.static_constraint = C_O1_I2(r, r, r),
2116    .out_rrr = tgen_andc,
2117};
2118
2119static void tgen_clz(TCGContext *s, TCGType type,
2120                     TCGReg a0, TCGReg a1, TCGReg a2)
2121{
2122    tcg_out_cmp(s, type, TCG_COND_NE, a1, 0, true);
2123    tcg_out_insn(s, 3507, CLZ, type, TCG_REG_TMP0, a1);
2124    tcg_out_insn(s, 3506, CSEL, type, a0, TCG_REG_TMP0, a2, TCG_COND_NE);
2125}
2126
2127static void tgen_clzi(TCGContext *s, TCGType type,
2128                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2129{
2130    if (a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
2131        tcg_out_insn(s, 3507, CLZ, type, a0, a1);
2132        return;
2133    }
2134
2135    tcg_out_cmp(s, type, TCG_COND_NE, a1, 0, true);
2136    tcg_out_insn(s, 3507, CLZ, type, a0, a1);
2137
2138    switch (a2) {
2139    case -1:
2140        tcg_out_insn(s, 3506, CSINV, type, a0, a0, TCG_REG_XZR, TCG_COND_NE);
2141        break;
2142    case 0:
2143        tcg_out_insn(s, 3506, CSEL, type, a0, a0, TCG_REG_XZR, TCG_COND_NE);
2144        break;
2145    default:
2146        tcg_out_movi(s, type, TCG_REG_TMP0, a2);
2147        tcg_out_insn(s, 3506, CSEL, type, a0, a0, TCG_REG_TMP0, TCG_COND_NE);
2148        break;
2149    }
2150}
2151
2152static const TCGOutOpBinary outop_clz = {
2153    .base.static_constraint = C_O1_I2(r, r, rAL),
2154    .out_rrr = tgen_clz,
2155    .out_rri = tgen_clzi,
2156};
2157
2158static const TCGOutOpUnary outop_ctpop = {
2159    .base.static_constraint = C_NotImplemented,
2160};
2161
2162static void tgen_ctz(TCGContext *s, TCGType type,
2163                     TCGReg a0, TCGReg a1, TCGReg a2)
2164{
2165    tcg_out_insn(s, 3507, RBIT, type, TCG_REG_TMP0, a1);
2166    tgen_clz(s, type, a0, TCG_REG_TMP0, a2);
2167}
2168
2169static void tgen_ctzi(TCGContext *s, TCGType type,
2170                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2171{
2172    tcg_out_insn(s, 3507, RBIT, type, TCG_REG_TMP0, a1);
2173    tgen_clzi(s, type, a0, TCG_REG_TMP0, a2);
2174}
2175
2176static const TCGOutOpBinary outop_ctz = {
2177    .base.static_constraint = C_O1_I2(r, r, rAL),
2178    .out_rrr = tgen_ctz,
2179    .out_rri = tgen_ctzi,
2180};
2181
2182static void tgen_divs(TCGContext *s, TCGType type,
2183                      TCGReg a0, TCGReg a1, TCGReg a2)
2184{
2185    tcg_out_insn(s, 3508, SDIV, type, a0, a1, a2);
2186}
2187
2188static const TCGOutOpBinary outop_divs = {
2189    .base.static_constraint = C_O1_I2(r, r, r),
2190    .out_rrr = tgen_divs,
2191};
2192
2193static const TCGOutOpDivRem outop_divs2 = {
2194    .base.static_constraint = C_NotImplemented,
2195};
2196
2197static void tgen_divu(TCGContext *s, TCGType type,
2198                      TCGReg a0, TCGReg a1, TCGReg a2)
2199{
2200    tcg_out_insn(s, 3508, UDIV, type, a0, a1, a2);
2201}
2202
2203static const TCGOutOpBinary outop_divu = {
2204    .base.static_constraint = C_O1_I2(r, r, r),
2205    .out_rrr = tgen_divu,
2206};
2207
2208static const TCGOutOpDivRem outop_divu2 = {
2209    .base.static_constraint = C_NotImplemented,
2210};
2211
2212static void tgen_eqv(TCGContext *s, TCGType type,
2213                     TCGReg a0, TCGReg a1, TCGReg a2)
2214{
2215    tcg_out_insn(s, 3510, EON, type, a0, a1, a2);
2216}
2217
2218static const TCGOutOpBinary outop_eqv = {
2219    .base.static_constraint = C_O1_I2(r, r, r),
2220    .out_rrr = tgen_eqv,
2221};
2222
2223static void tgen_mul(TCGContext *s, TCGType type,
2224                     TCGReg a0, TCGReg a1, TCGReg a2)
2225{
2226    tcg_out_insn(s, 3509, MADD, type, a0, a1, a2, TCG_REG_XZR);
2227}
2228
2229static const TCGOutOpBinary outop_mul = {
2230    .base.static_constraint = C_O1_I2(r, r, r),
2231    .out_rrr = tgen_mul,
2232};
2233
2234static const TCGOutOpMul2 outop_muls2 = {
2235    .base.static_constraint = C_NotImplemented,
2236};
2237
2238static TCGConstraintSetIndex cset_mulh(TCGType type, unsigned flags)
2239{
2240    return type == TCG_TYPE_I64 ? C_O1_I2(r, r, r) : C_NotImplemented;
2241}
2242
2243static void tgen_mulsh(TCGContext *s, TCGType type,
2244                       TCGReg a0, TCGReg a1, TCGReg a2)
2245{
2246    tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2247}
2248
2249static const TCGOutOpBinary outop_mulsh = {
2250    .base.static_constraint = C_Dynamic,
2251    .base.dynamic_constraint = cset_mulh,
2252    .out_rrr = tgen_mulsh,
2253};
2254
2255static const TCGOutOpMul2 outop_mulu2 = {
2256    .base.static_constraint = C_NotImplemented,
2257};
2258
2259static void tgen_muluh(TCGContext *s, TCGType type,
2260                       TCGReg a0, TCGReg a1, TCGReg a2)
2261{
2262    tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2263}
2264
2265static const TCGOutOpBinary outop_muluh = {
2266    .base.static_constraint = C_Dynamic,
2267    .base.dynamic_constraint = cset_mulh,
2268    .out_rrr = tgen_muluh,
2269};
2270
2271static const TCGOutOpBinary outop_nand = {
2272    .base.static_constraint = C_NotImplemented,
2273};
2274
2275static const TCGOutOpBinary outop_nor = {
2276    .base.static_constraint = C_NotImplemented,
2277};
2278
2279static void tgen_or(TCGContext *s, TCGType type,
2280                    TCGReg a0, TCGReg a1, TCGReg a2)
2281{
2282    tcg_out_insn(s, 3510, ORR, type, a0, a1, a2);
2283}
2284
2285static void tgen_ori(TCGContext *s, TCGType type,
2286                     TCGReg a0, TCGReg a1, tcg_target_long a2)
2287{
2288    tcg_out_logicali(s, I3404_ORRI, type, a0, a1, a2);
2289}
2290
2291static const TCGOutOpBinary outop_or = {
2292    .base.static_constraint = C_O1_I2(r, r, rL),
2293    .out_rrr = tgen_or,
2294    .out_rri = tgen_ori,
2295};
2296
2297static void tgen_orc(TCGContext *s, TCGType type,
2298                     TCGReg a0, TCGReg a1, TCGReg a2)
2299{
2300    tcg_out_insn(s, 3510, ORN, type, a0, a1, a2);
2301}
2302
2303static const TCGOutOpBinary outop_orc = {
2304    .base.static_constraint = C_O1_I2(r, r, r),
2305    .out_rrr = tgen_orc,
2306};
2307
2308static void tgen_rems(TCGContext *s, TCGType type,
2309                      TCGReg a0, TCGReg a1, TCGReg a2)
2310{
2311    tcg_out_insn(s, 3508, SDIV, type, TCG_REG_TMP0, a1, a2);
2312    tcg_out_insn(s, 3509, MSUB, type, a0, TCG_REG_TMP0, a2, a1);
2313}
2314
2315static const TCGOutOpBinary outop_rems = {
2316    .base.static_constraint = C_O1_I2(r, r, r),
2317    .out_rrr = tgen_rems,
2318};
2319
2320static void tgen_remu(TCGContext *s, TCGType type,
2321                      TCGReg a0, TCGReg a1, TCGReg a2)
2322{
2323    tcg_out_insn(s, 3508, UDIV, type, TCG_REG_TMP0, a1, a2);
2324    tcg_out_insn(s, 3509, MSUB, type, a0, TCG_REG_TMP0, a2, a1);
2325}
2326
2327static const TCGOutOpBinary outop_remu = {
2328    .base.static_constraint = C_O1_I2(r, r, r),
2329    .out_rrr = tgen_remu,
2330};
2331
2332static const TCGOutOpBinary outop_rotl = {
2333    .base.static_constraint = C_NotImplemented,
2334};
2335
2336static void tgen_rotr(TCGContext *s, TCGType type,
2337                      TCGReg a0, TCGReg a1, TCGReg a2)
2338{
2339    tcg_out_insn(s, 3508, RORV, type, a0, a1, a2);
2340}
2341
2342static void tgen_rotri(TCGContext *s, TCGType type,
2343                       TCGReg a0, TCGReg a1, tcg_target_long a2)
2344{
2345    int max = type == TCG_TYPE_I32 ? 31 : 63;
2346    tcg_out_extr(s, type, a0, a1, a1, a2 & max);
2347}
2348
2349static const TCGOutOpBinary outop_rotr = {
2350    .base.static_constraint = C_O1_I2(r, r, ri),
2351    .out_rrr = tgen_rotr,
2352    .out_rri = tgen_rotri,
2353};
2354
2355static void tgen_sar(TCGContext *s, TCGType type,
2356                     TCGReg a0, TCGReg a1, TCGReg a2)
2357{
2358    tcg_out_insn(s, 3508, ASRV, type, a0, a1, a2);
2359}
2360
2361static void tgen_sari(TCGContext *s, TCGType type,
2362                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2363{
2364    int max = type == TCG_TYPE_I32 ? 31 : 63;
2365    tcg_out_sbfm(s, type, a0, a1, a2 & max, max);
2366}
2367
2368static const TCGOutOpBinary outop_sar = {
2369    .base.static_constraint = C_O1_I2(r, r, ri),
2370    .out_rrr = tgen_sar,
2371    .out_rri = tgen_sari,
2372};
2373
2374static void tgen_shl(TCGContext *s, TCGType type,
2375                     TCGReg a0, TCGReg a1, TCGReg a2)
2376{
2377    tcg_out_insn(s, 3508, LSLV, type, a0, a1, a2);
2378}
2379
2380static void tgen_shli(TCGContext *s, TCGType type,
2381                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2382{
2383    int max = type == TCG_TYPE_I32 ? 31 : 63;
2384    tcg_out_ubfm(s, type, a0, a1, -a2 & max, ~a2 & max);
2385}
2386
2387static const TCGOutOpBinary outop_shl = {
2388    .base.static_constraint = C_O1_I2(r, r, ri),
2389    .out_rrr = tgen_shl,
2390    .out_rri = tgen_shli,
2391};
2392
2393static void tgen_shr(TCGContext *s, TCGType type,
2394                     TCGReg a0, TCGReg a1, TCGReg a2)
2395{
2396    tcg_out_insn(s, 3508, LSRV, type, a0, a1, a2);
2397}
2398
2399static void tgen_shri(TCGContext *s, TCGType type,
2400                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2401{
2402    int max = type == TCG_TYPE_I32 ? 31 : 63;
2403    tcg_out_ubfm(s, type, a0, a1, a2 & max, max);
2404}
2405
2406static const TCGOutOpBinary outop_shr = {
2407    .base.static_constraint = C_O1_I2(r, r, ri),
2408    .out_rrr = tgen_shr,
2409    .out_rri = tgen_shri,
2410};
2411
2412static void tgen_sub(TCGContext *s, TCGType type,
2413                     TCGReg a0, TCGReg a1, TCGReg a2)
2414{
2415    tcg_out_insn(s, 3502, SUB, type, a0, a1, a2);
2416}
2417
2418static const TCGOutOpSubtract outop_sub = {
2419    .base.static_constraint = C_O1_I2(r, r, r),
2420    .out_rrr = tgen_sub,
2421};
2422
2423static void tgen_xor(TCGContext *s, TCGType type,
2424                     TCGReg a0, TCGReg a1, TCGReg a2)
2425{
2426    tcg_out_insn(s, 3510, EOR, type, a0, a1, a2);
2427}
2428
2429static void tgen_xori(TCGContext *s, TCGType type,
2430                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2431{
2432    tcg_out_logicali(s, I3404_EORI, type, a0, a1, a2);
2433}
2434
2435static const TCGOutOpBinary outop_xor = {
2436    .base.static_constraint = C_O1_I2(r, r, rL),
2437    .out_rrr = tgen_xor,
2438    .out_rri = tgen_xori,
2439};
2440
2441static void tgen_bswap16(TCGContext *s, TCGType type,
2442                         TCGReg a0, TCGReg a1, unsigned flags)
2443{
2444    tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2445    if (flags & TCG_BSWAP_OS) {
2446        /* Output must be sign-extended. */
2447        tcg_out_ext16s(s, type, a0, a0);
2448    } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2449        /* Output must be zero-extended, but input isn't. */
2450        tcg_out_ext16u(s, a0, a0);
2451    }
2452}
2453
2454static const TCGOutOpBswap outop_bswap16 = {
2455    .base.static_constraint = C_O1_I1(r, r),
2456    .out_rr = tgen_bswap16,
2457};
2458
2459static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
2460{
2461    tgen_sub(s, type, a0, TCG_REG_XZR, a1);
2462}
2463
2464static const TCGOutOpUnary outop_neg = {
2465    .base.static_constraint = C_O1_I1(r, r),
2466    .out_rr = tgen_neg,
2467};
2468
2469static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
2470{
2471    tgen_orc(s, type, a0, TCG_REG_XZR, a1);
2472}
2473
2474static const TCGOutOpUnary outop_not = {
2475    .base.static_constraint = C_O1_I1(r, r),
2476    .out_rr = tgen_not,
2477};
2478
2479static void tgen_cset(TCGContext *s, TCGCond cond, TCGReg ret)
2480{
2481    /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2482    tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, ret, TCG_REG_XZR,
2483                 TCG_REG_XZR, tcg_invert_cond(cond));
2484}
2485
2486static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
2487                         TCGReg a0, TCGReg a1, TCGReg a2)
2488{
2489    tgen_cmp(s, type, cond, a1, a2);
2490    tgen_cset(s, cond, a0);
2491}
2492
2493static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond,
2494                          TCGReg a0, TCGReg a1, tcg_target_long a2)
2495{
2496    tgen_cmpi(s, type, cond, a1, a2);
2497    tgen_cset(s, cond, a0);
2498}
2499
2500static const TCGOutOpSetcond outop_setcond = {
2501    .base.static_constraint = C_O1_I2(r, r, rC),
2502    .out_rrr = tgen_setcond,
2503    .out_rri = tgen_setcondi,
2504};
2505
2506static void tgen_csetm(TCGContext *s, TCGType ext, TCGCond cond, TCGReg ret)
2507{
2508    /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond).  */
2509    tcg_out_insn(s, 3506, CSINV, ext, ret, TCG_REG_XZR,
2510                 TCG_REG_XZR, tcg_invert_cond(cond));
2511}
2512
2513static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond,
2514                            TCGReg a0, TCGReg a1, TCGReg a2)
2515{
2516    tgen_cmp(s, type, cond, a1, a2);
2517    tgen_csetm(s, type, cond, a0);
2518}
2519
2520static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond,
2521                             TCGReg a0, TCGReg a1, tcg_target_long a2)
2522{
2523    tgen_cmpi(s, type, cond, a1, a2);
2524    tgen_csetm(s, type, cond, a0);
2525}
2526
2527static const TCGOutOpSetcond outop_negsetcond = {
2528    .base.static_constraint = C_O1_I2(r, r, rC),
2529    .out_rrr = tgen_negsetcond,
2530    .out_rri = tgen_negsetcondi,
2531};
2532
2533static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond,
2534                         TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2,
2535                         TCGArg vt, bool const_vt, TCGArg vf, bool const_vf)
2536{
2537    tcg_out_cmp(s, type, cond, c1, c2, const_c2);
2538    tcg_out_insn(s, 3506, CSEL, type, ret, vt, vf, cond);
2539}
2540
2541static const TCGOutOpMovcond outop_movcond = {
2542    .base.static_constraint = C_O1_I4(r, r, rC, rz, rz),
2543    .out = tgen_movcond,
2544};
2545
2546static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext,
2547                       const TCGArg args[TCG_MAX_OP_ARGS],
2548                       const int const_args[TCG_MAX_OP_ARGS])
2549{
2550    /* Hoist the loads of the most common arguments.  */
2551    TCGArg a0 = args[0];
2552    TCGArg a1 = args[1];
2553    TCGArg a2 = args[2];
2554
2555    switch (opc) {
2556    case INDEX_op_goto_ptr:
2557        tcg_out_insn(s, 3207, BR, a0);
2558        break;
2559
2560    case INDEX_op_br:
2561        tcg_out_goto_label(s, arg_label(a0));
2562        break;
2563
2564    case INDEX_op_ld8u_i32:
2565    case INDEX_op_ld8u_i64:
2566        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
2567        break;
2568    case INDEX_op_ld8s_i32:
2569        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
2570        break;
2571    case INDEX_op_ld8s_i64:
2572        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
2573        break;
2574    case INDEX_op_ld16u_i32:
2575    case INDEX_op_ld16u_i64:
2576        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
2577        break;
2578    case INDEX_op_ld16s_i32:
2579        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
2580        break;
2581    case INDEX_op_ld16s_i64:
2582        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
2583        break;
2584    case INDEX_op_ld_i32:
2585    case INDEX_op_ld32u_i64:
2586        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
2587        break;
2588    case INDEX_op_ld32s_i64:
2589        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
2590        break;
2591    case INDEX_op_ld_i64:
2592        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
2593        break;
2594
2595    case INDEX_op_st8_i32:
2596    case INDEX_op_st8_i64:
2597        tcg_out_ldst(s, I3312_STRB, a0, a1, a2, 0);
2598        break;
2599    case INDEX_op_st16_i32:
2600    case INDEX_op_st16_i64:
2601        tcg_out_ldst(s, I3312_STRH, a0, a1, a2, 1);
2602        break;
2603    case INDEX_op_st_i32:
2604    case INDEX_op_st32_i64:
2605        tcg_out_ldst(s, I3312_STRW, a0, a1, a2, 2);
2606        break;
2607    case INDEX_op_st_i64:
2608        tcg_out_ldst(s, I3312_STRX, a0, a1, a2, 3);
2609        break;
2610
2611    case INDEX_op_qemu_ld_i32:
2612    case INDEX_op_qemu_ld_i64:
2613        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2614        break;
2615    case INDEX_op_qemu_st_i32:
2616    case INDEX_op_qemu_st_i64:
2617        tcg_out_qemu_st(s, a0, a1, a2, ext);
2618        break;
2619    case INDEX_op_qemu_ld_i128:
2620        tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
2621        break;
2622    case INDEX_op_qemu_st_i128:
2623        tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], false);
2624        break;
2625
2626    case INDEX_op_bswap64_i64:
2627        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2628        break;
2629    case INDEX_op_bswap32_i64:
2630        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2631        if (a2 & TCG_BSWAP_OS) {
2632            tcg_out_ext32s(s, a0, a0);
2633        }
2634        break;
2635    case INDEX_op_bswap32_i32:
2636        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2637        break;
2638
2639    case INDEX_op_deposit_i64:
2640    case INDEX_op_deposit_i32:
2641        tcg_out_dep(s, ext, a0, a2, args[3], args[4]);
2642        break;
2643
2644    case INDEX_op_extract_i64:
2645    case INDEX_op_extract_i32:
2646        if (a2 == 0) {
2647            uint64_t mask = MAKE_64BIT_MASK(0, args[3]);
2648            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, mask);
2649        } else {
2650            tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2651        }
2652        break;
2653
2654    case INDEX_op_sextract_i64:
2655    case INDEX_op_sextract_i32:
2656        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2657        break;
2658
2659    case INDEX_op_extract2_i64:
2660    case INDEX_op_extract2_i32:
2661        tcg_out_extr(s, ext, a0, a2, a1, args[3]);
2662        break;
2663
2664    case INDEX_op_add2_i32:
2665        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, a2, args[3],
2666                        (int32_t)args[4], args[5], const_args[4],
2667                        const_args[5], false);
2668        break;
2669    case INDEX_op_add2_i64:
2670        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, a2, args[3], args[4],
2671                        args[5], const_args[4], const_args[5], false);
2672        break;
2673    case INDEX_op_sub2_i32:
2674        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, a2, args[3],
2675                        (int32_t)args[4], args[5], const_args[4],
2676                        const_args[5], true);
2677        break;
2678    case INDEX_op_sub2_i64:
2679        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, a2, args[3], args[4],
2680                        args[5], const_args[4], const_args[5], true);
2681        break;
2682
2683    case INDEX_op_mb:
2684        tcg_out_mb(s, a0);
2685        break;
2686
2687    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2688    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2689    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2690    case INDEX_op_ext_i32_i64:  /* Always emitted via tcg_reg_alloc_op.  */
2691    case INDEX_op_extu_i32_i64:
2692    case INDEX_op_extrl_i64_i32:
2693    default:
2694        g_assert_not_reached();
2695    }
2696}
2697
2698static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2699                           unsigned vecl, unsigned vece,
2700                           const TCGArg args[TCG_MAX_OP_ARGS],
2701                           const int const_args[TCG_MAX_OP_ARGS])
2702{
2703    static const AArch64Insn cmp_vec_insn[16] = {
2704        [TCG_COND_EQ] = I3616_CMEQ,
2705        [TCG_COND_GT] = I3616_CMGT,
2706        [TCG_COND_GE] = I3616_CMGE,
2707        [TCG_COND_GTU] = I3616_CMHI,
2708        [TCG_COND_GEU] = I3616_CMHS,
2709    };
2710    static const AArch64Insn cmp_scalar_insn[16] = {
2711        [TCG_COND_EQ] = I3611_CMEQ,
2712        [TCG_COND_GT] = I3611_CMGT,
2713        [TCG_COND_GE] = I3611_CMGE,
2714        [TCG_COND_GTU] = I3611_CMHI,
2715        [TCG_COND_GEU] = I3611_CMHS,
2716    };
2717    static const AArch64Insn cmp0_vec_insn[16] = {
2718        [TCG_COND_EQ] = I3617_CMEQ0,
2719        [TCG_COND_GT] = I3617_CMGT0,
2720        [TCG_COND_GE] = I3617_CMGE0,
2721        [TCG_COND_LT] = I3617_CMLT0,
2722        [TCG_COND_LE] = I3617_CMLE0,
2723    };
2724    static const AArch64Insn cmp0_scalar_insn[16] = {
2725        [TCG_COND_EQ] = I3612_CMEQ0,
2726        [TCG_COND_GT] = I3612_CMGT0,
2727        [TCG_COND_GE] = I3612_CMGE0,
2728        [TCG_COND_LT] = I3612_CMLT0,
2729        [TCG_COND_LE] = I3612_CMLE0,
2730    };
2731
2732    TCGType type = vecl + TCG_TYPE_V64;
2733    unsigned is_q = vecl;
2734    bool is_scalar = !is_q && vece == MO_64;
2735    TCGArg a0, a1, a2, a3;
2736    int cmode, imm8;
2737
2738    a0 = args[0];
2739    a1 = args[1];
2740    a2 = args[2];
2741
2742    switch (opc) {
2743    case INDEX_op_ld_vec:
2744        tcg_out_ld(s, type, a0, a1, a2);
2745        break;
2746    case INDEX_op_st_vec:
2747        tcg_out_st(s, type, a0, a1, a2);
2748        break;
2749    case INDEX_op_dupm_vec:
2750        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2751        break;
2752    case INDEX_op_add_vec:
2753        if (is_scalar) {
2754            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2755        } else {
2756            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2757        }
2758        break;
2759    case INDEX_op_sub_vec:
2760        if (is_scalar) {
2761            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2762        } else {
2763            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2764        }
2765        break;
2766    case INDEX_op_mul_vec:
2767        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2768        break;
2769    case INDEX_op_neg_vec:
2770        if (is_scalar) {
2771            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2772        } else {
2773            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2774        }
2775        break;
2776    case INDEX_op_abs_vec:
2777        if (is_scalar) {
2778            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2779        } else {
2780            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2781        }
2782        break;
2783    case INDEX_op_and_vec:
2784        if (const_args[2]) {
2785            is_shimm1632(~a2, &cmode, &imm8);
2786            if (a0 == a1) {
2787                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2788                return;
2789            }
2790            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2791            a2 = a0;
2792        }
2793        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2794        break;
2795    case INDEX_op_or_vec:
2796        if (const_args[2]) {
2797            is_shimm1632(a2, &cmode, &imm8);
2798            if (a0 == a1) {
2799                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2800                return;
2801            }
2802            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2803            a2 = a0;
2804        }
2805        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2806        break;
2807    case INDEX_op_andc_vec:
2808        if (const_args[2]) {
2809            is_shimm1632(a2, &cmode, &imm8);
2810            if (a0 == a1) {
2811                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2812                return;
2813            }
2814            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2815            a2 = a0;
2816        }
2817        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2818        break;
2819    case INDEX_op_orc_vec:
2820        if (const_args[2]) {
2821            is_shimm1632(~a2, &cmode, &imm8);
2822            if (a0 == a1) {
2823                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2824                return;
2825            }
2826            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2827            a2 = a0;
2828        }
2829        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2830        break;
2831    case INDEX_op_xor_vec:
2832        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2833        break;
2834    case INDEX_op_ssadd_vec:
2835        if (is_scalar) {
2836            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2837        } else {
2838            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2839        }
2840        break;
2841    case INDEX_op_sssub_vec:
2842        if (is_scalar) {
2843            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2844        } else {
2845            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2846        }
2847        break;
2848    case INDEX_op_usadd_vec:
2849        if (is_scalar) {
2850            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2851        } else {
2852            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2853        }
2854        break;
2855    case INDEX_op_ussub_vec:
2856        if (is_scalar) {
2857            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2858        } else {
2859            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2860        }
2861        break;
2862    case INDEX_op_smax_vec:
2863        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2864        break;
2865    case INDEX_op_smin_vec:
2866        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2867        break;
2868    case INDEX_op_umax_vec:
2869        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2870        break;
2871    case INDEX_op_umin_vec:
2872        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2873        break;
2874    case INDEX_op_not_vec:
2875        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2876        break;
2877    case INDEX_op_shli_vec:
2878        if (is_scalar) {
2879            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2880        } else {
2881            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2882        }
2883        break;
2884    case INDEX_op_shri_vec:
2885        if (is_scalar) {
2886            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2887        } else {
2888            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2889        }
2890        break;
2891    case INDEX_op_sari_vec:
2892        if (is_scalar) {
2893            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2894        } else {
2895            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2896        }
2897        break;
2898    case INDEX_op_aa64_sli_vec:
2899        if (is_scalar) {
2900            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2901        } else {
2902            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2903        }
2904        break;
2905    case INDEX_op_shlv_vec:
2906        if (is_scalar) {
2907            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2908        } else {
2909            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2910        }
2911        break;
2912    case INDEX_op_aa64_sshl_vec:
2913        if (is_scalar) {
2914            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2915        } else {
2916            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2917        }
2918        break;
2919    case INDEX_op_cmp_vec:
2920        {
2921            TCGCond cond = args[3];
2922            AArch64Insn insn;
2923
2924            switch (cond) {
2925            case TCG_COND_NE:
2926                if (const_args[2]) {
2927                    if (is_scalar) {
2928                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2929                    } else {
2930                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2931                    }
2932                } else {
2933                    if (is_scalar) {
2934                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2935                    } else {
2936                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2937                    }
2938                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2939                }
2940                break;
2941
2942            case TCG_COND_TSTNE:
2943            case TCG_COND_TSTEQ:
2944                if (const_args[2]) {
2945                    /* (x & 0) == 0 */
2946                    tcg_out_dupi_vec(s, type, MO_8, a0,
2947                                     -(cond == TCG_COND_TSTEQ));
2948                    break;
2949                }
2950                if (is_scalar) {
2951                    tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a2);
2952                } else {
2953                    tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a2);
2954                }
2955                if (cond == TCG_COND_TSTEQ) {
2956                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2957                }
2958                break;
2959
2960            default:
2961                if (const_args[2]) {
2962                    if (is_scalar) {
2963                        insn = cmp0_scalar_insn[cond];
2964                        if (insn) {
2965                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2966                            break;
2967                        }
2968                    } else {
2969                        insn = cmp0_vec_insn[cond];
2970                        if (insn) {
2971                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2972                            break;
2973                        }
2974                    }
2975                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
2976                    a2 = TCG_VEC_TMP0;
2977                }
2978                if (is_scalar) {
2979                    insn = cmp_scalar_insn[cond];
2980                    if (insn == 0) {
2981                        TCGArg t;
2982                        t = a1, a1 = a2, a2 = t;
2983                        cond = tcg_swap_cond(cond);
2984                        insn = cmp_scalar_insn[cond];
2985                        tcg_debug_assert(insn != 0);
2986                    }
2987                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2988                } else {
2989                    insn = cmp_vec_insn[cond];
2990                    if (insn == 0) {
2991                        TCGArg t;
2992                        t = a1, a1 = a2, a2 = t;
2993                        cond = tcg_swap_cond(cond);
2994                        insn = cmp_vec_insn[cond];
2995                        tcg_debug_assert(insn != 0);
2996                    }
2997                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2998                }
2999                break;
3000            }
3001        }
3002        break;
3003
3004    case INDEX_op_bitsel_vec:
3005        a3 = args[3];
3006        if (a0 == a3) {
3007            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
3008        } else if (a0 == a2) {
3009            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
3010        } else {
3011            if (a0 != a1) {
3012                tcg_out_mov(s, type, a0, a1);
3013            }
3014            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
3015        }
3016        break;
3017
3018    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
3019    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
3020    default:
3021        g_assert_not_reached();
3022    }
3023}
3024
3025int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3026{
3027    switch (opc) {
3028    case INDEX_op_add_vec:
3029    case INDEX_op_sub_vec:
3030    case INDEX_op_and_vec:
3031    case INDEX_op_or_vec:
3032    case INDEX_op_xor_vec:
3033    case INDEX_op_andc_vec:
3034    case INDEX_op_orc_vec:
3035    case INDEX_op_neg_vec:
3036    case INDEX_op_abs_vec:
3037    case INDEX_op_not_vec:
3038    case INDEX_op_cmp_vec:
3039    case INDEX_op_shli_vec:
3040    case INDEX_op_shri_vec:
3041    case INDEX_op_sari_vec:
3042    case INDEX_op_ssadd_vec:
3043    case INDEX_op_sssub_vec:
3044    case INDEX_op_usadd_vec:
3045    case INDEX_op_ussub_vec:
3046    case INDEX_op_shlv_vec:
3047    case INDEX_op_bitsel_vec:
3048        return 1;
3049    case INDEX_op_rotli_vec:
3050    case INDEX_op_shrv_vec:
3051    case INDEX_op_sarv_vec:
3052    case INDEX_op_rotlv_vec:
3053    case INDEX_op_rotrv_vec:
3054        return -1;
3055    case INDEX_op_mul_vec:
3056    case INDEX_op_smax_vec:
3057    case INDEX_op_smin_vec:
3058    case INDEX_op_umax_vec:
3059    case INDEX_op_umin_vec:
3060        return vece < MO_64;
3061
3062    default:
3063        return 0;
3064    }
3065}
3066
3067void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3068                       TCGArg a0, ...)
3069{
3070    va_list va;
3071    TCGv_vec v0, v1, v2, t1, t2, c1;
3072    TCGArg a2;
3073
3074    va_start(va, a0);
3075    v0 = temp_tcgv_vec(arg_temp(a0));
3076    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3077    a2 = va_arg(va, TCGArg);
3078    va_end(va);
3079
3080    switch (opc) {
3081    case INDEX_op_rotli_vec:
3082        t1 = tcg_temp_new_vec(type);
3083        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
3084        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
3085                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
3086        tcg_temp_free_vec(t1);
3087        break;
3088
3089    case INDEX_op_shrv_vec:
3090    case INDEX_op_sarv_vec:
3091        /* Right shifts are negative left shifts for AArch64.  */
3092        v2 = temp_tcgv_vec(arg_temp(a2));
3093        t1 = tcg_temp_new_vec(type);
3094        tcg_gen_neg_vec(vece, t1, v2);
3095        opc = (opc == INDEX_op_shrv_vec
3096               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
3097        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
3098                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3099        tcg_temp_free_vec(t1);
3100        break;
3101
3102    case INDEX_op_rotlv_vec:
3103        v2 = temp_tcgv_vec(arg_temp(a2));
3104        t1 = tcg_temp_new_vec(type);
3105        c1 = tcg_constant_vec(type, vece, 8 << vece);
3106        tcg_gen_sub_vec(vece, t1, v2, c1);
3107        /* Right shifts are negative left shifts for AArch64.  */
3108        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
3109                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3110        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
3111                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3112        tcg_gen_or_vec(vece, v0, v0, t1);
3113        tcg_temp_free_vec(t1);
3114        break;
3115
3116    case INDEX_op_rotrv_vec:
3117        v2 = temp_tcgv_vec(arg_temp(a2));
3118        t1 = tcg_temp_new_vec(type);
3119        t2 = tcg_temp_new_vec(type);
3120        c1 = tcg_constant_vec(type, vece, 8 << vece);
3121        tcg_gen_neg_vec(vece, t1, v2);
3122        tcg_gen_sub_vec(vece, t2, c1, v2);
3123        /* Right shifts are negative left shifts for AArch64.  */
3124        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
3125                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3126        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
3127                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
3128        tcg_gen_or_vec(vece, v0, t1, t2);
3129        tcg_temp_free_vec(t1);
3130        tcg_temp_free_vec(t2);
3131        break;
3132
3133    default:
3134        g_assert_not_reached();
3135    }
3136}
3137
3138static TCGConstraintSetIndex
3139tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
3140{
3141    switch (op) {
3142    case INDEX_op_goto_ptr:
3143        return C_O0_I1(r);
3144
3145    case INDEX_op_ld8u_i32:
3146    case INDEX_op_ld8s_i32:
3147    case INDEX_op_ld16u_i32:
3148    case INDEX_op_ld16s_i32:
3149    case INDEX_op_ld_i32:
3150    case INDEX_op_ld8u_i64:
3151    case INDEX_op_ld8s_i64:
3152    case INDEX_op_ld16u_i64:
3153    case INDEX_op_ld16s_i64:
3154    case INDEX_op_ld32u_i64:
3155    case INDEX_op_ld32s_i64:
3156    case INDEX_op_ld_i64:
3157    case INDEX_op_bswap32_i32:
3158    case INDEX_op_bswap32_i64:
3159    case INDEX_op_bswap64_i64:
3160    case INDEX_op_ext_i32_i64:
3161    case INDEX_op_extu_i32_i64:
3162    case INDEX_op_extract_i32:
3163    case INDEX_op_extract_i64:
3164    case INDEX_op_sextract_i32:
3165    case INDEX_op_sextract_i64:
3166        return C_O1_I1(r, r);
3167
3168    case INDEX_op_st8_i32:
3169    case INDEX_op_st16_i32:
3170    case INDEX_op_st_i32:
3171    case INDEX_op_st8_i64:
3172    case INDEX_op_st16_i64:
3173    case INDEX_op_st32_i64:
3174    case INDEX_op_st_i64:
3175        return C_O0_I2(rz, r);
3176
3177    case INDEX_op_qemu_ld_i32:
3178    case INDEX_op_qemu_ld_i64:
3179        return C_O1_I1(r, r);
3180    case INDEX_op_qemu_ld_i128:
3181        return C_O2_I1(r, r, r);
3182    case INDEX_op_qemu_st_i32:
3183    case INDEX_op_qemu_st_i64:
3184        return C_O0_I2(rz, r);
3185    case INDEX_op_qemu_st_i128:
3186        return C_O0_I3(rz, rz, r);
3187
3188    case INDEX_op_deposit_i32:
3189    case INDEX_op_deposit_i64:
3190        return C_O1_I2(r, 0, rz);
3191
3192    case INDEX_op_extract2_i32:
3193    case INDEX_op_extract2_i64:
3194        return C_O1_I2(r, rz, rz);
3195
3196    case INDEX_op_add2_i32:
3197    case INDEX_op_add2_i64:
3198    case INDEX_op_sub2_i32:
3199    case INDEX_op_sub2_i64:
3200        return C_O2_I4(r, r, rz, rz, rA, rMZ);
3201
3202    case INDEX_op_add_vec:
3203    case INDEX_op_sub_vec:
3204    case INDEX_op_mul_vec:
3205    case INDEX_op_xor_vec:
3206    case INDEX_op_ssadd_vec:
3207    case INDEX_op_sssub_vec:
3208    case INDEX_op_usadd_vec:
3209    case INDEX_op_ussub_vec:
3210    case INDEX_op_smax_vec:
3211    case INDEX_op_smin_vec:
3212    case INDEX_op_umax_vec:
3213    case INDEX_op_umin_vec:
3214    case INDEX_op_shlv_vec:
3215    case INDEX_op_shrv_vec:
3216    case INDEX_op_sarv_vec:
3217    case INDEX_op_aa64_sshl_vec:
3218        return C_O1_I2(w, w, w);
3219    case INDEX_op_not_vec:
3220    case INDEX_op_neg_vec:
3221    case INDEX_op_abs_vec:
3222    case INDEX_op_shli_vec:
3223    case INDEX_op_shri_vec:
3224    case INDEX_op_sari_vec:
3225        return C_O1_I1(w, w);
3226    case INDEX_op_ld_vec:
3227    case INDEX_op_dupm_vec:
3228        return C_O1_I1(w, r);
3229    case INDEX_op_st_vec:
3230        return C_O0_I2(w, r);
3231    case INDEX_op_dup_vec:
3232        return C_O1_I1(w, wr);
3233    case INDEX_op_or_vec:
3234    case INDEX_op_andc_vec:
3235        return C_O1_I2(w, w, wO);
3236    case INDEX_op_and_vec:
3237    case INDEX_op_orc_vec:
3238        return C_O1_I2(w, w, wN);
3239    case INDEX_op_cmp_vec:
3240        return C_O1_I2(w, w, wZ);
3241    case INDEX_op_bitsel_vec:
3242        return C_O1_I3(w, w, w, w);
3243    case INDEX_op_aa64_sli_vec:
3244        return C_O1_I2(w, 0, w);
3245
3246    default:
3247        return C_NotImplemented;
3248    }
3249}
3250
3251static void tcg_target_init(TCGContext *s)
3252{
3253    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
3254    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
3255    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3256    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3257
3258    tcg_target_call_clobber_regs = -1ull;
3259    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
3260    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
3261    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
3262    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
3263    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
3264    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
3265    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
3266    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
3267    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
3268    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
3269    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
3270    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3271    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3272    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3273    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3274    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3275    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3276    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3277    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3278
3279    s->reserved_regs = 0;
3280    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
3281    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
3282    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
3283    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
3284    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
3285    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
3286    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
3287}
3288
3289/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
3290#define PUSH_SIZE  ((30 - 19 + 1) * 8)
3291
3292#define FRAME_SIZE \
3293    ((PUSH_SIZE \
3294      + TCG_STATIC_CALL_ARGS_SIZE \
3295      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3296      + TCG_TARGET_STACK_ALIGN - 1) \
3297     & ~(TCG_TARGET_STACK_ALIGN - 1))
3298
3299/* We're expecting a 2 byte uleb128 encoded value.  */
3300QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3301
3302/* We're expecting to use a single ADDI insn.  */
3303QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
3304
3305static void tcg_target_qemu_prologue(TCGContext *s)
3306{
3307    TCGReg r;
3308
3309    tcg_out_bti(s, BTI_C);
3310
3311    /* Push (FP, LR) and allocate space for all saved registers.  */
3312    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
3313                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
3314
3315    /* Set up frame pointer for canonical unwinding.  */
3316    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
3317
3318    /* Store callee-preserved regs x19..x28.  */
3319    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3320        int ofs = (r - TCG_REG_X19 + 2) * 8;
3321        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3322    }
3323
3324    /* Make stack space for TCG locals.  */
3325    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3326                 FRAME_SIZE - PUSH_SIZE);
3327
3328    /* Inform TCG about how to find TCG locals with register, offset, size.  */
3329    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
3330                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3331
3332    if (!tcg_use_softmmu) {
3333        /*
3334         * Note that XZR cannot be encoded in the address base register slot,
3335         * as that actually encodes SP.  Depending on the guest, we may need
3336         * to zero-extend the guest address via the address index register slot,
3337         * therefore we need to load even a zero guest base into a register.
3338         */
3339        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3340        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3341    }
3342
3343    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3344    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3345
3346    /*
3347     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3348     * and fall through to the rest of the epilogue.
3349     */
3350    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3351    tcg_out_bti(s, BTI_J);
3352    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3353
3354    /* TB epilogue */
3355    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3356    tcg_out_bti(s, BTI_J);
3357
3358    /* Remove TCG locals stack space.  */
3359    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3360                 FRAME_SIZE - PUSH_SIZE);
3361
3362    /* Restore registers x19..x28.  */
3363    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3364        int ofs = (r - TCG_REG_X19 + 2) * 8;
3365        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3366    }
3367
3368    /* Pop (FP, LR), restore SP to previous frame.  */
3369    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3370                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3371    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3372}
3373
3374static void tcg_out_tb_start(TCGContext *s)
3375{
3376    tcg_out_bti(s, BTI_J);
3377}
3378
3379static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3380{
3381    int i;
3382    for (i = 0; i < count; ++i) {
3383        p[i] = NOP;
3384    }
3385}
3386
3387typedef struct {
3388    DebugFrameHeader h;
3389    uint8_t fde_def_cfa[4];
3390    uint8_t fde_reg_ofs[24];
3391} DebugFrame;
3392
3393#define ELF_HOST_MACHINE EM_AARCH64
3394
3395static const DebugFrame debug_frame = {
3396    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3397    .h.cie.id = -1,
3398    .h.cie.version = 1,
3399    .h.cie.code_align = 1,
3400    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3401    .h.cie.return_column = TCG_REG_LR,
3402
3403    /* Total FDE size does not include the "len" member.  */
3404    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3405
3406    .fde_def_cfa = {
3407        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3408        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3409        (FRAME_SIZE >> 7)
3410    },
3411    .fde_reg_ofs = {
3412        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3413        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3414        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3415        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3416        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3417        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3418        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3419        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3420        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3421        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3422        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3423        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3424    }
3425};
3426
3427void tcg_register_jit(const void *buf, size_t buf_size)
3428{
3429    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3430}
3431