xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision 33aba058c8fcc9b1581b03a1fbac45d8d91baac6)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "qemu/bitops.h"
14
15/* Used for function call generation. */
16#define TCG_REG_CALL_STACK              TCG_REG_SP
17#define TCG_TARGET_STACK_ALIGN          16
18#define TCG_TARGET_CALL_STACK_OFFSET    0
19#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
20#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
21#ifdef CONFIG_DARWIN
22# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_NORMAL
23#else
24# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_EVEN
25#endif
26#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
27
28/* We're going to re-use TCGType in setting of the SF bit, which controls
29   the size of the operation performed.  If we know the values match, it
30   makes things much cleaner.  */
31QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
32
33#ifdef CONFIG_DEBUG_TCG
34static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
35    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
36    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
37    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
38    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
39
40    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
41    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
42    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
43    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
44};
45#endif /* CONFIG_DEBUG_TCG */
46
47static const int tcg_target_reg_alloc_order[] = {
48    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
49    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
50    TCG_REG_X28, /* we will reserve this for guest_base if configured */
51
52    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
53    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
54
55    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
56    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
57
58    /* X16 reserved as temporary */
59    /* X17 reserved as temporary */
60    /* X18 reserved by system */
61    /* X19 reserved for AREG0 */
62    /* X29 reserved as fp */
63    /* X30 reserved as temporary */
64
65    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
66    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
67    /* V8 - V15 are call-saved, and skipped.  */
68    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
69    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
70    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
71    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
72};
73
74static const int tcg_target_call_iarg_regs[8] = {
75    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
76    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
77};
78
79static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
80{
81    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
82    tcg_debug_assert(slot >= 0 && slot <= 1);
83    return TCG_REG_X0 + slot;
84}
85
86#define TCG_REG_TMP0 TCG_REG_X16
87#define TCG_REG_TMP1 TCG_REG_X17
88#define TCG_REG_TMP2 TCG_REG_X30
89#define TCG_VEC_TMP0 TCG_REG_V31
90
91#define TCG_REG_GUEST_BASE TCG_REG_X28
92
93static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
94{
95    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
96    ptrdiff_t offset = target - src_rx;
97
98    if (offset == sextract64(offset, 0, 26)) {
99        /* read instruction, mask away previous PC_REL26 parameter contents,
100           set the proper offset, then write back the instruction. */
101        *src_rw = deposit32(*src_rw, 0, 26, offset);
102        return true;
103    }
104    return false;
105}
106
107static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
108{
109    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
110    ptrdiff_t offset = target - src_rx;
111
112    if (offset == sextract64(offset, 0, 19)) {
113        *src_rw = deposit32(*src_rw, 5, 19, offset);
114        return true;
115    }
116    return false;
117}
118
119static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
120{
121    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
122    ptrdiff_t offset = target - src_rx;
123
124    if (offset == sextract64(offset, 0, 14)) {
125        *src_rw = deposit32(*src_rw, 5, 14, offset);
126        return true;
127    }
128    return false;
129}
130
131static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
132                        intptr_t value, intptr_t addend)
133{
134    tcg_debug_assert(addend == 0);
135    switch (type) {
136    case R_AARCH64_JUMP26:
137    case R_AARCH64_CALL26:
138        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
139    case R_AARCH64_CONDBR19:
140        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
141    case R_AARCH64_TSTBR14:
142        return reloc_pc14(code_ptr, (const tcg_insn_unit *)value);
143    default:
144        g_assert_not_reached();
145    }
146}
147
148#define TCG_CT_CONST_AIMM 0x100
149#define TCG_CT_CONST_LIMM 0x200
150#define TCG_CT_CONST_ZERO 0x400
151#define TCG_CT_CONST_MONE 0x800
152#define TCG_CT_CONST_ORRI 0x1000
153#define TCG_CT_CONST_ANDI 0x2000
154#define TCG_CT_CONST_CMP  0x4000
155
156#define ALL_GENERAL_REGS  0xffffffffu
157#define ALL_VECTOR_REGS   0xffffffff00000000ull
158
159/* Match a constant valid for addition (12-bit, optionally shifted).  */
160static inline bool is_aimm(uint64_t val)
161{
162    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
163}
164
165/* Match a constant valid for logical operations.  */
166static inline bool is_limm(uint64_t val)
167{
168    /* Taking a simplified view of the logical immediates for now, ignoring
169       the replication that can happen across the field.  Match bit patterns
170       of the forms
171           0....01....1
172           0..01..10..0
173       and their inverses.  */
174
175    /* Make things easier below, by testing the form with msb clear. */
176    if ((int64_t)val < 0) {
177        val = ~val;
178    }
179    if (val == 0) {
180        return false;
181    }
182    val += val & -val;
183    return (val & (val - 1)) == 0;
184}
185
186/* Return true if v16 is a valid 16-bit shifted immediate.  */
187static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
188{
189    if (v16 == (v16 & 0xff)) {
190        *cmode = 0x8;
191        *imm8 = v16 & 0xff;
192        return true;
193    } else if (v16 == (v16 & 0xff00)) {
194        *cmode = 0xa;
195        *imm8 = v16 >> 8;
196        return true;
197    }
198    return false;
199}
200
201/* Return true if v32 is a valid 32-bit shifted immediate.  */
202static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
203{
204    if (v32 == (v32 & 0xff)) {
205        *cmode = 0x0;
206        *imm8 = v32 & 0xff;
207        return true;
208    } else if (v32 == (v32 & 0xff00)) {
209        *cmode = 0x2;
210        *imm8 = (v32 >> 8) & 0xff;
211        return true;
212    } else if (v32 == (v32 & 0xff0000)) {
213        *cmode = 0x4;
214        *imm8 = (v32 >> 16) & 0xff;
215        return true;
216    } else if (v32 == (v32 & 0xff000000)) {
217        *cmode = 0x6;
218        *imm8 = v32 >> 24;
219        return true;
220    }
221    return false;
222}
223
224/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
225static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
226{
227    if ((v32 & 0xffff00ff) == 0xff) {
228        *cmode = 0xc;
229        *imm8 = (v32 >> 8) & 0xff;
230        return true;
231    } else if ((v32 & 0xff00ffff) == 0xffff) {
232        *cmode = 0xd;
233        *imm8 = (v32 >> 16) & 0xff;
234        return true;
235    }
236    return false;
237}
238
239/* Return true if v32 is a valid float32 immediate.  */
240static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
241{
242    if (extract32(v32, 0, 19) == 0
243        && (extract32(v32, 25, 6) == 0x20
244            || extract32(v32, 25, 6) == 0x1f)) {
245        *cmode = 0xf;
246        *imm8 = (extract32(v32, 31, 1) << 7)
247              | (extract32(v32, 25, 1) << 6)
248              | extract32(v32, 19, 6);
249        return true;
250    }
251    return false;
252}
253
254/* Return true if v64 is a valid float64 immediate.  */
255static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
256{
257    if (extract64(v64, 0, 48) == 0
258        && (extract64(v64, 54, 9) == 0x100
259            || extract64(v64, 54, 9) == 0x0ff)) {
260        *cmode = 0xf;
261        *imm8 = (extract64(v64, 63, 1) << 7)
262              | (extract64(v64, 54, 1) << 6)
263              | extract64(v64, 48, 6);
264        return true;
265    }
266    return false;
267}
268
269/*
270 * Return non-zero if v32 can be formed by MOVI+ORR.
271 * Place the parameters for MOVI in (cmode, imm8).
272 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
273 */
274static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
275{
276    int i;
277
278    for (i = 6; i > 0; i -= 2) {
279        /* Mask out one byte we can add with ORR.  */
280        uint32_t tmp = v32 & ~(0xffu << (i * 4));
281        if (is_shimm32(tmp, cmode, imm8) ||
282            is_soimm32(tmp, cmode, imm8)) {
283            break;
284        }
285    }
286    return i;
287}
288
289/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
290static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
291{
292    if (v32 == deposit32(v32, 16, 16, v32)) {
293        return is_shimm16(v32, cmode, imm8);
294    } else {
295        return is_shimm32(v32, cmode, imm8);
296    }
297}
298
299static bool tcg_target_const_match(int64_t val, int ct,
300                                   TCGType type, TCGCond cond, int vece)
301{
302    if (ct & TCG_CT_CONST) {
303        return 1;
304    }
305    if (type == TCG_TYPE_I32) {
306        val = (int32_t)val;
307    }
308
309    if (ct & TCG_CT_CONST_CMP) {
310        if (is_tst_cond(cond)) {
311            ct |= TCG_CT_CONST_LIMM;
312        } else {
313            ct |= TCG_CT_CONST_AIMM;
314        }
315    }
316
317    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
318        return 1;
319    }
320    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
321        return 1;
322    }
323    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
324        return 1;
325    }
326    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
327        return 1;
328    }
329
330    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
331    case 0:
332        break;
333    case TCG_CT_CONST_ANDI:
334        val = ~val;
335        /* fallthru */
336    case TCG_CT_CONST_ORRI:
337        if (val == deposit64(val, 32, 32, val)) {
338            int cmode, imm8;
339            return is_shimm1632(val, &cmode, &imm8);
340        }
341        break;
342    default:
343        /* Both bits should not be set for the same insn.  */
344        g_assert_not_reached();
345    }
346
347    return 0;
348}
349
350enum aarch64_cond_code {
351    COND_EQ = 0x0,
352    COND_NE = 0x1,
353    COND_CS = 0x2,     /* Unsigned greater or equal */
354    COND_HS = COND_CS, /* ALIAS greater or equal */
355    COND_CC = 0x3,     /* Unsigned less than */
356    COND_LO = COND_CC, /* ALIAS Lower */
357    COND_MI = 0x4,     /* Negative */
358    COND_PL = 0x5,     /* Zero or greater */
359    COND_VS = 0x6,     /* Overflow */
360    COND_VC = 0x7,     /* No overflow */
361    COND_HI = 0x8,     /* Unsigned greater than */
362    COND_LS = 0x9,     /* Unsigned less or equal */
363    COND_GE = 0xa,
364    COND_LT = 0xb,
365    COND_GT = 0xc,
366    COND_LE = 0xd,
367    COND_AL = 0xe,
368    COND_NV = 0xf, /* behaves like COND_AL here */
369};
370
371static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
372    [TCG_COND_EQ] = COND_EQ,
373    [TCG_COND_NE] = COND_NE,
374    [TCG_COND_LT] = COND_LT,
375    [TCG_COND_GE] = COND_GE,
376    [TCG_COND_LE] = COND_LE,
377    [TCG_COND_GT] = COND_GT,
378    /* unsigned */
379    [TCG_COND_LTU] = COND_LO,
380    [TCG_COND_GTU] = COND_HI,
381    [TCG_COND_GEU] = COND_HS,
382    [TCG_COND_LEU] = COND_LS,
383    /* bit test */
384    [TCG_COND_TSTEQ] = COND_EQ,
385    [TCG_COND_TSTNE] = COND_NE,
386};
387
388typedef enum {
389    LDST_ST = 0,    /* store */
390    LDST_LD = 1,    /* load */
391    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
392    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
393} AArch64LdstType;
394
395/* We encode the format of the insn into the beginning of the name, so that
396   we can have the preprocessor help "typecheck" the insn vs the output
397   function.  Arm didn't provide us with nice names for the formats, so we
398   use the section number of the architecture reference manual in which the
399   instruction group is described.  */
400typedef enum {
401    /* Compare and branch (immediate).  */
402    I3201_CBZ       = 0x34000000,
403    I3201_CBNZ      = 0x35000000,
404
405    /* Conditional branch (immediate).  */
406    I3202_B_C       = 0x54000000,
407
408    /* Test and branch (immediate).  */
409    I3205_TBZ       = 0x36000000,
410    I3205_TBNZ      = 0x37000000,
411
412    /* Unconditional branch (immediate).  */
413    I3206_B         = 0x14000000,
414    I3206_BL        = 0x94000000,
415
416    /* Unconditional branch (register).  */
417    I3207_BR        = 0xd61f0000,
418    I3207_BLR       = 0xd63f0000,
419    I3207_RET       = 0xd65f0000,
420
421    /* AdvSIMD load/store single structure.  */
422    I3303_LD1R      = 0x0d40c000,
423
424    /* Load literal for loading the address at pc-relative offset */
425    I3305_LDR       = 0x58000000,
426    I3305_LDR_v64   = 0x5c000000,
427    I3305_LDR_v128  = 0x9c000000,
428
429    /* Load/store exclusive. */
430    I3306_LDXP      = 0xc8600000,
431    I3306_STXP      = 0xc8200000,
432
433    /* Load/store register.  Described here as 3.3.12, but the helper
434       that emits them can transform to 3.3.10 or 3.3.13.  */
435    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
436    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
437    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
438    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
439
440    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
441    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
442    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
443    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
444
445    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
446    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
447
448    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
449    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
450    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
451
452    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
453    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
454
455    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
456    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
457
458    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
459    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
460
461    I3312_TO_I3310  = 0x00200800,
462    I3312_TO_I3313  = 0x01000000,
463
464    /* Load/store register pair instructions.  */
465    I3314_LDP       = 0x28400000,
466    I3314_STP       = 0x28000000,
467
468    /* Add/subtract immediate instructions.  */
469    I3401_ADDI      = 0x11000000,
470    I3401_ADDSI     = 0x31000000,
471    I3401_SUBI      = 0x51000000,
472    I3401_SUBSI     = 0x71000000,
473
474    /* Bitfield instructions.  */
475    I3402_BFM       = 0x33000000,
476    I3402_SBFM      = 0x13000000,
477    I3402_UBFM      = 0x53000000,
478
479    /* Extract instruction.  */
480    I3403_EXTR      = 0x13800000,
481
482    /* Logical immediate instructions.  */
483    I3404_ANDI      = 0x12000000,
484    I3404_ORRI      = 0x32000000,
485    I3404_EORI      = 0x52000000,
486    I3404_ANDSI     = 0x72000000,
487
488    /* Move wide immediate instructions.  */
489    I3405_MOVN      = 0x12800000,
490    I3405_MOVZ      = 0x52800000,
491    I3405_MOVK      = 0x72800000,
492
493    /* PC relative addressing instructions.  */
494    I3406_ADR       = 0x10000000,
495    I3406_ADRP      = 0x90000000,
496
497    /* Add/subtract extended register instructions. */
498    I3501_ADD       = 0x0b200000,
499
500    /* Add/subtract shifted register instructions (without a shift).  */
501    I3502_ADD       = 0x0b000000,
502    I3502_ADDS      = 0x2b000000,
503    I3502_SUB       = 0x4b000000,
504    I3502_SUBS      = 0x6b000000,
505
506    /* Add/subtract shifted register instructions (with a shift).  */
507    I3502S_ADD_LSL  = I3502_ADD,
508
509    /* Add/subtract with carry instructions.  */
510    I3503_ADC       = 0x1a000000,
511    I3503_ADCS      = 0x3a000000,
512    I3503_SBC       = 0x5a000000,
513    I3503_SBCS      = 0x7a000000,
514
515    /* Conditional select instructions.  */
516    I3506_CSEL      = 0x1a800000,
517    I3506_CSINC     = 0x1a800400,
518    I3506_CSINV     = 0x5a800000,
519    I3506_CSNEG     = 0x5a800400,
520
521    /* Data-processing (1 source) instructions.  */
522    I3507_CLZ       = 0x5ac01000,
523    I3507_RBIT      = 0x5ac00000,
524    I3507_REV       = 0x5ac00000, /* + size << 10 */
525
526    /* Data-processing (2 source) instructions.  */
527    I3508_LSLV      = 0x1ac02000,
528    I3508_LSRV      = 0x1ac02400,
529    I3508_ASRV      = 0x1ac02800,
530    I3508_RORV      = 0x1ac02c00,
531    I3508_SMULH     = 0x9b407c00,
532    I3508_UMULH     = 0x9bc07c00,
533    I3508_UDIV      = 0x1ac00800,
534    I3508_SDIV      = 0x1ac00c00,
535
536    /* Data-processing (3 source) instructions.  */
537    I3509_MADD      = 0x1b000000,
538    I3509_MSUB      = 0x1b008000,
539
540    /* Logical shifted register instructions (without a shift).  */
541    I3510_AND       = 0x0a000000,
542    I3510_BIC       = 0x0a200000,
543    I3510_ORR       = 0x2a000000,
544    I3510_ORN       = 0x2a200000,
545    I3510_EOR       = 0x4a000000,
546    I3510_EON       = 0x4a200000,
547    I3510_ANDS      = 0x6a000000,
548
549    /* Logical shifted register instructions (with a shift).  */
550    I3502S_AND_LSR  = I3510_AND | (1 << 22),
551
552    /* AdvSIMD copy */
553    I3605_DUP      = 0x0e000400,
554    I3605_INS      = 0x4e001c00,
555    I3605_UMOV     = 0x0e003c00,
556
557    /* AdvSIMD modified immediate */
558    I3606_MOVI      = 0x0f000400,
559    I3606_MVNI      = 0x2f000400,
560    I3606_BIC       = 0x2f001400,
561    I3606_ORR       = 0x0f001400,
562
563    /* AdvSIMD scalar shift by immediate */
564    I3609_SSHR      = 0x5f000400,
565    I3609_SSRA      = 0x5f001400,
566    I3609_SHL       = 0x5f005400,
567    I3609_USHR      = 0x7f000400,
568    I3609_USRA      = 0x7f001400,
569    I3609_SLI       = 0x7f005400,
570
571    /* AdvSIMD scalar three same */
572    I3611_SQADD     = 0x5e200c00,
573    I3611_SQSUB     = 0x5e202c00,
574    I3611_CMGT      = 0x5e203400,
575    I3611_CMGE      = 0x5e203c00,
576    I3611_SSHL      = 0x5e204400,
577    I3611_ADD       = 0x5e208400,
578    I3611_CMTST     = 0x5e208c00,
579    I3611_UQADD     = 0x7e200c00,
580    I3611_UQSUB     = 0x7e202c00,
581    I3611_CMHI      = 0x7e203400,
582    I3611_CMHS      = 0x7e203c00,
583    I3611_USHL      = 0x7e204400,
584    I3611_SUB       = 0x7e208400,
585    I3611_CMEQ      = 0x7e208c00,
586
587    /* AdvSIMD scalar two-reg misc */
588    I3612_CMGT0     = 0x5e208800,
589    I3612_CMEQ0     = 0x5e209800,
590    I3612_CMLT0     = 0x5e20a800,
591    I3612_ABS       = 0x5e20b800,
592    I3612_CMGE0     = 0x7e208800,
593    I3612_CMLE0     = 0x7e209800,
594    I3612_NEG       = 0x7e20b800,
595
596    /* AdvSIMD shift by immediate */
597    I3614_SSHR      = 0x0f000400,
598    I3614_SSRA      = 0x0f001400,
599    I3614_SHL       = 0x0f005400,
600    I3614_SLI       = 0x2f005400,
601    I3614_USHR      = 0x2f000400,
602    I3614_USRA      = 0x2f001400,
603
604    /* AdvSIMD three same.  */
605    I3616_ADD       = 0x0e208400,
606    I3616_AND       = 0x0e201c00,
607    I3616_BIC       = 0x0e601c00,
608    I3616_BIF       = 0x2ee01c00,
609    I3616_BIT       = 0x2ea01c00,
610    I3616_BSL       = 0x2e601c00,
611    I3616_EOR       = 0x2e201c00,
612    I3616_MUL       = 0x0e209c00,
613    I3616_ORR       = 0x0ea01c00,
614    I3616_ORN       = 0x0ee01c00,
615    I3616_SUB       = 0x2e208400,
616    I3616_CMGT      = 0x0e203400,
617    I3616_CMGE      = 0x0e203c00,
618    I3616_CMTST     = 0x0e208c00,
619    I3616_CMHI      = 0x2e203400,
620    I3616_CMHS      = 0x2e203c00,
621    I3616_CMEQ      = 0x2e208c00,
622    I3616_SMAX      = 0x0e206400,
623    I3616_SMIN      = 0x0e206c00,
624    I3616_SSHL      = 0x0e204400,
625    I3616_SQADD     = 0x0e200c00,
626    I3616_SQSUB     = 0x0e202c00,
627    I3616_UMAX      = 0x2e206400,
628    I3616_UMIN      = 0x2e206c00,
629    I3616_UQADD     = 0x2e200c00,
630    I3616_UQSUB     = 0x2e202c00,
631    I3616_USHL      = 0x2e204400,
632
633    /* AdvSIMD two-reg misc.  */
634    I3617_CMGT0     = 0x0e208800,
635    I3617_CMEQ0     = 0x0e209800,
636    I3617_CMLT0     = 0x0e20a800,
637    I3617_CMGE0     = 0x2e208800,
638    I3617_CMLE0     = 0x2e209800,
639    I3617_NOT       = 0x2e205800,
640    I3617_ABS       = 0x0e20b800,
641    I3617_NEG       = 0x2e20b800,
642
643    /* System instructions.  */
644    NOP             = 0xd503201f,
645    DMB_ISH         = 0xd50338bf,
646    DMB_LD          = 0x00000100,
647    DMB_ST          = 0x00000200,
648
649    BTI_C           = 0xd503245f,
650    BTI_J           = 0xd503249f,
651    BTI_JC          = 0xd50324df,
652} AArch64Insn;
653
654static inline uint32_t tcg_in32(TCGContext *s)
655{
656    uint32_t v = *(uint32_t *)s->code_ptr;
657    return v;
658}
659
660/* Emit an opcode with "type-checking" of the format.  */
661#define tcg_out_insn(S, FMT, OP, ...) \
662    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
663
664static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
665                              TCGReg rt, TCGReg rn, unsigned size)
666{
667    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
668}
669
670static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
671                              int imm19, TCGReg rt)
672{
673    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
674}
675
676static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
677                              TCGReg rt, TCGReg rt2, TCGReg rn)
678{
679    tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
680}
681
682static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
683                              TCGReg rt, int imm19)
684{
685    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
686}
687
688static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
689                              TCGCond c, int imm19)
690{
691    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
692}
693
694static void tcg_out_insn_3205(TCGContext *s, AArch64Insn insn,
695                              TCGReg rt, int imm6, int imm14)
696{
697    insn |= (imm6 & 0x20) << (31 - 5);
698    insn |= (imm6 & 0x1f) << 19;
699    tcg_out32(s, insn | (imm14 & 0x3fff) << 5 | rt);
700}
701
702static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
703{
704    tcg_out32(s, insn | (imm26 & 0x03ffffff));
705}
706
707static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
708{
709    tcg_out32(s, insn | rn << 5);
710}
711
712static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
713                              TCGReg r1, TCGReg r2, TCGReg rn,
714                              tcg_target_long ofs, bool pre, bool w)
715{
716    insn |= 1u << 31; /* ext */
717    insn |= pre << 24;
718    insn |= w << 23;
719
720    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
721    insn |= (ofs & (0x7f << 3)) << (15 - 3);
722
723    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
724}
725
726static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
727                              TCGReg rd, TCGReg rn, uint64_t aimm)
728{
729    if (aimm > 0xfff) {
730        tcg_debug_assert((aimm & 0xfff) == 0);
731        aimm >>= 12;
732        tcg_debug_assert(aimm <= 0xfff);
733        aimm |= 1 << 12;  /* apply LSL 12 */
734    }
735    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
736}
737
738/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
739   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
740   that feed the DecodeBitMasks pseudo function.  */
741static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
742                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
743{
744    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
745              | rn << 5 | rd);
746}
747
748#define tcg_out_insn_3404  tcg_out_insn_3402
749
750static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
751                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
752{
753    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
754              | rn << 5 | rd);
755}
756
757/* This function is used for the Move (wide immediate) instruction group.
758   Note that SHIFT is a full shift count, not the 2 bit HW field. */
759static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
760                              TCGReg rd, uint16_t half, unsigned shift)
761{
762    tcg_debug_assert((shift & ~0x30) == 0);
763    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
764}
765
766static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
767                              TCGReg rd, int64_t disp)
768{
769    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
770}
771
772static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
773                                     TCGType sf, TCGReg rd, TCGReg rn,
774                                     TCGReg rm, int opt, int imm3)
775{
776    tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
777              imm3 << 10 | rn << 5 | rd);
778}
779
780/* This function is for both 3.5.2 (Add/Subtract shifted register), for
781   the rare occasion when we actually want to supply a shift amount.  */
782static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
783                                      TCGType ext, TCGReg rd, TCGReg rn,
784                                      TCGReg rm, int imm6)
785{
786    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
787}
788
789/* This function is for 3.5.2 (Add/subtract shifted register),
790   and 3.5.10 (Logical shifted register), for the vast majorty of cases
791   when we don't want to apply a shift.  Thus it can also be used for
792   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
793static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
794                              TCGReg rd, TCGReg rn, TCGReg rm)
795{
796    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
797}
798
799#define tcg_out_insn_3503  tcg_out_insn_3502
800#define tcg_out_insn_3508  tcg_out_insn_3502
801#define tcg_out_insn_3510  tcg_out_insn_3502
802
803static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
804                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
805{
806    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
807              | tcg_cond_to_aarch64[c] << 12);
808}
809
810static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
811                              TCGReg rd, TCGReg rn)
812{
813    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
814}
815
816static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
817                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
818{
819    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
820}
821
822static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
823                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
824{
825    /* Note that bit 11 set means general register input.  Therefore
826       we can handle both register sets with one function.  */
827    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
828              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
829}
830
831static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
832                              TCGReg rd, bool op, int cmode, uint8_t imm8)
833{
834    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
835              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
836}
837
838static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
839                              TCGReg rd, TCGReg rn, unsigned immhb)
840{
841    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
842}
843
844static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
845                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
846{
847    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
848              | (rn & 0x1f) << 5 | (rd & 0x1f));
849}
850
851static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
852                              unsigned size, TCGReg rd, TCGReg rn)
853{
854    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
855}
856
857static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
858                              TCGReg rd, TCGReg rn, unsigned immhb)
859{
860    tcg_out32(s, insn | q << 30 | immhb << 16
861              | (rn & 0x1f) << 5 | (rd & 0x1f));
862}
863
864static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
865                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
866{
867    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
868              | (rn & 0x1f) << 5 | (rd & 0x1f));
869}
870
871static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
872                              unsigned size, TCGReg rd, TCGReg rn)
873{
874    tcg_out32(s, insn | q << 30 | (size << 22)
875              | (rn & 0x1f) << 5 | (rd & 0x1f));
876}
877
878static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
879                              TCGReg rd, TCGReg base, TCGType ext,
880                              TCGReg regoff)
881{
882    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
883    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
884              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
885}
886
887static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
888                              TCGReg rd, TCGReg rn, intptr_t offset)
889{
890    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
891}
892
893static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
894                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
895{
896    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
897    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
898              | rn << 5 | (rd & 0x1f));
899}
900
901static void tcg_out_bti(TCGContext *s, AArch64Insn insn)
902{
903    /*
904     * While BTI insns are nops on hosts without FEAT_BTI,
905     * there is no point in emitting them in that case either.
906     */
907    if (cpuinfo & CPUINFO_BTI) {
908        tcg_out32(s, insn);
909    }
910}
911
912/* Register to register move using ORR (shifted register with no shift). */
913static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
914{
915    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
916}
917
918/* Register to register move using ADDI (move to/from SP).  */
919static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
920{
921    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
922}
923
924/* This function is used for the Logical (immediate) instruction group.
925   The value of LIMM must satisfy IS_LIMM.  See the comment above about
926   only supporting simplified logical immediates.  */
927static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
928                             TCGReg rd, TCGReg rn, uint64_t limm)
929{
930    unsigned h, l, r, c;
931
932    tcg_debug_assert(is_limm(limm));
933
934    h = clz64(limm);
935    l = ctz64(limm);
936    if (l == 0) {
937        r = 0;                  /* form 0....01....1 */
938        c = ctz64(~limm) - 1;
939        if (h == 0) {
940            r = clz64(~limm);   /* form 1..10..01..1 */
941            c += r;
942        }
943    } else {
944        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
945        c = r - h - 1;
946    }
947    if (ext == TCG_TYPE_I32) {
948        r &= 31;
949        c &= 31;
950    }
951
952    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
953}
954
955static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
956                             TCGReg rd, int64_t v64)
957{
958    bool q = type == TCG_TYPE_V128;
959    int cmode, imm8, i;
960
961    /* Test all bytes equal first.  */
962    if (vece == MO_8) {
963        imm8 = (uint8_t)v64;
964        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
965        return;
966    }
967
968    /*
969     * Test all bytes 0x00 or 0xff second.  This can match cases that
970     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
971     */
972    for (i = imm8 = 0; i < 8; i++) {
973        uint8_t byte = v64 >> (i * 8);
974        if (byte == 0xff) {
975            imm8 |= 1 << i;
976        } else if (byte != 0) {
977            goto fail_bytes;
978        }
979    }
980    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
981    return;
982 fail_bytes:
983
984    /*
985     * Tests for various replications.  For each element width, if we
986     * cannot find an expansion there's no point checking a larger
987     * width because we already know by replication it cannot match.
988     */
989    if (vece == MO_16) {
990        uint16_t v16 = v64;
991
992        if (is_shimm16(v16, &cmode, &imm8)) {
993            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
994            return;
995        }
996        if (is_shimm16(~v16, &cmode, &imm8)) {
997            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
998            return;
999        }
1000
1001        /*
1002         * Otherwise, all remaining constants can be loaded in two insns:
1003         * rd = v16 & 0xff, rd |= v16 & 0xff00.
1004         */
1005        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
1006        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
1007        return;
1008    } else if (vece == MO_32) {
1009        uint32_t v32 = v64;
1010        uint32_t n32 = ~v32;
1011
1012        if (is_shimm32(v32, &cmode, &imm8) ||
1013            is_soimm32(v32, &cmode, &imm8) ||
1014            is_fimm32(v32, &cmode, &imm8)) {
1015            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
1016            return;
1017        }
1018        if (is_shimm32(n32, &cmode, &imm8) ||
1019            is_soimm32(n32, &cmode, &imm8)) {
1020            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
1021            return;
1022        }
1023
1024        /*
1025         * Restrict the set of constants to those we can load with
1026         * two instructions.  Others we load from the pool.
1027         */
1028        i = is_shimm32_pair(v32, &cmode, &imm8);
1029        if (i) {
1030            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
1031            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
1032            return;
1033        }
1034        i = is_shimm32_pair(n32, &cmode, &imm8);
1035        if (i) {
1036            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
1037            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
1038            return;
1039        }
1040    } else if (is_fimm64(v64, &cmode, &imm8)) {
1041        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
1042        return;
1043    }
1044
1045    /*
1046     * As a last resort, load from the constant pool.  Sadly there
1047     * is no LD1R (literal), so store the full 16-byte vector.
1048     */
1049    if (type == TCG_TYPE_V128) {
1050        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
1051        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
1052    } else {
1053        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
1054        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
1055    }
1056}
1057
1058static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
1059                            TCGReg rd, TCGReg rs)
1060{
1061    int is_q = type - TCG_TYPE_V64;
1062    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
1063    return true;
1064}
1065
1066static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1067                             TCGReg r, TCGReg base, intptr_t offset)
1068{
1069    TCGReg temp = TCG_REG_TMP0;
1070
1071    if (offset < -0xffffff || offset > 0xffffff) {
1072        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
1073        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
1074        base = temp;
1075    } else {
1076        AArch64Insn add_insn = I3401_ADDI;
1077
1078        if (offset < 0) {
1079            add_insn = I3401_SUBI;
1080            offset = -offset;
1081        }
1082        if (offset & 0xfff000) {
1083            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1084            base = temp;
1085        }
1086        if (offset & 0xfff) {
1087            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1088            base = temp;
1089        }
1090    }
1091    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1092    return true;
1093}
1094
1095static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1096                         tcg_target_long value)
1097{
1098    tcg_target_long svalue = value;
1099    tcg_target_long ivalue = ~value;
1100    tcg_target_long t0, t1, t2;
1101    int s0, s1;
1102    AArch64Insn opc;
1103
1104    switch (type) {
1105    case TCG_TYPE_I32:
1106    case TCG_TYPE_I64:
1107        tcg_debug_assert(rd < 32);
1108        break;
1109    default:
1110        g_assert_not_reached();
1111    }
1112
1113    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1114       values within [2**31, 2**32-1], we can create smaller sequences by
1115       interpreting this as a negative 32-bit number, while ensuring that
1116       the high 32 bits are cleared by setting SF=0.  */
1117    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1118        svalue = (int32_t)value;
1119        value = (uint32_t)value;
1120        ivalue = (uint32_t)ivalue;
1121        type = TCG_TYPE_I32;
1122    }
1123
1124    /* Speed things up by handling the common case of small positive
1125       and negative values specially.  */
1126    if ((value & ~0xffffull) == 0) {
1127        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1128        return;
1129    } else if ((ivalue & ~0xffffull) == 0) {
1130        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1131        return;
1132    }
1133
1134    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1135       use the sign-extended value.  That lets us match rotated values such
1136       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1137    if (is_limm(svalue)) {
1138        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1139        return;
1140    }
1141
1142    /* Look for host pointer values within 4G of the PC.  This happens
1143       often when loading pointers to QEMU's own data structures.  */
1144    if (type == TCG_TYPE_I64) {
1145        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1146        tcg_target_long disp = value - src_rx;
1147        if (disp == sextract64(disp, 0, 21)) {
1148            tcg_out_insn(s, 3406, ADR, rd, disp);
1149            return;
1150        }
1151        disp = (value >> 12) - (src_rx >> 12);
1152        if (disp == sextract64(disp, 0, 21)) {
1153            tcg_out_insn(s, 3406, ADRP, rd, disp);
1154            if (value & 0xfff) {
1155                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1156            }
1157            return;
1158        }
1159    }
1160
1161    /* Would it take fewer insns to begin with MOVN?  */
1162    if (ctpop64(value) >= 32) {
1163        t0 = ivalue;
1164        opc = I3405_MOVN;
1165    } else {
1166        t0 = value;
1167        opc = I3405_MOVZ;
1168    }
1169    s0 = ctz64(t0) & (63 & -16);
1170    t1 = t0 & ~(0xffffull << s0);
1171    s1 = ctz64(t1) & (63 & -16);
1172    t2 = t1 & ~(0xffffull << s1);
1173    if (t2 == 0) {
1174        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1175        if (t1 != 0) {
1176            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1177        }
1178        return;
1179    }
1180
1181    /* For more than 2 insns, dump it into the constant pool.  */
1182    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1183    tcg_out_insn(s, 3305, LDR, 0, rd);
1184}
1185
1186static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1187{
1188    return false;
1189}
1190
1191static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1192                             tcg_target_long imm)
1193{
1194    /* This function is only used for passing structs by reference. */
1195    g_assert_not_reached();
1196}
1197
1198/* Define something more legible for general use.  */
1199#define tcg_out_ldst_r  tcg_out_insn_3310
1200
1201static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1202                         TCGReg rn, intptr_t offset, int lgsize)
1203{
1204    /* If the offset is naturally aligned and in range, then we can
1205       use the scaled uimm12 encoding */
1206    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1207        uintptr_t scaled_uimm = offset >> lgsize;
1208        if (scaled_uimm <= 0xfff) {
1209            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1210            return;
1211        }
1212    }
1213
1214    /* Small signed offsets can use the unscaled encoding.  */
1215    if (offset >= -256 && offset < 256) {
1216        tcg_out_insn_3312(s, insn, rd, rn, offset);
1217        return;
1218    }
1219
1220    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1221    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
1222    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
1223}
1224
1225static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1226{
1227    if (ret == arg) {
1228        return true;
1229    }
1230    switch (type) {
1231    case TCG_TYPE_I32:
1232    case TCG_TYPE_I64:
1233        if (ret < 32 && arg < 32) {
1234            tcg_out_movr(s, type, ret, arg);
1235            break;
1236        } else if (ret < 32) {
1237            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1238            break;
1239        } else if (arg < 32) {
1240            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1241            break;
1242        }
1243        /* FALLTHRU */
1244
1245    case TCG_TYPE_V64:
1246        tcg_debug_assert(ret >= 32 && arg >= 32);
1247        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1248        break;
1249    case TCG_TYPE_V128:
1250        tcg_debug_assert(ret >= 32 && arg >= 32);
1251        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1252        break;
1253
1254    default:
1255        g_assert_not_reached();
1256    }
1257    return true;
1258}
1259
1260static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1261                       TCGReg base, intptr_t ofs)
1262{
1263    AArch64Insn insn;
1264    int lgsz;
1265
1266    switch (type) {
1267    case TCG_TYPE_I32:
1268        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1269        lgsz = 2;
1270        break;
1271    case TCG_TYPE_I64:
1272        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1273        lgsz = 3;
1274        break;
1275    case TCG_TYPE_V64:
1276        insn = I3312_LDRVD;
1277        lgsz = 3;
1278        break;
1279    case TCG_TYPE_V128:
1280        insn = I3312_LDRVQ;
1281        lgsz = 4;
1282        break;
1283    default:
1284        g_assert_not_reached();
1285    }
1286    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1287}
1288
1289static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1290                       TCGReg base, intptr_t ofs)
1291{
1292    AArch64Insn insn;
1293    int lgsz;
1294
1295    switch (type) {
1296    case TCG_TYPE_I32:
1297        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1298        lgsz = 2;
1299        break;
1300    case TCG_TYPE_I64:
1301        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1302        lgsz = 3;
1303        break;
1304    case TCG_TYPE_V64:
1305        insn = I3312_STRVD;
1306        lgsz = 3;
1307        break;
1308    case TCG_TYPE_V128:
1309        insn = I3312_STRVQ;
1310        lgsz = 4;
1311        break;
1312    default:
1313        g_assert_not_reached();
1314    }
1315    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1316}
1317
1318static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1319                               TCGReg base, intptr_t ofs)
1320{
1321    if (type <= TCG_TYPE_I64 && val == 0) {
1322        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1323        return true;
1324    }
1325    return false;
1326}
1327
1328static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1329                               TCGReg rn, unsigned int a, unsigned int b)
1330{
1331    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1332}
1333
1334static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1335                                TCGReg rn, unsigned int a, unsigned int b)
1336{
1337    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1338}
1339
1340static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1341                                TCGReg rn, unsigned int a, unsigned int b)
1342{
1343    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1344}
1345
1346static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1347                                TCGReg rn, TCGReg rm, unsigned int a)
1348{
1349    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1350}
1351
1352static void tgen_cmp(TCGContext *s, TCGType ext, TCGCond cond,
1353                     TCGReg a, TCGReg b)
1354{
1355    if (is_tst_cond(cond)) {
1356        tcg_out_insn(s, 3510, ANDS, ext, TCG_REG_XZR, a, b);
1357    } else {
1358        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1359    }
1360}
1361
1362static void tgen_cmpi(TCGContext *s, TCGType ext, TCGCond cond,
1363                      TCGReg a, tcg_target_long b)
1364{
1365    if (is_tst_cond(cond)) {
1366        tcg_out_logicali(s, I3404_ANDSI, ext, TCG_REG_XZR, a, b);
1367    } else if (b >= 0) {
1368        tcg_debug_assert(is_aimm(b));
1369        tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1370    } else {
1371        tcg_debug_assert(is_aimm(-b));
1372        tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1373    }
1374}
1375
1376static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGCond cond, TCGReg a,
1377                        tcg_target_long b, bool const_b)
1378{
1379    if (const_b) {
1380        tgen_cmpi(s, ext, cond, a, b);
1381    } else {
1382        tgen_cmp(s, ext, cond, a, b);
1383    }
1384}
1385
1386static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1387{
1388    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1389    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1390    tcg_out_insn(s, 3206, B, offset);
1391}
1392
1393static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1394{
1395    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1396    if (offset == sextract64(offset, 0, 26)) {
1397        tcg_out_insn(s, 3206, BL, offset);
1398    } else {
1399        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1400        tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
1401    }
1402}
1403
1404static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1405                         const TCGHelperInfo *info)
1406{
1407    tcg_out_call_int(s, target);
1408}
1409
1410static void tcg_out_br(TCGContext *s, TCGLabel *l)
1411{
1412    if (!l->has_value) {
1413        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1414        tcg_out_insn(s, 3206, B, 0);
1415    } else {
1416        tcg_out_goto(s, l->u.value_ptr);
1417    }
1418}
1419
1420static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
1421                        TCGReg a, TCGReg b, TCGLabel *l)
1422{
1423    tgen_cmp(s, type, c, a, b);
1424    tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1425    tcg_out_insn(s, 3202, B_C, c, 0);
1426}
1427
1428static void tgen_brcondi(TCGContext *s, TCGType ext, TCGCond c,
1429                         TCGReg a, tcg_target_long b, TCGLabel *l)
1430{
1431    int tbit = -1;
1432    bool need_cmp = true;
1433
1434    switch (c) {
1435    case TCG_COND_EQ:
1436    case TCG_COND_NE:
1437        /* cmp xN,0; b.ne L -> cbnz xN,L */
1438        if (b == 0) {
1439            need_cmp = false;
1440        }
1441        break;
1442    case TCG_COND_LT:
1443    case TCG_COND_GE:
1444        /* cmp xN,0; b.mi L -> tbnz xN,63,L */
1445        if (b == 0) {
1446            c = (c == TCG_COND_LT ? TCG_COND_TSTNE : TCG_COND_TSTEQ);
1447            tbit = ext ? 63 : 31;
1448            need_cmp = false;
1449        }
1450        break;
1451    case TCG_COND_TSTEQ:
1452    case TCG_COND_TSTNE:
1453        /* tst xN,0xffffffff; b.ne L -> cbnz wN,L */
1454        if (b == UINT32_MAX) {
1455            c = tcg_tst_eqne_cond(c);
1456            ext = TCG_TYPE_I32;
1457            need_cmp = false;
1458            break;
1459        }
1460        /* tst xN,1<<B; b.ne L -> tbnz xN,B,L */
1461        if (is_power_of_2(b)) {
1462            tbit = ctz64(b);
1463            need_cmp = false;
1464        }
1465        break;
1466    default:
1467        break;
1468    }
1469
1470    if (need_cmp) {
1471        tgen_cmpi(s, ext, c, a, b);
1472        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1473        tcg_out_insn(s, 3202, B_C, c, 0);
1474        return;
1475    }
1476
1477    if (tbit >= 0) {
1478        tcg_out_reloc(s, s->code_ptr, R_AARCH64_TSTBR14, l, 0);
1479        switch (c) {
1480        case TCG_COND_TSTEQ:
1481            tcg_out_insn(s, 3205, TBZ, a, tbit, 0);
1482            break;
1483        case TCG_COND_TSTNE:
1484            tcg_out_insn(s, 3205, TBNZ, a, tbit, 0);
1485            break;
1486        default:
1487            g_assert_not_reached();
1488        }
1489    } else {
1490        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1491        switch (c) {
1492        case TCG_COND_EQ:
1493            tcg_out_insn(s, 3201, CBZ, ext, a, 0);
1494            break;
1495        case TCG_COND_NE:
1496            tcg_out_insn(s, 3201, CBNZ, ext, a, 0);
1497            break;
1498        default:
1499            g_assert_not_reached();
1500        }
1501    }
1502}
1503
1504static const TCGOutOpBrcond outop_brcond = {
1505    .base.static_constraint = C_O0_I2(r, rC),
1506    .out_rr = tgen_brcond,
1507    .out_ri = tgen_brcondi,
1508};
1509
1510static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1511                               TCGReg rd, TCGReg rn)
1512{
1513    /* REV, REV16, REV32 */
1514    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1515}
1516
1517static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1518                               TCGReg rd, TCGReg rn)
1519{
1520    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1521    int bits = (8 << s_bits) - 1;
1522    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1523}
1524
1525static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1526{
1527    tcg_out_sxt(s, type, MO_8, rd, rn);
1528}
1529
1530static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1531{
1532    tcg_out_sxt(s, type, MO_16, rd, rn);
1533}
1534
1535static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1536{
1537    tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1538}
1539
1540static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1541{
1542    tcg_out_ext32s(s, rd, rn);
1543}
1544
1545static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1546                               TCGReg rd, TCGReg rn)
1547{
1548    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1549    int bits = (8 << s_bits) - 1;
1550    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1551}
1552
1553static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1554{
1555    tcg_out_uxt(s, MO_8, rd, rn);
1556}
1557
1558static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1559{
1560    tcg_out_uxt(s, MO_16, rd, rn);
1561}
1562
1563static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1564{
1565    tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1566}
1567
1568static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1569{
1570    tcg_out_ext32u(s, rd, rn);
1571}
1572
1573static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1574{
1575    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1576}
1577
1578static void tcg_out_mb(TCGContext *s, unsigned a0)
1579{
1580    static const uint32_t sync[] = {
1581        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1582        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1583        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1584        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1585        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1586    };
1587    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1588}
1589
1590typedef struct {
1591    TCGReg base;
1592    TCGReg index;
1593    TCGType index_ext;
1594    TCGAtomAlign aa;
1595} HostAddress;
1596
1597bool tcg_target_has_memory_bswap(MemOp memop)
1598{
1599    return false;
1600}
1601
1602static const TCGLdstHelperParam ldst_helper_param = {
1603    .ntmp = 1, .tmp = { TCG_REG_TMP0 }
1604};
1605
1606static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1607{
1608    MemOp opc = get_memop(lb->oi);
1609
1610    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1611        return false;
1612    }
1613
1614    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1615    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1616    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1617    tcg_out_goto(s, lb->raddr);
1618    return true;
1619}
1620
1621static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1622{
1623    MemOp opc = get_memop(lb->oi);
1624
1625    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1626        return false;
1627    }
1628
1629    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1630    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1631    tcg_out_goto(s, lb->raddr);
1632    return true;
1633}
1634
1635/* We expect to use a 7-bit scaled negative offset from ENV.  */
1636#define MIN_TLB_MASK_TABLE_OFS  -512
1637
1638/*
1639 * For system-mode, perform the TLB load and compare.
1640 * For user-mode, perform any required alignment tests.
1641 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1642 * is required and fill in @h with the host address for the fast path.
1643 */
1644static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1645                                           TCGReg addr_reg, MemOpIdx oi,
1646                                           bool is_ld)
1647{
1648    TCGType addr_type = s->addr_type;
1649    TCGLabelQemuLdst *ldst = NULL;
1650    MemOp opc = get_memop(oi);
1651    MemOp s_bits = opc & MO_SIZE;
1652    unsigned a_mask;
1653
1654    h->aa = atom_and_align_for_opc(s, opc,
1655                                   have_lse2 ? MO_ATOM_WITHIN16
1656                                             : MO_ATOM_IFALIGN,
1657                                   s_bits == MO_128);
1658    a_mask = (1 << h->aa.align) - 1;
1659
1660    if (tcg_use_softmmu) {
1661        unsigned s_mask = (1u << s_bits) - 1;
1662        unsigned mem_index = get_mmuidx(oi);
1663        TCGReg addr_adj;
1664        TCGType mask_type;
1665        uint64_t compare_mask;
1666
1667        ldst = new_ldst_label(s);
1668        ldst->is_ld = is_ld;
1669        ldst->oi = oi;
1670        ldst->addr_reg = addr_reg;
1671
1672        mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
1673                     ? TCG_TYPE_I64 : TCG_TYPE_I32);
1674
1675        /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
1676        QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1677        QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1678        tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
1679                     tlb_mask_table_ofs(s, mem_index), 1, 0);
1680
1681        /* Extract the TLB index from the address into X0.  */
1682        tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1683                     TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
1684                     s->page_bits - CPU_TLB_ENTRY_BITS);
1685
1686        /* Add the tlb_table pointer, forming the CPUTLBEntry address. */
1687        tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
1688
1689        /* Load the tlb comparator into TMP0, and the fast path addend. */
1690        QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
1691        tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
1692                   is_ld ? offsetof(CPUTLBEntry, addr_read)
1693                         : offsetof(CPUTLBEntry, addr_write));
1694        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
1695                   offsetof(CPUTLBEntry, addend));
1696
1697        /*
1698         * For aligned accesses, we check the first byte and include
1699         * the alignment bits within the address.  For unaligned access,
1700         * we check that we don't cross pages using the address of the
1701         * last byte of the access.
1702         */
1703        if (a_mask >= s_mask) {
1704            addr_adj = addr_reg;
1705        } else {
1706            addr_adj = TCG_REG_TMP2;
1707            tcg_out_insn(s, 3401, ADDI, addr_type,
1708                         addr_adj, addr_reg, s_mask - a_mask);
1709        }
1710        compare_mask = (uint64_t)s->page_mask | a_mask;
1711
1712        /* Store the page mask part of the address into TMP2.  */
1713        tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
1714                         addr_adj, compare_mask);
1715
1716        /* Perform the address comparison. */
1717        tcg_out_cmp(s, addr_type, TCG_COND_NE, TCG_REG_TMP0, TCG_REG_TMP2, 0);
1718
1719        /* If not equal, we jump to the slow path. */
1720        ldst->label_ptr[0] = s->code_ptr;
1721        tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1722
1723        h->base = TCG_REG_TMP1;
1724        h->index = addr_reg;
1725        h->index_ext = addr_type;
1726    } else {
1727        if (a_mask) {
1728            ldst = new_ldst_label(s);
1729
1730            ldst->is_ld = is_ld;
1731            ldst->oi = oi;
1732            ldst->addr_reg = addr_reg;
1733
1734            /* tst addr, #mask */
1735            tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1736
1737            /* b.ne slow_path */
1738            ldst->label_ptr[0] = s->code_ptr;
1739            tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1740        }
1741
1742        if (guest_base || addr_type == TCG_TYPE_I32) {
1743            h->base = TCG_REG_GUEST_BASE;
1744            h->index = addr_reg;
1745            h->index_ext = addr_type;
1746        } else {
1747            h->base = addr_reg;
1748            h->index = TCG_REG_XZR;
1749            h->index_ext = TCG_TYPE_I64;
1750        }
1751    }
1752
1753    return ldst;
1754}
1755
1756static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1757                                   TCGReg data_r, HostAddress h)
1758{
1759    switch (memop & MO_SSIZE) {
1760    case MO_UB:
1761        tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
1762        break;
1763    case MO_SB:
1764        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1765                       data_r, h.base, h.index_ext, h.index);
1766        break;
1767    case MO_UW:
1768        tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
1769        break;
1770    case MO_SW:
1771        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1772                       data_r, h.base, h.index_ext, h.index);
1773        break;
1774    case MO_UL:
1775        tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
1776        break;
1777    case MO_SL:
1778        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
1779        break;
1780    case MO_UQ:
1781        tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
1782        break;
1783    default:
1784        g_assert_not_reached();
1785    }
1786}
1787
1788static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1789                                   TCGReg data_r, HostAddress h)
1790{
1791    switch (memop & MO_SIZE) {
1792    case MO_8:
1793        tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
1794        break;
1795    case MO_16:
1796        tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
1797        break;
1798    case MO_32:
1799        tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
1800        break;
1801    case MO_64:
1802        tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
1803        break;
1804    default:
1805        g_assert_not_reached();
1806    }
1807}
1808
1809static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1810                            MemOpIdx oi, TCGType data_type)
1811{
1812    TCGLabelQemuLdst *ldst;
1813    HostAddress h;
1814
1815    ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1816    tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
1817
1818    if (ldst) {
1819        ldst->type = data_type;
1820        ldst->datalo_reg = data_reg;
1821        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1822    }
1823}
1824
1825static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1826                            MemOpIdx oi, TCGType data_type)
1827{
1828    TCGLabelQemuLdst *ldst;
1829    HostAddress h;
1830
1831    ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1832    tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1833
1834    if (ldst) {
1835        ldst->type = data_type;
1836        ldst->datalo_reg = data_reg;
1837        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1838    }
1839}
1840
1841static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
1842                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
1843{
1844    TCGLabelQemuLdst *ldst;
1845    HostAddress h;
1846    TCGReg base;
1847    bool use_pair;
1848
1849    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
1850
1851    /* Compose the final address, as LDP/STP have no indexing. */
1852    if (h.index == TCG_REG_XZR) {
1853        base = h.base;
1854    } else {
1855        base = TCG_REG_TMP2;
1856        if (h.index_ext == TCG_TYPE_I32) {
1857            /* add base, base, index, uxtw */
1858            tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base,
1859                         h.base, h.index, MO_32, 0);
1860        } else {
1861            /* add base, base, index */
1862            tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index);
1863        }
1864    }
1865
1866    use_pair = h.aa.atom < MO_128 || have_lse2;
1867
1868    if (!use_pair) {
1869        tcg_insn_unit *branch = NULL;
1870        TCGReg ll, lh, sl, sh;
1871
1872        /*
1873         * If we have already checked for 16-byte alignment, that's all
1874         * we need. Otherwise we have determined that misaligned atomicity
1875         * may be handled with two 8-byte loads.
1876         */
1877        if (h.aa.align < MO_128) {
1878            /*
1879             * TODO: align should be MO_64, so we only need test bit 3,
1880             * which means we could use TBNZ instead of ANDS+B_C.
1881             */
1882            tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15);
1883            branch = s->code_ptr;
1884            tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1885            use_pair = true;
1886        }
1887
1888        if (is_ld) {
1889            /*
1890             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1891             *    ldxp lo, hi, [base]
1892             *    stxp t0, lo, hi, [base]
1893             *    cbnz t0, .-8
1894             * Require no overlap between data{lo,hi} and base.
1895             */
1896            if (datalo == base || datahi == base) {
1897                tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base);
1898                base = TCG_REG_TMP2;
1899            }
1900            ll = sl = datalo;
1901            lh = sh = datahi;
1902        } else {
1903            /*
1904             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1905             * 1: ldxp t0, t1, [base]
1906             *    stxp t0, lo, hi, [base]
1907             *    cbnz t0, 1b
1908             */
1909            tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1);
1910            ll = TCG_REG_TMP0;
1911            lh = TCG_REG_TMP1;
1912            sl = datalo;
1913            sh = datahi;
1914        }
1915
1916        tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base);
1917        tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base);
1918        tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2);
1919
1920        if (use_pair) {
1921            /* "b .+8", branching across the one insn of use_pair. */
1922            tcg_out_insn(s, 3206, B, 2);
1923            reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr));
1924        }
1925    }
1926
1927    if (use_pair) {
1928        if (is_ld) {
1929            tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0);
1930        } else {
1931            tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0);
1932        }
1933    }
1934
1935    if (ldst) {
1936        ldst->type = TCG_TYPE_I128;
1937        ldst->datalo_reg = datalo;
1938        ldst->datahi_reg = datahi;
1939        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1940    }
1941}
1942
1943static const tcg_insn_unit *tb_ret_addr;
1944
1945static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1946{
1947    const tcg_insn_unit *target;
1948    ptrdiff_t offset;
1949
1950    /* Reuse the zeroing that exists for goto_ptr.  */
1951    if (a0 == 0) {
1952        target = tcg_code_gen_epilogue;
1953    } else {
1954        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1955        target = tb_ret_addr;
1956    }
1957
1958    offset = tcg_pcrel_diff(s, target) >> 2;
1959    if (offset == sextract64(offset, 0, 26)) {
1960        tcg_out_insn(s, 3206, B, offset);
1961    } else {
1962        /*
1963         * Only x16/x17 generate BTI type Jump (2),
1964         * other registers generate BTI type Jump|Call (3).
1965         */
1966        QEMU_BUILD_BUG_ON(TCG_REG_TMP0 != TCG_REG_X16);
1967        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1968        tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
1969    }
1970}
1971
1972static void tcg_out_goto_tb(TCGContext *s, int which)
1973{
1974    /*
1975     * Direct branch, or indirect address load, will be patched
1976     * by tb_target_set_jmp_target.  Assert indirect load offset
1977     * in range early, regardless of direct branch distance.
1978     */
1979    intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1980    tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1981
1982    set_jmp_insn_offset(s, which);
1983    tcg_out32(s, I3206_B);
1984    tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
1985    set_jmp_reset_offset(s, which);
1986    tcg_out_bti(s, BTI_J);
1987}
1988
1989static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0)
1990{
1991    tcg_out_insn(s, 3207, BR, a0);
1992}
1993
1994void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1995                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1996{
1997    uintptr_t d_addr = tb->jmp_target_addr[n];
1998    ptrdiff_t d_offset = d_addr - jmp_rx;
1999    tcg_insn_unit insn;
2000
2001    /* Either directly branch, or indirect branch load. */
2002    if (d_offset == sextract64(d_offset, 0, 28)) {
2003        insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
2004    } else {
2005        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
2006        ptrdiff_t i_offset = i_addr - jmp_rx;
2007
2008        /* Note that we asserted this in range in tcg_out_goto_tb. */
2009        insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
2010    }
2011    qatomic_set((uint32_t *)jmp_rw, insn);
2012    flush_idcache_range(jmp_rx, jmp_rw, 4);
2013}
2014
2015
2016static void tgen_add(TCGContext *s, TCGType type,
2017                     TCGReg a0, TCGReg a1, TCGReg a2)
2018{
2019    tcg_out_insn(s, 3502, ADD, type, a0, a1, a2);
2020}
2021
2022static void tgen_addi(TCGContext *s, TCGType type,
2023                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2024{
2025    if (a2 >= 0) {
2026        tcg_out_insn(s, 3401, ADDI, type, a0, a1, a2);
2027    } else {
2028        tcg_out_insn(s, 3401, SUBI, type, a0, a1, -a2);
2029    }
2030}
2031
2032static const TCGOutOpBinary outop_add = {
2033    .base.static_constraint = C_O1_I2(r, r, rA),
2034    .out_rrr = tgen_add,
2035    .out_rri = tgen_addi,
2036};
2037
2038static void tgen_addco(TCGContext *s, TCGType type,
2039                       TCGReg a0, TCGReg a1, TCGReg a2)
2040{
2041    tcg_out_insn(s, 3502, ADDS, type, a0, a1, a2);
2042}
2043
2044static void tgen_addco_imm(TCGContext *s, TCGType type,
2045                           TCGReg a0, TCGReg a1, tcg_target_long a2)
2046{
2047    if (a2 >= 0) {
2048        tcg_out_insn(s, 3401, ADDSI, type, a0, a1, a2);
2049    } else {
2050        tcg_out_insn(s, 3401, SUBSI, type, a0, a1, -a2);
2051    }
2052}
2053
2054static const TCGOutOpBinary outop_addco = {
2055    .base.static_constraint = C_O1_I2(r, r, rA),
2056    .out_rrr = tgen_addco,
2057    .out_rri = tgen_addco_imm,
2058};
2059
2060static void tgen_addci_rrr(TCGContext *s, TCGType type,
2061                           TCGReg a0, TCGReg a1, TCGReg a2)
2062{
2063    tcg_out_insn(s, 3503, ADC, type, a0, a1, a2);
2064}
2065
2066static void tgen_addci_rri(TCGContext *s, TCGType type,
2067                           TCGReg a0, TCGReg a1, tcg_target_long a2)
2068{
2069    /*
2070     * Note that the only two constants we support are 0 and -1, and
2071     * that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.
2072     */
2073    if (a2) {
2074        tcg_out_insn(s, 3503, SBC, type, a0, a1, TCG_REG_XZR);
2075    } else {
2076        tcg_out_insn(s, 3503, ADC, type, a0, a1, TCG_REG_XZR);
2077    }
2078}
2079
2080static const TCGOutOpAddSubCarry outop_addci = {
2081    .base.static_constraint = C_O1_I2(r, rz, rMZ),
2082    .out_rrr = tgen_addci_rrr,
2083    .out_rri = tgen_addci_rri,
2084};
2085
2086static void tgen_addcio(TCGContext *s, TCGType type,
2087                        TCGReg a0, TCGReg a1, TCGReg a2)
2088{
2089    tcg_out_insn(s, 3503, ADCS, type, a0, a1, a2);
2090}
2091
2092static void tgen_addcio_imm(TCGContext *s, TCGType type,
2093                            TCGReg a0, TCGReg a1, tcg_target_long a2)
2094{
2095    /* Use SBCS w/0 for ADCS w/-1 -- see above. */
2096    if (a2) {
2097        tcg_out_insn(s, 3503, SBCS, type, a0, a1, TCG_REG_XZR);
2098    } else {
2099        tcg_out_insn(s, 3503, ADCS, type, a0, a1, TCG_REG_XZR);
2100    }
2101}
2102
2103static const TCGOutOpBinary outop_addcio = {
2104    .base.static_constraint = C_O1_I2(r, rz, rMZ),
2105    .out_rrr = tgen_addcio,
2106    .out_rri = tgen_addcio_imm,
2107};
2108
2109static void tcg_out_set_carry(TCGContext *s)
2110{
2111    tcg_out_insn(s, 3502, SUBS, TCG_TYPE_I32,
2112                 TCG_REG_XZR, TCG_REG_XZR, TCG_REG_XZR);
2113}
2114
2115static void tgen_and(TCGContext *s, TCGType type,
2116                     TCGReg a0, TCGReg a1, TCGReg a2)
2117{
2118    tcg_out_insn(s, 3510, AND, type, a0, a1, a2);
2119}
2120
2121static void tgen_andi(TCGContext *s, TCGType type,
2122                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2123{
2124    tcg_out_logicali(s, I3404_ANDI, type, a0, a1, a2);
2125}
2126
2127static const TCGOutOpBinary outop_and = {
2128    .base.static_constraint = C_O1_I2(r, r, rL),
2129    .out_rrr = tgen_and,
2130    .out_rri = tgen_andi,
2131};
2132
2133static void tgen_andc(TCGContext *s, TCGType type,
2134                      TCGReg a0, TCGReg a1, TCGReg a2)
2135{
2136    tcg_out_insn(s, 3510, BIC, type, a0, a1, a2);
2137}
2138
2139static const TCGOutOpBinary outop_andc = {
2140    .base.static_constraint = C_O1_I2(r, r, r),
2141    .out_rrr = tgen_andc,
2142};
2143
2144static void tgen_clz(TCGContext *s, TCGType type,
2145                     TCGReg a0, TCGReg a1, TCGReg a2)
2146{
2147    tcg_out_cmp(s, type, TCG_COND_NE, a1, 0, true);
2148    tcg_out_insn(s, 3507, CLZ, type, TCG_REG_TMP0, a1);
2149    tcg_out_insn(s, 3506, CSEL, type, a0, TCG_REG_TMP0, a2, TCG_COND_NE);
2150}
2151
2152static void tgen_clzi(TCGContext *s, TCGType type,
2153                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2154{
2155    if (a2 == (type == TCG_TYPE_I32 ? 32 : 64)) {
2156        tcg_out_insn(s, 3507, CLZ, type, a0, a1);
2157        return;
2158    }
2159
2160    tcg_out_cmp(s, type, TCG_COND_NE, a1, 0, true);
2161    tcg_out_insn(s, 3507, CLZ, type, a0, a1);
2162
2163    switch (a2) {
2164    case -1:
2165        tcg_out_insn(s, 3506, CSINV, type, a0, a0, TCG_REG_XZR, TCG_COND_NE);
2166        break;
2167    case 0:
2168        tcg_out_insn(s, 3506, CSEL, type, a0, a0, TCG_REG_XZR, TCG_COND_NE);
2169        break;
2170    default:
2171        tcg_out_movi(s, type, TCG_REG_TMP0, a2);
2172        tcg_out_insn(s, 3506, CSEL, type, a0, a0, TCG_REG_TMP0, TCG_COND_NE);
2173        break;
2174    }
2175}
2176
2177static const TCGOutOpBinary outop_clz = {
2178    .base.static_constraint = C_O1_I2(r, r, rAL),
2179    .out_rrr = tgen_clz,
2180    .out_rri = tgen_clzi,
2181};
2182
2183static const TCGOutOpUnary outop_ctpop = {
2184    .base.static_constraint = C_NotImplemented,
2185};
2186
2187static void tgen_ctz(TCGContext *s, TCGType type,
2188                     TCGReg a0, TCGReg a1, TCGReg a2)
2189{
2190    tcg_out_insn(s, 3507, RBIT, type, TCG_REG_TMP0, a1);
2191    tgen_clz(s, type, a0, TCG_REG_TMP0, a2);
2192}
2193
2194static void tgen_ctzi(TCGContext *s, TCGType type,
2195                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2196{
2197    tcg_out_insn(s, 3507, RBIT, type, TCG_REG_TMP0, a1);
2198    tgen_clzi(s, type, a0, TCG_REG_TMP0, a2);
2199}
2200
2201static const TCGOutOpBinary outop_ctz = {
2202    .base.static_constraint = C_O1_I2(r, r, rAL),
2203    .out_rrr = tgen_ctz,
2204    .out_rri = tgen_ctzi,
2205};
2206
2207static void tgen_divs(TCGContext *s, TCGType type,
2208                      TCGReg a0, TCGReg a1, TCGReg a2)
2209{
2210    tcg_out_insn(s, 3508, SDIV, type, a0, a1, a2);
2211}
2212
2213static const TCGOutOpBinary outop_divs = {
2214    .base.static_constraint = C_O1_I2(r, r, r),
2215    .out_rrr = tgen_divs,
2216};
2217
2218static const TCGOutOpDivRem outop_divs2 = {
2219    .base.static_constraint = C_NotImplemented,
2220};
2221
2222static void tgen_divu(TCGContext *s, TCGType type,
2223                      TCGReg a0, TCGReg a1, TCGReg a2)
2224{
2225    tcg_out_insn(s, 3508, UDIV, type, a0, a1, a2);
2226}
2227
2228static const TCGOutOpBinary outop_divu = {
2229    .base.static_constraint = C_O1_I2(r, r, r),
2230    .out_rrr = tgen_divu,
2231};
2232
2233static const TCGOutOpDivRem outop_divu2 = {
2234    .base.static_constraint = C_NotImplemented,
2235};
2236
2237static void tgen_eqv(TCGContext *s, TCGType type,
2238                     TCGReg a0, TCGReg a1, TCGReg a2)
2239{
2240    tcg_out_insn(s, 3510, EON, type, a0, a1, a2);
2241}
2242
2243static const TCGOutOpBinary outop_eqv = {
2244    .base.static_constraint = C_O1_I2(r, r, r),
2245    .out_rrr = tgen_eqv,
2246};
2247
2248static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
2249{
2250    tcg_out_ubfm(s, TCG_TYPE_I64, a0, a1, 32, 63);
2251}
2252
2253static const TCGOutOpUnary outop_extrh_i64_i32 = {
2254    .base.static_constraint = C_O1_I1(r, r),
2255    .out_rr = tgen_extrh_i64_i32,
2256};
2257
2258static void tgen_mul(TCGContext *s, TCGType type,
2259                     TCGReg a0, TCGReg a1, TCGReg a2)
2260{
2261    tcg_out_insn(s, 3509, MADD, type, a0, a1, a2, TCG_REG_XZR);
2262}
2263
2264static const TCGOutOpBinary outop_mul = {
2265    .base.static_constraint = C_O1_I2(r, r, r),
2266    .out_rrr = tgen_mul,
2267};
2268
2269static const TCGOutOpMul2 outop_muls2 = {
2270    .base.static_constraint = C_NotImplemented,
2271};
2272
2273static TCGConstraintSetIndex cset_mulh(TCGType type, unsigned flags)
2274{
2275    return type == TCG_TYPE_I64 ? C_O1_I2(r, r, r) : C_NotImplemented;
2276}
2277
2278static void tgen_mulsh(TCGContext *s, TCGType type,
2279                       TCGReg a0, TCGReg a1, TCGReg a2)
2280{
2281    tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2282}
2283
2284static const TCGOutOpBinary outop_mulsh = {
2285    .base.static_constraint = C_Dynamic,
2286    .base.dynamic_constraint = cset_mulh,
2287    .out_rrr = tgen_mulsh,
2288};
2289
2290static const TCGOutOpMul2 outop_mulu2 = {
2291    .base.static_constraint = C_NotImplemented,
2292};
2293
2294static void tgen_muluh(TCGContext *s, TCGType type,
2295                       TCGReg a0, TCGReg a1, TCGReg a2)
2296{
2297    tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2298}
2299
2300static const TCGOutOpBinary outop_muluh = {
2301    .base.static_constraint = C_Dynamic,
2302    .base.dynamic_constraint = cset_mulh,
2303    .out_rrr = tgen_muluh,
2304};
2305
2306static const TCGOutOpBinary outop_nand = {
2307    .base.static_constraint = C_NotImplemented,
2308};
2309
2310static const TCGOutOpBinary outop_nor = {
2311    .base.static_constraint = C_NotImplemented,
2312};
2313
2314static void tgen_or(TCGContext *s, TCGType type,
2315                    TCGReg a0, TCGReg a1, TCGReg a2)
2316{
2317    tcg_out_insn(s, 3510, ORR, type, a0, a1, a2);
2318}
2319
2320static void tgen_ori(TCGContext *s, TCGType type,
2321                     TCGReg a0, TCGReg a1, tcg_target_long a2)
2322{
2323    tcg_out_logicali(s, I3404_ORRI, type, a0, a1, a2);
2324}
2325
2326static const TCGOutOpBinary outop_or = {
2327    .base.static_constraint = C_O1_I2(r, r, rL),
2328    .out_rrr = tgen_or,
2329    .out_rri = tgen_ori,
2330};
2331
2332static void tgen_orc(TCGContext *s, TCGType type,
2333                     TCGReg a0, TCGReg a1, TCGReg a2)
2334{
2335    tcg_out_insn(s, 3510, ORN, type, a0, a1, a2);
2336}
2337
2338static const TCGOutOpBinary outop_orc = {
2339    .base.static_constraint = C_O1_I2(r, r, r),
2340    .out_rrr = tgen_orc,
2341};
2342
2343static void tgen_rems(TCGContext *s, TCGType type,
2344                      TCGReg a0, TCGReg a1, TCGReg a2)
2345{
2346    tcg_out_insn(s, 3508, SDIV, type, TCG_REG_TMP0, a1, a2);
2347    tcg_out_insn(s, 3509, MSUB, type, a0, TCG_REG_TMP0, a2, a1);
2348}
2349
2350static const TCGOutOpBinary outop_rems = {
2351    .base.static_constraint = C_O1_I2(r, r, r),
2352    .out_rrr = tgen_rems,
2353};
2354
2355static void tgen_remu(TCGContext *s, TCGType type,
2356                      TCGReg a0, TCGReg a1, TCGReg a2)
2357{
2358    tcg_out_insn(s, 3508, UDIV, type, TCG_REG_TMP0, a1, a2);
2359    tcg_out_insn(s, 3509, MSUB, type, a0, TCG_REG_TMP0, a2, a1);
2360}
2361
2362static const TCGOutOpBinary outop_remu = {
2363    .base.static_constraint = C_O1_I2(r, r, r),
2364    .out_rrr = tgen_remu,
2365};
2366
2367static const TCGOutOpBinary outop_rotl = {
2368    .base.static_constraint = C_NotImplemented,
2369};
2370
2371static void tgen_rotr(TCGContext *s, TCGType type,
2372                      TCGReg a0, TCGReg a1, TCGReg a2)
2373{
2374    tcg_out_insn(s, 3508, RORV, type, a0, a1, a2);
2375}
2376
2377static void tgen_rotri(TCGContext *s, TCGType type,
2378                       TCGReg a0, TCGReg a1, tcg_target_long a2)
2379{
2380    int max = type == TCG_TYPE_I32 ? 31 : 63;
2381    tcg_out_extr(s, type, a0, a1, a1, a2 & max);
2382}
2383
2384static const TCGOutOpBinary outop_rotr = {
2385    .base.static_constraint = C_O1_I2(r, r, ri),
2386    .out_rrr = tgen_rotr,
2387    .out_rri = tgen_rotri,
2388};
2389
2390static void tgen_sar(TCGContext *s, TCGType type,
2391                     TCGReg a0, TCGReg a1, TCGReg a2)
2392{
2393    tcg_out_insn(s, 3508, ASRV, type, a0, a1, a2);
2394}
2395
2396static void tgen_sari(TCGContext *s, TCGType type,
2397                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2398{
2399    int max = type == TCG_TYPE_I32 ? 31 : 63;
2400    tcg_out_sbfm(s, type, a0, a1, a2 & max, max);
2401}
2402
2403static const TCGOutOpBinary outop_sar = {
2404    .base.static_constraint = C_O1_I2(r, r, ri),
2405    .out_rrr = tgen_sar,
2406    .out_rri = tgen_sari,
2407};
2408
2409static void tgen_shl(TCGContext *s, TCGType type,
2410                     TCGReg a0, TCGReg a1, TCGReg a2)
2411{
2412    tcg_out_insn(s, 3508, LSLV, type, a0, a1, a2);
2413}
2414
2415static void tgen_shli(TCGContext *s, TCGType type,
2416                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2417{
2418    int max = type == TCG_TYPE_I32 ? 31 : 63;
2419    tcg_out_ubfm(s, type, a0, a1, -a2 & max, ~a2 & max);
2420}
2421
2422static const TCGOutOpBinary outop_shl = {
2423    .base.static_constraint = C_O1_I2(r, r, ri),
2424    .out_rrr = tgen_shl,
2425    .out_rri = tgen_shli,
2426};
2427
2428static void tgen_shr(TCGContext *s, TCGType type,
2429                     TCGReg a0, TCGReg a1, TCGReg a2)
2430{
2431    tcg_out_insn(s, 3508, LSRV, type, a0, a1, a2);
2432}
2433
2434static void tgen_shri(TCGContext *s, TCGType type,
2435                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2436{
2437    int max = type == TCG_TYPE_I32 ? 31 : 63;
2438    tcg_out_ubfm(s, type, a0, a1, a2 & max, max);
2439}
2440
2441static const TCGOutOpBinary outop_shr = {
2442    .base.static_constraint = C_O1_I2(r, r, ri),
2443    .out_rrr = tgen_shr,
2444    .out_rri = tgen_shri,
2445};
2446
2447static void tgen_sub(TCGContext *s, TCGType type,
2448                     TCGReg a0, TCGReg a1, TCGReg a2)
2449{
2450    tcg_out_insn(s, 3502, SUB, type, a0, a1, a2);
2451}
2452
2453static const TCGOutOpSubtract outop_sub = {
2454    .base.static_constraint = C_O1_I2(r, r, r),
2455    .out_rrr = tgen_sub,
2456};
2457
2458static void tgen_subbo_rrr(TCGContext *s, TCGType type,
2459                           TCGReg a0, TCGReg a1, TCGReg a2)
2460{
2461    tcg_out_insn(s, 3502, SUBS, type, a0, a1, a2);
2462}
2463
2464static void tgen_subbo_rri(TCGContext *s, TCGType type,
2465                           TCGReg a0, TCGReg a1, tcg_target_long a2)
2466{
2467    if (a2 >= 0) {
2468        tcg_out_insn(s, 3401, SUBSI, type, a0, a1, a2);
2469    } else {
2470        tcg_out_insn(s, 3401, ADDSI, type, a0, a1, -a2);
2471    }
2472}
2473
2474static void tgen_subbo_rir(TCGContext *s, TCGType type,
2475                           TCGReg a0, tcg_target_long a1, TCGReg a2)
2476{
2477    tgen_subbo_rrr(s, type, a0, TCG_REG_XZR, a2);
2478}
2479
2480static void tgen_subbo_rii(TCGContext *s, TCGType type,
2481                           TCGReg a0, tcg_target_long a1, tcg_target_long a2)
2482{
2483    if (a2 == 0) {
2484        tgen_subbo_rrr(s, type, a0, TCG_REG_XZR, TCG_REG_XZR);
2485        return;
2486    }
2487
2488    /*
2489     * We want to allow a1 to be zero for the benefit of negation via
2490     * subtraction.  However, that leaves open the possibility of
2491     * adding 0 +/- const, and the immediate add/sub instructions
2492     * encode XSP not XZR.  Since we have 0 - non-zero, borrow is
2493     * always set.
2494     */
2495    tcg_out_movi(s, type, a0, -a2);
2496    tcg_out_set_borrow(s);
2497}
2498
2499static const TCGOutOpAddSubCarry outop_subbo = {
2500    .base.static_constraint = C_O1_I2(r, rZ, rA),
2501    .out_rrr = tgen_subbo_rrr,
2502    .out_rri = tgen_subbo_rri,
2503    .out_rir = tgen_subbo_rir,
2504    .out_rii = tgen_subbo_rii,
2505};
2506
2507static void tgen_subbi_rrr(TCGContext *s, TCGType type,
2508                           TCGReg a0, TCGReg a1, TCGReg a2)
2509{
2510    tcg_out_insn(s, 3503, SBC, type, a0, a1, a2);
2511}
2512
2513static void tgen_subbi_rri(TCGContext *s, TCGType type,
2514                           TCGReg a0, TCGReg a1, tcg_target_long a2)
2515{
2516    tgen_addci_rri(s, type, a0, a1, ~a2);
2517}
2518
2519static const TCGOutOpAddSubCarry outop_subbi = {
2520    .base.static_constraint = C_O1_I2(r, rz, rMZ),
2521    .out_rrr = tgen_subbi_rrr,
2522    .out_rri = tgen_subbi_rri,
2523};
2524
2525static void tgen_subbio_rrr(TCGContext *s, TCGType type,
2526                            TCGReg a0, TCGReg a1, TCGReg a2)
2527{
2528    tcg_out_insn(s, 3503, SBCS, type, a0, a1, a2);
2529}
2530
2531static void tgen_subbio_rri(TCGContext *s, TCGType type,
2532                            TCGReg a0, TCGReg a1, tcg_target_long a2)
2533{
2534    tgen_addcio_imm(s, type, a0, a1, ~a2);
2535}
2536
2537static const TCGOutOpAddSubCarry outop_subbio = {
2538    .base.static_constraint = C_O1_I2(r, rz, rMZ),
2539    .out_rrr = tgen_subbio_rrr,
2540    .out_rri = tgen_subbio_rri,
2541};
2542
2543static void tcg_out_set_borrow(TCGContext *s)
2544{
2545    tcg_out_insn(s, 3502, ADDS, TCG_TYPE_I32,
2546                 TCG_REG_XZR, TCG_REG_XZR, TCG_REG_XZR);
2547}
2548
2549static void tgen_xor(TCGContext *s, TCGType type,
2550                     TCGReg a0, TCGReg a1, TCGReg a2)
2551{
2552    tcg_out_insn(s, 3510, EOR, type, a0, a1, a2);
2553}
2554
2555static void tgen_xori(TCGContext *s, TCGType type,
2556                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2557{
2558    tcg_out_logicali(s, I3404_EORI, type, a0, a1, a2);
2559}
2560
2561static const TCGOutOpBinary outop_xor = {
2562    .base.static_constraint = C_O1_I2(r, r, rL),
2563    .out_rrr = tgen_xor,
2564    .out_rri = tgen_xori,
2565};
2566
2567static void tgen_bswap16(TCGContext *s, TCGType type,
2568                         TCGReg a0, TCGReg a1, unsigned flags)
2569{
2570    tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2571    if (flags & TCG_BSWAP_OS) {
2572        /* Output must be sign-extended. */
2573        tcg_out_ext16s(s, type, a0, a0);
2574    } else if ((flags & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2575        /* Output must be zero-extended, but input isn't. */
2576        tcg_out_ext16u(s, a0, a0);
2577    }
2578}
2579
2580static const TCGOutOpBswap outop_bswap16 = {
2581    .base.static_constraint = C_O1_I1(r, r),
2582    .out_rr = tgen_bswap16,
2583};
2584
2585static void tgen_bswap32(TCGContext *s, TCGType type,
2586                         TCGReg a0, TCGReg a1, unsigned flags)
2587{
2588    tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2589    if (flags & TCG_BSWAP_OS) {
2590        tcg_out_ext32s(s, a0, a0);
2591    }
2592}
2593
2594static const TCGOutOpBswap outop_bswap32 = {
2595    .base.static_constraint = C_O1_I1(r, r),
2596    .out_rr = tgen_bswap32,
2597};
2598
2599static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
2600{
2601    tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2602}
2603
2604static const TCGOutOpUnary outop_bswap64 = {
2605    .base.static_constraint = C_O1_I1(r, r),
2606    .out_rr = tgen_bswap64,
2607};
2608
2609static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
2610{
2611    tgen_sub(s, type, a0, TCG_REG_XZR, a1);
2612}
2613
2614static const TCGOutOpUnary outop_neg = {
2615    .base.static_constraint = C_O1_I1(r, r),
2616    .out_rr = tgen_neg,
2617};
2618
2619static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
2620{
2621    tgen_orc(s, type, a0, TCG_REG_XZR, a1);
2622}
2623
2624static const TCGOutOpUnary outop_not = {
2625    .base.static_constraint = C_O1_I1(r, r),
2626    .out_rr = tgen_not,
2627};
2628
2629static void tgen_cset(TCGContext *s, TCGCond cond, TCGReg ret)
2630{
2631    /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2632    tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, ret, TCG_REG_XZR,
2633                 TCG_REG_XZR, tcg_invert_cond(cond));
2634}
2635
2636static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
2637                         TCGReg a0, TCGReg a1, TCGReg a2)
2638{
2639    tgen_cmp(s, type, cond, a1, a2);
2640    tgen_cset(s, cond, a0);
2641}
2642
2643static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond,
2644                          TCGReg a0, TCGReg a1, tcg_target_long a2)
2645{
2646    tgen_cmpi(s, type, cond, a1, a2);
2647    tgen_cset(s, cond, a0);
2648}
2649
2650static const TCGOutOpSetcond outop_setcond = {
2651    .base.static_constraint = C_O1_I2(r, r, rC),
2652    .out_rrr = tgen_setcond,
2653    .out_rri = tgen_setcondi,
2654};
2655
2656static void tgen_csetm(TCGContext *s, TCGType ext, TCGCond cond, TCGReg ret)
2657{
2658    /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond).  */
2659    tcg_out_insn(s, 3506, CSINV, ext, ret, TCG_REG_XZR,
2660                 TCG_REG_XZR, tcg_invert_cond(cond));
2661}
2662
2663static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond,
2664                            TCGReg a0, TCGReg a1, TCGReg a2)
2665{
2666    tgen_cmp(s, type, cond, a1, a2);
2667    tgen_csetm(s, type, cond, a0);
2668}
2669
2670static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond,
2671                             TCGReg a0, TCGReg a1, tcg_target_long a2)
2672{
2673    tgen_cmpi(s, type, cond, a1, a2);
2674    tgen_csetm(s, type, cond, a0);
2675}
2676
2677static const TCGOutOpSetcond outop_negsetcond = {
2678    .base.static_constraint = C_O1_I2(r, r, rC),
2679    .out_rrr = tgen_negsetcond,
2680    .out_rri = tgen_negsetcondi,
2681};
2682
2683static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond,
2684                         TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2,
2685                         TCGArg vt, bool const_vt, TCGArg vf, bool const_vf)
2686{
2687    tcg_out_cmp(s, type, cond, c1, c2, const_c2);
2688    tcg_out_insn(s, 3506, CSEL, type, ret, vt, vf, cond);
2689}
2690
2691static const TCGOutOpMovcond outop_movcond = {
2692    .base.static_constraint = C_O1_I4(r, r, rC, rz, rz),
2693    .out = tgen_movcond,
2694};
2695
2696static void tgen_deposit(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
2697                         TCGReg a2, unsigned ofs, unsigned len)
2698{
2699    unsigned mask = type == TCG_TYPE_I32 ? 31 : 63;
2700
2701    /*
2702     * Since we can't support "0Z" as a constraint, we allow a1 in
2703     * any register.  Fix things up as if a matching constraint.
2704     */
2705    if (a0 != a1) {
2706        if (a0 == a2) {
2707            tcg_out_mov(s, type, TCG_REG_TMP0, a2);
2708            a2 = TCG_REG_TMP0;
2709        }
2710        tcg_out_mov(s, type, a0, a1);
2711    }
2712    tcg_out_bfm(s, type, a0, a2, -ofs & mask, len - 1);
2713}
2714
2715static void tgen_depositi(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
2716                          tcg_target_long a2, unsigned ofs, unsigned len)
2717{
2718    tgen_andi(s, type, a0, a1, ~MAKE_64BIT_MASK(ofs, len));
2719}
2720
2721static void tgen_depositz(TCGContext *s, TCGType type, TCGReg a0, TCGReg a2,
2722                          unsigned ofs, unsigned len)
2723{
2724    int max = type == TCG_TYPE_I32 ? 31 : 63;
2725    tcg_out_ubfm(s, type, a0, a2, -ofs & max, len - 1);
2726}
2727
2728static const TCGOutOpDeposit outop_deposit = {
2729    .base.static_constraint = C_O1_I2(r, rZ, rZ),
2730    .out_rrr = tgen_deposit,
2731    .out_rri = tgen_depositi,
2732    .out_rzr = tgen_depositz,
2733};
2734
2735static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
2736                         unsigned ofs, unsigned len)
2737{
2738    if (ofs == 0) {
2739        uint64_t mask = MAKE_64BIT_MASK(0, len);
2740        tcg_out_logicali(s, I3404_ANDI, type, a0, a1, mask);
2741    } else {
2742        tcg_out_ubfm(s, type, a0, a1, ofs, ofs + len - 1);
2743    }
2744}
2745
2746static const TCGOutOpExtract outop_extract = {
2747    .base.static_constraint = C_O1_I1(r, r),
2748    .out_rr = tgen_extract,
2749};
2750
2751static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
2752                          unsigned ofs, unsigned len)
2753{
2754    tcg_out_sbfm(s, type, a0, a1, ofs, ofs + len - 1);
2755}
2756
2757static const TCGOutOpExtract outop_sextract = {
2758    .base.static_constraint = C_O1_I1(r, r),
2759    .out_rr = tgen_sextract,
2760};
2761
2762static void tgen_extract2(TCGContext *s, TCGType type, TCGReg a0,
2763                          TCGReg a1, TCGReg a2, unsigned shr)
2764{
2765    tcg_out_extr(s, type, a0, a2, a1, shr);
2766}
2767
2768static const TCGOutOpExtract2 outop_extract2 = {
2769    .base.static_constraint = C_O1_I2(r, rz, rz),
2770    .out_rrr = tgen_extract2,
2771};
2772
2773static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest,
2774                      TCGReg base, ptrdiff_t offset)
2775{
2776    tcg_out_ldst(s, I3312_LDRB, dest, base, offset, 0);
2777}
2778
2779static const TCGOutOpLoad outop_ld8u = {
2780    .base.static_constraint = C_O1_I1(r, r),
2781    .out = tgen_ld8u,
2782};
2783
2784static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest,
2785                      TCGReg base, ptrdiff_t offset)
2786{
2787    AArch64Insn insn = type == TCG_TYPE_I32 ? I3312_LDRSBW : I3312_LDRSBX;
2788    tcg_out_ldst(s, insn, dest, base, offset, 0);
2789}
2790
2791static const TCGOutOpLoad outop_ld8s = {
2792    .base.static_constraint = C_O1_I1(r, r),
2793    .out = tgen_ld8s,
2794};
2795
2796static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest,
2797                       TCGReg base, ptrdiff_t offset)
2798{
2799    tcg_out_ldst(s, I3312_LDRH, dest, base, offset, 1);
2800}
2801
2802static const TCGOutOpLoad outop_ld16u = {
2803    .base.static_constraint = C_O1_I1(r, r),
2804    .out = tgen_ld16u,
2805};
2806
2807static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest,
2808                       TCGReg base, ptrdiff_t offset)
2809{
2810    AArch64Insn insn = type == TCG_TYPE_I32 ? I3312_LDRSHW : I3312_LDRSHX;
2811    tcg_out_ldst(s, insn, dest, base, offset, 1);
2812}
2813
2814static const TCGOutOpLoad outop_ld16s = {
2815    .base.static_constraint = C_O1_I1(r, r),
2816    .out = tgen_ld16s,
2817};
2818
2819static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest,
2820                       TCGReg base, ptrdiff_t offset)
2821{
2822    tcg_out_ldst(s, I3312_LDRW, dest, base, offset, 2);
2823}
2824
2825static const TCGOutOpLoad outop_ld32u = {
2826    .base.static_constraint = C_O1_I1(r, r),
2827    .out = tgen_ld32u,
2828};
2829
2830static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest,
2831                       TCGReg base, ptrdiff_t offset)
2832{
2833    tcg_out_ldst(s, I3312_LDRSWX, dest, base, offset, 2);
2834}
2835
2836static const TCGOutOpLoad outop_ld32s = {
2837    .base.static_constraint = C_O1_I1(r, r),
2838    .out = tgen_ld32s,
2839};
2840
2841static void tgen_st8_r(TCGContext *s, TCGType type, TCGReg data,
2842                       TCGReg base, ptrdiff_t offset)
2843{
2844    tcg_out_ldst(s, I3312_STRB, data, base, offset, 0);
2845}
2846
2847static const TCGOutOpStore outop_st8 = {
2848    .base.static_constraint = C_O0_I2(rz, r),
2849    .out_r = tgen_st8_r,
2850};
2851
2852static void tgen_st16_r(TCGContext *s, TCGType type, TCGReg data,
2853                        TCGReg base, ptrdiff_t offset)
2854{
2855    tcg_out_ldst(s, I3312_STRH, data, base, offset, 1);
2856}
2857
2858static const TCGOutOpStore outop_st16 = {
2859    .base.static_constraint = C_O0_I2(rz, r),
2860    .out_r = tgen_st16_r,
2861};
2862
2863static const TCGOutOpStore outop_st = {
2864    .base.static_constraint = C_O0_I2(rz, r),
2865    .out_r = tcg_out_st,
2866};
2867
2868static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext,
2869                       const TCGArg args[TCG_MAX_OP_ARGS],
2870                       const int const_args[TCG_MAX_OP_ARGS])
2871{
2872    /* Hoist the loads of the most common arguments.  */
2873    TCGArg a0 = args[0];
2874    TCGArg a1 = args[1];
2875    TCGArg a2 = args[2];
2876
2877    switch (opc) {
2878    case INDEX_op_qemu_ld_i32:
2879    case INDEX_op_qemu_ld_i64:
2880        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2881        break;
2882    case INDEX_op_qemu_st_i32:
2883    case INDEX_op_qemu_st_i64:
2884        tcg_out_qemu_st(s, a0, a1, a2, ext);
2885        break;
2886    case INDEX_op_qemu_ld_i128:
2887        tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
2888        break;
2889    case INDEX_op_qemu_st_i128:
2890        tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], false);
2891        break;
2892
2893    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2894    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2895    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2896    default:
2897        g_assert_not_reached();
2898    }
2899}
2900
2901static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2902                           unsigned vecl, unsigned vece,
2903                           const TCGArg args[TCG_MAX_OP_ARGS],
2904                           const int const_args[TCG_MAX_OP_ARGS])
2905{
2906    static const AArch64Insn cmp_vec_insn[16] = {
2907        [TCG_COND_EQ] = I3616_CMEQ,
2908        [TCG_COND_GT] = I3616_CMGT,
2909        [TCG_COND_GE] = I3616_CMGE,
2910        [TCG_COND_GTU] = I3616_CMHI,
2911        [TCG_COND_GEU] = I3616_CMHS,
2912    };
2913    static const AArch64Insn cmp_scalar_insn[16] = {
2914        [TCG_COND_EQ] = I3611_CMEQ,
2915        [TCG_COND_GT] = I3611_CMGT,
2916        [TCG_COND_GE] = I3611_CMGE,
2917        [TCG_COND_GTU] = I3611_CMHI,
2918        [TCG_COND_GEU] = I3611_CMHS,
2919    };
2920    static const AArch64Insn cmp0_vec_insn[16] = {
2921        [TCG_COND_EQ] = I3617_CMEQ0,
2922        [TCG_COND_GT] = I3617_CMGT0,
2923        [TCG_COND_GE] = I3617_CMGE0,
2924        [TCG_COND_LT] = I3617_CMLT0,
2925        [TCG_COND_LE] = I3617_CMLE0,
2926    };
2927    static const AArch64Insn cmp0_scalar_insn[16] = {
2928        [TCG_COND_EQ] = I3612_CMEQ0,
2929        [TCG_COND_GT] = I3612_CMGT0,
2930        [TCG_COND_GE] = I3612_CMGE0,
2931        [TCG_COND_LT] = I3612_CMLT0,
2932        [TCG_COND_LE] = I3612_CMLE0,
2933    };
2934
2935    TCGType type = vecl + TCG_TYPE_V64;
2936    unsigned is_q = vecl;
2937    bool is_scalar = !is_q && vece == MO_64;
2938    TCGArg a0, a1, a2, a3;
2939    int cmode, imm8;
2940
2941    a0 = args[0];
2942    a1 = args[1];
2943    a2 = args[2];
2944
2945    switch (opc) {
2946    case INDEX_op_ld_vec:
2947        tcg_out_ld(s, type, a0, a1, a2);
2948        break;
2949    case INDEX_op_st_vec:
2950        tcg_out_st(s, type, a0, a1, a2);
2951        break;
2952    case INDEX_op_dupm_vec:
2953        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2954        break;
2955    case INDEX_op_add_vec:
2956        if (is_scalar) {
2957            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2958        } else {
2959            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2960        }
2961        break;
2962    case INDEX_op_sub_vec:
2963        if (is_scalar) {
2964            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2965        } else {
2966            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2967        }
2968        break;
2969    case INDEX_op_mul_vec:
2970        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2971        break;
2972    case INDEX_op_neg_vec:
2973        if (is_scalar) {
2974            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2975        } else {
2976            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2977        }
2978        break;
2979    case INDEX_op_abs_vec:
2980        if (is_scalar) {
2981            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2982        } else {
2983            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2984        }
2985        break;
2986    case INDEX_op_and_vec:
2987        if (const_args[2]) {
2988            is_shimm1632(~a2, &cmode, &imm8);
2989            if (a0 == a1) {
2990                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2991                return;
2992            }
2993            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2994            a2 = a0;
2995        }
2996        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2997        break;
2998    case INDEX_op_or_vec:
2999        if (const_args[2]) {
3000            is_shimm1632(a2, &cmode, &imm8);
3001            if (a0 == a1) {
3002                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
3003                return;
3004            }
3005            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
3006            a2 = a0;
3007        }
3008        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
3009        break;
3010    case INDEX_op_andc_vec:
3011        if (const_args[2]) {
3012            is_shimm1632(a2, &cmode, &imm8);
3013            if (a0 == a1) {
3014                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
3015                return;
3016            }
3017            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
3018            a2 = a0;
3019        }
3020        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
3021        break;
3022    case INDEX_op_orc_vec:
3023        if (const_args[2]) {
3024            is_shimm1632(~a2, &cmode, &imm8);
3025            if (a0 == a1) {
3026                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
3027                return;
3028            }
3029            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
3030            a2 = a0;
3031        }
3032        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
3033        break;
3034    case INDEX_op_xor_vec:
3035        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
3036        break;
3037    case INDEX_op_ssadd_vec:
3038        if (is_scalar) {
3039            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
3040        } else {
3041            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
3042        }
3043        break;
3044    case INDEX_op_sssub_vec:
3045        if (is_scalar) {
3046            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
3047        } else {
3048            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
3049        }
3050        break;
3051    case INDEX_op_usadd_vec:
3052        if (is_scalar) {
3053            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
3054        } else {
3055            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
3056        }
3057        break;
3058    case INDEX_op_ussub_vec:
3059        if (is_scalar) {
3060            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
3061        } else {
3062            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
3063        }
3064        break;
3065    case INDEX_op_smax_vec:
3066        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
3067        break;
3068    case INDEX_op_smin_vec:
3069        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
3070        break;
3071    case INDEX_op_umax_vec:
3072        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
3073        break;
3074    case INDEX_op_umin_vec:
3075        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
3076        break;
3077    case INDEX_op_not_vec:
3078        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
3079        break;
3080    case INDEX_op_shli_vec:
3081        if (is_scalar) {
3082            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
3083        } else {
3084            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
3085        }
3086        break;
3087    case INDEX_op_shri_vec:
3088        if (is_scalar) {
3089            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
3090        } else {
3091            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
3092        }
3093        break;
3094    case INDEX_op_sari_vec:
3095        if (is_scalar) {
3096            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
3097        } else {
3098            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
3099        }
3100        break;
3101    case INDEX_op_aa64_sli_vec:
3102        if (is_scalar) {
3103            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
3104        } else {
3105            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
3106        }
3107        break;
3108    case INDEX_op_shlv_vec:
3109        if (is_scalar) {
3110            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
3111        } else {
3112            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
3113        }
3114        break;
3115    case INDEX_op_aa64_sshl_vec:
3116        if (is_scalar) {
3117            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
3118        } else {
3119            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
3120        }
3121        break;
3122    case INDEX_op_cmp_vec:
3123        {
3124            TCGCond cond = args[3];
3125            AArch64Insn insn;
3126
3127            switch (cond) {
3128            case TCG_COND_NE:
3129                if (const_args[2]) {
3130                    if (is_scalar) {
3131                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
3132                    } else {
3133                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
3134                    }
3135                } else {
3136                    if (is_scalar) {
3137                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
3138                    } else {
3139                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
3140                    }
3141                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
3142                }
3143                break;
3144
3145            case TCG_COND_TSTNE:
3146            case TCG_COND_TSTEQ:
3147                if (const_args[2]) {
3148                    /* (x & 0) == 0 */
3149                    tcg_out_dupi_vec(s, type, MO_8, a0,
3150                                     -(cond == TCG_COND_TSTEQ));
3151                    break;
3152                }
3153                if (is_scalar) {
3154                    tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a2);
3155                } else {
3156                    tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a2);
3157                }
3158                if (cond == TCG_COND_TSTEQ) {
3159                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
3160                }
3161                break;
3162
3163            default:
3164                if (const_args[2]) {
3165                    if (is_scalar) {
3166                        insn = cmp0_scalar_insn[cond];
3167                        if (insn) {
3168                            tcg_out_insn_3612(s, insn, vece, a0, a1);
3169                            break;
3170                        }
3171                    } else {
3172                        insn = cmp0_vec_insn[cond];
3173                        if (insn) {
3174                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
3175                            break;
3176                        }
3177                    }
3178                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
3179                    a2 = TCG_VEC_TMP0;
3180                }
3181                if (is_scalar) {
3182                    insn = cmp_scalar_insn[cond];
3183                    if (insn == 0) {
3184                        TCGArg t;
3185                        t = a1, a1 = a2, a2 = t;
3186                        cond = tcg_swap_cond(cond);
3187                        insn = cmp_scalar_insn[cond];
3188                        tcg_debug_assert(insn != 0);
3189                    }
3190                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
3191                } else {
3192                    insn = cmp_vec_insn[cond];
3193                    if (insn == 0) {
3194                        TCGArg t;
3195                        t = a1, a1 = a2, a2 = t;
3196                        cond = tcg_swap_cond(cond);
3197                        insn = cmp_vec_insn[cond];
3198                        tcg_debug_assert(insn != 0);
3199                    }
3200                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
3201                }
3202                break;
3203            }
3204        }
3205        break;
3206
3207    case INDEX_op_bitsel_vec:
3208        a3 = args[3];
3209        if (a0 == a3) {
3210            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
3211        } else if (a0 == a2) {
3212            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
3213        } else {
3214            if (a0 != a1) {
3215                tcg_out_mov(s, type, a0, a1);
3216            }
3217            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
3218        }
3219        break;
3220
3221    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
3222    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
3223    default:
3224        g_assert_not_reached();
3225    }
3226}
3227
3228int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3229{
3230    switch (opc) {
3231    case INDEX_op_add_vec:
3232    case INDEX_op_sub_vec:
3233    case INDEX_op_and_vec:
3234    case INDEX_op_or_vec:
3235    case INDEX_op_xor_vec:
3236    case INDEX_op_andc_vec:
3237    case INDEX_op_orc_vec:
3238    case INDEX_op_neg_vec:
3239    case INDEX_op_abs_vec:
3240    case INDEX_op_not_vec:
3241    case INDEX_op_cmp_vec:
3242    case INDEX_op_shli_vec:
3243    case INDEX_op_shri_vec:
3244    case INDEX_op_sari_vec:
3245    case INDEX_op_ssadd_vec:
3246    case INDEX_op_sssub_vec:
3247    case INDEX_op_usadd_vec:
3248    case INDEX_op_ussub_vec:
3249    case INDEX_op_shlv_vec:
3250    case INDEX_op_bitsel_vec:
3251        return 1;
3252    case INDEX_op_rotli_vec:
3253    case INDEX_op_shrv_vec:
3254    case INDEX_op_sarv_vec:
3255    case INDEX_op_rotlv_vec:
3256    case INDEX_op_rotrv_vec:
3257        return -1;
3258    case INDEX_op_mul_vec:
3259    case INDEX_op_smax_vec:
3260    case INDEX_op_smin_vec:
3261    case INDEX_op_umax_vec:
3262    case INDEX_op_umin_vec:
3263        return vece < MO_64;
3264
3265    default:
3266        return 0;
3267    }
3268}
3269
3270void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3271                       TCGArg a0, ...)
3272{
3273    va_list va;
3274    TCGv_vec v0, v1, v2, t1, t2, c1;
3275    TCGArg a2;
3276
3277    va_start(va, a0);
3278    v0 = temp_tcgv_vec(arg_temp(a0));
3279    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3280    a2 = va_arg(va, TCGArg);
3281    va_end(va);
3282
3283    switch (opc) {
3284    case INDEX_op_rotli_vec:
3285        t1 = tcg_temp_new_vec(type);
3286        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
3287        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
3288                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
3289        tcg_temp_free_vec(t1);
3290        break;
3291
3292    case INDEX_op_shrv_vec:
3293    case INDEX_op_sarv_vec:
3294        /* Right shifts are negative left shifts for AArch64.  */
3295        v2 = temp_tcgv_vec(arg_temp(a2));
3296        t1 = tcg_temp_new_vec(type);
3297        tcg_gen_neg_vec(vece, t1, v2);
3298        opc = (opc == INDEX_op_shrv_vec
3299               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
3300        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
3301                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3302        tcg_temp_free_vec(t1);
3303        break;
3304
3305    case INDEX_op_rotlv_vec:
3306        v2 = temp_tcgv_vec(arg_temp(a2));
3307        t1 = tcg_temp_new_vec(type);
3308        c1 = tcg_constant_vec(type, vece, 8 << vece);
3309        tcg_gen_sub_vec(vece, t1, v2, c1);
3310        /* Right shifts are negative left shifts for AArch64.  */
3311        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
3312                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3313        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
3314                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
3315        tcg_gen_or_vec(vece, v0, v0, t1);
3316        tcg_temp_free_vec(t1);
3317        break;
3318
3319    case INDEX_op_rotrv_vec:
3320        v2 = temp_tcgv_vec(arg_temp(a2));
3321        t1 = tcg_temp_new_vec(type);
3322        t2 = tcg_temp_new_vec(type);
3323        c1 = tcg_constant_vec(type, vece, 8 << vece);
3324        tcg_gen_neg_vec(vece, t1, v2);
3325        tcg_gen_sub_vec(vece, t2, c1, v2);
3326        /* Right shifts are negative left shifts for AArch64.  */
3327        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
3328                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
3329        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
3330                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
3331        tcg_gen_or_vec(vece, v0, t1, t2);
3332        tcg_temp_free_vec(t1);
3333        tcg_temp_free_vec(t2);
3334        break;
3335
3336    default:
3337        g_assert_not_reached();
3338    }
3339}
3340
3341static TCGConstraintSetIndex
3342tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
3343{
3344    switch (op) {
3345    case INDEX_op_qemu_ld_i32:
3346    case INDEX_op_qemu_ld_i64:
3347        return C_O1_I1(r, r);
3348    case INDEX_op_qemu_ld_i128:
3349        return C_O2_I1(r, r, r);
3350    case INDEX_op_qemu_st_i32:
3351    case INDEX_op_qemu_st_i64:
3352        return C_O0_I2(rz, r);
3353    case INDEX_op_qemu_st_i128:
3354        return C_O0_I3(rz, rz, r);
3355
3356    case INDEX_op_add_vec:
3357    case INDEX_op_sub_vec:
3358    case INDEX_op_mul_vec:
3359    case INDEX_op_xor_vec:
3360    case INDEX_op_ssadd_vec:
3361    case INDEX_op_sssub_vec:
3362    case INDEX_op_usadd_vec:
3363    case INDEX_op_ussub_vec:
3364    case INDEX_op_smax_vec:
3365    case INDEX_op_smin_vec:
3366    case INDEX_op_umax_vec:
3367    case INDEX_op_umin_vec:
3368    case INDEX_op_shlv_vec:
3369    case INDEX_op_shrv_vec:
3370    case INDEX_op_sarv_vec:
3371    case INDEX_op_aa64_sshl_vec:
3372        return C_O1_I2(w, w, w);
3373    case INDEX_op_not_vec:
3374    case INDEX_op_neg_vec:
3375    case INDEX_op_abs_vec:
3376    case INDEX_op_shli_vec:
3377    case INDEX_op_shri_vec:
3378    case INDEX_op_sari_vec:
3379        return C_O1_I1(w, w);
3380    case INDEX_op_ld_vec:
3381    case INDEX_op_dupm_vec:
3382        return C_O1_I1(w, r);
3383    case INDEX_op_st_vec:
3384        return C_O0_I2(w, r);
3385    case INDEX_op_dup_vec:
3386        return C_O1_I1(w, wr);
3387    case INDEX_op_or_vec:
3388    case INDEX_op_andc_vec:
3389        return C_O1_I2(w, w, wO);
3390    case INDEX_op_and_vec:
3391    case INDEX_op_orc_vec:
3392        return C_O1_I2(w, w, wN);
3393    case INDEX_op_cmp_vec:
3394        return C_O1_I2(w, w, wZ);
3395    case INDEX_op_bitsel_vec:
3396        return C_O1_I3(w, w, w, w);
3397    case INDEX_op_aa64_sli_vec:
3398        return C_O1_I2(w, 0, w);
3399
3400    default:
3401        return C_NotImplemented;
3402    }
3403}
3404
3405static void tcg_target_init(TCGContext *s)
3406{
3407    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
3408    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
3409    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3410    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3411
3412    tcg_target_call_clobber_regs = -1ull;
3413    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
3414    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
3415    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
3416    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
3417    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
3418    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
3419    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
3420    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
3421    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
3422    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
3423    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
3424    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3425    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3426    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3427    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3428    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3429    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3430    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3431    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3432
3433    s->reserved_regs = 0;
3434    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
3435    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
3436    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
3437    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
3438    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
3439    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
3440    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
3441}
3442
3443/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
3444#define PUSH_SIZE  ((30 - 19 + 1) * 8)
3445
3446#define FRAME_SIZE \
3447    ((PUSH_SIZE \
3448      + TCG_STATIC_CALL_ARGS_SIZE \
3449      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3450      + TCG_TARGET_STACK_ALIGN - 1) \
3451     & ~(TCG_TARGET_STACK_ALIGN - 1))
3452
3453/* We're expecting a 2 byte uleb128 encoded value.  */
3454QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3455
3456/* We're expecting to use a single ADDI insn.  */
3457QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
3458
3459static void tcg_target_qemu_prologue(TCGContext *s)
3460{
3461    TCGReg r;
3462
3463    tcg_out_bti(s, BTI_C);
3464
3465    /* Push (FP, LR) and allocate space for all saved registers.  */
3466    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
3467                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
3468
3469    /* Set up frame pointer for canonical unwinding.  */
3470    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
3471
3472    /* Store callee-preserved regs x19..x28.  */
3473    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3474        int ofs = (r - TCG_REG_X19 + 2) * 8;
3475        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3476    }
3477
3478    /* Make stack space for TCG locals.  */
3479    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3480                 FRAME_SIZE - PUSH_SIZE);
3481
3482    /* Inform TCG about how to find TCG locals with register, offset, size.  */
3483    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
3484                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3485
3486    if (!tcg_use_softmmu) {
3487        /*
3488         * Note that XZR cannot be encoded in the address base register slot,
3489         * as that actually encodes SP.  Depending on the guest, we may need
3490         * to zero-extend the guest address via the address index register slot,
3491         * therefore we need to load even a zero guest base into a register.
3492         */
3493        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3494        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3495    }
3496
3497    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3498    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3499
3500    /*
3501     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3502     * and fall through to the rest of the epilogue.
3503     */
3504    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3505    tcg_out_bti(s, BTI_J);
3506    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3507
3508    /* TB epilogue */
3509    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3510    tcg_out_bti(s, BTI_J);
3511
3512    /* Remove TCG locals stack space.  */
3513    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3514                 FRAME_SIZE - PUSH_SIZE);
3515
3516    /* Restore registers x19..x28.  */
3517    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3518        int ofs = (r - TCG_REG_X19 + 2) * 8;
3519        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3520    }
3521
3522    /* Pop (FP, LR), restore SP to previous frame.  */
3523    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3524                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3525    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3526}
3527
3528static void tcg_out_tb_start(TCGContext *s)
3529{
3530    tcg_out_bti(s, BTI_J);
3531}
3532
3533static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3534{
3535    int i;
3536    for (i = 0; i < count; ++i) {
3537        p[i] = NOP;
3538    }
3539}
3540
3541typedef struct {
3542    DebugFrameHeader h;
3543    uint8_t fde_def_cfa[4];
3544    uint8_t fde_reg_ofs[24];
3545} DebugFrame;
3546
3547#define ELF_HOST_MACHINE EM_AARCH64
3548
3549static const DebugFrame debug_frame = {
3550    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3551    .h.cie.id = -1,
3552    .h.cie.version = 1,
3553    .h.cie.code_align = 1,
3554    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3555    .h.cie.return_column = TCG_REG_LR,
3556
3557    /* Total FDE size does not include the "len" member.  */
3558    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3559
3560    .fde_def_cfa = {
3561        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3562        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3563        (FRAME_SIZE >> 7)
3564    },
3565    .fde_reg_ofs = {
3566        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3567        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3568        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3569        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3570        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3571        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3572        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3573        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3574        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3575        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3576        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3577        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3578    }
3579};
3580
3581void tcg_register_jit(const void *buf, size_t buf_size)
3582{
3583    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3584}
3585