xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision ba26f1477735a5ad7dd40a3227ac2a54cf82014d)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "qemu/bitops.h"
14
15/* Used for function call generation. */
16#define TCG_REG_CALL_STACK              TCG_REG_SP
17#define TCG_TARGET_STACK_ALIGN          16
18#define TCG_TARGET_CALL_STACK_OFFSET    0
19#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
20#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
21#ifdef CONFIG_DARWIN
22# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_NORMAL
23#else
24# define TCG_TARGET_CALL_ARG_I128       TCG_CALL_ARG_EVEN
25#endif
26#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
27
28/* We're going to re-use TCGType in setting of the SF bit, which controls
29   the size of the operation performed.  If we know the values match, it
30   makes things much cleaner.  */
31QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
32
33#ifdef CONFIG_DEBUG_TCG
34static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
35    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
36    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
37    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
38    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
39
40    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
41    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
42    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
43    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
44};
45#endif /* CONFIG_DEBUG_TCG */
46
47static const int tcg_target_reg_alloc_order[] = {
48    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
49    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
50    TCG_REG_X28, /* we will reserve this for guest_base if configured */
51
52    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
53    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
54
55    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
56    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
57
58    /* X16 reserved as temporary */
59    /* X17 reserved as temporary */
60    /* X18 reserved by system */
61    /* X19 reserved for AREG0 */
62    /* X29 reserved as fp */
63    /* X30 reserved as temporary */
64
65    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
66    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
67    /* V8 - V15 are call-saved, and skipped.  */
68    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
69    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
70    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
71    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
72};
73
74static const int tcg_target_call_iarg_regs[8] = {
75    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
76    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
77};
78
79static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
80{
81    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
82    tcg_debug_assert(slot >= 0 && slot <= 1);
83    return TCG_REG_X0 + slot;
84}
85
86#define TCG_REG_TMP0 TCG_REG_X16
87#define TCG_REG_TMP1 TCG_REG_X17
88#define TCG_REG_TMP2 TCG_REG_X30
89#define TCG_VEC_TMP0 TCG_REG_V31
90
91#define TCG_REG_GUEST_BASE TCG_REG_X28
92
93static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
94{
95    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
96    ptrdiff_t offset = target - src_rx;
97
98    if (offset == sextract64(offset, 0, 26)) {
99        /* read instruction, mask away previous PC_REL26 parameter contents,
100           set the proper offset, then write back the instruction. */
101        *src_rw = deposit32(*src_rw, 0, 26, offset);
102        return true;
103    }
104    return false;
105}
106
107static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
108{
109    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
110    ptrdiff_t offset = target - src_rx;
111
112    if (offset == sextract64(offset, 0, 19)) {
113        *src_rw = deposit32(*src_rw, 5, 19, offset);
114        return true;
115    }
116    return false;
117}
118
119static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
120{
121    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
122    ptrdiff_t offset = target - src_rx;
123
124    if (offset == sextract64(offset, 0, 14)) {
125        *src_rw = deposit32(*src_rw, 5, 14, offset);
126        return true;
127    }
128    return false;
129}
130
131static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
132                        intptr_t value, intptr_t addend)
133{
134    tcg_debug_assert(addend == 0);
135    switch (type) {
136    case R_AARCH64_JUMP26:
137    case R_AARCH64_CALL26:
138        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
139    case R_AARCH64_CONDBR19:
140        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
141    case R_AARCH64_TSTBR14:
142        return reloc_pc14(code_ptr, (const tcg_insn_unit *)value);
143    default:
144        g_assert_not_reached();
145    }
146}
147
148#define TCG_CT_CONST_AIMM 0x100
149#define TCG_CT_CONST_LIMM 0x200
150#define TCG_CT_CONST_ZERO 0x400
151#define TCG_CT_CONST_MONE 0x800
152#define TCG_CT_CONST_ORRI 0x1000
153#define TCG_CT_CONST_ANDI 0x2000
154#define TCG_CT_CONST_CMP  0x4000
155
156#define ALL_GENERAL_REGS  0xffffffffu
157#define ALL_VECTOR_REGS   0xffffffff00000000ull
158
159/* Match a constant valid for addition (12-bit, optionally shifted).  */
160static inline bool is_aimm(uint64_t val)
161{
162    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
163}
164
165/* Match a constant valid for logical operations.  */
166static inline bool is_limm(uint64_t val)
167{
168    /* Taking a simplified view of the logical immediates for now, ignoring
169       the replication that can happen across the field.  Match bit patterns
170       of the forms
171           0....01....1
172           0..01..10..0
173       and their inverses.  */
174
175    /* Make things easier below, by testing the form with msb clear. */
176    if ((int64_t)val < 0) {
177        val = ~val;
178    }
179    if (val == 0) {
180        return false;
181    }
182    val += val & -val;
183    return (val & (val - 1)) == 0;
184}
185
186/* Return true if v16 is a valid 16-bit shifted immediate.  */
187static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
188{
189    if (v16 == (v16 & 0xff)) {
190        *cmode = 0x8;
191        *imm8 = v16 & 0xff;
192        return true;
193    } else if (v16 == (v16 & 0xff00)) {
194        *cmode = 0xa;
195        *imm8 = v16 >> 8;
196        return true;
197    }
198    return false;
199}
200
201/* Return true if v32 is a valid 32-bit shifted immediate.  */
202static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
203{
204    if (v32 == (v32 & 0xff)) {
205        *cmode = 0x0;
206        *imm8 = v32 & 0xff;
207        return true;
208    } else if (v32 == (v32 & 0xff00)) {
209        *cmode = 0x2;
210        *imm8 = (v32 >> 8) & 0xff;
211        return true;
212    } else if (v32 == (v32 & 0xff0000)) {
213        *cmode = 0x4;
214        *imm8 = (v32 >> 16) & 0xff;
215        return true;
216    } else if (v32 == (v32 & 0xff000000)) {
217        *cmode = 0x6;
218        *imm8 = v32 >> 24;
219        return true;
220    }
221    return false;
222}
223
224/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
225static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
226{
227    if ((v32 & 0xffff00ff) == 0xff) {
228        *cmode = 0xc;
229        *imm8 = (v32 >> 8) & 0xff;
230        return true;
231    } else if ((v32 & 0xff00ffff) == 0xffff) {
232        *cmode = 0xd;
233        *imm8 = (v32 >> 16) & 0xff;
234        return true;
235    }
236    return false;
237}
238
239/* Return true if v32 is a valid float32 immediate.  */
240static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
241{
242    if (extract32(v32, 0, 19) == 0
243        && (extract32(v32, 25, 6) == 0x20
244            || extract32(v32, 25, 6) == 0x1f)) {
245        *cmode = 0xf;
246        *imm8 = (extract32(v32, 31, 1) << 7)
247              | (extract32(v32, 25, 1) << 6)
248              | extract32(v32, 19, 6);
249        return true;
250    }
251    return false;
252}
253
254/* Return true if v64 is a valid float64 immediate.  */
255static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
256{
257    if (extract64(v64, 0, 48) == 0
258        && (extract64(v64, 54, 9) == 0x100
259            || extract64(v64, 54, 9) == 0x0ff)) {
260        *cmode = 0xf;
261        *imm8 = (extract64(v64, 63, 1) << 7)
262              | (extract64(v64, 54, 1) << 6)
263              | extract64(v64, 48, 6);
264        return true;
265    }
266    return false;
267}
268
269/*
270 * Return non-zero if v32 can be formed by MOVI+ORR.
271 * Place the parameters for MOVI in (cmode, imm8).
272 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
273 */
274static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
275{
276    int i;
277
278    for (i = 6; i > 0; i -= 2) {
279        /* Mask out one byte we can add with ORR.  */
280        uint32_t tmp = v32 & ~(0xffu << (i * 4));
281        if (is_shimm32(tmp, cmode, imm8) ||
282            is_soimm32(tmp, cmode, imm8)) {
283            break;
284        }
285    }
286    return i;
287}
288
289/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
290static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
291{
292    if (v32 == deposit32(v32, 16, 16, v32)) {
293        return is_shimm16(v32, cmode, imm8);
294    } else {
295        return is_shimm32(v32, cmode, imm8);
296    }
297}
298
299static bool tcg_target_const_match(int64_t val, int ct,
300                                   TCGType type, TCGCond cond, int vece)
301{
302    if (ct & TCG_CT_CONST) {
303        return 1;
304    }
305    if (type == TCG_TYPE_I32) {
306        val = (int32_t)val;
307    }
308
309    if (ct & TCG_CT_CONST_CMP) {
310        if (is_tst_cond(cond)) {
311            ct |= TCG_CT_CONST_LIMM;
312        } else {
313            ct |= TCG_CT_CONST_AIMM;
314        }
315    }
316
317    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
318        return 1;
319    }
320    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
321        return 1;
322    }
323    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
324        return 1;
325    }
326    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
327        return 1;
328    }
329
330    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
331    case 0:
332        break;
333    case TCG_CT_CONST_ANDI:
334        val = ~val;
335        /* fallthru */
336    case TCG_CT_CONST_ORRI:
337        if (val == deposit64(val, 32, 32, val)) {
338            int cmode, imm8;
339            return is_shimm1632(val, &cmode, &imm8);
340        }
341        break;
342    default:
343        /* Both bits should not be set for the same insn.  */
344        g_assert_not_reached();
345    }
346
347    return 0;
348}
349
350enum aarch64_cond_code {
351    COND_EQ = 0x0,
352    COND_NE = 0x1,
353    COND_CS = 0x2,     /* Unsigned greater or equal */
354    COND_HS = COND_CS, /* ALIAS greater or equal */
355    COND_CC = 0x3,     /* Unsigned less than */
356    COND_LO = COND_CC, /* ALIAS Lower */
357    COND_MI = 0x4,     /* Negative */
358    COND_PL = 0x5,     /* Zero or greater */
359    COND_VS = 0x6,     /* Overflow */
360    COND_VC = 0x7,     /* No overflow */
361    COND_HI = 0x8,     /* Unsigned greater than */
362    COND_LS = 0x9,     /* Unsigned less or equal */
363    COND_GE = 0xa,
364    COND_LT = 0xb,
365    COND_GT = 0xc,
366    COND_LE = 0xd,
367    COND_AL = 0xe,
368    COND_NV = 0xf, /* behaves like COND_AL here */
369};
370
371static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
372    [TCG_COND_EQ] = COND_EQ,
373    [TCG_COND_NE] = COND_NE,
374    [TCG_COND_LT] = COND_LT,
375    [TCG_COND_GE] = COND_GE,
376    [TCG_COND_LE] = COND_LE,
377    [TCG_COND_GT] = COND_GT,
378    /* unsigned */
379    [TCG_COND_LTU] = COND_LO,
380    [TCG_COND_GTU] = COND_HI,
381    [TCG_COND_GEU] = COND_HS,
382    [TCG_COND_LEU] = COND_LS,
383    /* bit test */
384    [TCG_COND_TSTEQ] = COND_EQ,
385    [TCG_COND_TSTNE] = COND_NE,
386};
387
388typedef enum {
389    LDST_ST = 0,    /* store */
390    LDST_LD = 1,    /* load */
391    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
392    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
393} AArch64LdstType;
394
395/* We encode the format of the insn into the beginning of the name, so that
396   we can have the preprocessor help "typecheck" the insn vs the output
397   function.  Arm didn't provide us with nice names for the formats, so we
398   use the section number of the architecture reference manual in which the
399   instruction group is described.  */
400typedef enum {
401    /* Compare and branch (immediate).  */
402    I3201_CBZ       = 0x34000000,
403    I3201_CBNZ      = 0x35000000,
404
405    /* Conditional branch (immediate).  */
406    I3202_B_C       = 0x54000000,
407
408    /* Test and branch (immediate).  */
409    I3205_TBZ       = 0x36000000,
410    I3205_TBNZ      = 0x37000000,
411
412    /* Unconditional branch (immediate).  */
413    I3206_B         = 0x14000000,
414    I3206_BL        = 0x94000000,
415
416    /* Unconditional branch (register).  */
417    I3207_BR        = 0xd61f0000,
418    I3207_BLR       = 0xd63f0000,
419    I3207_RET       = 0xd65f0000,
420
421    /* AdvSIMD load/store single structure.  */
422    I3303_LD1R      = 0x0d40c000,
423
424    /* Load literal for loading the address at pc-relative offset */
425    I3305_LDR       = 0x58000000,
426    I3305_LDR_v64   = 0x5c000000,
427    I3305_LDR_v128  = 0x9c000000,
428
429    /* Load/store exclusive. */
430    I3306_LDXP      = 0xc8600000,
431    I3306_STXP      = 0xc8200000,
432
433    /* Load/store register.  Described here as 3.3.12, but the helper
434       that emits them can transform to 3.3.10 or 3.3.13.  */
435    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
436    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
437    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
438    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
439
440    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
441    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
442    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
443    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
444
445    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
446    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
447
448    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
449    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
450    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
451
452    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
453    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
454
455    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
456    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
457
458    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
459    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
460
461    I3312_TO_I3310  = 0x00200800,
462    I3312_TO_I3313  = 0x01000000,
463
464    /* Load/store register pair instructions.  */
465    I3314_LDP       = 0x28400000,
466    I3314_STP       = 0x28000000,
467
468    /* Add/subtract immediate instructions.  */
469    I3401_ADDI      = 0x11000000,
470    I3401_ADDSI     = 0x31000000,
471    I3401_SUBI      = 0x51000000,
472    I3401_SUBSI     = 0x71000000,
473
474    /* Bitfield instructions.  */
475    I3402_BFM       = 0x33000000,
476    I3402_SBFM      = 0x13000000,
477    I3402_UBFM      = 0x53000000,
478
479    /* Extract instruction.  */
480    I3403_EXTR      = 0x13800000,
481
482    /* Logical immediate instructions.  */
483    I3404_ANDI      = 0x12000000,
484    I3404_ORRI      = 0x32000000,
485    I3404_EORI      = 0x52000000,
486    I3404_ANDSI     = 0x72000000,
487
488    /* Move wide immediate instructions.  */
489    I3405_MOVN      = 0x12800000,
490    I3405_MOVZ      = 0x52800000,
491    I3405_MOVK      = 0x72800000,
492
493    /* PC relative addressing instructions.  */
494    I3406_ADR       = 0x10000000,
495    I3406_ADRP      = 0x90000000,
496
497    /* Add/subtract extended register instructions. */
498    I3501_ADD       = 0x0b200000,
499
500    /* Add/subtract shifted register instructions (without a shift).  */
501    I3502_ADD       = 0x0b000000,
502    I3502_ADDS      = 0x2b000000,
503    I3502_SUB       = 0x4b000000,
504    I3502_SUBS      = 0x6b000000,
505
506    /* Add/subtract shifted register instructions (with a shift).  */
507    I3502S_ADD_LSL  = I3502_ADD,
508
509    /* Add/subtract with carry instructions.  */
510    I3503_ADC       = 0x1a000000,
511    I3503_SBC       = 0x5a000000,
512
513    /* Conditional select instructions.  */
514    I3506_CSEL      = 0x1a800000,
515    I3506_CSINC     = 0x1a800400,
516    I3506_CSINV     = 0x5a800000,
517    I3506_CSNEG     = 0x5a800400,
518
519    /* Data-processing (1 source) instructions.  */
520    I3507_CLZ       = 0x5ac01000,
521    I3507_RBIT      = 0x5ac00000,
522    I3507_REV       = 0x5ac00000, /* + size << 10 */
523
524    /* Data-processing (2 source) instructions.  */
525    I3508_LSLV      = 0x1ac02000,
526    I3508_LSRV      = 0x1ac02400,
527    I3508_ASRV      = 0x1ac02800,
528    I3508_RORV      = 0x1ac02c00,
529    I3508_SMULH     = 0x9b407c00,
530    I3508_UMULH     = 0x9bc07c00,
531    I3508_UDIV      = 0x1ac00800,
532    I3508_SDIV      = 0x1ac00c00,
533
534    /* Data-processing (3 source) instructions.  */
535    I3509_MADD      = 0x1b000000,
536    I3509_MSUB      = 0x1b008000,
537
538    /* Logical shifted register instructions (without a shift).  */
539    I3510_AND       = 0x0a000000,
540    I3510_BIC       = 0x0a200000,
541    I3510_ORR       = 0x2a000000,
542    I3510_ORN       = 0x2a200000,
543    I3510_EOR       = 0x4a000000,
544    I3510_EON       = 0x4a200000,
545    I3510_ANDS      = 0x6a000000,
546
547    /* Logical shifted register instructions (with a shift).  */
548    I3502S_AND_LSR  = I3510_AND | (1 << 22),
549
550    /* AdvSIMD copy */
551    I3605_DUP      = 0x0e000400,
552    I3605_INS      = 0x4e001c00,
553    I3605_UMOV     = 0x0e003c00,
554
555    /* AdvSIMD modified immediate */
556    I3606_MOVI      = 0x0f000400,
557    I3606_MVNI      = 0x2f000400,
558    I3606_BIC       = 0x2f001400,
559    I3606_ORR       = 0x0f001400,
560
561    /* AdvSIMD scalar shift by immediate */
562    I3609_SSHR      = 0x5f000400,
563    I3609_SSRA      = 0x5f001400,
564    I3609_SHL       = 0x5f005400,
565    I3609_USHR      = 0x7f000400,
566    I3609_USRA      = 0x7f001400,
567    I3609_SLI       = 0x7f005400,
568
569    /* AdvSIMD scalar three same */
570    I3611_SQADD     = 0x5e200c00,
571    I3611_SQSUB     = 0x5e202c00,
572    I3611_CMGT      = 0x5e203400,
573    I3611_CMGE      = 0x5e203c00,
574    I3611_SSHL      = 0x5e204400,
575    I3611_ADD       = 0x5e208400,
576    I3611_CMTST     = 0x5e208c00,
577    I3611_UQADD     = 0x7e200c00,
578    I3611_UQSUB     = 0x7e202c00,
579    I3611_CMHI      = 0x7e203400,
580    I3611_CMHS      = 0x7e203c00,
581    I3611_USHL      = 0x7e204400,
582    I3611_SUB       = 0x7e208400,
583    I3611_CMEQ      = 0x7e208c00,
584
585    /* AdvSIMD scalar two-reg misc */
586    I3612_CMGT0     = 0x5e208800,
587    I3612_CMEQ0     = 0x5e209800,
588    I3612_CMLT0     = 0x5e20a800,
589    I3612_ABS       = 0x5e20b800,
590    I3612_CMGE0     = 0x7e208800,
591    I3612_CMLE0     = 0x7e209800,
592    I3612_NEG       = 0x7e20b800,
593
594    /* AdvSIMD shift by immediate */
595    I3614_SSHR      = 0x0f000400,
596    I3614_SSRA      = 0x0f001400,
597    I3614_SHL       = 0x0f005400,
598    I3614_SLI       = 0x2f005400,
599    I3614_USHR      = 0x2f000400,
600    I3614_USRA      = 0x2f001400,
601
602    /* AdvSIMD three same.  */
603    I3616_ADD       = 0x0e208400,
604    I3616_AND       = 0x0e201c00,
605    I3616_BIC       = 0x0e601c00,
606    I3616_BIF       = 0x2ee01c00,
607    I3616_BIT       = 0x2ea01c00,
608    I3616_BSL       = 0x2e601c00,
609    I3616_EOR       = 0x2e201c00,
610    I3616_MUL       = 0x0e209c00,
611    I3616_ORR       = 0x0ea01c00,
612    I3616_ORN       = 0x0ee01c00,
613    I3616_SUB       = 0x2e208400,
614    I3616_CMGT      = 0x0e203400,
615    I3616_CMGE      = 0x0e203c00,
616    I3616_CMTST     = 0x0e208c00,
617    I3616_CMHI      = 0x2e203400,
618    I3616_CMHS      = 0x2e203c00,
619    I3616_CMEQ      = 0x2e208c00,
620    I3616_SMAX      = 0x0e206400,
621    I3616_SMIN      = 0x0e206c00,
622    I3616_SSHL      = 0x0e204400,
623    I3616_SQADD     = 0x0e200c00,
624    I3616_SQSUB     = 0x0e202c00,
625    I3616_UMAX      = 0x2e206400,
626    I3616_UMIN      = 0x2e206c00,
627    I3616_UQADD     = 0x2e200c00,
628    I3616_UQSUB     = 0x2e202c00,
629    I3616_USHL      = 0x2e204400,
630
631    /* AdvSIMD two-reg misc.  */
632    I3617_CMGT0     = 0x0e208800,
633    I3617_CMEQ0     = 0x0e209800,
634    I3617_CMLT0     = 0x0e20a800,
635    I3617_CMGE0     = 0x2e208800,
636    I3617_CMLE0     = 0x2e209800,
637    I3617_NOT       = 0x2e205800,
638    I3617_ABS       = 0x0e20b800,
639    I3617_NEG       = 0x2e20b800,
640
641    /* System instructions.  */
642    NOP             = 0xd503201f,
643    DMB_ISH         = 0xd50338bf,
644    DMB_LD          = 0x00000100,
645    DMB_ST          = 0x00000200,
646
647    BTI_C           = 0xd503245f,
648    BTI_J           = 0xd503249f,
649    BTI_JC          = 0xd50324df,
650} AArch64Insn;
651
652static inline uint32_t tcg_in32(TCGContext *s)
653{
654    uint32_t v = *(uint32_t *)s->code_ptr;
655    return v;
656}
657
658/* Emit an opcode with "type-checking" of the format.  */
659#define tcg_out_insn(S, FMT, OP, ...) \
660    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
661
662static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
663                              TCGReg rt, TCGReg rn, unsigned size)
664{
665    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
666}
667
668static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
669                              int imm19, TCGReg rt)
670{
671    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
672}
673
674static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
675                              TCGReg rt, TCGReg rt2, TCGReg rn)
676{
677    tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
678}
679
680static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
681                              TCGReg rt, int imm19)
682{
683    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
684}
685
686static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
687                              TCGCond c, int imm19)
688{
689    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
690}
691
692static void tcg_out_insn_3205(TCGContext *s, AArch64Insn insn,
693                              TCGReg rt, int imm6, int imm14)
694{
695    insn |= (imm6 & 0x20) << (31 - 5);
696    insn |= (imm6 & 0x1f) << 19;
697    tcg_out32(s, insn | (imm14 & 0x3fff) << 5 | rt);
698}
699
700static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
701{
702    tcg_out32(s, insn | (imm26 & 0x03ffffff));
703}
704
705static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
706{
707    tcg_out32(s, insn | rn << 5);
708}
709
710static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
711                              TCGReg r1, TCGReg r2, TCGReg rn,
712                              tcg_target_long ofs, bool pre, bool w)
713{
714    insn |= 1u << 31; /* ext */
715    insn |= pre << 24;
716    insn |= w << 23;
717
718    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
719    insn |= (ofs & (0x7f << 3)) << (15 - 3);
720
721    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
722}
723
724static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
725                              TCGReg rd, TCGReg rn, uint64_t aimm)
726{
727    if (aimm > 0xfff) {
728        tcg_debug_assert((aimm & 0xfff) == 0);
729        aimm >>= 12;
730        tcg_debug_assert(aimm <= 0xfff);
731        aimm |= 1 << 12;  /* apply LSL 12 */
732    }
733    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
734}
735
736/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
737   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
738   that feed the DecodeBitMasks pseudo function.  */
739static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
740                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
741{
742    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
743              | rn << 5 | rd);
744}
745
746#define tcg_out_insn_3404  tcg_out_insn_3402
747
748static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
749                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
750{
751    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
752              | rn << 5 | rd);
753}
754
755/* This function is used for the Move (wide immediate) instruction group.
756   Note that SHIFT is a full shift count, not the 2 bit HW field. */
757static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
758                              TCGReg rd, uint16_t half, unsigned shift)
759{
760    tcg_debug_assert((shift & ~0x30) == 0);
761    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
762}
763
764static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
765                              TCGReg rd, int64_t disp)
766{
767    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
768}
769
770static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
771                                     TCGType sf, TCGReg rd, TCGReg rn,
772                                     TCGReg rm, int opt, int imm3)
773{
774    tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
775              imm3 << 10 | rn << 5 | rd);
776}
777
778/* This function is for both 3.5.2 (Add/Subtract shifted register), for
779   the rare occasion when we actually want to supply a shift amount.  */
780static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
781                                      TCGType ext, TCGReg rd, TCGReg rn,
782                                      TCGReg rm, int imm6)
783{
784    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
785}
786
787/* This function is for 3.5.2 (Add/subtract shifted register),
788   and 3.5.10 (Logical shifted register), for the vast majorty of cases
789   when we don't want to apply a shift.  Thus it can also be used for
790   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
791static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
792                              TCGReg rd, TCGReg rn, TCGReg rm)
793{
794    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
795}
796
797#define tcg_out_insn_3503  tcg_out_insn_3502
798#define tcg_out_insn_3508  tcg_out_insn_3502
799#define tcg_out_insn_3510  tcg_out_insn_3502
800
801static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
802                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
803{
804    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
805              | tcg_cond_to_aarch64[c] << 12);
806}
807
808static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
809                              TCGReg rd, TCGReg rn)
810{
811    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
812}
813
814static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
815                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
816{
817    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
818}
819
820static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
821                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
822{
823    /* Note that bit 11 set means general register input.  Therefore
824       we can handle both register sets with one function.  */
825    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
826              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
827}
828
829static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
830                              TCGReg rd, bool op, int cmode, uint8_t imm8)
831{
832    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
833              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
834}
835
836static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
837                              TCGReg rd, TCGReg rn, unsigned immhb)
838{
839    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
840}
841
842static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
843                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
844{
845    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
846              | (rn & 0x1f) << 5 | (rd & 0x1f));
847}
848
849static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
850                              unsigned size, TCGReg rd, TCGReg rn)
851{
852    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
853}
854
855static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
856                              TCGReg rd, TCGReg rn, unsigned immhb)
857{
858    tcg_out32(s, insn | q << 30 | immhb << 16
859              | (rn & 0x1f) << 5 | (rd & 0x1f));
860}
861
862static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
863                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
864{
865    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
866              | (rn & 0x1f) << 5 | (rd & 0x1f));
867}
868
869static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
870                              unsigned size, TCGReg rd, TCGReg rn)
871{
872    tcg_out32(s, insn | q << 30 | (size << 22)
873              | (rn & 0x1f) << 5 | (rd & 0x1f));
874}
875
876static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
877                              TCGReg rd, TCGReg base, TCGType ext,
878                              TCGReg regoff)
879{
880    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
881    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
882              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
883}
884
885static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
886                              TCGReg rd, TCGReg rn, intptr_t offset)
887{
888    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
889}
890
891static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
892                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
893{
894    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
895    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
896              | rn << 5 | (rd & 0x1f));
897}
898
899static void tcg_out_bti(TCGContext *s, AArch64Insn insn)
900{
901    /*
902     * While BTI insns are nops on hosts without FEAT_BTI,
903     * there is no point in emitting them in that case either.
904     */
905    if (cpuinfo & CPUINFO_BTI) {
906        tcg_out32(s, insn);
907    }
908}
909
910/* Register to register move using ORR (shifted register with no shift). */
911static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
912{
913    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
914}
915
916/* Register to register move using ADDI (move to/from SP).  */
917static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
918{
919    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
920}
921
922/* This function is used for the Logical (immediate) instruction group.
923   The value of LIMM must satisfy IS_LIMM.  See the comment above about
924   only supporting simplified logical immediates.  */
925static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
926                             TCGReg rd, TCGReg rn, uint64_t limm)
927{
928    unsigned h, l, r, c;
929
930    tcg_debug_assert(is_limm(limm));
931
932    h = clz64(limm);
933    l = ctz64(limm);
934    if (l == 0) {
935        r = 0;                  /* form 0....01....1 */
936        c = ctz64(~limm) - 1;
937        if (h == 0) {
938            r = clz64(~limm);   /* form 1..10..01..1 */
939            c += r;
940        }
941    } else {
942        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
943        c = r - h - 1;
944    }
945    if (ext == TCG_TYPE_I32) {
946        r &= 31;
947        c &= 31;
948    }
949
950    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
951}
952
953static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
954                             TCGReg rd, int64_t v64)
955{
956    bool q = type == TCG_TYPE_V128;
957    int cmode, imm8, i;
958
959    /* Test all bytes equal first.  */
960    if (vece == MO_8) {
961        imm8 = (uint8_t)v64;
962        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
963        return;
964    }
965
966    /*
967     * Test all bytes 0x00 or 0xff second.  This can match cases that
968     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
969     */
970    for (i = imm8 = 0; i < 8; i++) {
971        uint8_t byte = v64 >> (i * 8);
972        if (byte == 0xff) {
973            imm8 |= 1 << i;
974        } else if (byte != 0) {
975            goto fail_bytes;
976        }
977    }
978    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
979    return;
980 fail_bytes:
981
982    /*
983     * Tests for various replications.  For each element width, if we
984     * cannot find an expansion there's no point checking a larger
985     * width because we already know by replication it cannot match.
986     */
987    if (vece == MO_16) {
988        uint16_t v16 = v64;
989
990        if (is_shimm16(v16, &cmode, &imm8)) {
991            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
992            return;
993        }
994        if (is_shimm16(~v16, &cmode, &imm8)) {
995            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
996            return;
997        }
998
999        /*
1000         * Otherwise, all remaining constants can be loaded in two insns:
1001         * rd = v16 & 0xff, rd |= v16 & 0xff00.
1002         */
1003        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
1004        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
1005        return;
1006    } else if (vece == MO_32) {
1007        uint32_t v32 = v64;
1008        uint32_t n32 = ~v32;
1009
1010        if (is_shimm32(v32, &cmode, &imm8) ||
1011            is_soimm32(v32, &cmode, &imm8) ||
1012            is_fimm32(v32, &cmode, &imm8)) {
1013            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
1014            return;
1015        }
1016        if (is_shimm32(n32, &cmode, &imm8) ||
1017            is_soimm32(n32, &cmode, &imm8)) {
1018            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
1019            return;
1020        }
1021
1022        /*
1023         * Restrict the set of constants to those we can load with
1024         * two instructions.  Others we load from the pool.
1025         */
1026        i = is_shimm32_pair(v32, &cmode, &imm8);
1027        if (i) {
1028            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
1029            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
1030            return;
1031        }
1032        i = is_shimm32_pair(n32, &cmode, &imm8);
1033        if (i) {
1034            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
1035            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
1036            return;
1037        }
1038    } else if (is_fimm64(v64, &cmode, &imm8)) {
1039        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
1040        return;
1041    }
1042
1043    /*
1044     * As a last resort, load from the constant pool.  Sadly there
1045     * is no LD1R (literal), so store the full 16-byte vector.
1046     */
1047    if (type == TCG_TYPE_V128) {
1048        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
1049        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
1050    } else {
1051        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
1052        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
1053    }
1054}
1055
1056static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
1057                            TCGReg rd, TCGReg rs)
1058{
1059    int is_q = type - TCG_TYPE_V64;
1060    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
1061    return true;
1062}
1063
1064static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1065                             TCGReg r, TCGReg base, intptr_t offset)
1066{
1067    TCGReg temp = TCG_REG_TMP0;
1068
1069    if (offset < -0xffffff || offset > 0xffffff) {
1070        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
1071        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
1072        base = temp;
1073    } else {
1074        AArch64Insn add_insn = I3401_ADDI;
1075
1076        if (offset < 0) {
1077            add_insn = I3401_SUBI;
1078            offset = -offset;
1079        }
1080        if (offset & 0xfff000) {
1081            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1082            base = temp;
1083        }
1084        if (offset & 0xfff) {
1085            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1086            base = temp;
1087        }
1088    }
1089    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1090    return true;
1091}
1092
1093static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1094                         tcg_target_long value)
1095{
1096    tcg_target_long svalue = value;
1097    tcg_target_long ivalue = ~value;
1098    tcg_target_long t0, t1, t2;
1099    int s0, s1;
1100    AArch64Insn opc;
1101
1102    switch (type) {
1103    case TCG_TYPE_I32:
1104    case TCG_TYPE_I64:
1105        tcg_debug_assert(rd < 32);
1106        break;
1107    default:
1108        g_assert_not_reached();
1109    }
1110
1111    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1112       values within [2**31, 2**32-1], we can create smaller sequences by
1113       interpreting this as a negative 32-bit number, while ensuring that
1114       the high 32 bits are cleared by setting SF=0.  */
1115    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1116        svalue = (int32_t)value;
1117        value = (uint32_t)value;
1118        ivalue = (uint32_t)ivalue;
1119        type = TCG_TYPE_I32;
1120    }
1121
1122    /* Speed things up by handling the common case of small positive
1123       and negative values specially.  */
1124    if ((value & ~0xffffull) == 0) {
1125        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1126        return;
1127    } else if ((ivalue & ~0xffffull) == 0) {
1128        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1129        return;
1130    }
1131
1132    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1133       use the sign-extended value.  That lets us match rotated values such
1134       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1135    if (is_limm(svalue)) {
1136        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1137        return;
1138    }
1139
1140    /* Look for host pointer values within 4G of the PC.  This happens
1141       often when loading pointers to QEMU's own data structures.  */
1142    if (type == TCG_TYPE_I64) {
1143        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1144        tcg_target_long disp = value - src_rx;
1145        if (disp == sextract64(disp, 0, 21)) {
1146            tcg_out_insn(s, 3406, ADR, rd, disp);
1147            return;
1148        }
1149        disp = (value >> 12) - (src_rx >> 12);
1150        if (disp == sextract64(disp, 0, 21)) {
1151            tcg_out_insn(s, 3406, ADRP, rd, disp);
1152            if (value & 0xfff) {
1153                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1154            }
1155            return;
1156        }
1157    }
1158
1159    /* Would it take fewer insns to begin with MOVN?  */
1160    if (ctpop64(value) >= 32) {
1161        t0 = ivalue;
1162        opc = I3405_MOVN;
1163    } else {
1164        t0 = value;
1165        opc = I3405_MOVZ;
1166    }
1167    s0 = ctz64(t0) & (63 & -16);
1168    t1 = t0 & ~(0xffffull << s0);
1169    s1 = ctz64(t1) & (63 & -16);
1170    t2 = t1 & ~(0xffffull << s1);
1171    if (t2 == 0) {
1172        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1173        if (t1 != 0) {
1174            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1175        }
1176        return;
1177    }
1178
1179    /* For more than 2 insns, dump it into the constant pool.  */
1180    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1181    tcg_out_insn(s, 3305, LDR, 0, rd);
1182}
1183
1184static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1185{
1186    return false;
1187}
1188
1189static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1190                             tcg_target_long imm)
1191{
1192    /* This function is only used for passing structs by reference. */
1193    g_assert_not_reached();
1194}
1195
1196/* Define something more legible for general use.  */
1197#define tcg_out_ldst_r  tcg_out_insn_3310
1198
1199static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1200                         TCGReg rn, intptr_t offset, int lgsize)
1201{
1202    /* If the offset is naturally aligned and in range, then we can
1203       use the scaled uimm12 encoding */
1204    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1205        uintptr_t scaled_uimm = offset >> lgsize;
1206        if (scaled_uimm <= 0xfff) {
1207            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1208            return;
1209        }
1210    }
1211
1212    /* Small signed offsets can use the unscaled encoding.  */
1213    if (offset >= -256 && offset < 256) {
1214        tcg_out_insn_3312(s, insn, rd, rn, offset);
1215        return;
1216    }
1217
1218    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1219    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
1220    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
1221}
1222
1223static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1224{
1225    if (ret == arg) {
1226        return true;
1227    }
1228    switch (type) {
1229    case TCG_TYPE_I32:
1230    case TCG_TYPE_I64:
1231        if (ret < 32 && arg < 32) {
1232            tcg_out_movr(s, type, ret, arg);
1233            break;
1234        } else if (ret < 32) {
1235            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1236            break;
1237        } else if (arg < 32) {
1238            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1239            break;
1240        }
1241        /* FALLTHRU */
1242
1243    case TCG_TYPE_V64:
1244        tcg_debug_assert(ret >= 32 && arg >= 32);
1245        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1246        break;
1247    case TCG_TYPE_V128:
1248        tcg_debug_assert(ret >= 32 && arg >= 32);
1249        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1250        break;
1251
1252    default:
1253        g_assert_not_reached();
1254    }
1255    return true;
1256}
1257
1258static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1259                       TCGReg base, intptr_t ofs)
1260{
1261    AArch64Insn insn;
1262    int lgsz;
1263
1264    switch (type) {
1265    case TCG_TYPE_I32:
1266        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1267        lgsz = 2;
1268        break;
1269    case TCG_TYPE_I64:
1270        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1271        lgsz = 3;
1272        break;
1273    case TCG_TYPE_V64:
1274        insn = I3312_LDRVD;
1275        lgsz = 3;
1276        break;
1277    case TCG_TYPE_V128:
1278        insn = I3312_LDRVQ;
1279        lgsz = 4;
1280        break;
1281    default:
1282        g_assert_not_reached();
1283    }
1284    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1285}
1286
1287static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1288                       TCGReg base, intptr_t ofs)
1289{
1290    AArch64Insn insn;
1291    int lgsz;
1292
1293    switch (type) {
1294    case TCG_TYPE_I32:
1295        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1296        lgsz = 2;
1297        break;
1298    case TCG_TYPE_I64:
1299        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1300        lgsz = 3;
1301        break;
1302    case TCG_TYPE_V64:
1303        insn = I3312_STRVD;
1304        lgsz = 3;
1305        break;
1306    case TCG_TYPE_V128:
1307        insn = I3312_STRVQ;
1308        lgsz = 4;
1309        break;
1310    default:
1311        g_assert_not_reached();
1312    }
1313    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1314}
1315
1316static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1317                               TCGReg base, intptr_t ofs)
1318{
1319    if (type <= TCG_TYPE_I64 && val == 0) {
1320        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1321        return true;
1322    }
1323    return false;
1324}
1325
1326static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1327                               TCGReg rn, unsigned int a, unsigned int b)
1328{
1329    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1330}
1331
1332static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1333                                TCGReg rn, unsigned int a, unsigned int b)
1334{
1335    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1336}
1337
1338static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1339                                TCGReg rn, unsigned int a, unsigned int b)
1340{
1341    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1342}
1343
1344static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1345                                TCGReg rn, TCGReg rm, unsigned int a)
1346{
1347    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1348}
1349
1350static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1351                               TCGReg rd, TCGReg rn, unsigned int m)
1352{
1353    int bits = ext ? 64 : 32;
1354    int max = bits - 1;
1355    tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1356}
1357
1358static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1359                               TCGReg rd, TCGReg rn, unsigned int m)
1360{
1361    int max = ext ? 63 : 31;
1362    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1363}
1364
1365static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1366                               TCGReg rd, TCGReg rn, unsigned int m)
1367{
1368    int max = ext ? 63 : 31;
1369    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1370}
1371
1372static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1373                                TCGReg rd, TCGReg rn, unsigned int m)
1374{
1375    int max = ext ? 63 : 31;
1376    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1377}
1378
1379static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1380                                TCGReg rd, TCGReg rn, unsigned int m)
1381{
1382    int max = ext ? 63 : 31;
1383    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1384}
1385
1386static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1387                               TCGReg rn, unsigned lsb, unsigned width)
1388{
1389    unsigned size = ext ? 64 : 32;
1390    unsigned a = (size - lsb) & (size - 1);
1391    unsigned b = width - 1;
1392    tcg_out_bfm(s, ext, rd, rn, a, b);
1393}
1394
1395static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGCond cond, TCGReg a,
1396                        tcg_target_long b, bool const_b)
1397{
1398    if (is_tst_cond(cond)) {
1399        if (!const_b) {
1400            tcg_out_insn(s, 3510, ANDS, ext, TCG_REG_XZR, a, b);
1401        } else {
1402            tcg_out_logicali(s, I3404_ANDSI, ext, TCG_REG_XZR, a, b);
1403        }
1404    } else {
1405        if (!const_b) {
1406            tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1407        } else if (b >= 0) {
1408            tcg_debug_assert(is_aimm(b));
1409            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1410        } else {
1411            tcg_debug_assert(is_aimm(-b));
1412            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1413        }
1414    }
1415}
1416
1417static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1418{
1419    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1420    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1421    tcg_out_insn(s, 3206, B, offset);
1422}
1423
1424static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1425{
1426    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1427    if (offset == sextract64(offset, 0, 26)) {
1428        tcg_out_insn(s, 3206, BL, offset);
1429    } else {
1430        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1431        tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
1432    }
1433}
1434
1435static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1436                         const TCGHelperInfo *info)
1437{
1438    tcg_out_call_int(s, target);
1439}
1440
1441static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1442{
1443    if (!l->has_value) {
1444        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1445        tcg_out_insn(s, 3206, B, 0);
1446    } else {
1447        tcg_out_goto(s, l->u.value_ptr);
1448    }
1449}
1450
1451static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1452                           TCGArg b, bool b_const, TCGLabel *l)
1453{
1454    int tbit = -1;
1455    bool need_cmp = true;
1456
1457    switch (c) {
1458    case TCG_COND_EQ:
1459    case TCG_COND_NE:
1460        /* cmp xN,0; b.ne L -> cbnz xN,L */
1461        if (b_const && b == 0) {
1462            need_cmp = false;
1463        }
1464        break;
1465    case TCG_COND_LT:
1466    case TCG_COND_GE:
1467        /* cmp xN,0; b.mi L -> tbnz xN,63,L */
1468        if (b_const && b == 0) {
1469            c = (c == TCG_COND_LT ? TCG_COND_TSTNE : TCG_COND_TSTEQ);
1470            tbit = ext ? 63 : 31;
1471            need_cmp = false;
1472        }
1473        break;
1474    case TCG_COND_TSTEQ:
1475    case TCG_COND_TSTNE:
1476        /* tst xN,0xffffffff; b.ne L -> cbnz wN,L */
1477        if (b_const && b == UINT32_MAX) {
1478            c = tcg_tst_eqne_cond(c);
1479            ext = TCG_TYPE_I32;
1480            need_cmp = false;
1481            break;
1482        }
1483        /* tst xN,1<<B; b.ne L -> tbnz xN,B,L */
1484        if (b_const && is_power_of_2(b)) {
1485            tbit = ctz64(b);
1486            need_cmp = false;
1487        }
1488        break;
1489    default:
1490        break;
1491    }
1492
1493    if (need_cmp) {
1494        tcg_out_cmp(s, ext, c, a, b, b_const);
1495        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1496        tcg_out_insn(s, 3202, B_C, c, 0);
1497        return;
1498    }
1499
1500    if (tbit >= 0) {
1501        tcg_out_reloc(s, s->code_ptr, R_AARCH64_TSTBR14, l, 0);
1502        switch (c) {
1503        case TCG_COND_TSTEQ:
1504            tcg_out_insn(s, 3205, TBZ, a, tbit, 0);
1505            break;
1506        case TCG_COND_TSTNE:
1507            tcg_out_insn(s, 3205, TBNZ, a, tbit, 0);
1508            break;
1509        default:
1510            g_assert_not_reached();
1511        }
1512    } else {
1513        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1514        switch (c) {
1515        case TCG_COND_EQ:
1516            tcg_out_insn(s, 3201, CBZ, ext, a, 0);
1517            break;
1518        case TCG_COND_NE:
1519            tcg_out_insn(s, 3201, CBNZ, ext, a, 0);
1520            break;
1521        default:
1522            g_assert_not_reached();
1523        }
1524    }
1525}
1526
1527static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1528                               TCGReg rd, TCGReg rn)
1529{
1530    /* REV, REV16, REV32 */
1531    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1532}
1533
1534static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1535                               TCGReg rd, TCGReg rn)
1536{
1537    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1538    int bits = (8 << s_bits) - 1;
1539    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1540}
1541
1542static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1543{
1544    tcg_out_sxt(s, type, MO_8, rd, rn);
1545}
1546
1547static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1548{
1549    tcg_out_sxt(s, type, MO_16, rd, rn);
1550}
1551
1552static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1553{
1554    tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1555}
1556
1557static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1558{
1559    tcg_out_ext32s(s, rd, rn);
1560}
1561
1562static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1563                               TCGReg rd, TCGReg rn)
1564{
1565    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1566    int bits = (8 << s_bits) - 1;
1567    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1568}
1569
1570static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1571{
1572    tcg_out_uxt(s, MO_8, rd, rn);
1573}
1574
1575static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1576{
1577    tcg_out_uxt(s, MO_16, rd, rn);
1578}
1579
1580static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1581{
1582    tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1583}
1584
1585static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1586{
1587    tcg_out_ext32u(s, rd, rn);
1588}
1589
1590static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1591{
1592    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1593}
1594
1595static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1596                            TCGReg rn, int64_t aimm)
1597{
1598    if (aimm >= 0) {
1599        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1600    } else {
1601        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1602    }
1603}
1604
1605static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1606                            TCGReg rh, TCGReg al, TCGReg ah,
1607                            tcg_target_long bl, tcg_target_long bh,
1608                            bool const_bl, bool const_bh, bool sub)
1609{
1610    TCGReg orig_rl = rl;
1611    AArch64Insn insn;
1612
1613    if (rl == ah || (!const_bh && rl == bh)) {
1614        rl = TCG_REG_TMP0;
1615    }
1616
1617    if (const_bl) {
1618        if (bl < 0) {
1619            bl = -bl;
1620            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1621        } else {
1622            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1623        }
1624
1625        if (unlikely(al == TCG_REG_XZR)) {
1626            /* ??? We want to allow al to be zero for the benefit of
1627               negation via subtraction.  However, that leaves open the
1628               possibility of adding 0+const in the low part, and the
1629               immediate add instructions encode XSP not XZR.  Don't try
1630               anything more elaborate here than loading another zero.  */
1631            al = TCG_REG_TMP0;
1632            tcg_out_movi(s, ext, al, 0);
1633        }
1634        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1635    } else {
1636        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1637    }
1638
1639    insn = I3503_ADC;
1640    if (const_bh) {
1641        /* Note that the only two constants we support are 0 and -1, and
1642           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1643        if ((bh != 0) ^ sub) {
1644            insn = I3503_SBC;
1645        }
1646        bh = TCG_REG_XZR;
1647    } else if (sub) {
1648        insn = I3503_SBC;
1649    }
1650    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1651
1652    tcg_out_mov(s, ext, orig_rl, rl);
1653}
1654
1655static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1656{
1657    static const uint32_t sync[] = {
1658        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1659        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1660        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1661        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1662        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1663    };
1664    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1665}
1666
1667static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1668                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1669{
1670    TCGReg a1 = a0;
1671    if (is_ctz) {
1672        a1 = TCG_REG_TMP0;
1673        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1674    }
1675    if (const_b && b == (ext ? 64 : 32)) {
1676        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1677    } else {
1678        AArch64Insn sel = I3506_CSEL;
1679
1680        tcg_out_cmp(s, ext, TCG_COND_NE, a0, 0, 1);
1681        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1);
1682
1683        if (const_b) {
1684            if (b == -1) {
1685                b = TCG_REG_XZR;
1686                sel = I3506_CSINV;
1687            } else if (b == 0) {
1688                b = TCG_REG_XZR;
1689            } else {
1690                tcg_out_movi(s, ext, d, b);
1691                b = d;
1692            }
1693        }
1694        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE);
1695    }
1696}
1697
1698typedef struct {
1699    TCGReg base;
1700    TCGReg index;
1701    TCGType index_ext;
1702    TCGAtomAlign aa;
1703} HostAddress;
1704
1705bool tcg_target_has_memory_bswap(MemOp memop)
1706{
1707    return false;
1708}
1709
1710static const TCGLdstHelperParam ldst_helper_param = {
1711    .ntmp = 1, .tmp = { TCG_REG_TMP0 }
1712};
1713
1714static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1715{
1716    MemOp opc = get_memop(lb->oi);
1717
1718    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1719        return false;
1720    }
1721
1722    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1723    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1724    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1725    tcg_out_goto(s, lb->raddr);
1726    return true;
1727}
1728
1729static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1730{
1731    MemOp opc = get_memop(lb->oi);
1732
1733    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1734        return false;
1735    }
1736
1737    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1738    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1739    tcg_out_goto(s, lb->raddr);
1740    return true;
1741}
1742
1743/* We expect to use a 7-bit scaled negative offset from ENV.  */
1744#define MIN_TLB_MASK_TABLE_OFS  -512
1745
1746/*
1747 * For system-mode, perform the TLB load and compare.
1748 * For user-mode, perform any required alignment tests.
1749 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1750 * is required and fill in @h with the host address for the fast path.
1751 */
1752static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1753                                           TCGReg addr_reg, MemOpIdx oi,
1754                                           bool is_ld)
1755{
1756    TCGType addr_type = s->addr_type;
1757    TCGLabelQemuLdst *ldst = NULL;
1758    MemOp opc = get_memop(oi);
1759    MemOp s_bits = opc & MO_SIZE;
1760    unsigned a_mask;
1761
1762    h->aa = atom_and_align_for_opc(s, opc,
1763                                   have_lse2 ? MO_ATOM_WITHIN16
1764                                             : MO_ATOM_IFALIGN,
1765                                   s_bits == MO_128);
1766    a_mask = (1 << h->aa.align) - 1;
1767
1768    if (tcg_use_softmmu) {
1769        unsigned s_mask = (1u << s_bits) - 1;
1770        unsigned mem_index = get_mmuidx(oi);
1771        TCGReg addr_adj;
1772        TCGType mask_type;
1773        uint64_t compare_mask;
1774
1775        ldst = new_ldst_label(s);
1776        ldst->is_ld = is_ld;
1777        ldst->oi = oi;
1778        ldst->addrlo_reg = addr_reg;
1779
1780        mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
1781                     ? TCG_TYPE_I64 : TCG_TYPE_I32);
1782
1783        /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
1784        QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1785        QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1786        tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
1787                     tlb_mask_table_ofs(s, mem_index), 1, 0);
1788
1789        /* Extract the TLB index from the address into X0.  */
1790        tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1791                     TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
1792                     s->page_bits - CPU_TLB_ENTRY_BITS);
1793
1794        /* Add the tlb_table pointer, forming the CPUTLBEntry address. */
1795        tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
1796
1797        /* Load the tlb comparator into TMP0, and the fast path addend. */
1798        QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
1799        tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
1800                   is_ld ? offsetof(CPUTLBEntry, addr_read)
1801                         : offsetof(CPUTLBEntry, addr_write));
1802        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
1803                   offsetof(CPUTLBEntry, addend));
1804
1805        /*
1806         * For aligned accesses, we check the first byte and include
1807         * the alignment bits within the address.  For unaligned access,
1808         * we check that we don't cross pages using the address of the
1809         * last byte of the access.
1810         */
1811        if (a_mask >= s_mask) {
1812            addr_adj = addr_reg;
1813        } else {
1814            addr_adj = TCG_REG_TMP2;
1815            tcg_out_insn(s, 3401, ADDI, addr_type,
1816                         addr_adj, addr_reg, s_mask - a_mask);
1817        }
1818        compare_mask = (uint64_t)s->page_mask | a_mask;
1819
1820        /* Store the page mask part of the address into TMP2.  */
1821        tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
1822                         addr_adj, compare_mask);
1823
1824        /* Perform the address comparison. */
1825        tcg_out_cmp(s, addr_type, TCG_COND_NE, TCG_REG_TMP0, TCG_REG_TMP2, 0);
1826
1827        /* If not equal, we jump to the slow path. */
1828        ldst->label_ptr[0] = s->code_ptr;
1829        tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1830
1831        h->base = TCG_REG_TMP1;
1832        h->index = addr_reg;
1833        h->index_ext = addr_type;
1834    } else {
1835        if (a_mask) {
1836            ldst = new_ldst_label(s);
1837
1838            ldst->is_ld = is_ld;
1839            ldst->oi = oi;
1840            ldst->addrlo_reg = addr_reg;
1841
1842            /* tst addr, #mask */
1843            tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1844
1845            /* b.ne slow_path */
1846            ldst->label_ptr[0] = s->code_ptr;
1847            tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1848        }
1849
1850        if (guest_base || addr_type == TCG_TYPE_I32) {
1851            h->base = TCG_REG_GUEST_BASE;
1852            h->index = addr_reg;
1853            h->index_ext = addr_type;
1854        } else {
1855            h->base = addr_reg;
1856            h->index = TCG_REG_XZR;
1857            h->index_ext = TCG_TYPE_I64;
1858        }
1859    }
1860
1861    return ldst;
1862}
1863
1864static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1865                                   TCGReg data_r, HostAddress h)
1866{
1867    switch (memop & MO_SSIZE) {
1868    case MO_UB:
1869        tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
1870        break;
1871    case MO_SB:
1872        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1873                       data_r, h.base, h.index_ext, h.index);
1874        break;
1875    case MO_UW:
1876        tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
1877        break;
1878    case MO_SW:
1879        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1880                       data_r, h.base, h.index_ext, h.index);
1881        break;
1882    case MO_UL:
1883        tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
1884        break;
1885    case MO_SL:
1886        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
1887        break;
1888    case MO_UQ:
1889        tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
1890        break;
1891    default:
1892        g_assert_not_reached();
1893    }
1894}
1895
1896static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1897                                   TCGReg data_r, HostAddress h)
1898{
1899    switch (memop & MO_SIZE) {
1900    case MO_8:
1901        tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
1902        break;
1903    case MO_16:
1904        tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
1905        break;
1906    case MO_32:
1907        tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
1908        break;
1909    case MO_64:
1910        tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
1911        break;
1912    default:
1913        g_assert_not_reached();
1914    }
1915}
1916
1917static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1918                            MemOpIdx oi, TCGType data_type)
1919{
1920    TCGLabelQemuLdst *ldst;
1921    HostAddress h;
1922
1923    ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1924    tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
1925
1926    if (ldst) {
1927        ldst->type = data_type;
1928        ldst->datalo_reg = data_reg;
1929        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1930    }
1931}
1932
1933static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1934                            MemOpIdx oi, TCGType data_type)
1935{
1936    TCGLabelQemuLdst *ldst;
1937    HostAddress h;
1938
1939    ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1940    tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1941
1942    if (ldst) {
1943        ldst->type = data_type;
1944        ldst->datalo_reg = data_reg;
1945        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1946    }
1947}
1948
1949static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
1950                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
1951{
1952    TCGLabelQemuLdst *ldst;
1953    HostAddress h;
1954    TCGReg base;
1955    bool use_pair;
1956
1957    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
1958
1959    /* Compose the final address, as LDP/STP have no indexing. */
1960    if (h.index == TCG_REG_XZR) {
1961        base = h.base;
1962    } else {
1963        base = TCG_REG_TMP2;
1964        if (h.index_ext == TCG_TYPE_I32) {
1965            /* add base, base, index, uxtw */
1966            tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base,
1967                         h.base, h.index, MO_32, 0);
1968        } else {
1969            /* add base, base, index */
1970            tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index);
1971        }
1972    }
1973
1974    use_pair = h.aa.atom < MO_128 || have_lse2;
1975
1976    if (!use_pair) {
1977        tcg_insn_unit *branch = NULL;
1978        TCGReg ll, lh, sl, sh;
1979
1980        /*
1981         * If we have already checked for 16-byte alignment, that's all
1982         * we need. Otherwise we have determined that misaligned atomicity
1983         * may be handled with two 8-byte loads.
1984         */
1985        if (h.aa.align < MO_128) {
1986            /*
1987             * TODO: align should be MO_64, so we only need test bit 3,
1988             * which means we could use TBNZ instead of ANDS+B_C.
1989             */
1990            tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15);
1991            branch = s->code_ptr;
1992            tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1993            use_pair = true;
1994        }
1995
1996        if (is_ld) {
1997            /*
1998             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1999             *    ldxp lo, hi, [base]
2000             *    stxp t0, lo, hi, [base]
2001             *    cbnz t0, .-8
2002             * Require no overlap between data{lo,hi} and base.
2003             */
2004            if (datalo == base || datahi == base) {
2005                tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base);
2006                base = TCG_REG_TMP2;
2007            }
2008            ll = sl = datalo;
2009            lh = sh = datahi;
2010        } else {
2011            /*
2012             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
2013             * 1: ldxp t0, t1, [base]
2014             *    stxp t0, lo, hi, [base]
2015             *    cbnz t0, 1b
2016             */
2017            tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1);
2018            ll = TCG_REG_TMP0;
2019            lh = TCG_REG_TMP1;
2020            sl = datalo;
2021            sh = datahi;
2022        }
2023
2024        tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base);
2025        tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base);
2026        tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2);
2027
2028        if (use_pair) {
2029            /* "b .+8", branching across the one insn of use_pair. */
2030            tcg_out_insn(s, 3206, B, 2);
2031            reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr));
2032        }
2033    }
2034
2035    if (use_pair) {
2036        if (is_ld) {
2037            tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0);
2038        } else {
2039            tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0);
2040        }
2041    }
2042
2043    if (ldst) {
2044        ldst->type = TCG_TYPE_I128;
2045        ldst->datalo_reg = datalo;
2046        ldst->datahi_reg = datahi;
2047        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2048    }
2049}
2050
2051static const tcg_insn_unit *tb_ret_addr;
2052
2053static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
2054{
2055    const tcg_insn_unit *target;
2056    ptrdiff_t offset;
2057
2058    /* Reuse the zeroing that exists for goto_ptr.  */
2059    if (a0 == 0) {
2060        target = tcg_code_gen_epilogue;
2061    } else {
2062        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
2063        target = tb_ret_addr;
2064    }
2065
2066    offset = tcg_pcrel_diff(s, target) >> 2;
2067    if (offset == sextract64(offset, 0, 26)) {
2068        tcg_out_insn(s, 3206, B, offset);
2069    } else {
2070        /*
2071         * Only x16/x17 generate BTI type Jump (2),
2072         * other registers generate BTI type Jump|Call (3).
2073         */
2074        QEMU_BUILD_BUG_ON(TCG_REG_TMP0 != TCG_REG_X16);
2075        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
2076        tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
2077    }
2078}
2079
2080static void tcg_out_goto_tb(TCGContext *s, int which)
2081{
2082    /*
2083     * Direct branch, or indirect address load, will be patched
2084     * by tb_target_set_jmp_target.  Assert indirect load offset
2085     * in range early, regardless of direct branch distance.
2086     */
2087    intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
2088    tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
2089
2090    set_jmp_insn_offset(s, which);
2091    tcg_out32(s, I3206_B);
2092    tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
2093    set_jmp_reset_offset(s, which);
2094    tcg_out_bti(s, BTI_J);
2095}
2096
2097void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2098                              uintptr_t jmp_rx, uintptr_t jmp_rw)
2099{
2100    uintptr_t d_addr = tb->jmp_target_addr[n];
2101    ptrdiff_t d_offset = d_addr - jmp_rx;
2102    tcg_insn_unit insn;
2103
2104    /* Either directly branch, or indirect branch load. */
2105    if (d_offset == sextract64(d_offset, 0, 28)) {
2106        insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
2107    } else {
2108        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
2109        ptrdiff_t i_offset = i_addr - jmp_rx;
2110
2111        /* Note that we asserted this in range in tcg_out_goto_tb. */
2112        insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
2113    }
2114    qatomic_set((uint32_t *)jmp_rw, insn);
2115    flush_idcache_range(jmp_rx, jmp_rw, 4);
2116}
2117
2118static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType ext,
2119                       const TCGArg args[TCG_MAX_OP_ARGS],
2120                       const int const_args[TCG_MAX_OP_ARGS])
2121{
2122    /* Hoist the loads of the most common arguments.  */
2123    TCGArg a0 = args[0];
2124    TCGArg a1 = args[1];
2125    TCGArg a2 = args[2];
2126    int c2 = const_args[2];
2127
2128    /* Some operands are defined with "rZ" constraint, a register or
2129       the zero register.  These need not actually test args[I] == 0.  */
2130#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
2131
2132    switch (opc) {
2133    case INDEX_op_goto_ptr:
2134        tcg_out_insn(s, 3207, BR, a0);
2135        break;
2136
2137    case INDEX_op_br:
2138        tcg_out_goto_label(s, arg_label(a0));
2139        break;
2140
2141    case INDEX_op_ld8u_i32:
2142    case INDEX_op_ld8u_i64:
2143        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
2144        break;
2145    case INDEX_op_ld8s_i32:
2146        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
2147        break;
2148    case INDEX_op_ld8s_i64:
2149        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
2150        break;
2151    case INDEX_op_ld16u_i32:
2152    case INDEX_op_ld16u_i64:
2153        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
2154        break;
2155    case INDEX_op_ld16s_i32:
2156        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
2157        break;
2158    case INDEX_op_ld16s_i64:
2159        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
2160        break;
2161    case INDEX_op_ld_i32:
2162    case INDEX_op_ld32u_i64:
2163        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
2164        break;
2165    case INDEX_op_ld32s_i64:
2166        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
2167        break;
2168    case INDEX_op_ld_i64:
2169        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
2170        break;
2171
2172    case INDEX_op_st8_i32:
2173    case INDEX_op_st8_i64:
2174        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
2175        break;
2176    case INDEX_op_st16_i32:
2177    case INDEX_op_st16_i64:
2178        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
2179        break;
2180    case INDEX_op_st_i32:
2181    case INDEX_op_st32_i64:
2182        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
2183        break;
2184    case INDEX_op_st_i64:
2185        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
2186        break;
2187
2188    case INDEX_op_add_i32:
2189        a2 = (int32_t)a2;
2190        /* FALLTHRU */
2191    case INDEX_op_add_i64:
2192        if (c2) {
2193            tcg_out_addsubi(s, ext, a0, a1, a2);
2194        } else {
2195            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2196        }
2197        break;
2198
2199    case INDEX_op_sub_i32:
2200        a2 = (int32_t)a2;
2201        /* FALLTHRU */
2202    case INDEX_op_sub_i64:
2203        if (c2) {
2204            tcg_out_addsubi(s, ext, a0, a1, -a2);
2205        } else {
2206            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2207        }
2208        break;
2209
2210    case INDEX_op_neg_i64:
2211    case INDEX_op_neg_i32:
2212        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2213        break;
2214
2215    case INDEX_op_and_i32:
2216        a2 = (int32_t)a2;
2217        /* FALLTHRU */
2218    case INDEX_op_and_i64:
2219        if (c2) {
2220            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2221        } else {
2222            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2223        }
2224        break;
2225
2226    case INDEX_op_andc_i32:
2227        a2 = (int32_t)a2;
2228        /* FALLTHRU */
2229    case INDEX_op_andc_i64:
2230        if (c2) {
2231            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2232        } else {
2233            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2234        }
2235        break;
2236
2237    case INDEX_op_or_i32:
2238        a2 = (int32_t)a2;
2239        /* FALLTHRU */
2240    case INDEX_op_or_i64:
2241        if (c2) {
2242            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2243        } else {
2244            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2245        }
2246        break;
2247
2248    case INDEX_op_orc_i32:
2249        a2 = (int32_t)a2;
2250        /* FALLTHRU */
2251    case INDEX_op_orc_i64:
2252        if (c2) {
2253            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2254        } else {
2255            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2256        }
2257        break;
2258
2259    case INDEX_op_xor_i32:
2260        a2 = (int32_t)a2;
2261        /* FALLTHRU */
2262    case INDEX_op_xor_i64:
2263        if (c2) {
2264            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2265        } else {
2266            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2267        }
2268        break;
2269
2270    case INDEX_op_eqv_i32:
2271        a2 = (int32_t)a2;
2272        /* FALLTHRU */
2273    case INDEX_op_eqv_i64:
2274        if (c2) {
2275            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2276        } else {
2277            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2278        }
2279        break;
2280
2281    case INDEX_op_not_i64:
2282    case INDEX_op_not_i32:
2283        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2284        break;
2285
2286    case INDEX_op_mul_i64:
2287    case INDEX_op_mul_i32:
2288        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2289        break;
2290
2291    case INDEX_op_div_i64:
2292    case INDEX_op_div_i32:
2293        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2294        break;
2295    case INDEX_op_divu_i64:
2296    case INDEX_op_divu_i32:
2297        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2298        break;
2299
2300    case INDEX_op_rem_i64:
2301    case INDEX_op_rem_i32:
2302        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2);
2303        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2304        break;
2305    case INDEX_op_remu_i64:
2306    case INDEX_op_remu_i32:
2307        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2);
2308        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2309        break;
2310
2311    case INDEX_op_shl_i64:
2312    case INDEX_op_shl_i32:
2313        if (c2) {
2314            tcg_out_shl(s, ext, a0, a1, a2);
2315        } else {
2316            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2317        }
2318        break;
2319
2320    case INDEX_op_shr_i64:
2321    case INDEX_op_shr_i32:
2322        if (c2) {
2323            tcg_out_shr(s, ext, a0, a1, a2);
2324        } else {
2325            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2326        }
2327        break;
2328
2329    case INDEX_op_sar_i64:
2330    case INDEX_op_sar_i32:
2331        if (c2) {
2332            tcg_out_sar(s, ext, a0, a1, a2);
2333        } else {
2334            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2335        }
2336        break;
2337
2338    case INDEX_op_rotr_i64:
2339    case INDEX_op_rotr_i32:
2340        if (c2) {
2341            tcg_out_rotr(s, ext, a0, a1, a2);
2342        } else {
2343            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2344        }
2345        break;
2346
2347    case INDEX_op_rotl_i64:
2348    case INDEX_op_rotl_i32:
2349        if (c2) {
2350            tcg_out_rotl(s, ext, a0, a1, a2);
2351        } else {
2352            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2);
2353            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0);
2354        }
2355        break;
2356
2357    case INDEX_op_clz_i64:
2358    case INDEX_op_clz_i32:
2359        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2360        break;
2361    case INDEX_op_ctz_i64:
2362    case INDEX_op_ctz_i32:
2363        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2364        break;
2365
2366    case INDEX_op_brcond_i32:
2367        a1 = (int32_t)a1;
2368        /* FALLTHRU */
2369    case INDEX_op_brcond_i64:
2370        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2371        break;
2372
2373    case INDEX_op_setcond_i32:
2374        a2 = (int32_t)a2;
2375        /* FALLTHRU */
2376    case INDEX_op_setcond_i64:
2377        tcg_out_cmp(s, ext, args[3], a1, a2, c2);
2378        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2379        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2380                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2381        break;
2382
2383    case INDEX_op_negsetcond_i32:
2384        a2 = (int32_t)a2;
2385        /* FALLTHRU */
2386    case INDEX_op_negsetcond_i64:
2387        tcg_out_cmp(s, ext, args[3], a1, a2, c2);
2388        /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond).  */
2389        tcg_out_insn(s, 3506, CSINV, ext, a0, TCG_REG_XZR,
2390                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2391        break;
2392
2393    case INDEX_op_movcond_i32:
2394        a2 = (int32_t)a2;
2395        /* FALLTHRU */
2396    case INDEX_op_movcond_i64:
2397        tcg_out_cmp(s, ext, args[5], a1, a2, c2);
2398        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2399        break;
2400
2401    case INDEX_op_qemu_ld_a32_i32:
2402    case INDEX_op_qemu_ld_a64_i32:
2403    case INDEX_op_qemu_ld_a32_i64:
2404    case INDEX_op_qemu_ld_a64_i64:
2405        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2406        break;
2407    case INDEX_op_qemu_st_a32_i32:
2408    case INDEX_op_qemu_st_a64_i32:
2409    case INDEX_op_qemu_st_a32_i64:
2410    case INDEX_op_qemu_st_a64_i64:
2411        tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
2412        break;
2413    case INDEX_op_qemu_ld_a32_i128:
2414    case INDEX_op_qemu_ld_a64_i128:
2415        tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
2416        break;
2417    case INDEX_op_qemu_st_a32_i128:
2418    case INDEX_op_qemu_st_a64_i128:
2419        tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false);
2420        break;
2421
2422    case INDEX_op_bswap64_i64:
2423        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2424        break;
2425    case INDEX_op_bswap32_i64:
2426        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2427        if (a2 & TCG_BSWAP_OS) {
2428            tcg_out_ext32s(s, a0, a0);
2429        }
2430        break;
2431    case INDEX_op_bswap32_i32:
2432        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2433        break;
2434    case INDEX_op_bswap16_i64:
2435    case INDEX_op_bswap16_i32:
2436        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2437        if (a2 & TCG_BSWAP_OS) {
2438            /* Output must be sign-extended. */
2439            tcg_out_ext16s(s, ext, a0, a0);
2440        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2441            /* Output must be zero-extended, but input isn't. */
2442            tcg_out_ext16u(s, a0, a0);
2443        }
2444        break;
2445
2446    case INDEX_op_deposit_i64:
2447    case INDEX_op_deposit_i32:
2448        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2449        break;
2450
2451    case INDEX_op_extract_i64:
2452    case INDEX_op_extract_i32:
2453        if (a2 == 0) {
2454            uint64_t mask = MAKE_64BIT_MASK(0, args[3]);
2455            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, mask);
2456        } else {
2457            tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2458        }
2459        break;
2460
2461    case INDEX_op_sextract_i64:
2462    case INDEX_op_sextract_i32:
2463        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2464        break;
2465
2466    case INDEX_op_extract2_i64:
2467    case INDEX_op_extract2_i32:
2468        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2469        break;
2470
2471    case INDEX_op_add2_i32:
2472        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2473                        (int32_t)args[4], args[5], const_args[4],
2474                        const_args[5], false);
2475        break;
2476    case INDEX_op_add2_i64:
2477        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2478                        args[5], const_args[4], const_args[5], false);
2479        break;
2480    case INDEX_op_sub2_i32:
2481        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2482                        (int32_t)args[4], args[5], const_args[4],
2483                        const_args[5], true);
2484        break;
2485    case INDEX_op_sub2_i64:
2486        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2487                        args[5], const_args[4], const_args[5], true);
2488        break;
2489
2490    case INDEX_op_muluh_i64:
2491        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2492        break;
2493    case INDEX_op_mulsh_i64:
2494        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2495        break;
2496
2497    case INDEX_op_mb:
2498        tcg_out_mb(s, a0);
2499        break;
2500
2501    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2502    case INDEX_op_mov_i64:
2503    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2504    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2505    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2506    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2507    case INDEX_op_ext8s_i64:
2508    case INDEX_op_ext8u_i32:
2509    case INDEX_op_ext8u_i64:
2510    case INDEX_op_ext16s_i64:
2511    case INDEX_op_ext16s_i32:
2512    case INDEX_op_ext16u_i64:
2513    case INDEX_op_ext16u_i32:
2514    case INDEX_op_ext32s_i64:
2515    case INDEX_op_ext32u_i64:
2516    case INDEX_op_ext_i32_i64:
2517    case INDEX_op_extu_i32_i64:
2518    case INDEX_op_extrl_i64_i32:
2519    default:
2520        g_assert_not_reached();
2521    }
2522
2523#undef REG0
2524}
2525
2526static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2527                           unsigned vecl, unsigned vece,
2528                           const TCGArg args[TCG_MAX_OP_ARGS],
2529                           const int const_args[TCG_MAX_OP_ARGS])
2530{
2531    static const AArch64Insn cmp_vec_insn[16] = {
2532        [TCG_COND_EQ] = I3616_CMEQ,
2533        [TCG_COND_GT] = I3616_CMGT,
2534        [TCG_COND_GE] = I3616_CMGE,
2535        [TCG_COND_GTU] = I3616_CMHI,
2536        [TCG_COND_GEU] = I3616_CMHS,
2537    };
2538    static const AArch64Insn cmp_scalar_insn[16] = {
2539        [TCG_COND_EQ] = I3611_CMEQ,
2540        [TCG_COND_GT] = I3611_CMGT,
2541        [TCG_COND_GE] = I3611_CMGE,
2542        [TCG_COND_GTU] = I3611_CMHI,
2543        [TCG_COND_GEU] = I3611_CMHS,
2544    };
2545    static const AArch64Insn cmp0_vec_insn[16] = {
2546        [TCG_COND_EQ] = I3617_CMEQ0,
2547        [TCG_COND_GT] = I3617_CMGT0,
2548        [TCG_COND_GE] = I3617_CMGE0,
2549        [TCG_COND_LT] = I3617_CMLT0,
2550        [TCG_COND_LE] = I3617_CMLE0,
2551    };
2552    static const AArch64Insn cmp0_scalar_insn[16] = {
2553        [TCG_COND_EQ] = I3612_CMEQ0,
2554        [TCG_COND_GT] = I3612_CMGT0,
2555        [TCG_COND_GE] = I3612_CMGE0,
2556        [TCG_COND_LT] = I3612_CMLT0,
2557        [TCG_COND_LE] = I3612_CMLE0,
2558    };
2559
2560    TCGType type = vecl + TCG_TYPE_V64;
2561    unsigned is_q = vecl;
2562    bool is_scalar = !is_q && vece == MO_64;
2563    TCGArg a0, a1, a2, a3;
2564    int cmode, imm8;
2565
2566    a0 = args[0];
2567    a1 = args[1];
2568    a2 = args[2];
2569
2570    switch (opc) {
2571    case INDEX_op_ld_vec:
2572        tcg_out_ld(s, type, a0, a1, a2);
2573        break;
2574    case INDEX_op_st_vec:
2575        tcg_out_st(s, type, a0, a1, a2);
2576        break;
2577    case INDEX_op_dupm_vec:
2578        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2579        break;
2580    case INDEX_op_add_vec:
2581        if (is_scalar) {
2582            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2583        } else {
2584            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2585        }
2586        break;
2587    case INDEX_op_sub_vec:
2588        if (is_scalar) {
2589            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2590        } else {
2591            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2592        }
2593        break;
2594    case INDEX_op_mul_vec:
2595        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2596        break;
2597    case INDEX_op_neg_vec:
2598        if (is_scalar) {
2599            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2600        } else {
2601            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2602        }
2603        break;
2604    case INDEX_op_abs_vec:
2605        if (is_scalar) {
2606            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2607        } else {
2608            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2609        }
2610        break;
2611    case INDEX_op_and_vec:
2612        if (const_args[2]) {
2613            is_shimm1632(~a2, &cmode, &imm8);
2614            if (a0 == a1) {
2615                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2616                return;
2617            }
2618            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2619            a2 = a0;
2620        }
2621        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2622        break;
2623    case INDEX_op_or_vec:
2624        if (const_args[2]) {
2625            is_shimm1632(a2, &cmode, &imm8);
2626            if (a0 == a1) {
2627                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2628                return;
2629            }
2630            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2631            a2 = a0;
2632        }
2633        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2634        break;
2635    case INDEX_op_andc_vec:
2636        if (const_args[2]) {
2637            is_shimm1632(a2, &cmode, &imm8);
2638            if (a0 == a1) {
2639                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2640                return;
2641            }
2642            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2643            a2 = a0;
2644        }
2645        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2646        break;
2647    case INDEX_op_orc_vec:
2648        if (const_args[2]) {
2649            is_shimm1632(~a2, &cmode, &imm8);
2650            if (a0 == a1) {
2651                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2652                return;
2653            }
2654            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2655            a2 = a0;
2656        }
2657        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2658        break;
2659    case INDEX_op_xor_vec:
2660        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2661        break;
2662    case INDEX_op_ssadd_vec:
2663        if (is_scalar) {
2664            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2665        } else {
2666            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2667        }
2668        break;
2669    case INDEX_op_sssub_vec:
2670        if (is_scalar) {
2671            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2672        } else {
2673            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2674        }
2675        break;
2676    case INDEX_op_usadd_vec:
2677        if (is_scalar) {
2678            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2679        } else {
2680            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2681        }
2682        break;
2683    case INDEX_op_ussub_vec:
2684        if (is_scalar) {
2685            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2686        } else {
2687            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2688        }
2689        break;
2690    case INDEX_op_smax_vec:
2691        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2692        break;
2693    case INDEX_op_smin_vec:
2694        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2695        break;
2696    case INDEX_op_umax_vec:
2697        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2698        break;
2699    case INDEX_op_umin_vec:
2700        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2701        break;
2702    case INDEX_op_not_vec:
2703        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2704        break;
2705    case INDEX_op_shli_vec:
2706        if (is_scalar) {
2707            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2708        } else {
2709            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2710        }
2711        break;
2712    case INDEX_op_shri_vec:
2713        if (is_scalar) {
2714            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2715        } else {
2716            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2717        }
2718        break;
2719    case INDEX_op_sari_vec:
2720        if (is_scalar) {
2721            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2722        } else {
2723            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2724        }
2725        break;
2726    case INDEX_op_aa64_sli_vec:
2727        if (is_scalar) {
2728            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2729        } else {
2730            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2731        }
2732        break;
2733    case INDEX_op_shlv_vec:
2734        if (is_scalar) {
2735            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2736        } else {
2737            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2738        }
2739        break;
2740    case INDEX_op_aa64_sshl_vec:
2741        if (is_scalar) {
2742            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2743        } else {
2744            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2745        }
2746        break;
2747    case INDEX_op_cmp_vec:
2748        {
2749            TCGCond cond = args[3];
2750            AArch64Insn insn;
2751
2752            switch (cond) {
2753            case TCG_COND_NE:
2754                if (const_args[2]) {
2755                    if (is_scalar) {
2756                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2757                    } else {
2758                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2759                    }
2760                } else {
2761                    if (is_scalar) {
2762                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2763                    } else {
2764                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2765                    }
2766                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2767                }
2768                break;
2769
2770            case TCG_COND_TSTNE:
2771            case TCG_COND_TSTEQ:
2772                if (const_args[2]) {
2773                    /* (x & 0) == 0 */
2774                    tcg_out_dupi_vec(s, type, MO_8, a0,
2775                                     -(cond == TCG_COND_TSTEQ));
2776                    break;
2777                }
2778                if (is_scalar) {
2779                    tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a2);
2780                } else {
2781                    tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a2);
2782                }
2783                if (cond == TCG_COND_TSTEQ) {
2784                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2785                }
2786                break;
2787
2788            default:
2789                if (const_args[2]) {
2790                    if (is_scalar) {
2791                        insn = cmp0_scalar_insn[cond];
2792                        if (insn) {
2793                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2794                            break;
2795                        }
2796                    } else {
2797                        insn = cmp0_vec_insn[cond];
2798                        if (insn) {
2799                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2800                            break;
2801                        }
2802                    }
2803                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
2804                    a2 = TCG_VEC_TMP0;
2805                }
2806                if (is_scalar) {
2807                    insn = cmp_scalar_insn[cond];
2808                    if (insn == 0) {
2809                        TCGArg t;
2810                        t = a1, a1 = a2, a2 = t;
2811                        cond = tcg_swap_cond(cond);
2812                        insn = cmp_scalar_insn[cond];
2813                        tcg_debug_assert(insn != 0);
2814                    }
2815                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2816                } else {
2817                    insn = cmp_vec_insn[cond];
2818                    if (insn == 0) {
2819                        TCGArg t;
2820                        t = a1, a1 = a2, a2 = t;
2821                        cond = tcg_swap_cond(cond);
2822                        insn = cmp_vec_insn[cond];
2823                        tcg_debug_assert(insn != 0);
2824                    }
2825                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2826                }
2827                break;
2828            }
2829        }
2830        break;
2831
2832    case INDEX_op_bitsel_vec:
2833        a3 = args[3];
2834        if (a0 == a3) {
2835            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2836        } else if (a0 == a2) {
2837            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2838        } else {
2839            if (a0 != a1) {
2840                tcg_out_mov(s, type, a0, a1);
2841            }
2842            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2843        }
2844        break;
2845
2846    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2847    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2848    default:
2849        g_assert_not_reached();
2850    }
2851}
2852
2853int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2854{
2855    switch (opc) {
2856    case INDEX_op_add_vec:
2857    case INDEX_op_sub_vec:
2858    case INDEX_op_and_vec:
2859    case INDEX_op_or_vec:
2860    case INDEX_op_xor_vec:
2861    case INDEX_op_andc_vec:
2862    case INDEX_op_orc_vec:
2863    case INDEX_op_neg_vec:
2864    case INDEX_op_abs_vec:
2865    case INDEX_op_not_vec:
2866    case INDEX_op_cmp_vec:
2867    case INDEX_op_shli_vec:
2868    case INDEX_op_shri_vec:
2869    case INDEX_op_sari_vec:
2870    case INDEX_op_ssadd_vec:
2871    case INDEX_op_sssub_vec:
2872    case INDEX_op_usadd_vec:
2873    case INDEX_op_ussub_vec:
2874    case INDEX_op_shlv_vec:
2875    case INDEX_op_bitsel_vec:
2876        return 1;
2877    case INDEX_op_rotli_vec:
2878    case INDEX_op_shrv_vec:
2879    case INDEX_op_sarv_vec:
2880    case INDEX_op_rotlv_vec:
2881    case INDEX_op_rotrv_vec:
2882        return -1;
2883    case INDEX_op_mul_vec:
2884    case INDEX_op_smax_vec:
2885    case INDEX_op_smin_vec:
2886    case INDEX_op_umax_vec:
2887    case INDEX_op_umin_vec:
2888        return vece < MO_64;
2889
2890    default:
2891        return 0;
2892    }
2893}
2894
2895void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2896                       TCGArg a0, ...)
2897{
2898    va_list va;
2899    TCGv_vec v0, v1, v2, t1, t2, c1;
2900    TCGArg a2;
2901
2902    va_start(va, a0);
2903    v0 = temp_tcgv_vec(arg_temp(a0));
2904    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2905    a2 = va_arg(va, TCGArg);
2906    va_end(va);
2907
2908    switch (opc) {
2909    case INDEX_op_rotli_vec:
2910        t1 = tcg_temp_new_vec(type);
2911        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2912        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2913                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2914        tcg_temp_free_vec(t1);
2915        break;
2916
2917    case INDEX_op_shrv_vec:
2918    case INDEX_op_sarv_vec:
2919        /* Right shifts are negative left shifts for AArch64.  */
2920        v2 = temp_tcgv_vec(arg_temp(a2));
2921        t1 = tcg_temp_new_vec(type);
2922        tcg_gen_neg_vec(vece, t1, v2);
2923        opc = (opc == INDEX_op_shrv_vec
2924               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2925        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2926                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2927        tcg_temp_free_vec(t1);
2928        break;
2929
2930    case INDEX_op_rotlv_vec:
2931        v2 = temp_tcgv_vec(arg_temp(a2));
2932        t1 = tcg_temp_new_vec(type);
2933        c1 = tcg_constant_vec(type, vece, 8 << vece);
2934        tcg_gen_sub_vec(vece, t1, v2, c1);
2935        /* Right shifts are negative left shifts for AArch64.  */
2936        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2937                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2938        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2939                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2940        tcg_gen_or_vec(vece, v0, v0, t1);
2941        tcg_temp_free_vec(t1);
2942        break;
2943
2944    case INDEX_op_rotrv_vec:
2945        v2 = temp_tcgv_vec(arg_temp(a2));
2946        t1 = tcg_temp_new_vec(type);
2947        t2 = tcg_temp_new_vec(type);
2948        c1 = tcg_constant_vec(type, vece, 8 << vece);
2949        tcg_gen_neg_vec(vece, t1, v2);
2950        tcg_gen_sub_vec(vece, t2, c1, v2);
2951        /* Right shifts are negative left shifts for AArch64.  */
2952        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2953                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2954        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2955                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2956        tcg_gen_or_vec(vece, v0, t1, t2);
2957        tcg_temp_free_vec(t1);
2958        tcg_temp_free_vec(t2);
2959        break;
2960
2961    default:
2962        g_assert_not_reached();
2963    }
2964}
2965
2966static TCGConstraintSetIndex
2967tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
2968{
2969    switch (op) {
2970    case INDEX_op_goto_ptr:
2971        return C_O0_I1(r);
2972
2973    case INDEX_op_ld8u_i32:
2974    case INDEX_op_ld8s_i32:
2975    case INDEX_op_ld16u_i32:
2976    case INDEX_op_ld16s_i32:
2977    case INDEX_op_ld_i32:
2978    case INDEX_op_ld8u_i64:
2979    case INDEX_op_ld8s_i64:
2980    case INDEX_op_ld16u_i64:
2981    case INDEX_op_ld16s_i64:
2982    case INDEX_op_ld32u_i64:
2983    case INDEX_op_ld32s_i64:
2984    case INDEX_op_ld_i64:
2985    case INDEX_op_neg_i32:
2986    case INDEX_op_neg_i64:
2987    case INDEX_op_not_i32:
2988    case INDEX_op_not_i64:
2989    case INDEX_op_bswap16_i32:
2990    case INDEX_op_bswap32_i32:
2991    case INDEX_op_bswap16_i64:
2992    case INDEX_op_bswap32_i64:
2993    case INDEX_op_bswap64_i64:
2994    case INDEX_op_ext8s_i32:
2995    case INDEX_op_ext16s_i32:
2996    case INDEX_op_ext8u_i32:
2997    case INDEX_op_ext16u_i32:
2998    case INDEX_op_ext8s_i64:
2999    case INDEX_op_ext16s_i64:
3000    case INDEX_op_ext32s_i64:
3001    case INDEX_op_ext8u_i64:
3002    case INDEX_op_ext16u_i64:
3003    case INDEX_op_ext32u_i64:
3004    case INDEX_op_ext_i32_i64:
3005    case INDEX_op_extu_i32_i64:
3006    case INDEX_op_extract_i32:
3007    case INDEX_op_extract_i64:
3008    case INDEX_op_sextract_i32:
3009    case INDEX_op_sextract_i64:
3010        return C_O1_I1(r, r);
3011
3012    case INDEX_op_st8_i32:
3013    case INDEX_op_st16_i32:
3014    case INDEX_op_st_i32:
3015    case INDEX_op_st8_i64:
3016    case INDEX_op_st16_i64:
3017    case INDEX_op_st32_i64:
3018    case INDEX_op_st_i64:
3019        return C_O0_I2(rZ, r);
3020
3021    case INDEX_op_add_i32:
3022    case INDEX_op_add_i64:
3023    case INDEX_op_sub_i32:
3024    case INDEX_op_sub_i64:
3025        return C_O1_I2(r, r, rA);
3026
3027    case INDEX_op_setcond_i32:
3028    case INDEX_op_setcond_i64:
3029    case INDEX_op_negsetcond_i32:
3030    case INDEX_op_negsetcond_i64:
3031        return C_O1_I2(r, r, rC);
3032
3033    case INDEX_op_mul_i32:
3034    case INDEX_op_mul_i64:
3035    case INDEX_op_div_i32:
3036    case INDEX_op_div_i64:
3037    case INDEX_op_divu_i32:
3038    case INDEX_op_divu_i64:
3039    case INDEX_op_rem_i32:
3040    case INDEX_op_rem_i64:
3041    case INDEX_op_remu_i32:
3042    case INDEX_op_remu_i64:
3043    case INDEX_op_muluh_i64:
3044    case INDEX_op_mulsh_i64:
3045        return C_O1_I2(r, r, r);
3046
3047    case INDEX_op_and_i32:
3048    case INDEX_op_and_i64:
3049    case INDEX_op_or_i32:
3050    case INDEX_op_or_i64:
3051    case INDEX_op_xor_i32:
3052    case INDEX_op_xor_i64:
3053    case INDEX_op_andc_i32:
3054    case INDEX_op_andc_i64:
3055    case INDEX_op_orc_i32:
3056    case INDEX_op_orc_i64:
3057    case INDEX_op_eqv_i32:
3058    case INDEX_op_eqv_i64:
3059        return C_O1_I2(r, r, rL);
3060
3061    case INDEX_op_shl_i32:
3062    case INDEX_op_shr_i32:
3063    case INDEX_op_sar_i32:
3064    case INDEX_op_rotl_i32:
3065    case INDEX_op_rotr_i32:
3066    case INDEX_op_shl_i64:
3067    case INDEX_op_shr_i64:
3068    case INDEX_op_sar_i64:
3069    case INDEX_op_rotl_i64:
3070    case INDEX_op_rotr_i64:
3071        return C_O1_I2(r, r, ri);
3072
3073    case INDEX_op_clz_i32:
3074    case INDEX_op_ctz_i32:
3075    case INDEX_op_clz_i64:
3076    case INDEX_op_ctz_i64:
3077        return C_O1_I2(r, r, rAL);
3078
3079    case INDEX_op_brcond_i32:
3080    case INDEX_op_brcond_i64:
3081        return C_O0_I2(r, rC);
3082
3083    case INDEX_op_movcond_i32:
3084    case INDEX_op_movcond_i64:
3085        return C_O1_I4(r, r, rC, rZ, rZ);
3086
3087    case INDEX_op_qemu_ld_a32_i32:
3088    case INDEX_op_qemu_ld_a64_i32:
3089    case INDEX_op_qemu_ld_a32_i64:
3090    case INDEX_op_qemu_ld_a64_i64:
3091        return C_O1_I1(r, r);
3092    case INDEX_op_qemu_ld_a32_i128:
3093    case INDEX_op_qemu_ld_a64_i128:
3094        return C_O2_I1(r, r, r);
3095    case INDEX_op_qemu_st_a32_i32:
3096    case INDEX_op_qemu_st_a64_i32:
3097    case INDEX_op_qemu_st_a32_i64:
3098    case INDEX_op_qemu_st_a64_i64:
3099        return C_O0_I2(rZ, r);
3100    case INDEX_op_qemu_st_a32_i128:
3101    case INDEX_op_qemu_st_a64_i128:
3102        return C_O0_I3(rZ, rZ, r);
3103
3104    case INDEX_op_deposit_i32:
3105    case INDEX_op_deposit_i64:
3106        return C_O1_I2(r, 0, rZ);
3107
3108    case INDEX_op_extract2_i32:
3109    case INDEX_op_extract2_i64:
3110        return C_O1_I2(r, rZ, rZ);
3111
3112    case INDEX_op_add2_i32:
3113    case INDEX_op_add2_i64:
3114    case INDEX_op_sub2_i32:
3115    case INDEX_op_sub2_i64:
3116        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
3117
3118    case INDEX_op_add_vec:
3119    case INDEX_op_sub_vec:
3120    case INDEX_op_mul_vec:
3121    case INDEX_op_xor_vec:
3122    case INDEX_op_ssadd_vec:
3123    case INDEX_op_sssub_vec:
3124    case INDEX_op_usadd_vec:
3125    case INDEX_op_ussub_vec:
3126    case INDEX_op_smax_vec:
3127    case INDEX_op_smin_vec:
3128    case INDEX_op_umax_vec:
3129    case INDEX_op_umin_vec:
3130    case INDEX_op_shlv_vec:
3131    case INDEX_op_shrv_vec:
3132    case INDEX_op_sarv_vec:
3133    case INDEX_op_aa64_sshl_vec:
3134        return C_O1_I2(w, w, w);
3135    case INDEX_op_not_vec:
3136    case INDEX_op_neg_vec:
3137    case INDEX_op_abs_vec:
3138    case INDEX_op_shli_vec:
3139    case INDEX_op_shri_vec:
3140    case INDEX_op_sari_vec:
3141        return C_O1_I1(w, w);
3142    case INDEX_op_ld_vec:
3143    case INDEX_op_dupm_vec:
3144        return C_O1_I1(w, r);
3145    case INDEX_op_st_vec:
3146        return C_O0_I2(w, r);
3147    case INDEX_op_dup_vec:
3148        return C_O1_I1(w, wr);
3149    case INDEX_op_or_vec:
3150    case INDEX_op_andc_vec:
3151        return C_O1_I2(w, w, wO);
3152    case INDEX_op_and_vec:
3153    case INDEX_op_orc_vec:
3154        return C_O1_I2(w, w, wN);
3155    case INDEX_op_cmp_vec:
3156        return C_O1_I2(w, w, wZ);
3157    case INDEX_op_bitsel_vec:
3158        return C_O1_I3(w, w, w, w);
3159    case INDEX_op_aa64_sli_vec:
3160        return C_O1_I2(w, 0, w);
3161
3162    default:
3163        return C_NotImplemented;
3164    }
3165}
3166
3167static void tcg_target_init(TCGContext *s)
3168{
3169    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
3170    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
3171    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3172    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3173
3174    tcg_target_call_clobber_regs = -1ull;
3175    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
3176    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
3177    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
3178    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
3179    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
3180    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
3181    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
3182    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
3183    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
3184    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
3185    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
3186    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3187    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3188    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3189    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3190    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3191    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3192    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3193    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3194
3195    s->reserved_regs = 0;
3196    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
3197    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
3198    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
3199    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
3200    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
3201    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
3202    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
3203}
3204
3205/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
3206#define PUSH_SIZE  ((30 - 19 + 1) * 8)
3207
3208#define FRAME_SIZE \
3209    ((PUSH_SIZE \
3210      + TCG_STATIC_CALL_ARGS_SIZE \
3211      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3212      + TCG_TARGET_STACK_ALIGN - 1) \
3213     & ~(TCG_TARGET_STACK_ALIGN - 1))
3214
3215/* We're expecting a 2 byte uleb128 encoded value.  */
3216QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3217
3218/* We're expecting to use a single ADDI insn.  */
3219QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
3220
3221static void tcg_target_qemu_prologue(TCGContext *s)
3222{
3223    TCGReg r;
3224
3225    tcg_out_bti(s, BTI_C);
3226
3227    /* Push (FP, LR) and allocate space for all saved registers.  */
3228    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
3229                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
3230
3231    /* Set up frame pointer for canonical unwinding.  */
3232    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
3233
3234    /* Store callee-preserved regs x19..x28.  */
3235    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3236        int ofs = (r - TCG_REG_X19 + 2) * 8;
3237        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3238    }
3239
3240    /* Make stack space for TCG locals.  */
3241    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3242                 FRAME_SIZE - PUSH_SIZE);
3243
3244    /* Inform TCG about how to find TCG locals with register, offset, size.  */
3245    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
3246                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3247
3248    if (!tcg_use_softmmu) {
3249        /*
3250         * Note that XZR cannot be encoded in the address base register slot,
3251         * as that actually encodes SP.  Depending on the guest, we may need
3252         * to zero-extend the guest address via the address index register slot,
3253         * therefore we need to load even a zero guest base into a register.
3254         */
3255        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3256        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3257    }
3258
3259    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3260    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3261
3262    /*
3263     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3264     * and fall through to the rest of the epilogue.
3265     */
3266    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3267    tcg_out_bti(s, BTI_J);
3268    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3269
3270    /* TB epilogue */
3271    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3272    tcg_out_bti(s, BTI_J);
3273
3274    /* Remove TCG locals stack space.  */
3275    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3276                 FRAME_SIZE - PUSH_SIZE);
3277
3278    /* Restore registers x19..x28.  */
3279    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3280        int ofs = (r - TCG_REG_X19 + 2) * 8;
3281        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3282    }
3283
3284    /* Pop (FP, LR), restore SP to previous frame.  */
3285    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3286                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3287    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3288}
3289
3290static void tcg_out_tb_start(TCGContext *s)
3291{
3292    tcg_out_bti(s, BTI_J);
3293}
3294
3295static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3296{
3297    int i;
3298    for (i = 0; i < count; ++i) {
3299        p[i] = NOP;
3300    }
3301}
3302
3303typedef struct {
3304    DebugFrameHeader h;
3305    uint8_t fde_def_cfa[4];
3306    uint8_t fde_reg_ofs[24];
3307} DebugFrame;
3308
3309#define ELF_HOST_MACHINE EM_AARCH64
3310
3311static const DebugFrame debug_frame = {
3312    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3313    .h.cie.id = -1,
3314    .h.cie.version = 1,
3315    .h.cie.code_align = 1,
3316    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3317    .h.cie.return_column = TCG_REG_LR,
3318
3319    /* Total FDE size does not include the "len" member.  */
3320    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3321
3322    .fde_def_cfa = {
3323        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3324        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3325        (FRAME_SIZE >> 7)
3326    },
3327    .fde_reg_ofs = {
3328        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3329        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3330        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3331        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3332        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3333        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3334        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3335        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3336        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3337        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3338        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3339        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3340    }
3341};
3342
3343void tcg_register_jit(const void *buf, size_t buf_size)
3344{
3345    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3346}
3347