xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision 92a11b935df24829cac30510196c6d07b36891ea)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-ldst.c.inc"
14#include "../tcg-pool.c.inc"
15#include "qemu/bitops.h"
16
17/* We're going to re-use TCGType in setting of the SF bit, which controls
18   the size of the operation performed.  If we know the values match, it
19   makes things much cleaner.  */
20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21
22#ifdef CONFIG_DEBUG_TCG
23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28
29    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33};
34#endif /* CONFIG_DEBUG_TCG */
35
36static const int tcg_target_reg_alloc_order[] = {
37    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39    TCG_REG_X28, /* we will reserve this for guest_base if configured */
40
41    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
43
44    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47    /* X16 reserved as temporary */
48    /* X17 reserved as temporary */
49    /* X18 reserved by system */
50    /* X19 reserved for AREG0 */
51    /* X29 reserved as fp */
52    /* X30 reserved as temporary */
53
54    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
55    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
56    /* V8 - V15 are call-saved, and skipped.  */
57    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
58    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
59    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
60    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
61};
62
63static const int tcg_target_call_iarg_regs[8] = {
64    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
65    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
66};
67
68static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
69{
70    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
71    tcg_debug_assert(slot >= 0 && slot <= 1);
72    return TCG_REG_X0 + slot;
73}
74
75#define TCG_REG_TMP0 TCG_REG_X16
76#define TCG_REG_TMP1 TCG_REG_X17
77#define TCG_REG_TMP2 TCG_REG_X30
78#define TCG_VEC_TMP0 TCG_REG_V31
79
80#define TCG_REG_GUEST_BASE TCG_REG_X28
81
82static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
83{
84    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
85    ptrdiff_t offset = target - src_rx;
86
87    if (offset == sextract64(offset, 0, 26)) {
88        /* read instruction, mask away previous PC_REL26 parameter contents,
89           set the proper offset, then write back the instruction. */
90        *src_rw = deposit32(*src_rw, 0, 26, offset);
91        return true;
92    }
93    return false;
94}
95
96static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
97{
98    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
99    ptrdiff_t offset = target - src_rx;
100
101    if (offset == sextract64(offset, 0, 19)) {
102        *src_rw = deposit32(*src_rw, 5, 19, offset);
103        return true;
104    }
105    return false;
106}
107
108static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
109{
110    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
111    ptrdiff_t offset = target - src_rx;
112
113    if (offset == sextract64(offset, 0, 14)) {
114        *src_rw = deposit32(*src_rw, 5, 14, offset);
115        return true;
116    }
117    return false;
118}
119
120static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
121                        intptr_t value, intptr_t addend)
122{
123    tcg_debug_assert(addend == 0);
124    switch (type) {
125    case R_AARCH64_JUMP26:
126    case R_AARCH64_CALL26:
127        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
128    case R_AARCH64_CONDBR19:
129        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
130    case R_AARCH64_TSTBR14:
131        return reloc_pc14(code_ptr, (const tcg_insn_unit *)value);
132    default:
133        g_assert_not_reached();
134    }
135}
136
137#define TCG_CT_CONST_AIMM 0x100
138#define TCG_CT_CONST_LIMM 0x200
139#define TCG_CT_CONST_ZERO 0x400
140#define TCG_CT_CONST_MONE 0x800
141#define TCG_CT_CONST_ORRI 0x1000
142#define TCG_CT_CONST_ANDI 0x2000
143#define TCG_CT_CONST_CMP  0x4000
144
145#define ALL_GENERAL_REGS  0xffffffffu
146#define ALL_VECTOR_REGS   0xffffffff00000000ull
147
148/* Match a constant valid for addition (12-bit, optionally shifted).  */
149static inline bool is_aimm(uint64_t val)
150{
151    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
152}
153
154/* Match a constant valid for logical operations.  */
155static inline bool is_limm(uint64_t val)
156{
157    /* Taking a simplified view of the logical immediates for now, ignoring
158       the replication that can happen across the field.  Match bit patterns
159       of the forms
160           0....01....1
161           0..01..10..0
162       and their inverses.  */
163
164    /* Make things easier below, by testing the form with msb clear. */
165    if ((int64_t)val < 0) {
166        val = ~val;
167    }
168    if (val == 0) {
169        return false;
170    }
171    val += val & -val;
172    return (val & (val - 1)) == 0;
173}
174
175/* Return true if v16 is a valid 16-bit shifted immediate.  */
176static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
177{
178    if (v16 == (v16 & 0xff)) {
179        *cmode = 0x8;
180        *imm8 = v16 & 0xff;
181        return true;
182    } else if (v16 == (v16 & 0xff00)) {
183        *cmode = 0xa;
184        *imm8 = v16 >> 8;
185        return true;
186    }
187    return false;
188}
189
190/* Return true if v32 is a valid 32-bit shifted immediate.  */
191static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
192{
193    if (v32 == (v32 & 0xff)) {
194        *cmode = 0x0;
195        *imm8 = v32 & 0xff;
196        return true;
197    } else if (v32 == (v32 & 0xff00)) {
198        *cmode = 0x2;
199        *imm8 = (v32 >> 8) & 0xff;
200        return true;
201    } else if (v32 == (v32 & 0xff0000)) {
202        *cmode = 0x4;
203        *imm8 = (v32 >> 16) & 0xff;
204        return true;
205    } else if (v32 == (v32 & 0xff000000)) {
206        *cmode = 0x6;
207        *imm8 = v32 >> 24;
208        return true;
209    }
210    return false;
211}
212
213/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
214static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
215{
216    if ((v32 & 0xffff00ff) == 0xff) {
217        *cmode = 0xc;
218        *imm8 = (v32 >> 8) & 0xff;
219        return true;
220    } else if ((v32 & 0xff00ffff) == 0xffff) {
221        *cmode = 0xd;
222        *imm8 = (v32 >> 16) & 0xff;
223        return true;
224    }
225    return false;
226}
227
228/* Return true if v32 is a valid float32 immediate.  */
229static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
230{
231    if (extract32(v32, 0, 19) == 0
232        && (extract32(v32, 25, 6) == 0x20
233            || extract32(v32, 25, 6) == 0x1f)) {
234        *cmode = 0xf;
235        *imm8 = (extract32(v32, 31, 1) << 7)
236              | (extract32(v32, 25, 1) << 6)
237              | extract32(v32, 19, 6);
238        return true;
239    }
240    return false;
241}
242
243/* Return true if v64 is a valid float64 immediate.  */
244static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
245{
246    if (extract64(v64, 0, 48) == 0
247        && (extract64(v64, 54, 9) == 0x100
248            || extract64(v64, 54, 9) == 0x0ff)) {
249        *cmode = 0xf;
250        *imm8 = (extract64(v64, 63, 1) << 7)
251              | (extract64(v64, 54, 1) << 6)
252              | extract64(v64, 48, 6);
253        return true;
254    }
255    return false;
256}
257
258/*
259 * Return non-zero if v32 can be formed by MOVI+ORR.
260 * Place the parameters for MOVI in (cmode, imm8).
261 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
262 */
263static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
264{
265    int i;
266
267    for (i = 6; i > 0; i -= 2) {
268        /* Mask out one byte we can add with ORR.  */
269        uint32_t tmp = v32 & ~(0xffu << (i * 4));
270        if (is_shimm32(tmp, cmode, imm8) ||
271            is_soimm32(tmp, cmode, imm8)) {
272            break;
273        }
274    }
275    return i;
276}
277
278/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
279static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
280{
281    if (v32 == deposit32(v32, 16, 16, v32)) {
282        return is_shimm16(v32, cmode, imm8);
283    } else {
284        return is_shimm32(v32, cmode, imm8);
285    }
286}
287
288static bool tcg_target_const_match(int64_t val, int ct,
289                                   TCGType type, TCGCond cond, int vece)
290{
291    if (ct & TCG_CT_CONST) {
292        return 1;
293    }
294    if (type == TCG_TYPE_I32) {
295        val = (int32_t)val;
296    }
297
298    if (ct & TCG_CT_CONST_CMP) {
299        if (is_tst_cond(cond)) {
300            ct |= TCG_CT_CONST_LIMM;
301        } else {
302            ct |= TCG_CT_CONST_AIMM;
303        }
304    }
305
306    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
307        return 1;
308    }
309    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
310        return 1;
311    }
312    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
313        return 1;
314    }
315    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
316        return 1;
317    }
318
319    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
320    case 0:
321        break;
322    case TCG_CT_CONST_ANDI:
323        val = ~val;
324        /* fallthru */
325    case TCG_CT_CONST_ORRI:
326        if (val == deposit64(val, 32, 32, val)) {
327            int cmode, imm8;
328            return is_shimm1632(val, &cmode, &imm8);
329        }
330        break;
331    default:
332        /* Both bits should not be set for the same insn.  */
333        g_assert_not_reached();
334    }
335
336    return 0;
337}
338
339enum aarch64_cond_code {
340    COND_EQ = 0x0,
341    COND_NE = 0x1,
342    COND_CS = 0x2,     /* Unsigned greater or equal */
343    COND_HS = COND_CS, /* ALIAS greater or equal */
344    COND_CC = 0x3,     /* Unsigned less than */
345    COND_LO = COND_CC, /* ALIAS Lower */
346    COND_MI = 0x4,     /* Negative */
347    COND_PL = 0x5,     /* Zero or greater */
348    COND_VS = 0x6,     /* Overflow */
349    COND_VC = 0x7,     /* No overflow */
350    COND_HI = 0x8,     /* Unsigned greater than */
351    COND_LS = 0x9,     /* Unsigned less or equal */
352    COND_GE = 0xa,
353    COND_LT = 0xb,
354    COND_GT = 0xc,
355    COND_LE = 0xd,
356    COND_AL = 0xe,
357    COND_NV = 0xf, /* behaves like COND_AL here */
358};
359
360static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
361    [TCG_COND_EQ] = COND_EQ,
362    [TCG_COND_NE] = COND_NE,
363    [TCG_COND_LT] = COND_LT,
364    [TCG_COND_GE] = COND_GE,
365    [TCG_COND_LE] = COND_LE,
366    [TCG_COND_GT] = COND_GT,
367    /* unsigned */
368    [TCG_COND_LTU] = COND_LO,
369    [TCG_COND_GTU] = COND_HI,
370    [TCG_COND_GEU] = COND_HS,
371    [TCG_COND_LEU] = COND_LS,
372    /* bit test */
373    [TCG_COND_TSTEQ] = COND_EQ,
374    [TCG_COND_TSTNE] = COND_NE,
375};
376
377typedef enum {
378    LDST_ST = 0,    /* store */
379    LDST_LD = 1,    /* load */
380    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
381    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
382} AArch64LdstType;
383
384/* We encode the format of the insn into the beginning of the name, so that
385   we can have the preprocessor help "typecheck" the insn vs the output
386   function.  Arm didn't provide us with nice names for the formats, so we
387   use the section number of the architecture reference manual in which the
388   instruction group is described.  */
389typedef enum {
390    /* Compare and branch (immediate).  */
391    I3201_CBZ       = 0x34000000,
392    I3201_CBNZ      = 0x35000000,
393
394    /* Conditional branch (immediate).  */
395    I3202_B_C       = 0x54000000,
396
397    /* Test and branch (immediate).  */
398    I3205_TBZ       = 0x36000000,
399    I3205_TBNZ      = 0x37000000,
400
401    /* Unconditional branch (immediate).  */
402    I3206_B         = 0x14000000,
403    I3206_BL        = 0x94000000,
404
405    /* Unconditional branch (register).  */
406    I3207_BR        = 0xd61f0000,
407    I3207_BLR       = 0xd63f0000,
408    I3207_RET       = 0xd65f0000,
409
410    /* AdvSIMD load/store single structure.  */
411    I3303_LD1R      = 0x0d40c000,
412
413    /* Load literal for loading the address at pc-relative offset */
414    I3305_LDR       = 0x58000000,
415    I3305_LDR_v64   = 0x5c000000,
416    I3305_LDR_v128  = 0x9c000000,
417
418    /* Load/store exclusive. */
419    I3306_LDXP      = 0xc8600000,
420    I3306_STXP      = 0xc8200000,
421
422    /* Load/store register.  Described here as 3.3.12, but the helper
423       that emits them can transform to 3.3.10 or 3.3.13.  */
424    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
425    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
426    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
427    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
428
429    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
430    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
431    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
432    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
433
434    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
435    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
436
437    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
438    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
439    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
440
441    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
442    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
443
444    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
445    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
446
447    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
448    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
449
450    I3312_TO_I3310  = 0x00200800,
451    I3312_TO_I3313  = 0x01000000,
452
453    /* Load/store register pair instructions.  */
454    I3314_LDP       = 0x28400000,
455    I3314_STP       = 0x28000000,
456
457    /* Add/subtract immediate instructions.  */
458    I3401_ADDI      = 0x11000000,
459    I3401_ADDSI     = 0x31000000,
460    I3401_SUBI      = 0x51000000,
461    I3401_SUBSI     = 0x71000000,
462
463    /* Bitfield instructions.  */
464    I3402_BFM       = 0x33000000,
465    I3402_SBFM      = 0x13000000,
466    I3402_UBFM      = 0x53000000,
467
468    /* Extract instruction.  */
469    I3403_EXTR      = 0x13800000,
470
471    /* Logical immediate instructions.  */
472    I3404_ANDI      = 0x12000000,
473    I3404_ORRI      = 0x32000000,
474    I3404_EORI      = 0x52000000,
475    I3404_ANDSI     = 0x72000000,
476
477    /* Move wide immediate instructions.  */
478    I3405_MOVN      = 0x12800000,
479    I3405_MOVZ      = 0x52800000,
480    I3405_MOVK      = 0x72800000,
481
482    /* PC relative addressing instructions.  */
483    I3406_ADR       = 0x10000000,
484    I3406_ADRP      = 0x90000000,
485
486    /* Add/subtract extended register instructions. */
487    I3501_ADD       = 0x0b200000,
488
489    /* Add/subtract shifted register instructions (without a shift).  */
490    I3502_ADD       = 0x0b000000,
491    I3502_ADDS      = 0x2b000000,
492    I3502_SUB       = 0x4b000000,
493    I3502_SUBS      = 0x6b000000,
494
495    /* Add/subtract shifted register instructions (with a shift).  */
496    I3502S_ADD_LSL  = I3502_ADD,
497
498    /* Add/subtract with carry instructions.  */
499    I3503_ADC       = 0x1a000000,
500    I3503_SBC       = 0x5a000000,
501
502    /* Conditional select instructions.  */
503    I3506_CSEL      = 0x1a800000,
504    I3506_CSINC     = 0x1a800400,
505    I3506_CSINV     = 0x5a800000,
506    I3506_CSNEG     = 0x5a800400,
507
508    /* Data-processing (1 source) instructions.  */
509    I3507_CLZ       = 0x5ac01000,
510    I3507_RBIT      = 0x5ac00000,
511    I3507_REV       = 0x5ac00000, /* + size << 10 */
512
513    /* Data-processing (2 source) instructions.  */
514    I3508_LSLV      = 0x1ac02000,
515    I3508_LSRV      = 0x1ac02400,
516    I3508_ASRV      = 0x1ac02800,
517    I3508_RORV      = 0x1ac02c00,
518    I3508_SMULH     = 0x9b407c00,
519    I3508_UMULH     = 0x9bc07c00,
520    I3508_UDIV      = 0x1ac00800,
521    I3508_SDIV      = 0x1ac00c00,
522
523    /* Data-processing (3 source) instructions.  */
524    I3509_MADD      = 0x1b000000,
525    I3509_MSUB      = 0x1b008000,
526
527    /* Logical shifted register instructions (without a shift).  */
528    I3510_AND       = 0x0a000000,
529    I3510_BIC       = 0x0a200000,
530    I3510_ORR       = 0x2a000000,
531    I3510_ORN       = 0x2a200000,
532    I3510_EOR       = 0x4a000000,
533    I3510_EON       = 0x4a200000,
534    I3510_ANDS      = 0x6a000000,
535
536    /* Logical shifted register instructions (with a shift).  */
537    I3502S_AND_LSR  = I3510_AND | (1 << 22),
538
539    /* AdvSIMD copy */
540    I3605_DUP      = 0x0e000400,
541    I3605_INS      = 0x4e001c00,
542    I3605_UMOV     = 0x0e003c00,
543
544    /* AdvSIMD modified immediate */
545    I3606_MOVI      = 0x0f000400,
546    I3606_MVNI      = 0x2f000400,
547    I3606_BIC       = 0x2f001400,
548    I3606_ORR       = 0x0f001400,
549
550    /* AdvSIMD scalar shift by immediate */
551    I3609_SSHR      = 0x5f000400,
552    I3609_SSRA      = 0x5f001400,
553    I3609_SHL       = 0x5f005400,
554    I3609_USHR      = 0x7f000400,
555    I3609_USRA      = 0x7f001400,
556    I3609_SLI       = 0x7f005400,
557
558    /* AdvSIMD scalar three same */
559    I3611_SQADD     = 0x5e200c00,
560    I3611_SQSUB     = 0x5e202c00,
561    I3611_CMGT      = 0x5e203400,
562    I3611_CMGE      = 0x5e203c00,
563    I3611_SSHL      = 0x5e204400,
564    I3611_ADD       = 0x5e208400,
565    I3611_CMTST     = 0x5e208c00,
566    I3611_UQADD     = 0x7e200c00,
567    I3611_UQSUB     = 0x7e202c00,
568    I3611_CMHI      = 0x7e203400,
569    I3611_CMHS      = 0x7e203c00,
570    I3611_USHL      = 0x7e204400,
571    I3611_SUB       = 0x7e208400,
572    I3611_CMEQ      = 0x7e208c00,
573
574    /* AdvSIMD scalar two-reg misc */
575    I3612_CMGT0     = 0x5e208800,
576    I3612_CMEQ0     = 0x5e209800,
577    I3612_CMLT0     = 0x5e20a800,
578    I3612_ABS       = 0x5e20b800,
579    I3612_CMGE0     = 0x7e208800,
580    I3612_CMLE0     = 0x7e209800,
581    I3612_NEG       = 0x7e20b800,
582
583    /* AdvSIMD shift by immediate */
584    I3614_SSHR      = 0x0f000400,
585    I3614_SSRA      = 0x0f001400,
586    I3614_SHL       = 0x0f005400,
587    I3614_SLI       = 0x2f005400,
588    I3614_USHR      = 0x2f000400,
589    I3614_USRA      = 0x2f001400,
590
591    /* AdvSIMD three same.  */
592    I3616_ADD       = 0x0e208400,
593    I3616_AND       = 0x0e201c00,
594    I3616_BIC       = 0x0e601c00,
595    I3616_BIF       = 0x2ee01c00,
596    I3616_BIT       = 0x2ea01c00,
597    I3616_BSL       = 0x2e601c00,
598    I3616_EOR       = 0x2e201c00,
599    I3616_MUL       = 0x0e209c00,
600    I3616_ORR       = 0x0ea01c00,
601    I3616_ORN       = 0x0ee01c00,
602    I3616_SUB       = 0x2e208400,
603    I3616_CMGT      = 0x0e203400,
604    I3616_CMGE      = 0x0e203c00,
605    I3616_CMTST     = 0x0e208c00,
606    I3616_CMHI      = 0x2e203400,
607    I3616_CMHS      = 0x2e203c00,
608    I3616_CMEQ      = 0x2e208c00,
609    I3616_SMAX      = 0x0e206400,
610    I3616_SMIN      = 0x0e206c00,
611    I3616_SSHL      = 0x0e204400,
612    I3616_SQADD     = 0x0e200c00,
613    I3616_SQSUB     = 0x0e202c00,
614    I3616_UMAX      = 0x2e206400,
615    I3616_UMIN      = 0x2e206c00,
616    I3616_UQADD     = 0x2e200c00,
617    I3616_UQSUB     = 0x2e202c00,
618    I3616_USHL      = 0x2e204400,
619
620    /* AdvSIMD two-reg misc.  */
621    I3617_CMGT0     = 0x0e208800,
622    I3617_CMEQ0     = 0x0e209800,
623    I3617_CMLT0     = 0x0e20a800,
624    I3617_CMGE0     = 0x2e208800,
625    I3617_CMLE0     = 0x2e209800,
626    I3617_NOT       = 0x2e205800,
627    I3617_ABS       = 0x0e20b800,
628    I3617_NEG       = 0x2e20b800,
629
630    /* System instructions.  */
631    NOP             = 0xd503201f,
632    DMB_ISH         = 0xd50338bf,
633    DMB_LD          = 0x00000100,
634    DMB_ST          = 0x00000200,
635
636    BTI_C           = 0xd503245f,
637    BTI_J           = 0xd503249f,
638    BTI_JC          = 0xd50324df,
639} AArch64Insn;
640
641static inline uint32_t tcg_in32(TCGContext *s)
642{
643    uint32_t v = *(uint32_t *)s->code_ptr;
644    return v;
645}
646
647/* Emit an opcode with "type-checking" of the format.  */
648#define tcg_out_insn(S, FMT, OP, ...) \
649    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
650
651static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
652                              TCGReg rt, TCGReg rn, unsigned size)
653{
654    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
655}
656
657static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
658                              int imm19, TCGReg rt)
659{
660    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
661}
662
663static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
664                              TCGReg rt, TCGReg rt2, TCGReg rn)
665{
666    tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
667}
668
669static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
670                              TCGReg rt, int imm19)
671{
672    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
673}
674
675static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
676                              TCGCond c, int imm19)
677{
678    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
679}
680
681static void tcg_out_insn_3205(TCGContext *s, AArch64Insn insn,
682                              TCGReg rt, int imm6, int imm14)
683{
684    insn |= (imm6 & 0x20) << (31 - 5);
685    insn |= (imm6 & 0x1f) << 19;
686    tcg_out32(s, insn | (imm14 & 0x3fff) << 5 | rt);
687}
688
689static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
690{
691    tcg_out32(s, insn | (imm26 & 0x03ffffff));
692}
693
694static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
695{
696    tcg_out32(s, insn | rn << 5);
697}
698
699static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
700                              TCGReg r1, TCGReg r2, TCGReg rn,
701                              tcg_target_long ofs, bool pre, bool w)
702{
703    insn |= 1u << 31; /* ext */
704    insn |= pre << 24;
705    insn |= w << 23;
706
707    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
708    insn |= (ofs & (0x7f << 3)) << (15 - 3);
709
710    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
711}
712
713static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
714                              TCGReg rd, TCGReg rn, uint64_t aimm)
715{
716    if (aimm > 0xfff) {
717        tcg_debug_assert((aimm & 0xfff) == 0);
718        aimm >>= 12;
719        tcg_debug_assert(aimm <= 0xfff);
720        aimm |= 1 << 12;  /* apply LSL 12 */
721    }
722    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
723}
724
725/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
726   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
727   that feed the DecodeBitMasks pseudo function.  */
728static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
729                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
730{
731    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
732              | rn << 5 | rd);
733}
734
735#define tcg_out_insn_3404  tcg_out_insn_3402
736
737static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
738                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
739{
740    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
741              | rn << 5 | rd);
742}
743
744/* This function is used for the Move (wide immediate) instruction group.
745   Note that SHIFT is a full shift count, not the 2 bit HW field. */
746static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
747                              TCGReg rd, uint16_t half, unsigned shift)
748{
749    tcg_debug_assert((shift & ~0x30) == 0);
750    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
751}
752
753static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
754                              TCGReg rd, int64_t disp)
755{
756    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
757}
758
759static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
760                                     TCGType sf, TCGReg rd, TCGReg rn,
761                                     TCGReg rm, int opt, int imm3)
762{
763    tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
764              imm3 << 10 | rn << 5 | rd);
765}
766
767/* This function is for both 3.5.2 (Add/Subtract shifted register), for
768   the rare occasion when we actually want to supply a shift amount.  */
769static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
770                                      TCGType ext, TCGReg rd, TCGReg rn,
771                                      TCGReg rm, int imm6)
772{
773    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
774}
775
776/* This function is for 3.5.2 (Add/subtract shifted register),
777   and 3.5.10 (Logical shifted register), for the vast majorty of cases
778   when we don't want to apply a shift.  Thus it can also be used for
779   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
780static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
781                              TCGReg rd, TCGReg rn, TCGReg rm)
782{
783    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
784}
785
786#define tcg_out_insn_3503  tcg_out_insn_3502
787#define tcg_out_insn_3508  tcg_out_insn_3502
788#define tcg_out_insn_3510  tcg_out_insn_3502
789
790static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
791                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
792{
793    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
794              | tcg_cond_to_aarch64[c] << 12);
795}
796
797static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
798                              TCGReg rd, TCGReg rn)
799{
800    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
801}
802
803static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
804                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
805{
806    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
807}
808
809static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
810                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
811{
812    /* Note that bit 11 set means general register input.  Therefore
813       we can handle both register sets with one function.  */
814    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
815              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
816}
817
818static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
819                              TCGReg rd, bool op, int cmode, uint8_t imm8)
820{
821    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
822              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
823}
824
825static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
826                              TCGReg rd, TCGReg rn, unsigned immhb)
827{
828    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
829}
830
831static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
832                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
833{
834    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
835              | (rn & 0x1f) << 5 | (rd & 0x1f));
836}
837
838static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
839                              unsigned size, TCGReg rd, TCGReg rn)
840{
841    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
842}
843
844static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
845                              TCGReg rd, TCGReg rn, unsigned immhb)
846{
847    tcg_out32(s, insn | q << 30 | immhb << 16
848              | (rn & 0x1f) << 5 | (rd & 0x1f));
849}
850
851static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
852                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
853{
854    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
855              | (rn & 0x1f) << 5 | (rd & 0x1f));
856}
857
858static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
859                              unsigned size, TCGReg rd, TCGReg rn)
860{
861    tcg_out32(s, insn | q << 30 | (size << 22)
862              | (rn & 0x1f) << 5 | (rd & 0x1f));
863}
864
865static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
866                              TCGReg rd, TCGReg base, TCGType ext,
867                              TCGReg regoff)
868{
869    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
870    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
871              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
872}
873
874static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
875                              TCGReg rd, TCGReg rn, intptr_t offset)
876{
877    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
878}
879
880static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
881                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
882{
883    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
884    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
885              | rn << 5 | (rd & 0x1f));
886}
887
888static void tcg_out_bti(TCGContext *s, AArch64Insn insn)
889{
890    /*
891     * While BTI insns are nops on hosts without FEAT_BTI,
892     * there is no point in emitting them in that case either.
893     */
894    if (cpuinfo & CPUINFO_BTI) {
895        tcg_out32(s, insn);
896    }
897}
898
899/* Register to register move using ORR (shifted register with no shift). */
900static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
901{
902    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
903}
904
905/* Register to register move using ADDI (move to/from SP).  */
906static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
907{
908    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
909}
910
911/* This function is used for the Logical (immediate) instruction group.
912   The value of LIMM must satisfy IS_LIMM.  See the comment above about
913   only supporting simplified logical immediates.  */
914static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
915                             TCGReg rd, TCGReg rn, uint64_t limm)
916{
917    unsigned h, l, r, c;
918
919    tcg_debug_assert(is_limm(limm));
920
921    h = clz64(limm);
922    l = ctz64(limm);
923    if (l == 0) {
924        r = 0;                  /* form 0....01....1 */
925        c = ctz64(~limm) - 1;
926        if (h == 0) {
927            r = clz64(~limm);   /* form 1..10..01..1 */
928            c += r;
929        }
930    } else {
931        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
932        c = r - h - 1;
933    }
934    if (ext == TCG_TYPE_I32) {
935        r &= 31;
936        c &= 31;
937    }
938
939    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
940}
941
942static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
943                             TCGReg rd, int64_t v64)
944{
945    bool q = type == TCG_TYPE_V128;
946    int cmode, imm8, i;
947
948    /* Test all bytes equal first.  */
949    if (vece == MO_8) {
950        imm8 = (uint8_t)v64;
951        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
952        return;
953    }
954
955    /*
956     * Test all bytes 0x00 or 0xff second.  This can match cases that
957     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
958     */
959    for (i = imm8 = 0; i < 8; i++) {
960        uint8_t byte = v64 >> (i * 8);
961        if (byte == 0xff) {
962            imm8 |= 1 << i;
963        } else if (byte != 0) {
964            goto fail_bytes;
965        }
966    }
967    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
968    return;
969 fail_bytes:
970
971    /*
972     * Tests for various replications.  For each element width, if we
973     * cannot find an expansion there's no point checking a larger
974     * width because we already know by replication it cannot match.
975     */
976    if (vece == MO_16) {
977        uint16_t v16 = v64;
978
979        if (is_shimm16(v16, &cmode, &imm8)) {
980            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
981            return;
982        }
983        if (is_shimm16(~v16, &cmode, &imm8)) {
984            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
985            return;
986        }
987
988        /*
989         * Otherwise, all remaining constants can be loaded in two insns:
990         * rd = v16 & 0xff, rd |= v16 & 0xff00.
991         */
992        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
993        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
994        return;
995    } else if (vece == MO_32) {
996        uint32_t v32 = v64;
997        uint32_t n32 = ~v32;
998
999        if (is_shimm32(v32, &cmode, &imm8) ||
1000            is_soimm32(v32, &cmode, &imm8) ||
1001            is_fimm32(v32, &cmode, &imm8)) {
1002            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
1003            return;
1004        }
1005        if (is_shimm32(n32, &cmode, &imm8) ||
1006            is_soimm32(n32, &cmode, &imm8)) {
1007            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
1008            return;
1009        }
1010
1011        /*
1012         * Restrict the set of constants to those we can load with
1013         * two instructions.  Others we load from the pool.
1014         */
1015        i = is_shimm32_pair(v32, &cmode, &imm8);
1016        if (i) {
1017            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
1018            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
1019            return;
1020        }
1021        i = is_shimm32_pair(n32, &cmode, &imm8);
1022        if (i) {
1023            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
1024            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
1025            return;
1026        }
1027    } else if (is_fimm64(v64, &cmode, &imm8)) {
1028        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
1029        return;
1030    }
1031
1032    /*
1033     * As a last resort, load from the constant pool.  Sadly there
1034     * is no LD1R (literal), so store the full 16-byte vector.
1035     */
1036    if (type == TCG_TYPE_V128) {
1037        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
1038        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
1039    } else {
1040        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
1041        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
1042    }
1043}
1044
1045static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
1046                            TCGReg rd, TCGReg rs)
1047{
1048    int is_q = type - TCG_TYPE_V64;
1049    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
1050    return true;
1051}
1052
1053static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1054                             TCGReg r, TCGReg base, intptr_t offset)
1055{
1056    TCGReg temp = TCG_REG_TMP0;
1057
1058    if (offset < -0xffffff || offset > 0xffffff) {
1059        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
1060        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
1061        base = temp;
1062    } else {
1063        AArch64Insn add_insn = I3401_ADDI;
1064
1065        if (offset < 0) {
1066            add_insn = I3401_SUBI;
1067            offset = -offset;
1068        }
1069        if (offset & 0xfff000) {
1070            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1071            base = temp;
1072        }
1073        if (offset & 0xfff) {
1074            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1075            base = temp;
1076        }
1077    }
1078    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1079    return true;
1080}
1081
1082static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1083                         tcg_target_long value)
1084{
1085    tcg_target_long svalue = value;
1086    tcg_target_long ivalue = ~value;
1087    tcg_target_long t0, t1, t2;
1088    int s0, s1;
1089    AArch64Insn opc;
1090
1091    switch (type) {
1092    case TCG_TYPE_I32:
1093    case TCG_TYPE_I64:
1094        tcg_debug_assert(rd < 32);
1095        break;
1096    default:
1097        g_assert_not_reached();
1098    }
1099
1100    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1101       values within [2**31, 2**32-1], we can create smaller sequences by
1102       interpreting this as a negative 32-bit number, while ensuring that
1103       the high 32 bits are cleared by setting SF=0.  */
1104    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1105        svalue = (int32_t)value;
1106        value = (uint32_t)value;
1107        ivalue = (uint32_t)ivalue;
1108        type = TCG_TYPE_I32;
1109    }
1110
1111    /* Speed things up by handling the common case of small positive
1112       and negative values specially.  */
1113    if ((value & ~0xffffull) == 0) {
1114        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1115        return;
1116    } else if ((ivalue & ~0xffffull) == 0) {
1117        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1118        return;
1119    }
1120
1121    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1122       use the sign-extended value.  That lets us match rotated values such
1123       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1124    if (is_limm(svalue)) {
1125        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1126        return;
1127    }
1128
1129    /* Look for host pointer values within 4G of the PC.  This happens
1130       often when loading pointers to QEMU's own data structures.  */
1131    if (type == TCG_TYPE_I64) {
1132        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1133        tcg_target_long disp = value - src_rx;
1134        if (disp == sextract64(disp, 0, 21)) {
1135            tcg_out_insn(s, 3406, ADR, rd, disp);
1136            return;
1137        }
1138        disp = (value >> 12) - (src_rx >> 12);
1139        if (disp == sextract64(disp, 0, 21)) {
1140            tcg_out_insn(s, 3406, ADRP, rd, disp);
1141            if (value & 0xfff) {
1142                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1143            }
1144            return;
1145        }
1146    }
1147
1148    /* Would it take fewer insns to begin with MOVN?  */
1149    if (ctpop64(value) >= 32) {
1150        t0 = ivalue;
1151        opc = I3405_MOVN;
1152    } else {
1153        t0 = value;
1154        opc = I3405_MOVZ;
1155    }
1156    s0 = ctz64(t0) & (63 & -16);
1157    t1 = t0 & ~(0xffffull << s0);
1158    s1 = ctz64(t1) & (63 & -16);
1159    t2 = t1 & ~(0xffffull << s1);
1160    if (t2 == 0) {
1161        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1162        if (t1 != 0) {
1163            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1164        }
1165        return;
1166    }
1167
1168    /* For more than 2 insns, dump it into the constant pool.  */
1169    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1170    tcg_out_insn(s, 3305, LDR, 0, rd);
1171}
1172
1173static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1174{
1175    return false;
1176}
1177
1178static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1179                             tcg_target_long imm)
1180{
1181    /* This function is only used for passing structs by reference. */
1182    g_assert_not_reached();
1183}
1184
1185/* Define something more legible for general use.  */
1186#define tcg_out_ldst_r  tcg_out_insn_3310
1187
1188static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1189                         TCGReg rn, intptr_t offset, int lgsize)
1190{
1191    /* If the offset is naturally aligned and in range, then we can
1192       use the scaled uimm12 encoding */
1193    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1194        uintptr_t scaled_uimm = offset >> lgsize;
1195        if (scaled_uimm <= 0xfff) {
1196            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1197            return;
1198        }
1199    }
1200
1201    /* Small signed offsets can use the unscaled encoding.  */
1202    if (offset >= -256 && offset < 256) {
1203        tcg_out_insn_3312(s, insn, rd, rn, offset);
1204        return;
1205    }
1206
1207    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1208    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
1209    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
1210}
1211
1212static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1213{
1214    if (ret == arg) {
1215        return true;
1216    }
1217    switch (type) {
1218    case TCG_TYPE_I32:
1219    case TCG_TYPE_I64:
1220        if (ret < 32 && arg < 32) {
1221            tcg_out_movr(s, type, ret, arg);
1222            break;
1223        } else if (ret < 32) {
1224            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1225            break;
1226        } else if (arg < 32) {
1227            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1228            break;
1229        }
1230        /* FALLTHRU */
1231
1232    case TCG_TYPE_V64:
1233        tcg_debug_assert(ret >= 32 && arg >= 32);
1234        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1235        break;
1236    case TCG_TYPE_V128:
1237        tcg_debug_assert(ret >= 32 && arg >= 32);
1238        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1239        break;
1240
1241    default:
1242        g_assert_not_reached();
1243    }
1244    return true;
1245}
1246
1247static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1248                       TCGReg base, intptr_t ofs)
1249{
1250    AArch64Insn insn;
1251    int lgsz;
1252
1253    switch (type) {
1254    case TCG_TYPE_I32:
1255        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1256        lgsz = 2;
1257        break;
1258    case TCG_TYPE_I64:
1259        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1260        lgsz = 3;
1261        break;
1262    case TCG_TYPE_V64:
1263        insn = I3312_LDRVD;
1264        lgsz = 3;
1265        break;
1266    case TCG_TYPE_V128:
1267        insn = I3312_LDRVQ;
1268        lgsz = 4;
1269        break;
1270    default:
1271        g_assert_not_reached();
1272    }
1273    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1274}
1275
1276static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1277                       TCGReg base, intptr_t ofs)
1278{
1279    AArch64Insn insn;
1280    int lgsz;
1281
1282    switch (type) {
1283    case TCG_TYPE_I32:
1284        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1285        lgsz = 2;
1286        break;
1287    case TCG_TYPE_I64:
1288        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1289        lgsz = 3;
1290        break;
1291    case TCG_TYPE_V64:
1292        insn = I3312_STRVD;
1293        lgsz = 3;
1294        break;
1295    case TCG_TYPE_V128:
1296        insn = I3312_STRVQ;
1297        lgsz = 4;
1298        break;
1299    default:
1300        g_assert_not_reached();
1301    }
1302    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1303}
1304
1305static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1306                               TCGReg base, intptr_t ofs)
1307{
1308    if (type <= TCG_TYPE_I64 && val == 0) {
1309        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1310        return true;
1311    }
1312    return false;
1313}
1314
1315static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1316                               TCGReg rn, unsigned int a, unsigned int b)
1317{
1318    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1319}
1320
1321static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1322                                TCGReg rn, unsigned int a, unsigned int b)
1323{
1324    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1325}
1326
1327static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1328                                TCGReg rn, unsigned int a, unsigned int b)
1329{
1330    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1331}
1332
1333static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1334                                TCGReg rn, TCGReg rm, unsigned int a)
1335{
1336    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1337}
1338
1339static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1340                               TCGReg rd, TCGReg rn, unsigned int m)
1341{
1342    int bits = ext ? 64 : 32;
1343    int max = bits - 1;
1344    tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1345}
1346
1347static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1348                               TCGReg rd, TCGReg rn, unsigned int m)
1349{
1350    int max = ext ? 63 : 31;
1351    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1352}
1353
1354static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1355                               TCGReg rd, TCGReg rn, unsigned int m)
1356{
1357    int max = ext ? 63 : 31;
1358    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1359}
1360
1361static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1362                                TCGReg rd, TCGReg rn, unsigned int m)
1363{
1364    int max = ext ? 63 : 31;
1365    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1366}
1367
1368static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1369                                TCGReg rd, TCGReg rn, unsigned int m)
1370{
1371    int max = ext ? 63 : 31;
1372    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1373}
1374
1375static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1376                               TCGReg rn, unsigned lsb, unsigned width)
1377{
1378    unsigned size = ext ? 64 : 32;
1379    unsigned a = (size - lsb) & (size - 1);
1380    unsigned b = width - 1;
1381    tcg_out_bfm(s, ext, rd, rn, a, b);
1382}
1383
1384static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGCond cond, TCGReg a,
1385                        tcg_target_long b, bool const_b)
1386{
1387    if (is_tst_cond(cond)) {
1388        if (!const_b) {
1389            tcg_out_insn(s, 3510, ANDS, ext, TCG_REG_XZR, a, b);
1390        } else {
1391            tcg_debug_assert(is_limm(b));
1392            tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, a, b);
1393        }
1394    } else {
1395        if (!const_b) {
1396            tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1397        } else if (b >= 0) {
1398            tcg_debug_assert(is_aimm(b));
1399            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1400        } else {
1401            tcg_debug_assert(is_aimm(-b));
1402            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1403        }
1404    }
1405}
1406
1407static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1408{
1409    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1410    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1411    tcg_out_insn(s, 3206, B, offset);
1412}
1413
1414static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1415{
1416    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1417    if (offset == sextract64(offset, 0, 26)) {
1418        tcg_out_insn(s, 3206, BL, offset);
1419    } else {
1420        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1421        tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
1422    }
1423}
1424
1425static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1426                         const TCGHelperInfo *info)
1427{
1428    tcg_out_call_int(s, target);
1429}
1430
1431static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1432{
1433    if (!l->has_value) {
1434        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1435        tcg_out_insn(s, 3206, B, 0);
1436    } else {
1437        tcg_out_goto(s, l->u.value_ptr);
1438    }
1439}
1440
1441static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1442                           TCGArg b, bool b_const, TCGLabel *l)
1443{
1444    int tbit = -1;
1445    bool need_cmp = true;
1446
1447    switch (c) {
1448    case TCG_COND_EQ:
1449    case TCG_COND_NE:
1450        /* cmp xN,0; b.ne L -> cbnz xN,L */
1451        if (b_const && b == 0) {
1452            need_cmp = false;
1453        }
1454        break;
1455    case TCG_COND_LT:
1456    case TCG_COND_GE:
1457        /* cmp xN,0; b.mi L -> tbnz xN,63,L */
1458        if (b_const && b == 0) {
1459            c = (c == TCG_COND_LT ? TCG_COND_TSTNE : TCG_COND_TSTEQ);
1460            tbit = ext ? 63 : 31;
1461            need_cmp = false;
1462        }
1463        break;
1464    case TCG_COND_TSTEQ:
1465    case TCG_COND_TSTNE:
1466        /* tst xN,1<<B; b.ne L -> tbnz xN,B,L */
1467        if (b_const && is_power_of_2(b)) {
1468            tbit = ctz64(b);
1469            need_cmp = false;
1470        }
1471        break;
1472    default:
1473        break;
1474    }
1475
1476    if (need_cmp) {
1477        tcg_out_cmp(s, ext, c, a, b, b_const);
1478        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1479        tcg_out_insn(s, 3202, B_C, c, 0);
1480        return;
1481    }
1482
1483    if (tbit >= 0) {
1484        tcg_out_reloc(s, s->code_ptr, R_AARCH64_TSTBR14, l, 0);
1485        switch (c) {
1486        case TCG_COND_TSTEQ:
1487            tcg_out_insn(s, 3205, TBZ, a, tbit, 0);
1488            break;
1489        case TCG_COND_TSTNE:
1490            tcg_out_insn(s, 3205, TBNZ, a, tbit, 0);
1491            break;
1492        default:
1493            g_assert_not_reached();
1494        }
1495    } else {
1496        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1497        switch (c) {
1498        case TCG_COND_EQ:
1499            tcg_out_insn(s, 3201, CBZ, ext, a, 0);
1500            break;
1501        case TCG_COND_NE:
1502            tcg_out_insn(s, 3201, CBNZ, ext, a, 0);
1503            break;
1504        default:
1505            g_assert_not_reached();
1506        }
1507    }
1508}
1509
1510static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1511                               TCGReg rd, TCGReg rn)
1512{
1513    /* REV, REV16, REV32 */
1514    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1515}
1516
1517static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1518                               TCGReg rd, TCGReg rn)
1519{
1520    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1521    int bits = (8 << s_bits) - 1;
1522    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1523}
1524
1525static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1526{
1527    tcg_out_sxt(s, type, MO_8, rd, rn);
1528}
1529
1530static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1531{
1532    tcg_out_sxt(s, type, MO_16, rd, rn);
1533}
1534
1535static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1536{
1537    tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1538}
1539
1540static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1541{
1542    tcg_out_ext32s(s, rd, rn);
1543}
1544
1545static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1546                               TCGReg rd, TCGReg rn)
1547{
1548    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1549    int bits = (8 << s_bits) - 1;
1550    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1551}
1552
1553static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1554{
1555    tcg_out_uxt(s, MO_8, rd, rn);
1556}
1557
1558static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1559{
1560    tcg_out_uxt(s, MO_16, rd, rn);
1561}
1562
1563static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1564{
1565    tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1566}
1567
1568static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1569{
1570    tcg_out_ext32u(s, rd, rn);
1571}
1572
1573static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1574{
1575    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1576}
1577
1578static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1579                            TCGReg rn, int64_t aimm)
1580{
1581    if (aimm >= 0) {
1582        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1583    } else {
1584        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1585    }
1586}
1587
1588static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1589                            TCGReg rh, TCGReg al, TCGReg ah,
1590                            tcg_target_long bl, tcg_target_long bh,
1591                            bool const_bl, bool const_bh, bool sub)
1592{
1593    TCGReg orig_rl = rl;
1594    AArch64Insn insn;
1595
1596    if (rl == ah || (!const_bh && rl == bh)) {
1597        rl = TCG_REG_TMP0;
1598    }
1599
1600    if (const_bl) {
1601        if (bl < 0) {
1602            bl = -bl;
1603            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1604        } else {
1605            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1606        }
1607
1608        if (unlikely(al == TCG_REG_XZR)) {
1609            /* ??? We want to allow al to be zero for the benefit of
1610               negation via subtraction.  However, that leaves open the
1611               possibility of adding 0+const in the low part, and the
1612               immediate add instructions encode XSP not XZR.  Don't try
1613               anything more elaborate here than loading another zero.  */
1614            al = TCG_REG_TMP0;
1615            tcg_out_movi(s, ext, al, 0);
1616        }
1617        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1618    } else {
1619        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1620    }
1621
1622    insn = I3503_ADC;
1623    if (const_bh) {
1624        /* Note that the only two constants we support are 0 and -1, and
1625           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1626        if ((bh != 0) ^ sub) {
1627            insn = I3503_SBC;
1628        }
1629        bh = TCG_REG_XZR;
1630    } else if (sub) {
1631        insn = I3503_SBC;
1632    }
1633    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1634
1635    tcg_out_mov(s, ext, orig_rl, rl);
1636}
1637
1638static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1639{
1640    static const uint32_t sync[] = {
1641        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1642        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1643        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1644        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1645        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1646    };
1647    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1648}
1649
1650static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1651                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1652{
1653    TCGReg a1 = a0;
1654    if (is_ctz) {
1655        a1 = TCG_REG_TMP0;
1656        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1657    }
1658    if (const_b && b == (ext ? 64 : 32)) {
1659        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1660    } else {
1661        AArch64Insn sel = I3506_CSEL;
1662
1663        tcg_out_cmp(s, ext, TCG_COND_NE, a0, 0, 1);
1664        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1);
1665
1666        if (const_b) {
1667            if (b == -1) {
1668                b = TCG_REG_XZR;
1669                sel = I3506_CSINV;
1670            } else if (b == 0) {
1671                b = TCG_REG_XZR;
1672            } else {
1673                tcg_out_movi(s, ext, d, b);
1674                b = d;
1675            }
1676        }
1677        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE);
1678    }
1679}
1680
1681typedef struct {
1682    TCGReg base;
1683    TCGReg index;
1684    TCGType index_ext;
1685    TCGAtomAlign aa;
1686} HostAddress;
1687
1688bool tcg_target_has_memory_bswap(MemOp memop)
1689{
1690    return false;
1691}
1692
1693static const TCGLdstHelperParam ldst_helper_param = {
1694    .ntmp = 1, .tmp = { TCG_REG_TMP0 }
1695};
1696
1697static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1698{
1699    MemOp opc = get_memop(lb->oi);
1700
1701    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1702        return false;
1703    }
1704
1705    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1706    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1707    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1708    tcg_out_goto(s, lb->raddr);
1709    return true;
1710}
1711
1712static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1713{
1714    MemOp opc = get_memop(lb->oi);
1715
1716    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1717        return false;
1718    }
1719
1720    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1721    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1722    tcg_out_goto(s, lb->raddr);
1723    return true;
1724}
1725
1726/* We expect to use a 7-bit scaled negative offset from ENV.  */
1727#define MIN_TLB_MASK_TABLE_OFS  -512
1728
1729/*
1730 * For system-mode, perform the TLB load and compare.
1731 * For user-mode, perform any required alignment tests.
1732 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1733 * is required and fill in @h with the host address for the fast path.
1734 */
1735static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1736                                           TCGReg addr_reg, MemOpIdx oi,
1737                                           bool is_ld)
1738{
1739    TCGType addr_type = s->addr_type;
1740    TCGLabelQemuLdst *ldst = NULL;
1741    MemOp opc = get_memop(oi);
1742    MemOp s_bits = opc & MO_SIZE;
1743    unsigned a_mask;
1744
1745    h->aa = atom_and_align_for_opc(s, opc,
1746                                   have_lse2 ? MO_ATOM_WITHIN16
1747                                             : MO_ATOM_IFALIGN,
1748                                   s_bits == MO_128);
1749    a_mask = (1 << h->aa.align) - 1;
1750
1751    if (tcg_use_softmmu) {
1752        unsigned s_mask = (1u << s_bits) - 1;
1753        unsigned mem_index = get_mmuidx(oi);
1754        TCGReg addr_adj;
1755        TCGType mask_type;
1756        uint64_t compare_mask;
1757
1758        ldst = new_ldst_label(s);
1759        ldst->is_ld = is_ld;
1760        ldst->oi = oi;
1761        ldst->addrlo_reg = addr_reg;
1762
1763        mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
1764                     ? TCG_TYPE_I64 : TCG_TYPE_I32);
1765
1766        /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
1767        QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1768        QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1769        tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
1770                     tlb_mask_table_ofs(s, mem_index), 1, 0);
1771
1772        /* Extract the TLB index from the address into X0.  */
1773        tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1774                     TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
1775                     s->page_bits - CPU_TLB_ENTRY_BITS);
1776
1777        /* Add the tlb_table pointer, forming the CPUTLBEntry address. */
1778        tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
1779
1780        /* Load the tlb comparator into TMP0, and the fast path addend. */
1781        QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
1782        tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
1783                   is_ld ? offsetof(CPUTLBEntry, addr_read)
1784                         : offsetof(CPUTLBEntry, addr_write));
1785        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
1786                   offsetof(CPUTLBEntry, addend));
1787
1788        /*
1789         * For aligned accesses, we check the first byte and include
1790         * the alignment bits within the address.  For unaligned access,
1791         * we check that we don't cross pages using the address of the
1792         * last byte of the access.
1793         */
1794        if (a_mask >= s_mask) {
1795            addr_adj = addr_reg;
1796        } else {
1797            addr_adj = TCG_REG_TMP2;
1798            tcg_out_insn(s, 3401, ADDI, addr_type,
1799                         addr_adj, addr_reg, s_mask - a_mask);
1800        }
1801        compare_mask = (uint64_t)s->page_mask | a_mask;
1802
1803        /* Store the page mask part of the address into TMP2.  */
1804        tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
1805                         addr_adj, compare_mask);
1806
1807        /* Perform the address comparison. */
1808        tcg_out_cmp(s, addr_type, TCG_COND_NE, TCG_REG_TMP0, TCG_REG_TMP2, 0);
1809
1810        /* If not equal, we jump to the slow path. */
1811        ldst->label_ptr[0] = s->code_ptr;
1812        tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1813
1814        h->base = TCG_REG_TMP1;
1815        h->index = addr_reg;
1816        h->index_ext = addr_type;
1817    } else {
1818        if (a_mask) {
1819            ldst = new_ldst_label(s);
1820
1821            ldst->is_ld = is_ld;
1822            ldst->oi = oi;
1823            ldst->addrlo_reg = addr_reg;
1824
1825            /* tst addr, #mask */
1826            tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1827
1828            /* b.ne slow_path */
1829            ldst->label_ptr[0] = s->code_ptr;
1830            tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1831        }
1832
1833        if (guest_base || addr_type == TCG_TYPE_I32) {
1834            h->base = TCG_REG_GUEST_BASE;
1835            h->index = addr_reg;
1836            h->index_ext = addr_type;
1837        } else {
1838            h->base = addr_reg;
1839            h->index = TCG_REG_XZR;
1840            h->index_ext = TCG_TYPE_I64;
1841        }
1842    }
1843
1844    return ldst;
1845}
1846
1847static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1848                                   TCGReg data_r, HostAddress h)
1849{
1850    switch (memop & MO_SSIZE) {
1851    case MO_UB:
1852        tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
1853        break;
1854    case MO_SB:
1855        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1856                       data_r, h.base, h.index_ext, h.index);
1857        break;
1858    case MO_UW:
1859        tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
1860        break;
1861    case MO_SW:
1862        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1863                       data_r, h.base, h.index_ext, h.index);
1864        break;
1865    case MO_UL:
1866        tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
1867        break;
1868    case MO_SL:
1869        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
1870        break;
1871    case MO_UQ:
1872        tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
1873        break;
1874    default:
1875        g_assert_not_reached();
1876    }
1877}
1878
1879static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1880                                   TCGReg data_r, HostAddress h)
1881{
1882    switch (memop & MO_SIZE) {
1883    case MO_8:
1884        tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
1885        break;
1886    case MO_16:
1887        tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
1888        break;
1889    case MO_32:
1890        tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
1891        break;
1892    case MO_64:
1893        tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
1894        break;
1895    default:
1896        g_assert_not_reached();
1897    }
1898}
1899
1900static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1901                            MemOpIdx oi, TCGType data_type)
1902{
1903    TCGLabelQemuLdst *ldst;
1904    HostAddress h;
1905
1906    ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1907    tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
1908
1909    if (ldst) {
1910        ldst->type = data_type;
1911        ldst->datalo_reg = data_reg;
1912        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1913    }
1914}
1915
1916static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1917                            MemOpIdx oi, TCGType data_type)
1918{
1919    TCGLabelQemuLdst *ldst;
1920    HostAddress h;
1921
1922    ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1923    tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1924
1925    if (ldst) {
1926        ldst->type = data_type;
1927        ldst->datalo_reg = data_reg;
1928        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1929    }
1930}
1931
1932static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
1933                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
1934{
1935    TCGLabelQemuLdst *ldst;
1936    HostAddress h;
1937    TCGReg base;
1938    bool use_pair;
1939
1940    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
1941
1942    /* Compose the final address, as LDP/STP have no indexing. */
1943    if (h.index == TCG_REG_XZR) {
1944        base = h.base;
1945    } else {
1946        base = TCG_REG_TMP2;
1947        if (h.index_ext == TCG_TYPE_I32) {
1948            /* add base, base, index, uxtw */
1949            tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base,
1950                         h.base, h.index, MO_32, 0);
1951        } else {
1952            /* add base, base, index */
1953            tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index);
1954        }
1955    }
1956
1957    use_pair = h.aa.atom < MO_128 || have_lse2;
1958
1959    if (!use_pair) {
1960        tcg_insn_unit *branch = NULL;
1961        TCGReg ll, lh, sl, sh;
1962
1963        /*
1964         * If we have already checked for 16-byte alignment, that's all
1965         * we need. Otherwise we have determined that misaligned atomicity
1966         * may be handled with two 8-byte loads.
1967         */
1968        if (h.aa.align < MO_128) {
1969            /*
1970             * TODO: align should be MO_64, so we only need test bit 3,
1971             * which means we could use TBNZ instead of ANDS+B_C.
1972             */
1973            tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15);
1974            branch = s->code_ptr;
1975            tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1976            use_pair = true;
1977        }
1978
1979        if (is_ld) {
1980            /*
1981             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1982             *    ldxp lo, hi, [base]
1983             *    stxp t0, lo, hi, [base]
1984             *    cbnz t0, .-8
1985             * Require no overlap between data{lo,hi} and base.
1986             */
1987            if (datalo == base || datahi == base) {
1988                tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base);
1989                base = TCG_REG_TMP2;
1990            }
1991            ll = sl = datalo;
1992            lh = sh = datahi;
1993        } else {
1994            /*
1995             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1996             * 1: ldxp t0, t1, [base]
1997             *    stxp t0, lo, hi, [base]
1998             *    cbnz t0, 1b
1999             */
2000            tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1);
2001            ll = TCG_REG_TMP0;
2002            lh = TCG_REG_TMP1;
2003            sl = datalo;
2004            sh = datahi;
2005        }
2006
2007        tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base);
2008        tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base);
2009        tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2);
2010
2011        if (use_pair) {
2012            /* "b .+8", branching across the one insn of use_pair. */
2013            tcg_out_insn(s, 3206, B, 2);
2014            reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr));
2015        }
2016    }
2017
2018    if (use_pair) {
2019        if (is_ld) {
2020            tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0);
2021        } else {
2022            tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0);
2023        }
2024    }
2025
2026    if (ldst) {
2027        ldst->type = TCG_TYPE_I128;
2028        ldst->datalo_reg = datalo;
2029        ldst->datahi_reg = datahi;
2030        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2031    }
2032}
2033
2034static const tcg_insn_unit *tb_ret_addr;
2035
2036static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
2037{
2038    const tcg_insn_unit *target;
2039    ptrdiff_t offset;
2040
2041    /* Reuse the zeroing that exists for goto_ptr.  */
2042    if (a0 == 0) {
2043        target = tcg_code_gen_epilogue;
2044    } else {
2045        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
2046        target = tb_ret_addr;
2047    }
2048
2049    offset = tcg_pcrel_diff(s, target) >> 2;
2050    if (offset == sextract64(offset, 0, 26)) {
2051        tcg_out_insn(s, 3206, B, offset);
2052    } else {
2053        /*
2054         * Only x16/x17 generate BTI type Jump (2),
2055         * other registers generate BTI type Jump|Call (3).
2056         */
2057        QEMU_BUILD_BUG_ON(TCG_REG_TMP0 != TCG_REG_X16);
2058        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
2059        tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
2060    }
2061}
2062
2063static void tcg_out_goto_tb(TCGContext *s, int which)
2064{
2065    /*
2066     * Direct branch, or indirect address load, will be patched
2067     * by tb_target_set_jmp_target.  Assert indirect load offset
2068     * in range early, regardless of direct branch distance.
2069     */
2070    intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
2071    tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
2072
2073    set_jmp_insn_offset(s, which);
2074    tcg_out32(s, I3206_B);
2075    tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
2076    set_jmp_reset_offset(s, which);
2077    tcg_out_bti(s, BTI_J);
2078}
2079
2080void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2081                              uintptr_t jmp_rx, uintptr_t jmp_rw)
2082{
2083    uintptr_t d_addr = tb->jmp_target_addr[n];
2084    ptrdiff_t d_offset = d_addr - jmp_rx;
2085    tcg_insn_unit insn;
2086
2087    /* Either directly branch, or indirect branch load. */
2088    if (d_offset == sextract64(d_offset, 0, 28)) {
2089        insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
2090    } else {
2091        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
2092        ptrdiff_t i_offset = i_addr - jmp_rx;
2093
2094        /* Note that we asserted this in range in tcg_out_goto_tb. */
2095        insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
2096    }
2097    qatomic_set((uint32_t *)jmp_rw, insn);
2098    flush_idcache_range(jmp_rx, jmp_rw, 4);
2099}
2100
2101static void tcg_out_op(TCGContext *s, TCGOpcode opc,
2102                       const TCGArg args[TCG_MAX_OP_ARGS],
2103                       const int const_args[TCG_MAX_OP_ARGS])
2104{
2105    /* 99% of the time, we can signal the use of extension registers
2106       by looking to see if the opcode handles 64-bit data.  */
2107    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
2108
2109    /* Hoist the loads of the most common arguments.  */
2110    TCGArg a0 = args[0];
2111    TCGArg a1 = args[1];
2112    TCGArg a2 = args[2];
2113    int c2 = const_args[2];
2114
2115    /* Some operands are defined with "rZ" constraint, a register or
2116       the zero register.  These need not actually test args[I] == 0.  */
2117#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
2118
2119    switch (opc) {
2120    case INDEX_op_goto_ptr:
2121        tcg_out_insn(s, 3207, BR, a0);
2122        break;
2123
2124    case INDEX_op_br:
2125        tcg_out_goto_label(s, arg_label(a0));
2126        break;
2127
2128    case INDEX_op_ld8u_i32:
2129    case INDEX_op_ld8u_i64:
2130        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
2131        break;
2132    case INDEX_op_ld8s_i32:
2133        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
2134        break;
2135    case INDEX_op_ld8s_i64:
2136        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
2137        break;
2138    case INDEX_op_ld16u_i32:
2139    case INDEX_op_ld16u_i64:
2140        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
2141        break;
2142    case INDEX_op_ld16s_i32:
2143        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
2144        break;
2145    case INDEX_op_ld16s_i64:
2146        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
2147        break;
2148    case INDEX_op_ld_i32:
2149    case INDEX_op_ld32u_i64:
2150        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
2151        break;
2152    case INDEX_op_ld32s_i64:
2153        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
2154        break;
2155    case INDEX_op_ld_i64:
2156        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
2157        break;
2158
2159    case INDEX_op_st8_i32:
2160    case INDEX_op_st8_i64:
2161        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
2162        break;
2163    case INDEX_op_st16_i32:
2164    case INDEX_op_st16_i64:
2165        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
2166        break;
2167    case INDEX_op_st_i32:
2168    case INDEX_op_st32_i64:
2169        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
2170        break;
2171    case INDEX_op_st_i64:
2172        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
2173        break;
2174
2175    case INDEX_op_add_i32:
2176        a2 = (int32_t)a2;
2177        /* FALLTHRU */
2178    case INDEX_op_add_i64:
2179        if (c2) {
2180            tcg_out_addsubi(s, ext, a0, a1, a2);
2181        } else {
2182            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2183        }
2184        break;
2185
2186    case INDEX_op_sub_i32:
2187        a2 = (int32_t)a2;
2188        /* FALLTHRU */
2189    case INDEX_op_sub_i64:
2190        if (c2) {
2191            tcg_out_addsubi(s, ext, a0, a1, -a2);
2192        } else {
2193            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2194        }
2195        break;
2196
2197    case INDEX_op_neg_i64:
2198    case INDEX_op_neg_i32:
2199        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2200        break;
2201
2202    case INDEX_op_and_i32:
2203        a2 = (int32_t)a2;
2204        /* FALLTHRU */
2205    case INDEX_op_and_i64:
2206        if (c2) {
2207            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2208        } else {
2209            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2210        }
2211        break;
2212
2213    case INDEX_op_andc_i32:
2214        a2 = (int32_t)a2;
2215        /* FALLTHRU */
2216    case INDEX_op_andc_i64:
2217        if (c2) {
2218            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2219        } else {
2220            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2221        }
2222        break;
2223
2224    case INDEX_op_or_i32:
2225        a2 = (int32_t)a2;
2226        /* FALLTHRU */
2227    case INDEX_op_or_i64:
2228        if (c2) {
2229            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2230        } else {
2231            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2232        }
2233        break;
2234
2235    case INDEX_op_orc_i32:
2236        a2 = (int32_t)a2;
2237        /* FALLTHRU */
2238    case INDEX_op_orc_i64:
2239        if (c2) {
2240            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2241        } else {
2242            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2243        }
2244        break;
2245
2246    case INDEX_op_xor_i32:
2247        a2 = (int32_t)a2;
2248        /* FALLTHRU */
2249    case INDEX_op_xor_i64:
2250        if (c2) {
2251            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2252        } else {
2253            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2254        }
2255        break;
2256
2257    case INDEX_op_eqv_i32:
2258        a2 = (int32_t)a2;
2259        /* FALLTHRU */
2260    case INDEX_op_eqv_i64:
2261        if (c2) {
2262            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2263        } else {
2264            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2265        }
2266        break;
2267
2268    case INDEX_op_not_i64:
2269    case INDEX_op_not_i32:
2270        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2271        break;
2272
2273    case INDEX_op_mul_i64:
2274    case INDEX_op_mul_i32:
2275        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2276        break;
2277
2278    case INDEX_op_div_i64:
2279    case INDEX_op_div_i32:
2280        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2281        break;
2282    case INDEX_op_divu_i64:
2283    case INDEX_op_divu_i32:
2284        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2285        break;
2286
2287    case INDEX_op_rem_i64:
2288    case INDEX_op_rem_i32:
2289        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2);
2290        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2291        break;
2292    case INDEX_op_remu_i64:
2293    case INDEX_op_remu_i32:
2294        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2);
2295        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2296        break;
2297
2298    case INDEX_op_shl_i64:
2299    case INDEX_op_shl_i32:
2300        if (c2) {
2301            tcg_out_shl(s, ext, a0, a1, a2);
2302        } else {
2303            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2304        }
2305        break;
2306
2307    case INDEX_op_shr_i64:
2308    case INDEX_op_shr_i32:
2309        if (c2) {
2310            tcg_out_shr(s, ext, a0, a1, a2);
2311        } else {
2312            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2313        }
2314        break;
2315
2316    case INDEX_op_sar_i64:
2317    case INDEX_op_sar_i32:
2318        if (c2) {
2319            tcg_out_sar(s, ext, a0, a1, a2);
2320        } else {
2321            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2322        }
2323        break;
2324
2325    case INDEX_op_rotr_i64:
2326    case INDEX_op_rotr_i32:
2327        if (c2) {
2328            tcg_out_rotr(s, ext, a0, a1, a2);
2329        } else {
2330            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2331        }
2332        break;
2333
2334    case INDEX_op_rotl_i64:
2335    case INDEX_op_rotl_i32:
2336        if (c2) {
2337            tcg_out_rotl(s, ext, a0, a1, a2);
2338        } else {
2339            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2);
2340            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0);
2341        }
2342        break;
2343
2344    case INDEX_op_clz_i64:
2345    case INDEX_op_clz_i32:
2346        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2347        break;
2348    case INDEX_op_ctz_i64:
2349    case INDEX_op_ctz_i32:
2350        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2351        break;
2352
2353    case INDEX_op_brcond_i32:
2354        a1 = (int32_t)a1;
2355        /* FALLTHRU */
2356    case INDEX_op_brcond_i64:
2357        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2358        break;
2359
2360    case INDEX_op_setcond_i32:
2361        a2 = (int32_t)a2;
2362        /* FALLTHRU */
2363    case INDEX_op_setcond_i64:
2364        tcg_out_cmp(s, ext, args[3], a1, a2, c2);
2365        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2366        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2367                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2368        break;
2369
2370    case INDEX_op_negsetcond_i32:
2371        a2 = (int32_t)a2;
2372        /* FALLTHRU */
2373    case INDEX_op_negsetcond_i64:
2374        tcg_out_cmp(s, ext, args[3], a1, a2, c2);
2375        /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond).  */
2376        tcg_out_insn(s, 3506, CSINV, ext, a0, TCG_REG_XZR,
2377                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2378        break;
2379
2380    case INDEX_op_movcond_i32:
2381        a2 = (int32_t)a2;
2382        /* FALLTHRU */
2383    case INDEX_op_movcond_i64:
2384        tcg_out_cmp(s, ext, args[5], a1, a2, c2);
2385        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2386        break;
2387
2388    case INDEX_op_qemu_ld_a32_i32:
2389    case INDEX_op_qemu_ld_a64_i32:
2390    case INDEX_op_qemu_ld_a32_i64:
2391    case INDEX_op_qemu_ld_a64_i64:
2392        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2393        break;
2394    case INDEX_op_qemu_st_a32_i32:
2395    case INDEX_op_qemu_st_a64_i32:
2396    case INDEX_op_qemu_st_a32_i64:
2397    case INDEX_op_qemu_st_a64_i64:
2398        tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
2399        break;
2400    case INDEX_op_qemu_ld_a32_i128:
2401    case INDEX_op_qemu_ld_a64_i128:
2402        tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
2403        break;
2404    case INDEX_op_qemu_st_a32_i128:
2405    case INDEX_op_qemu_st_a64_i128:
2406        tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false);
2407        break;
2408
2409    case INDEX_op_bswap64_i64:
2410        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2411        break;
2412    case INDEX_op_bswap32_i64:
2413        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2414        if (a2 & TCG_BSWAP_OS) {
2415            tcg_out_ext32s(s, a0, a0);
2416        }
2417        break;
2418    case INDEX_op_bswap32_i32:
2419        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2420        break;
2421    case INDEX_op_bswap16_i64:
2422    case INDEX_op_bswap16_i32:
2423        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2424        if (a2 & TCG_BSWAP_OS) {
2425            /* Output must be sign-extended. */
2426            tcg_out_ext16s(s, ext, a0, a0);
2427        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2428            /* Output must be zero-extended, but input isn't. */
2429            tcg_out_ext16u(s, a0, a0);
2430        }
2431        break;
2432
2433    case INDEX_op_deposit_i64:
2434    case INDEX_op_deposit_i32:
2435        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2436        break;
2437
2438    case INDEX_op_extract_i64:
2439    case INDEX_op_extract_i32:
2440        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2441        break;
2442
2443    case INDEX_op_sextract_i64:
2444    case INDEX_op_sextract_i32:
2445        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2446        break;
2447
2448    case INDEX_op_extract2_i64:
2449    case INDEX_op_extract2_i32:
2450        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2451        break;
2452
2453    case INDEX_op_add2_i32:
2454        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2455                        (int32_t)args[4], args[5], const_args[4],
2456                        const_args[5], false);
2457        break;
2458    case INDEX_op_add2_i64:
2459        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2460                        args[5], const_args[4], const_args[5], false);
2461        break;
2462    case INDEX_op_sub2_i32:
2463        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2464                        (int32_t)args[4], args[5], const_args[4],
2465                        const_args[5], true);
2466        break;
2467    case INDEX_op_sub2_i64:
2468        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2469                        args[5], const_args[4], const_args[5], true);
2470        break;
2471
2472    case INDEX_op_muluh_i64:
2473        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2474        break;
2475    case INDEX_op_mulsh_i64:
2476        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2477        break;
2478
2479    case INDEX_op_mb:
2480        tcg_out_mb(s, a0);
2481        break;
2482
2483    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2484    case INDEX_op_mov_i64:
2485    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2486    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2487    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2488    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2489    case INDEX_op_ext8s_i64:
2490    case INDEX_op_ext8u_i32:
2491    case INDEX_op_ext8u_i64:
2492    case INDEX_op_ext16s_i64:
2493    case INDEX_op_ext16s_i32:
2494    case INDEX_op_ext16u_i64:
2495    case INDEX_op_ext16u_i32:
2496    case INDEX_op_ext32s_i64:
2497    case INDEX_op_ext32u_i64:
2498    case INDEX_op_ext_i32_i64:
2499    case INDEX_op_extu_i32_i64:
2500    case INDEX_op_extrl_i64_i32:
2501    default:
2502        g_assert_not_reached();
2503    }
2504
2505#undef REG0
2506}
2507
2508static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2509                           unsigned vecl, unsigned vece,
2510                           const TCGArg args[TCG_MAX_OP_ARGS],
2511                           const int const_args[TCG_MAX_OP_ARGS])
2512{
2513    static const AArch64Insn cmp_vec_insn[16] = {
2514        [TCG_COND_EQ] = I3616_CMEQ,
2515        [TCG_COND_GT] = I3616_CMGT,
2516        [TCG_COND_GE] = I3616_CMGE,
2517        [TCG_COND_GTU] = I3616_CMHI,
2518        [TCG_COND_GEU] = I3616_CMHS,
2519    };
2520    static const AArch64Insn cmp_scalar_insn[16] = {
2521        [TCG_COND_EQ] = I3611_CMEQ,
2522        [TCG_COND_GT] = I3611_CMGT,
2523        [TCG_COND_GE] = I3611_CMGE,
2524        [TCG_COND_GTU] = I3611_CMHI,
2525        [TCG_COND_GEU] = I3611_CMHS,
2526    };
2527    static const AArch64Insn cmp0_vec_insn[16] = {
2528        [TCG_COND_EQ] = I3617_CMEQ0,
2529        [TCG_COND_GT] = I3617_CMGT0,
2530        [TCG_COND_GE] = I3617_CMGE0,
2531        [TCG_COND_LT] = I3617_CMLT0,
2532        [TCG_COND_LE] = I3617_CMLE0,
2533    };
2534    static const AArch64Insn cmp0_scalar_insn[16] = {
2535        [TCG_COND_EQ] = I3612_CMEQ0,
2536        [TCG_COND_GT] = I3612_CMGT0,
2537        [TCG_COND_GE] = I3612_CMGE0,
2538        [TCG_COND_LT] = I3612_CMLT0,
2539        [TCG_COND_LE] = I3612_CMLE0,
2540    };
2541
2542    TCGType type = vecl + TCG_TYPE_V64;
2543    unsigned is_q = vecl;
2544    bool is_scalar = !is_q && vece == MO_64;
2545    TCGArg a0, a1, a2, a3;
2546    int cmode, imm8;
2547
2548    a0 = args[0];
2549    a1 = args[1];
2550    a2 = args[2];
2551
2552    switch (opc) {
2553    case INDEX_op_ld_vec:
2554        tcg_out_ld(s, type, a0, a1, a2);
2555        break;
2556    case INDEX_op_st_vec:
2557        tcg_out_st(s, type, a0, a1, a2);
2558        break;
2559    case INDEX_op_dupm_vec:
2560        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2561        break;
2562    case INDEX_op_add_vec:
2563        if (is_scalar) {
2564            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2565        } else {
2566            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2567        }
2568        break;
2569    case INDEX_op_sub_vec:
2570        if (is_scalar) {
2571            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2572        } else {
2573            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2574        }
2575        break;
2576    case INDEX_op_mul_vec:
2577        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2578        break;
2579    case INDEX_op_neg_vec:
2580        if (is_scalar) {
2581            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2582        } else {
2583            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2584        }
2585        break;
2586    case INDEX_op_abs_vec:
2587        if (is_scalar) {
2588            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2589        } else {
2590            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2591        }
2592        break;
2593    case INDEX_op_and_vec:
2594        if (const_args[2]) {
2595            is_shimm1632(~a2, &cmode, &imm8);
2596            if (a0 == a1) {
2597                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2598                return;
2599            }
2600            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2601            a2 = a0;
2602        }
2603        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2604        break;
2605    case INDEX_op_or_vec:
2606        if (const_args[2]) {
2607            is_shimm1632(a2, &cmode, &imm8);
2608            if (a0 == a1) {
2609                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2610                return;
2611            }
2612            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2613            a2 = a0;
2614        }
2615        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2616        break;
2617    case INDEX_op_andc_vec:
2618        if (const_args[2]) {
2619            is_shimm1632(a2, &cmode, &imm8);
2620            if (a0 == a1) {
2621                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2622                return;
2623            }
2624            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2625            a2 = a0;
2626        }
2627        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2628        break;
2629    case INDEX_op_orc_vec:
2630        if (const_args[2]) {
2631            is_shimm1632(~a2, &cmode, &imm8);
2632            if (a0 == a1) {
2633                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2634                return;
2635            }
2636            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2637            a2 = a0;
2638        }
2639        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2640        break;
2641    case INDEX_op_xor_vec:
2642        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2643        break;
2644    case INDEX_op_ssadd_vec:
2645        if (is_scalar) {
2646            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2647        } else {
2648            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2649        }
2650        break;
2651    case INDEX_op_sssub_vec:
2652        if (is_scalar) {
2653            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2654        } else {
2655            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2656        }
2657        break;
2658    case INDEX_op_usadd_vec:
2659        if (is_scalar) {
2660            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2661        } else {
2662            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2663        }
2664        break;
2665    case INDEX_op_ussub_vec:
2666        if (is_scalar) {
2667            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2668        } else {
2669            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2670        }
2671        break;
2672    case INDEX_op_smax_vec:
2673        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2674        break;
2675    case INDEX_op_smin_vec:
2676        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2677        break;
2678    case INDEX_op_umax_vec:
2679        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2680        break;
2681    case INDEX_op_umin_vec:
2682        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2683        break;
2684    case INDEX_op_not_vec:
2685        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2686        break;
2687    case INDEX_op_shli_vec:
2688        if (is_scalar) {
2689            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2690        } else {
2691            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2692        }
2693        break;
2694    case INDEX_op_shri_vec:
2695        if (is_scalar) {
2696            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2697        } else {
2698            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2699        }
2700        break;
2701    case INDEX_op_sari_vec:
2702        if (is_scalar) {
2703            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2704        } else {
2705            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2706        }
2707        break;
2708    case INDEX_op_aa64_sli_vec:
2709        if (is_scalar) {
2710            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2711        } else {
2712            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2713        }
2714        break;
2715    case INDEX_op_shlv_vec:
2716        if (is_scalar) {
2717            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2718        } else {
2719            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2720        }
2721        break;
2722    case INDEX_op_aa64_sshl_vec:
2723        if (is_scalar) {
2724            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2725        } else {
2726            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2727        }
2728        break;
2729    case INDEX_op_cmp_vec:
2730        {
2731            TCGCond cond = args[3];
2732            AArch64Insn insn;
2733
2734            if (cond == TCG_COND_NE) {
2735                if (const_args[2]) {
2736                    if (is_scalar) {
2737                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2738                    } else {
2739                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2740                    }
2741                } else {
2742                    if (is_scalar) {
2743                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2744                    } else {
2745                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2746                    }
2747                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2748                }
2749            } else {
2750                if (const_args[2]) {
2751                    if (is_scalar) {
2752                        insn = cmp0_scalar_insn[cond];
2753                        if (insn) {
2754                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2755                            break;
2756                        }
2757                    } else {
2758                        insn = cmp0_vec_insn[cond];
2759                        if (insn) {
2760                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2761                            break;
2762                        }
2763                    }
2764                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
2765                    a2 = TCG_VEC_TMP0;
2766                }
2767                if (is_scalar) {
2768                    insn = cmp_scalar_insn[cond];
2769                    if (insn == 0) {
2770                        TCGArg t;
2771                        t = a1, a1 = a2, a2 = t;
2772                        cond = tcg_swap_cond(cond);
2773                        insn = cmp_scalar_insn[cond];
2774                        tcg_debug_assert(insn != 0);
2775                    }
2776                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2777                } else {
2778                    insn = cmp_vec_insn[cond];
2779                    if (insn == 0) {
2780                        TCGArg t;
2781                        t = a1, a1 = a2, a2 = t;
2782                        cond = tcg_swap_cond(cond);
2783                        insn = cmp_vec_insn[cond];
2784                        tcg_debug_assert(insn != 0);
2785                    }
2786                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2787                }
2788            }
2789        }
2790        break;
2791
2792    case INDEX_op_bitsel_vec:
2793        a3 = args[3];
2794        if (a0 == a3) {
2795            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2796        } else if (a0 == a2) {
2797            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2798        } else {
2799            if (a0 != a1) {
2800                tcg_out_mov(s, type, a0, a1);
2801            }
2802            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2803        }
2804        break;
2805
2806    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2807    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2808    default:
2809        g_assert_not_reached();
2810    }
2811}
2812
2813int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2814{
2815    switch (opc) {
2816    case INDEX_op_add_vec:
2817    case INDEX_op_sub_vec:
2818    case INDEX_op_and_vec:
2819    case INDEX_op_or_vec:
2820    case INDEX_op_xor_vec:
2821    case INDEX_op_andc_vec:
2822    case INDEX_op_orc_vec:
2823    case INDEX_op_neg_vec:
2824    case INDEX_op_abs_vec:
2825    case INDEX_op_not_vec:
2826    case INDEX_op_cmp_vec:
2827    case INDEX_op_shli_vec:
2828    case INDEX_op_shri_vec:
2829    case INDEX_op_sari_vec:
2830    case INDEX_op_ssadd_vec:
2831    case INDEX_op_sssub_vec:
2832    case INDEX_op_usadd_vec:
2833    case INDEX_op_ussub_vec:
2834    case INDEX_op_shlv_vec:
2835    case INDEX_op_bitsel_vec:
2836        return 1;
2837    case INDEX_op_rotli_vec:
2838    case INDEX_op_shrv_vec:
2839    case INDEX_op_sarv_vec:
2840    case INDEX_op_rotlv_vec:
2841    case INDEX_op_rotrv_vec:
2842        return -1;
2843    case INDEX_op_mul_vec:
2844    case INDEX_op_smax_vec:
2845    case INDEX_op_smin_vec:
2846    case INDEX_op_umax_vec:
2847    case INDEX_op_umin_vec:
2848        return vece < MO_64;
2849
2850    default:
2851        return 0;
2852    }
2853}
2854
2855void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2856                       TCGArg a0, ...)
2857{
2858    va_list va;
2859    TCGv_vec v0, v1, v2, t1, t2, c1;
2860    TCGArg a2;
2861
2862    va_start(va, a0);
2863    v0 = temp_tcgv_vec(arg_temp(a0));
2864    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2865    a2 = va_arg(va, TCGArg);
2866    va_end(va);
2867
2868    switch (opc) {
2869    case INDEX_op_rotli_vec:
2870        t1 = tcg_temp_new_vec(type);
2871        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2872        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2873                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2874        tcg_temp_free_vec(t1);
2875        break;
2876
2877    case INDEX_op_shrv_vec:
2878    case INDEX_op_sarv_vec:
2879        /* Right shifts are negative left shifts for AArch64.  */
2880        v2 = temp_tcgv_vec(arg_temp(a2));
2881        t1 = tcg_temp_new_vec(type);
2882        tcg_gen_neg_vec(vece, t1, v2);
2883        opc = (opc == INDEX_op_shrv_vec
2884               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2885        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2886                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2887        tcg_temp_free_vec(t1);
2888        break;
2889
2890    case INDEX_op_rotlv_vec:
2891        v2 = temp_tcgv_vec(arg_temp(a2));
2892        t1 = tcg_temp_new_vec(type);
2893        c1 = tcg_constant_vec(type, vece, 8 << vece);
2894        tcg_gen_sub_vec(vece, t1, v2, c1);
2895        /* Right shifts are negative left shifts for AArch64.  */
2896        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2897                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2898        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2899                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2900        tcg_gen_or_vec(vece, v0, v0, t1);
2901        tcg_temp_free_vec(t1);
2902        break;
2903
2904    case INDEX_op_rotrv_vec:
2905        v2 = temp_tcgv_vec(arg_temp(a2));
2906        t1 = tcg_temp_new_vec(type);
2907        t2 = tcg_temp_new_vec(type);
2908        c1 = tcg_constant_vec(type, vece, 8 << vece);
2909        tcg_gen_neg_vec(vece, t1, v2);
2910        tcg_gen_sub_vec(vece, t2, c1, v2);
2911        /* Right shifts are negative left shifts for AArch64.  */
2912        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2913                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2914        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2915                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2916        tcg_gen_or_vec(vece, v0, t1, t2);
2917        tcg_temp_free_vec(t1);
2918        tcg_temp_free_vec(t2);
2919        break;
2920
2921    default:
2922        g_assert_not_reached();
2923    }
2924}
2925
2926static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2927{
2928    switch (op) {
2929    case INDEX_op_goto_ptr:
2930        return C_O0_I1(r);
2931
2932    case INDEX_op_ld8u_i32:
2933    case INDEX_op_ld8s_i32:
2934    case INDEX_op_ld16u_i32:
2935    case INDEX_op_ld16s_i32:
2936    case INDEX_op_ld_i32:
2937    case INDEX_op_ld8u_i64:
2938    case INDEX_op_ld8s_i64:
2939    case INDEX_op_ld16u_i64:
2940    case INDEX_op_ld16s_i64:
2941    case INDEX_op_ld32u_i64:
2942    case INDEX_op_ld32s_i64:
2943    case INDEX_op_ld_i64:
2944    case INDEX_op_neg_i32:
2945    case INDEX_op_neg_i64:
2946    case INDEX_op_not_i32:
2947    case INDEX_op_not_i64:
2948    case INDEX_op_bswap16_i32:
2949    case INDEX_op_bswap32_i32:
2950    case INDEX_op_bswap16_i64:
2951    case INDEX_op_bswap32_i64:
2952    case INDEX_op_bswap64_i64:
2953    case INDEX_op_ext8s_i32:
2954    case INDEX_op_ext16s_i32:
2955    case INDEX_op_ext8u_i32:
2956    case INDEX_op_ext16u_i32:
2957    case INDEX_op_ext8s_i64:
2958    case INDEX_op_ext16s_i64:
2959    case INDEX_op_ext32s_i64:
2960    case INDEX_op_ext8u_i64:
2961    case INDEX_op_ext16u_i64:
2962    case INDEX_op_ext32u_i64:
2963    case INDEX_op_ext_i32_i64:
2964    case INDEX_op_extu_i32_i64:
2965    case INDEX_op_extract_i32:
2966    case INDEX_op_extract_i64:
2967    case INDEX_op_sextract_i32:
2968    case INDEX_op_sextract_i64:
2969        return C_O1_I1(r, r);
2970
2971    case INDEX_op_st8_i32:
2972    case INDEX_op_st16_i32:
2973    case INDEX_op_st_i32:
2974    case INDEX_op_st8_i64:
2975    case INDEX_op_st16_i64:
2976    case INDEX_op_st32_i64:
2977    case INDEX_op_st_i64:
2978        return C_O0_I2(rZ, r);
2979
2980    case INDEX_op_add_i32:
2981    case INDEX_op_add_i64:
2982    case INDEX_op_sub_i32:
2983    case INDEX_op_sub_i64:
2984        return C_O1_I2(r, r, rA);
2985
2986    case INDEX_op_setcond_i32:
2987    case INDEX_op_setcond_i64:
2988    case INDEX_op_negsetcond_i32:
2989    case INDEX_op_negsetcond_i64:
2990        return C_O1_I2(r, r, rC);
2991
2992    case INDEX_op_mul_i32:
2993    case INDEX_op_mul_i64:
2994    case INDEX_op_div_i32:
2995    case INDEX_op_div_i64:
2996    case INDEX_op_divu_i32:
2997    case INDEX_op_divu_i64:
2998    case INDEX_op_rem_i32:
2999    case INDEX_op_rem_i64:
3000    case INDEX_op_remu_i32:
3001    case INDEX_op_remu_i64:
3002    case INDEX_op_muluh_i64:
3003    case INDEX_op_mulsh_i64:
3004        return C_O1_I2(r, r, r);
3005
3006    case INDEX_op_and_i32:
3007    case INDEX_op_and_i64:
3008    case INDEX_op_or_i32:
3009    case INDEX_op_or_i64:
3010    case INDEX_op_xor_i32:
3011    case INDEX_op_xor_i64:
3012    case INDEX_op_andc_i32:
3013    case INDEX_op_andc_i64:
3014    case INDEX_op_orc_i32:
3015    case INDEX_op_orc_i64:
3016    case INDEX_op_eqv_i32:
3017    case INDEX_op_eqv_i64:
3018        return C_O1_I2(r, r, rL);
3019
3020    case INDEX_op_shl_i32:
3021    case INDEX_op_shr_i32:
3022    case INDEX_op_sar_i32:
3023    case INDEX_op_rotl_i32:
3024    case INDEX_op_rotr_i32:
3025    case INDEX_op_shl_i64:
3026    case INDEX_op_shr_i64:
3027    case INDEX_op_sar_i64:
3028    case INDEX_op_rotl_i64:
3029    case INDEX_op_rotr_i64:
3030        return C_O1_I2(r, r, ri);
3031
3032    case INDEX_op_clz_i32:
3033    case INDEX_op_ctz_i32:
3034    case INDEX_op_clz_i64:
3035    case INDEX_op_ctz_i64:
3036        return C_O1_I2(r, r, rAL);
3037
3038    case INDEX_op_brcond_i32:
3039    case INDEX_op_brcond_i64:
3040        return C_O0_I2(r, rC);
3041
3042    case INDEX_op_movcond_i32:
3043    case INDEX_op_movcond_i64:
3044        return C_O1_I4(r, r, rC, rZ, rZ);
3045
3046    case INDEX_op_qemu_ld_a32_i32:
3047    case INDEX_op_qemu_ld_a64_i32:
3048    case INDEX_op_qemu_ld_a32_i64:
3049    case INDEX_op_qemu_ld_a64_i64:
3050        return C_O1_I1(r, r);
3051    case INDEX_op_qemu_ld_a32_i128:
3052    case INDEX_op_qemu_ld_a64_i128:
3053        return C_O2_I1(r, r, r);
3054    case INDEX_op_qemu_st_a32_i32:
3055    case INDEX_op_qemu_st_a64_i32:
3056    case INDEX_op_qemu_st_a32_i64:
3057    case INDEX_op_qemu_st_a64_i64:
3058        return C_O0_I2(rZ, r);
3059    case INDEX_op_qemu_st_a32_i128:
3060    case INDEX_op_qemu_st_a64_i128:
3061        return C_O0_I3(rZ, rZ, r);
3062
3063    case INDEX_op_deposit_i32:
3064    case INDEX_op_deposit_i64:
3065        return C_O1_I2(r, 0, rZ);
3066
3067    case INDEX_op_extract2_i32:
3068    case INDEX_op_extract2_i64:
3069        return C_O1_I2(r, rZ, rZ);
3070
3071    case INDEX_op_add2_i32:
3072    case INDEX_op_add2_i64:
3073    case INDEX_op_sub2_i32:
3074    case INDEX_op_sub2_i64:
3075        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
3076
3077    case INDEX_op_add_vec:
3078    case INDEX_op_sub_vec:
3079    case INDEX_op_mul_vec:
3080    case INDEX_op_xor_vec:
3081    case INDEX_op_ssadd_vec:
3082    case INDEX_op_sssub_vec:
3083    case INDEX_op_usadd_vec:
3084    case INDEX_op_ussub_vec:
3085    case INDEX_op_smax_vec:
3086    case INDEX_op_smin_vec:
3087    case INDEX_op_umax_vec:
3088    case INDEX_op_umin_vec:
3089    case INDEX_op_shlv_vec:
3090    case INDEX_op_shrv_vec:
3091    case INDEX_op_sarv_vec:
3092    case INDEX_op_aa64_sshl_vec:
3093        return C_O1_I2(w, w, w);
3094    case INDEX_op_not_vec:
3095    case INDEX_op_neg_vec:
3096    case INDEX_op_abs_vec:
3097    case INDEX_op_shli_vec:
3098    case INDEX_op_shri_vec:
3099    case INDEX_op_sari_vec:
3100        return C_O1_I1(w, w);
3101    case INDEX_op_ld_vec:
3102    case INDEX_op_dupm_vec:
3103        return C_O1_I1(w, r);
3104    case INDEX_op_st_vec:
3105        return C_O0_I2(w, r);
3106    case INDEX_op_dup_vec:
3107        return C_O1_I1(w, wr);
3108    case INDEX_op_or_vec:
3109    case INDEX_op_andc_vec:
3110        return C_O1_I2(w, w, wO);
3111    case INDEX_op_and_vec:
3112    case INDEX_op_orc_vec:
3113        return C_O1_I2(w, w, wN);
3114    case INDEX_op_cmp_vec:
3115        return C_O1_I2(w, w, wZ);
3116    case INDEX_op_bitsel_vec:
3117        return C_O1_I3(w, w, w, w);
3118    case INDEX_op_aa64_sli_vec:
3119        return C_O1_I2(w, 0, w);
3120
3121    default:
3122        g_assert_not_reached();
3123    }
3124}
3125
3126static void tcg_target_init(TCGContext *s)
3127{
3128    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
3129    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
3130    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3131    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3132
3133    tcg_target_call_clobber_regs = -1ull;
3134    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
3135    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
3136    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
3137    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
3138    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
3139    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
3140    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
3141    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
3142    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
3143    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
3144    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
3145    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3146    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3147    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3148    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3149    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3150    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3151    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3152    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3153
3154    s->reserved_regs = 0;
3155    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
3156    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
3157    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
3158    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
3159    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
3160    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
3161    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
3162}
3163
3164/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
3165#define PUSH_SIZE  ((30 - 19 + 1) * 8)
3166
3167#define FRAME_SIZE \
3168    ((PUSH_SIZE \
3169      + TCG_STATIC_CALL_ARGS_SIZE \
3170      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3171      + TCG_TARGET_STACK_ALIGN - 1) \
3172     & ~(TCG_TARGET_STACK_ALIGN - 1))
3173
3174/* We're expecting a 2 byte uleb128 encoded value.  */
3175QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3176
3177/* We're expecting to use a single ADDI insn.  */
3178QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
3179
3180static void tcg_target_qemu_prologue(TCGContext *s)
3181{
3182    TCGReg r;
3183
3184    tcg_out_bti(s, BTI_C);
3185
3186    /* Push (FP, LR) and allocate space for all saved registers.  */
3187    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
3188                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
3189
3190    /* Set up frame pointer for canonical unwinding.  */
3191    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
3192
3193    /* Store callee-preserved regs x19..x28.  */
3194    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3195        int ofs = (r - TCG_REG_X19 + 2) * 8;
3196        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3197    }
3198
3199    /* Make stack space for TCG locals.  */
3200    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3201                 FRAME_SIZE - PUSH_SIZE);
3202
3203    /* Inform TCG about how to find TCG locals with register, offset, size.  */
3204    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
3205                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3206
3207    if (!tcg_use_softmmu) {
3208        /*
3209         * Note that XZR cannot be encoded in the address base register slot,
3210         * as that actually encodes SP.  Depending on the guest, we may need
3211         * to zero-extend the guest address via the address index register slot,
3212         * therefore we need to load even a zero guest base into a register.
3213         */
3214        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3215        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3216    }
3217
3218    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3219    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3220
3221    /*
3222     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3223     * and fall through to the rest of the epilogue.
3224     */
3225    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3226    tcg_out_bti(s, BTI_J);
3227    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3228
3229    /* TB epilogue */
3230    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3231    tcg_out_bti(s, BTI_J);
3232
3233    /* Remove TCG locals stack space.  */
3234    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3235                 FRAME_SIZE - PUSH_SIZE);
3236
3237    /* Restore registers x19..x28.  */
3238    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3239        int ofs = (r - TCG_REG_X19 + 2) * 8;
3240        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3241    }
3242
3243    /* Pop (FP, LR), restore SP to previous frame.  */
3244    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3245                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3246    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3247}
3248
3249static void tcg_out_tb_start(TCGContext *s)
3250{
3251    tcg_out_bti(s, BTI_J);
3252}
3253
3254static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3255{
3256    int i;
3257    for (i = 0; i < count; ++i) {
3258        p[i] = NOP;
3259    }
3260}
3261
3262typedef struct {
3263    DebugFrameHeader h;
3264    uint8_t fde_def_cfa[4];
3265    uint8_t fde_reg_ofs[24];
3266} DebugFrame;
3267
3268#define ELF_HOST_MACHINE EM_AARCH64
3269
3270static const DebugFrame debug_frame = {
3271    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3272    .h.cie.id = -1,
3273    .h.cie.version = 1,
3274    .h.cie.code_align = 1,
3275    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3276    .h.cie.return_column = TCG_REG_LR,
3277
3278    /* Total FDE size does not include the "len" member.  */
3279    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3280
3281    .fde_def_cfa = {
3282        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3283        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3284        (FRAME_SIZE >> 7)
3285    },
3286    .fde_reg_ofs = {
3287        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3288        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3289        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3290        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3291        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3292        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3293        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3294        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3295        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3296        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3297        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3298        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3299    }
3300};
3301
3302void tcg_register_jit(const void *buf, size_t buf_size)
3303{
3304    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3305}
3306