xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision 21e9a8aefb0313174c1861df84e5e49bd84026c8)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-ldst.c.inc"
14#include "../tcg-pool.c.inc"
15#include "qemu/bitops.h"
16
17/* We're going to re-use TCGType in setting of the SF bit, which controls
18   the size of the operation performed.  If we know the values match, it
19   makes things much cleaner.  */
20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21
22#ifdef CONFIG_DEBUG_TCG
23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28
29    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33};
34#endif /* CONFIG_DEBUG_TCG */
35
36static const int tcg_target_reg_alloc_order[] = {
37    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39    TCG_REG_X28, /* we will reserve this for guest_base if configured */
40
41    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
43
44    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47    /* X16 reserved as temporary */
48    /* X17 reserved as temporary */
49    /* X18 reserved by system */
50    /* X19 reserved for AREG0 */
51    /* X29 reserved as fp */
52    /* X30 reserved as temporary */
53
54    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
55    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
56    /* V8 - V15 are call-saved, and skipped.  */
57    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
58    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
59    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
60    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
61};
62
63static const int tcg_target_call_iarg_regs[8] = {
64    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
65    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
66};
67
68static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
69{
70    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
71    tcg_debug_assert(slot >= 0 && slot <= 1);
72    return TCG_REG_X0 + slot;
73}
74
75#define TCG_REG_TMP0 TCG_REG_X16
76#define TCG_REG_TMP1 TCG_REG_X17
77#define TCG_REG_TMP2 TCG_REG_X30
78#define TCG_VEC_TMP0 TCG_REG_V31
79
80#define TCG_REG_GUEST_BASE TCG_REG_X28
81
82static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
83{
84    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
85    ptrdiff_t offset = target - src_rx;
86
87    if (offset == sextract64(offset, 0, 26)) {
88        /* read instruction, mask away previous PC_REL26 parameter contents,
89           set the proper offset, then write back the instruction. */
90        *src_rw = deposit32(*src_rw, 0, 26, offset);
91        return true;
92    }
93    return false;
94}
95
96static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
97{
98    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
99    ptrdiff_t offset = target - src_rx;
100
101    if (offset == sextract64(offset, 0, 19)) {
102        *src_rw = deposit32(*src_rw, 5, 19, offset);
103        return true;
104    }
105    return false;
106}
107
108static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
109                        intptr_t value, intptr_t addend)
110{
111    tcg_debug_assert(addend == 0);
112    switch (type) {
113    case R_AARCH64_JUMP26:
114    case R_AARCH64_CALL26:
115        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
116    case R_AARCH64_CONDBR19:
117        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
118    default:
119        g_assert_not_reached();
120    }
121}
122
123#define TCG_CT_CONST_AIMM 0x100
124#define TCG_CT_CONST_LIMM 0x200
125#define TCG_CT_CONST_ZERO 0x400
126#define TCG_CT_CONST_MONE 0x800
127#define TCG_CT_CONST_ORRI 0x1000
128#define TCG_CT_CONST_ANDI 0x2000
129
130#define ALL_GENERAL_REGS  0xffffffffu
131#define ALL_VECTOR_REGS   0xffffffff00000000ull
132
133/* Match a constant valid for addition (12-bit, optionally shifted).  */
134static inline bool is_aimm(uint64_t val)
135{
136    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
137}
138
139/* Match a constant valid for logical operations.  */
140static inline bool is_limm(uint64_t val)
141{
142    /* Taking a simplified view of the logical immediates for now, ignoring
143       the replication that can happen across the field.  Match bit patterns
144       of the forms
145           0....01....1
146           0..01..10..0
147       and their inverses.  */
148
149    /* Make things easier below, by testing the form with msb clear. */
150    if ((int64_t)val < 0) {
151        val = ~val;
152    }
153    if (val == 0) {
154        return false;
155    }
156    val += val & -val;
157    return (val & (val - 1)) == 0;
158}
159
160/* Return true if v16 is a valid 16-bit shifted immediate.  */
161static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
162{
163    if (v16 == (v16 & 0xff)) {
164        *cmode = 0x8;
165        *imm8 = v16 & 0xff;
166        return true;
167    } else if (v16 == (v16 & 0xff00)) {
168        *cmode = 0xa;
169        *imm8 = v16 >> 8;
170        return true;
171    }
172    return false;
173}
174
175/* Return true if v32 is a valid 32-bit shifted immediate.  */
176static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
177{
178    if (v32 == (v32 & 0xff)) {
179        *cmode = 0x0;
180        *imm8 = v32 & 0xff;
181        return true;
182    } else if (v32 == (v32 & 0xff00)) {
183        *cmode = 0x2;
184        *imm8 = (v32 >> 8) & 0xff;
185        return true;
186    } else if (v32 == (v32 & 0xff0000)) {
187        *cmode = 0x4;
188        *imm8 = (v32 >> 16) & 0xff;
189        return true;
190    } else if (v32 == (v32 & 0xff000000)) {
191        *cmode = 0x6;
192        *imm8 = v32 >> 24;
193        return true;
194    }
195    return false;
196}
197
198/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
199static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
200{
201    if ((v32 & 0xffff00ff) == 0xff) {
202        *cmode = 0xc;
203        *imm8 = (v32 >> 8) & 0xff;
204        return true;
205    } else if ((v32 & 0xff00ffff) == 0xffff) {
206        *cmode = 0xd;
207        *imm8 = (v32 >> 16) & 0xff;
208        return true;
209    }
210    return false;
211}
212
213/* Return true if v32 is a valid float32 immediate.  */
214static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
215{
216    if (extract32(v32, 0, 19) == 0
217        && (extract32(v32, 25, 6) == 0x20
218            || extract32(v32, 25, 6) == 0x1f)) {
219        *cmode = 0xf;
220        *imm8 = (extract32(v32, 31, 1) << 7)
221              | (extract32(v32, 25, 1) << 6)
222              | extract32(v32, 19, 6);
223        return true;
224    }
225    return false;
226}
227
228/* Return true if v64 is a valid float64 immediate.  */
229static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
230{
231    if (extract64(v64, 0, 48) == 0
232        && (extract64(v64, 54, 9) == 0x100
233            || extract64(v64, 54, 9) == 0x0ff)) {
234        *cmode = 0xf;
235        *imm8 = (extract64(v64, 63, 1) << 7)
236              | (extract64(v64, 54, 1) << 6)
237              | extract64(v64, 48, 6);
238        return true;
239    }
240    return false;
241}
242
243/*
244 * Return non-zero if v32 can be formed by MOVI+ORR.
245 * Place the parameters for MOVI in (cmode, imm8).
246 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
247 */
248static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
249{
250    int i;
251
252    for (i = 6; i > 0; i -= 2) {
253        /* Mask out one byte we can add with ORR.  */
254        uint32_t tmp = v32 & ~(0xffu << (i * 4));
255        if (is_shimm32(tmp, cmode, imm8) ||
256            is_soimm32(tmp, cmode, imm8)) {
257            break;
258        }
259    }
260    return i;
261}
262
263/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
264static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
265{
266    if (v32 == deposit32(v32, 16, 16, v32)) {
267        return is_shimm16(v32, cmode, imm8);
268    } else {
269        return is_shimm32(v32, cmode, imm8);
270    }
271}
272
273static bool tcg_target_const_match(int64_t val, int ct,
274                                   TCGType type, TCGCond cond, int vece)
275{
276    if (ct & TCG_CT_CONST) {
277        return 1;
278    }
279    if (type == TCG_TYPE_I32) {
280        val = (int32_t)val;
281    }
282    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
283        return 1;
284    }
285    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
286        return 1;
287    }
288    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
289        return 1;
290    }
291    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
292        return 1;
293    }
294
295    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
296    case 0:
297        break;
298    case TCG_CT_CONST_ANDI:
299        val = ~val;
300        /* fallthru */
301    case TCG_CT_CONST_ORRI:
302        if (val == deposit64(val, 32, 32, val)) {
303            int cmode, imm8;
304            return is_shimm1632(val, &cmode, &imm8);
305        }
306        break;
307    default:
308        /* Both bits should not be set for the same insn.  */
309        g_assert_not_reached();
310    }
311
312    return 0;
313}
314
315enum aarch64_cond_code {
316    COND_EQ = 0x0,
317    COND_NE = 0x1,
318    COND_CS = 0x2,     /* Unsigned greater or equal */
319    COND_HS = COND_CS, /* ALIAS greater or equal */
320    COND_CC = 0x3,     /* Unsigned less than */
321    COND_LO = COND_CC, /* ALIAS Lower */
322    COND_MI = 0x4,     /* Negative */
323    COND_PL = 0x5,     /* Zero or greater */
324    COND_VS = 0x6,     /* Overflow */
325    COND_VC = 0x7,     /* No overflow */
326    COND_HI = 0x8,     /* Unsigned greater than */
327    COND_LS = 0x9,     /* Unsigned less or equal */
328    COND_GE = 0xa,
329    COND_LT = 0xb,
330    COND_GT = 0xc,
331    COND_LE = 0xd,
332    COND_AL = 0xe,
333    COND_NV = 0xf, /* behaves like COND_AL here */
334};
335
336static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
337    [TCG_COND_EQ] = COND_EQ,
338    [TCG_COND_NE] = COND_NE,
339    [TCG_COND_LT] = COND_LT,
340    [TCG_COND_GE] = COND_GE,
341    [TCG_COND_LE] = COND_LE,
342    [TCG_COND_GT] = COND_GT,
343    /* unsigned */
344    [TCG_COND_LTU] = COND_LO,
345    [TCG_COND_GTU] = COND_HI,
346    [TCG_COND_GEU] = COND_HS,
347    [TCG_COND_LEU] = COND_LS,
348};
349
350typedef enum {
351    LDST_ST = 0,    /* store */
352    LDST_LD = 1,    /* load */
353    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
354    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
355} AArch64LdstType;
356
357/* We encode the format of the insn into the beginning of the name, so that
358   we can have the preprocessor help "typecheck" the insn vs the output
359   function.  Arm didn't provide us with nice names for the formats, so we
360   use the section number of the architecture reference manual in which the
361   instruction group is described.  */
362typedef enum {
363    /* Compare and branch (immediate).  */
364    I3201_CBZ       = 0x34000000,
365    I3201_CBNZ      = 0x35000000,
366
367    /* Conditional branch (immediate).  */
368    I3202_B_C       = 0x54000000,
369
370    /* Unconditional branch (immediate).  */
371    I3206_B         = 0x14000000,
372    I3206_BL        = 0x94000000,
373
374    /* Unconditional branch (register).  */
375    I3207_BR        = 0xd61f0000,
376    I3207_BLR       = 0xd63f0000,
377    I3207_RET       = 0xd65f0000,
378
379    /* AdvSIMD load/store single structure.  */
380    I3303_LD1R      = 0x0d40c000,
381
382    /* Load literal for loading the address at pc-relative offset */
383    I3305_LDR       = 0x58000000,
384    I3305_LDR_v64   = 0x5c000000,
385    I3305_LDR_v128  = 0x9c000000,
386
387    /* Load/store exclusive. */
388    I3306_LDXP      = 0xc8600000,
389    I3306_STXP      = 0xc8200000,
390
391    /* Load/store register.  Described here as 3.3.12, but the helper
392       that emits them can transform to 3.3.10 or 3.3.13.  */
393    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
394    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
395    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
396    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
397
398    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
399    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
400    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
401    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
402
403    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
404    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
405
406    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
407    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
408    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
409
410    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
411    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
412
413    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
414    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
415
416    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
417    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
418
419    I3312_TO_I3310  = 0x00200800,
420    I3312_TO_I3313  = 0x01000000,
421
422    /* Load/store register pair instructions.  */
423    I3314_LDP       = 0x28400000,
424    I3314_STP       = 0x28000000,
425
426    /* Add/subtract immediate instructions.  */
427    I3401_ADDI      = 0x11000000,
428    I3401_ADDSI     = 0x31000000,
429    I3401_SUBI      = 0x51000000,
430    I3401_SUBSI     = 0x71000000,
431
432    /* Bitfield instructions.  */
433    I3402_BFM       = 0x33000000,
434    I3402_SBFM      = 0x13000000,
435    I3402_UBFM      = 0x53000000,
436
437    /* Extract instruction.  */
438    I3403_EXTR      = 0x13800000,
439
440    /* Logical immediate instructions.  */
441    I3404_ANDI      = 0x12000000,
442    I3404_ORRI      = 0x32000000,
443    I3404_EORI      = 0x52000000,
444    I3404_ANDSI     = 0x72000000,
445
446    /* Move wide immediate instructions.  */
447    I3405_MOVN      = 0x12800000,
448    I3405_MOVZ      = 0x52800000,
449    I3405_MOVK      = 0x72800000,
450
451    /* PC relative addressing instructions.  */
452    I3406_ADR       = 0x10000000,
453    I3406_ADRP      = 0x90000000,
454
455    /* Add/subtract extended register instructions. */
456    I3501_ADD       = 0x0b200000,
457
458    /* Add/subtract shifted register instructions (without a shift).  */
459    I3502_ADD       = 0x0b000000,
460    I3502_ADDS      = 0x2b000000,
461    I3502_SUB       = 0x4b000000,
462    I3502_SUBS      = 0x6b000000,
463
464    /* Add/subtract shifted register instructions (with a shift).  */
465    I3502S_ADD_LSL  = I3502_ADD,
466
467    /* Add/subtract with carry instructions.  */
468    I3503_ADC       = 0x1a000000,
469    I3503_SBC       = 0x5a000000,
470
471    /* Conditional select instructions.  */
472    I3506_CSEL      = 0x1a800000,
473    I3506_CSINC     = 0x1a800400,
474    I3506_CSINV     = 0x5a800000,
475    I3506_CSNEG     = 0x5a800400,
476
477    /* Data-processing (1 source) instructions.  */
478    I3507_CLZ       = 0x5ac01000,
479    I3507_RBIT      = 0x5ac00000,
480    I3507_REV       = 0x5ac00000, /* + size << 10 */
481
482    /* Data-processing (2 source) instructions.  */
483    I3508_LSLV      = 0x1ac02000,
484    I3508_LSRV      = 0x1ac02400,
485    I3508_ASRV      = 0x1ac02800,
486    I3508_RORV      = 0x1ac02c00,
487    I3508_SMULH     = 0x9b407c00,
488    I3508_UMULH     = 0x9bc07c00,
489    I3508_UDIV      = 0x1ac00800,
490    I3508_SDIV      = 0x1ac00c00,
491
492    /* Data-processing (3 source) instructions.  */
493    I3509_MADD      = 0x1b000000,
494    I3509_MSUB      = 0x1b008000,
495
496    /* Logical shifted register instructions (without a shift).  */
497    I3510_AND       = 0x0a000000,
498    I3510_BIC       = 0x0a200000,
499    I3510_ORR       = 0x2a000000,
500    I3510_ORN       = 0x2a200000,
501    I3510_EOR       = 0x4a000000,
502    I3510_EON       = 0x4a200000,
503    I3510_ANDS      = 0x6a000000,
504
505    /* Logical shifted register instructions (with a shift).  */
506    I3502S_AND_LSR  = I3510_AND | (1 << 22),
507
508    /* AdvSIMD copy */
509    I3605_DUP      = 0x0e000400,
510    I3605_INS      = 0x4e001c00,
511    I3605_UMOV     = 0x0e003c00,
512
513    /* AdvSIMD modified immediate */
514    I3606_MOVI      = 0x0f000400,
515    I3606_MVNI      = 0x2f000400,
516    I3606_BIC       = 0x2f001400,
517    I3606_ORR       = 0x0f001400,
518
519    /* AdvSIMD scalar shift by immediate */
520    I3609_SSHR      = 0x5f000400,
521    I3609_SSRA      = 0x5f001400,
522    I3609_SHL       = 0x5f005400,
523    I3609_USHR      = 0x7f000400,
524    I3609_USRA      = 0x7f001400,
525    I3609_SLI       = 0x7f005400,
526
527    /* AdvSIMD scalar three same */
528    I3611_SQADD     = 0x5e200c00,
529    I3611_SQSUB     = 0x5e202c00,
530    I3611_CMGT      = 0x5e203400,
531    I3611_CMGE      = 0x5e203c00,
532    I3611_SSHL      = 0x5e204400,
533    I3611_ADD       = 0x5e208400,
534    I3611_CMTST     = 0x5e208c00,
535    I3611_UQADD     = 0x7e200c00,
536    I3611_UQSUB     = 0x7e202c00,
537    I3611_CMHI      = 0x7e203400,
538    I3611_CMHS      = 0x7e203c00,
539    I3611_USHL      = 0x7e204400,
540    I3611_SUB       = 0x7e208400,
541    I3611_CMEQ      = 0x7e208c00,
542
543    /* AdvSIMD scalar two-reg misc */
544    I3612_CMGT0     = 0x5e208800,
545    I3612_CMEQ0     = 0x5e209800,
546    I3612_CMLT0     = 0x5e20a800,
547    I3612_ABS       = 0x5e20b800,
548    I3612_CMGE0     = 0x7e208800,
549    I3612_CMLE0     = 0x7e209800,
550    I3612_NEG       = 0x7e20b800,
551
552    /* AdvSIMD shift by immediate */
553    I3614_SSHR      = 0x0f000400,
554    I3614_SSRA      = 0x0f001400,
555    I3614_SHL       = 0x0f005400,
556    I3614_SLI       = 0x2f005400,
557    I3614_USHR      = 0x2f000400,
558    I3614_USRA      = 0x2f001400,
559
560    /* AdvSIMD three same.  */
561    I3616_ADD       = 0x0e208400,
562    I3616_AND       = 0x0e201c00,
563    I3616_BIC       = 0x0e601c00,
564    I3616_BIF       = 0x2ee01c00,
565    I3616_BIT       = 0x2ea01c00,
566    I3616_BSL       = 0x2e601c00,
567    I3616_EOR       = 0x2e201c00,
568    I3616_MUL       = 0x0e209c00,
569    I3616_ORR       = 0x0ea01c00,
570    I3616_ORN       = 0x0ee01c00,
571    I3616_SUB       = 0x2e208400,
572    I3616_CMGT      = 0x0e203400,
573    I3616_CMGE      = 0x0e203c00,
574    I3616_CMTST     = 0x0e208c00,
575    I3616_CMHI      = 0x2e203400,
576    I3616_CMHS      = 0x2e203c00,
577    I3616_CMEQ      = 0x2e208c00,
578    I3616_SMAX      = 0x0e206400,
579    I3616_SMIN      = 0x0e206c00,
580    I3616_SSHL      = 0x0e204400,
581    I3616_SQADD     = 0x0e200c00,
582    I3616_SQSUB     = 0x0e202c00,
583    I3616_UMAX      = 0x2e206400,
584    I3616_UMIN      = 0x2e206c00,
585    I3616_UQADD     = 0x2e200c00,
586    I3616_UQSUB     = 0x2e202c00,
587    I3616_USHL      = 0x2e204400,
588
589    /* AdvSIMD two-reg misc.  */
590    I3617_CMGT0     = 0x0e208800,
591    I3617_CMEQ0     = 0x0e209800,
592    I3617_CMLT0     = 0x0e20a800,
593    I3617_CMGE0     = 0x2e208800,
594    I3617_CMLE0     = 0x2e209800,
595    I3617_NOT       = 0x2e205800,
596    I3617_ABS       = 0x0e20b800,
597    I3617_NEG       = 0x2e20b800,
598
599    /* System instructions.  */
600    NOP             = 0xd503201f,
601    DMB_ISH         = 0xd50338bf,
602    DMB_LD          = 0x00000100,
603    DMB_ST          = 0x00000200,
604
605    BTI_C           = 0xd503245f,
606    BTI_J           = 0xd503249f,
607    BTI_JC          = 0xd50324df,
608} AArch64Insn;
609
610static inline uint32_t tcg_in32(TCGContext *s)
611{
612    uint32_t v = *(uint32_t *)s->code_ptr;
613    return v;
614}
615
616/* Emit an opcode with "type-checking" of the format.  */
617#define tcg_out_insn(S, FMT, OP, ...) \
618    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
619
620static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
621                              TCGReg rt, TCGReg rn, unsigned size)
622{
623    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
624}
625
626static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
627                              int imm19, TCGReg rt)
628{
629    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
630}
631
632static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
633                              TCGReg rt, TCGReg rt2, TCGReg rn)
634{
635    tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
636}
637
638static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
639                              TCGReg rt, int imm19)
640{
641    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
642}
643
644static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
645                              TCGCond c, int imm19)
646{
647    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
648}
649
650static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
651{
652    tcg_out32(s, insn | (imm26 & 0x03ffffff));
653}
654
655static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
656{
657    tcg_out32(s, insn | rn << 5);
658}
659
660static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
661                              TCGReg r1, TCGReg r2, TCGReg rn,
662                              tcg_target_long ofs, bool pre, bool w)
663{
664    insn |= 1u << 31; /* ext */
665    insn |= pre << 24;
666    insn |= w << 23;
667
668    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
669    insn |= (ofs & (0x7f << 3)) << (15 - 3);
670
671    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
672}
673
674static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
675                              TCGReg rd, TCGReg rn, uint64_t aimm)
676{
677    if (aimm > 0xfff) {
678        tcg_debug_assert((aimm & 0xfff) == 0);
679        aimm >>= 12;
680        tcg_debug_assert(aimm <= 0xfff);
681        aimm |= 1 << 12;  /* apply LSL 12 */
682    }
683    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
684}
685
686/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
687   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
688   that feed the DecodeBitMasks pseudo function.  */
689static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
690                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
691{
692    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
693              | rn << 5 | rd);
694}
695
696#define tcg_out_insn_3404  tcg_out_insn_3402
697
698static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
699                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
700{
701    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
702              | rn << 5 | rd);
703}
704
705/* This function is used for the Move (wide immediate) instruction group.
706   Note that SHIFT is a full shift count, not the 2 bit HW field. */
707static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
708                              TCGReg rd, uint16_t half, unsigned shift)
709{
710    tcg_debug_assert((shift & ~0x30) == 0);
711    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
712}
713
714static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
715                              TCGReg rd, int64_t disp)
716{
717    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
718}
719
720static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
721                                     TCGType sf, TCGReg rd, TCGReg rn,
722                                     TCGReg rm, int opt, int imm3)
723{
724    tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
725              imm3 << 10 | rn << 5 | rd);
726}
727
728/* This function is for both 3.5.2 (Add/Subtract shifted register), for
729   the rare occasion when we actually want to supply a shift amount.  */
730static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
731                                      TCGType ext, TCGReg rd, TCGReg rn,
732                                      TCGReg rm, int imm6)
733{
734    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
735}
736
737/* This function is for 3.5.2 (Add/subtract shifted register),
738   and 3.5.10 (Logical shifted register), for the vast majorty of cases
739   when we don't want to apply a shift.  Thus it can also be used for
740   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
741static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
742                              TCGReg rd, TCGReg rn, TCGReg rm)
743{
744    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
745}
746
747#define tcg_out_insn_3503  tcg_out_insn_3502
748#define tcg_out_insn_3508  tcg_out_insn_3502
749#define tcg_out_insn_3510  tcg_out_insn_3502
750
751static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
752                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
753{
754    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
755              | tcg_cond_to_aarch64[c] << 12);
756}
757
758static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
759                              TCGReg rd, TCGReg rn)
760{
761    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
762}
763
764static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
765                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
766{
767    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
768}
769
770static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
771                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
772{
773    /* Note that bit 11 set means general register input.  Therefore
774       we can handle both register sets with one function.  */
775    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
776              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
777}
778
779static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
780                              TCGReg rd, bool op, int cmode, uint8_t imm8)
781{
782    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
783              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
784}
785
786static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
787                              TCGReg rd, TCGReg rn, unsigned immhb)
788{
789    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
790}
791
792static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
793                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
794{
795    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
796              | (rn & 0x1f) << 5 | (rd & 0x1f));
797}
798
799static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
800                              unsigned size, TCGReg rd, TCGReg rn)
801{
802    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
803}
804
805static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
806                              TCGReg rd, TCGReg rn, unsigned immhb)
807{
808    tcg_out32(s, insn | q << 30 | immhb << 16
809              | (rn & 0x1f) << 5 | (rd & 0x1f));
810}
811
812static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
813                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
814{
815    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
816              | (rn & 0x1f) << 5 | (rd & 0x1f));
817}
818
819static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
820                              unsigned size, TCGReg rd, TCGReg rn)
821{
822    tcg_out32(s, insn | q << 30 | (size << 22)
823              | (rn & 0x1f) << 5 | (rd & 0x1f));
824}
825
826static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
827                              TCGReg rd, TCGReg base, TCGType ext,
828                              TCGReg regoff)
829{
830    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
831    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
832              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
833}
834
835static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
836                              TCGReg rd, TCGReg rn, intptr_t offset)
837{
838    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
839}
840
841static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
842                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
843{
844    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
845    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
846              | rn << 5 | (rd & 0x1f));
847}
848
849static void tcg_out_bti(TCGContext *s, AArch64Insn insn)
850{
851    /*
852     * While BTI insns are nops on hosts without FEAT_BTI,
853     * there is no point in emitting them in that case either.
854     */
855    if (cpuinfo & CPUINFO_BTI) {
856        tcg_out32(s, insn);
857    }
858}
859
860/* Register to register move using ORR (shifted register with no shift). */
861static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
862{
863    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
864}
865
866/* Register to register move using ADDI (move to/from SP).  */
867static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
868{
869    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
870}
871
872/* This function is used for the Logical (immediate) instruction group.
873   The value of LIMM must satisfy IS_LIMM.  See the comment above about
874   only supporting simplified logical immediates.  */
875static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
876                             TCGReg rd, TCGReg rn, uint64_t limm)
877{
878    unsigned h, l, r, c;
879
880    tcg_debug_assert(is_limm(limm));
881
882    h = clz64(limm);
883    l = ctz64(limm);
884    if (l == 0) {
885        r = 0;                  /* form 0....01....1 */
886        c = ctz64(~limm) - 1;
887        if (h == 0) {
888            r = clz64(~limm);   /* form 1..10..01..1 */
889            c += r;
890        }
891    } else {
892        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
893        c = r - h - 1;
894    }
895    if (ext == TCG_TYPE_I32) {
896        r &= 31;
897        c &= 31;
898    }
899
900    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
901}
902
903static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
904                             TCGReg rd, int64_t v64)
905{
906    bool q = type == TCG_TYPE_V128;
907    int cmode, imm8, i;
908
909    /* Test all bytes equal first.  */
910    if (vece == MO_8) {
911        imm8 = (uint8_t)v64;
912        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
913        return;
914    }
915
916    /*
917     * Test all bytes 0x00 or 0xff second.  This can match cases that
918     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
919     */
920    for (i = imm8 = 0; i < 8; i++) {
921        uint8_t byte = v64 >> (i * 8);
922        if (byte == 0xff) {
923            imm8 |= 1 << i;
924        } else if (byte != 0) {
925            goto fail_bytes;
926        }
927    }
928    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
929    return;
930 fail_bytes:
931
932    /*
933     * Tests for various replications.  For each element width, if we
934     * cannot find an expansion there's no point checking a larger
935     * width because we already know by replication it cannot match.
936     */
937    if (vece == MO_16) {
938        uint16_t v16 = v64;
939
940        if (is_shimm16(v16, &cmode, &imm8)) {
941            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
942            return;
943        }
944        if (is_shimm16(~v16, &cmode, &imm8)) {
945            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
946            return;
947        }
948
949        /*
950         * Otherwise, all remaining constants can be loaded in two insns:
951         * rd = v16 & 0xff, rd |= v16 & 0xff00.
952         */
953        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
954        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
955        return;
956    } else if (vece == MO_32) {
957        uint32_t v32 = v64;
958        uint32_t n32 = ~v32;
959
960        if (is_shimm32(v32, &cmode, &imm8) ||
961            is_soimm32(v32, &cmode, &imm8) ||
962            is_fimm32(v32, &cmode, &imm8)) {
963            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
964            return;
965        }
966        if (is_shimm32(n32, &cmode, &imm8) ||
967            is_soimm32(n32, &cmode, &imm8)) {
968            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
969            return;
970        }
971
972        /*
973         * Restrict the set of constants to those we can load with
974         * two instructions.  Others we load from the pool.
975         */
976        i = is_shimm32_pair(v32, &cmode, &imm8);
977        if (i) {
978            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
979            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
980            return;
981        }
982        i = is_shimm32_pair(n32, &cmode, &imm8);
983        if (i) {
984            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
985            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
986            return;
987        }
988    } else if (is_fimm64(v64, &cmode, &imm8)) {
989        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
990        return;
991    }
992
993    /*
994     * As a last resort, load from the constant pool.  Sadly there
995     * is no LD1R (literal), so store the full 16-byte vector.
996     */
997    if (type == TCG_TYPE_V128) {
998        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
999        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
1000    } else {
1001        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
1002        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
1003    }
1004}
1005
1006static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
1007                            TCGReg rd, TCGReg rs)
1008{
1009    int is_q = type - TCG_TYPE_V64;
1010    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
1011    return true;
1012}
1013
1014static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1015                             TCGReg r, TCGReg base, intptr_t offset)
1016{
1017    TCGReg temp = TCG_REG_TMP0;
1018
1019    if (offset < -0xffffff || offset > 0xffffff) {
1020        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
1021        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
1022        base = temp;
1023    } else {
1024        AArch64Insn add_insn = I3401_ADDI;
1025
1026        if (offset < 0) {
1027            add_insn = I3401_SUBI;
1028            offset = -offset;
1029        }
1030        if (offset & 0xfff000) {
1031            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1032            base = temp;
1033        }
1034        if (offset & 0xfff) {
1035            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1036            base = temp;
1037        }
1038    }
1039    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1040    return true;
1041}
1042
1043static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1044                         tcg_target_long value)
1045{
1046    tcg_target_long svalue = value;
1047    tcg_target_long ivalue = ~value;
1048    tcg_target_long t0, t1, t2;
1049    int s0, s1;
1050    AArch64Insn opc;
1051
1052    switch (type) {
1053    case TCG_TYPE_I32:
1054    case TCG_TYPE_I64:
1055        tcg_debug_assert(rd < 32);
1056        break;
1057    default:
1058        g_assert_not_reached();
1059    }
1060
1061    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1062       values within [2**31, 2**32-1], we can create smaller sequences by
1063       interpreting this as a negative 32-bit number, while ensuring that
1064       the high 32 bits are cleared by setting SF=0.  */
1065    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1066        svalue = (int32_t)value;
1067        value = (uint32_t)value;
1068        ivalue = (uint32_t)ivalue;
1069        type = TCG_TYPE_I32;
1070    }
1071
1072    /* Speed things up by handling the common case of small positive
1073       and negative values specially.  */
1074    if ((value & ~0xffffull) == 0) {
1075        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1076        return;
1077    } else if ((ivalue & ~0xffffull) == 0) {
1078        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1079        return;
1080    }
1081
1082    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1083       use the sign-extended value.  That lets us match rotated values such
1084       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1085    if (is_limm(svalue)) {
1086        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1087        return;
1088    }
1089
1090    /* Look for host pointer values within 4G of the PC.  This happens
1091       often when loading pointers to QEMU's own data structures.  */
1092    if (type == TCG_TYPE_I64) {
1093        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1094        tcg_target_long disp = value - src_rx;
1095        if (disp == sextract64(disp, 0, 21)) {
1096            tcg_out_insn(s, 3406, ADR, rd, disp);
1097            return;
1098        }
1099        disp = (value >> 12) - (src_rx >> 12);
1100        if (disp == sextract64(disp, 0, 21)) {
1101            tcg_out_insn(s, 3406, ADRP, rd, disp);
1102            if (value & 0xfff) {
1103                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1104            }
1105            return;
1106        }
1107    }
1108
1109    /* Would it take fewer insns to begin with MOVN?  */
1110    if (ctpop64(value) >= 32) {
1111        t0 = ivalue;
1112        opc = I3405_MOVN;
1113    } else {
1114        t0 = value;
1115        opc = I3405_MOVZ;
1116    }
1117    s0 = ctz64(t0) & (63 & -16);
1118    t1 = t0 & ~(0xffffull << s0);
1119    s1 = ctz64(t1) & (63 & -16);
1120    t2 = t1 & ~(0xffffull << s1);
1121    if (t2 == 0) {
1122        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1123        if (t1 != 0) {
1124            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1125        }
1126        return;
1127    }
1128
1129    /* For more than 2 insns, dump it into the constant pool.  */
1130    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1131    tcg_out_insn(s, 3305, LDR, 0, rd);
1132}
1133
1134static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1135{
1136    return false;
1137}
1138
1139static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1140                             tcg_target_long imm)
1141{
1142    /* This function is only used for passing structs by reference. */
1143    g_assert_not_reached();
1144}
1145
1146/* Define something more legible for general use.  */
1147#define tcg_out_ldst_r  tcg_out_insn_3310
1148
1149static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1150                         TCGReg rn, intptr_t offset, int lgsize)
1151{
1152    /* If the offset is naturally aligned and in range, then we can
1153       use the scaled uimm12 encoding */
1154    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1155        uintptr_t scaled_uimm = offset >> lgsize;
1156        if (scaled_uimm <= 0xfff) {
1157            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1158            return;
1159        }
1160    }
1161
1162    /* Small signed offsets can use the unscaled encoding.  */
1163    if (offset >= -256 && offset < 256) {
1164        tcg_out_insn_3312(s, insn, rd, rn, offset);
1165        return;
1166    }
1167
1168    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1169    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
1170    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
1171}
1172
1173static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1174{
1175    if (ret == arg) {
1176        return true;
1177    }
1178    switch (type) {
1179    case TCG_TYPE_I32:
1180    case TCG_TYPE_I64:
1181        if (ret < 32 && arg < 32) {
1182            tcg_out_movr(s, type, ret, arg);
1183            break;
1184        } else if (ret < 32) {
1185            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1186            break;
1187        } else if (arg < 32) {
1188            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1189            break;
1190        }
1191        /* FALLTHRU */
1192
1193    case TCG_TYPE_V64:
1194        tcg_debug_assert(ret >= 32 && arg >= 32);
1195        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1196        break;
1197    case TCG_TYPE_V128:
1198        tcg_debug_assert(ret >= 32 && arg >= 32);
1199        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1200        break;
1201
1202    default:
1203        g_assert_not_reached();
1204    }
1205    return true;
1206}
1207
1208static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1209                       TCGReg base, intptr_t ofs)
1210{
1211    AArch64Insn insn;
1212    int lgsz;
1213
1214    switch (type) {
1215    case TCG_TYPE_I32:
1216        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1217        lgsz = 2;
1218        break;
1219    case TCG_TYPE_I64:
1220        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1221        lgsz = 3;
1222        break;
1223    case TCG_TYPE_V64:
1224        insn = I3312_LDRVD;
1225        lgsz = 3;
1226        break;
1227    case TCG_TYPE_V128:
1228        insn = I3312_LDRVQ;
1229        lgsz = 4;
1230        break;
1231    default:
1232        g_assert_not_reached();
1233    }
1234    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1235}
1236
1237static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1238                       TCGReg base, intptr_t ofs)
1239{
1240    AArch64Insn insn;
1241    int lgsz;
1242
1243    switch (type) {
1244    case TCG_TYPE_I32:
1245        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1246        lgsz = 2;
1247        break;
1248    case TCG_TYPE_I64:
1249        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1250        lgsz = 3;
1251        break;
1252    case TCG_TYPE_V64:
1253        insn = I3312_STRVD;
1254        lgsz = 3;
1255        break;
1256    case TCG_TYPE_V128:
1257        insn = I3312_STRVQ;
1258        lgsz = 4;
1259        break;
1260    default:
1261        g_assert_not_reached();
1262    }
1263    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1264}
1265
1266static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1267                               TCGReg base, intptr_t ofs)
1268{
1269    if (type <= TCG_TYPE_I64 && val == 0) {
1270        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1271        return true;
1272    }
1273    return false;
1274}
1275
1276static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1277                               TCGReg rn, unsigned int a, unsigned int b)
1278{
1279    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1280}
1281
1282static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1283                                TCGReg rn, unsigned int a, unsigned int b)
1284{
1285    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1286}
1287
1288static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1289                                TCGReg rn, unsigned int a, unsigned int b)
1290{
1291    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1292}
1293
1294static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1295                                TCGReg rn, TCGReg rm, unsigned int a)
1296{
1297    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1298}
1299
1300static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1301                               TCGReg rd, TCGReg rn, unsigned int m)
1302{
1303    int bits = ext ? 64 : 32;
1304    int max = bits - 1;
1305    tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1306}
1307
1308static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1309                               TCGReg rd, TCGReg rn, unsigned int m)
1310{
1311    int max = ext ? 63 : 31;
1312    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1313}
1314
1315static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1316                               TCGReg rd, TCGReg rn, unsigned int m)
1317{
1318    int max = ext ? 63 : 31;
1319    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1320}
1321
1322static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1323                                TCGReg rd, TCGReg rn, unsigned int m)
1324{
1325    int max = ext ? 63 : 31;
1326    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1327}
1328
1329static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1330                                TCGReg rd, TCGReg rn, unsigned int m)
1331{
1332    int max = ext ? 63 : 31;
1333    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1334}
1335
1336static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1337                               TCGReg rn, unsigned lsb, unsigned width)
1338{
1339    unsigned size = ext ? 64 : 32;
1340    unsigned a = (size - lsb) & (size - 1);
1341    unsigned b = width - 1;
1342    tcg_out_bfm(s, ext, rd, rn, a, b);
1343}
1344
1345static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1346                        tcg_target_long b, bool const_b)
1347{
1348    if (const_b) {
1349        /* Using CMP or CMN aliases.  */
1350        if (b >= 0) {
1351            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1352        } else {
1353            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1354        }
1355    } else {
1356        /* Using CMP alias SUBS wzr, Wn, Wm */
1357        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1358    }
1359}
1360
1361static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1362{
1363    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1364    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1365    tcg_out_insn(s, 3206, B, offset);
1366}
1367
1368static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1369{
1370    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1371    if (offset == sextract64(offset, 0, 26)) {
1372        tcg_out_insn(s, 3206, BL, offset);
1373    } else {
1374        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1375        tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
1376    }
1377}
1378
1379static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1380                         const TCGHelperInfo *info)
1381{
1382    tcg_out_call_int(s, target);
1383}
1384
1385static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1386{
1387    if (!l->has_value) {
1388        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1389        tcg_out_insn(s, 3206, B, 0);
1390    } else {
1391        tcg_out_goto(s, l->u.value_ptr);
1392    }
1393}
1394
1395static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1396                           TCGArg b, bool b_const, TCGLabel *l)
1397{
1398    intptr_t offset;
1399    bool need_cmp;
1400
1401    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1402        need_cmp = false;
1403    } else {
1404        need_cmp = true;
1405        tcg_out_cmp(s, ext, a, b, b_const);
1406    }
1407
1408    if (!l->has_value) {
1409        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1410        offset = tcg_in32(s) >> 5;
1411    } else {
1412        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1413        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1414    }
1415
1416    if (need_cmp) {
1417        tcg_out_insn(s, 3202, B_C, c, offset);
1418    } else if (c == TCG_COND_EQ) {
1419        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1420    } else {
1421        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1422    }
1423}
1424
1425static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1426                               TCGReg rd, TCGReg rn)
1427{
1428    /* REV, REV16, REV32 */
1429    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1430}
1431
1432static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1433                               TCGReg rd, TCGReg rn)
1434{
1435    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1436    int bits = (8 << s_bits) - 1;
1437    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1438}
1439
1440static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1441{
1442    tcg_out_sxt(s, type, MO_8, rd, rn);
1443}
1444
1445static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1446{
1447    tcg_out_sxt(s, type, MO_16, rd, rn);
1448}
1449
1450static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1451{
1452    tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1453}
1454
1455static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1456{
1457    tcg_out_ext32s(s, rd, rn);
1458}
1459
1460static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1461                               TCGReg rd, TCGReg rn)
1462{
1463    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1464    int bits = (8 << s_bits) - 1;
1465    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1466}
1467
1468static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1469{
1470    tcg_out_uxt(s, MO_8, rd, rn);
1471}
1472
1473static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1474{
1475    tcg_out_uxt(s, MO_16, rd, rn);
1476}
1477
1478static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1479{
1480    tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1481}
1482
1483static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1484{
1485    tcg_out_ext32u(s, rd, rn);
1486}
1487
1488static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1489{
1490    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1491}
1492
1493static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1494                            TCGReg rn, int64_t aimm)
1495{
1496    if (aimm >= 0) {
1497        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1498    } else {
1499        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1500    }
1501}
1502
1503static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1504                            TCGReg rh, TCGReg al, TCGReg ah,
1505                            tcg_target_long bl, tcg_target_long bh,
1506                            bool const_bl, bool const_bh, bool sub)
1507{
1508    TCGReg orig_rl = rl;
1509    AArch64Insn insn;
1510
1511    if (rl == ah || (!const_bh && rl == bh)) {
1512        rl = TCG_REG_TMP0;
1513    }
1514
1515    if (const_bl) {
1516        if (bl < 0) {
1517            bl = -bl;
1518            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1519        } else {
1520            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1521        }
1522
1523        if (unlikely(al == TCG_REG_XZR)) {
1524            /* ??? We want to allow al to be zero for the benefit of
1525               negation via subtraction.  However, that leaves open the
1526               possibility of adding 0+const in the low part, and the
1527               immediate add instructions encode XSP not XZR.  Don't try
1528               anything more elaborate here than loading another zero.  */
1529            al = TCG_REG_TMP0;
1530            tcg_out_movi(s, ext, al, 0);
1531        }
1532        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1533    } else {
1534        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1535    }
1536
1537    insn = I3503_ADC;
1538    if (const_bh) {
1539        /* Note that the only two constants we support are 0 and -1, and
1540           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1541        if ((bh != 0) ^ sub) {
1542            insn = I3503_SBC;
1543        }
1544        bh = TCG_REG_XZR;
1545    } else if (sub) {
1546        insn = I3503_SBC;
1547    }
1548    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1549
1550    tcg_out_mov(s, ext, orig_rl, rl);
1551}
1552
1553static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1554{
1555    static const uint32_t sync[] = {
1556        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1557        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1558        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1559        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1560        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1561    };
1562    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1563}
1564
1565static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1566                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1567{
1568    TCGReg a1 = a0;
1569    if (is_ctz) {
1570        a1 = TCG_REG_TMP0;
1571        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1572    }
1573    if (const_b && b == (ext ? 64 : 32)) {
1574        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1575    } else {
1576        AArch64Insn sel = I3506_CSEL;
1577
1578        tcg_out_cmp(s, ext, a0, 0, 1);
1579        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1);
1580
1581        if (const_b) {
1582            if (b == -1) {
1583                b = TCG_REG_XZR;
1584                sel = I3506_CSINV;
1585            } else if (b == 0) {
1586                b = TCG_REG_XZR;
1587            } else {
1588                tcg_out_movi(s, ext, d, b);
1589                b = d;
1590            }
1591        }
1592        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE);
1593    }
1594}
1595
1596typedef struct {
1597    TCGReg base;
1598    TCGReg index;
1599    TCGType index_ext;
1600    TCGAtomAlign aa;
1601} HostAddress;
1602
1603bool tcg_target_has_memory_bswap(MemOp memop)
1604{
1605    return false;
1606}
1607
1608static const TCGLdstHelperParam ldst_helper_param = {
1609    .ntmp = 1, .tmp = { TCG_REG_TMP0 }
1610};
1611
1612static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1613{
1614    MemOp opc = get_memop(lb->oi);
1615
1616    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1617        return false;
1618    }
1619
1620    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1621    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1622    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1623    tcg_out_goto(s, lb->raddr);
1624    return true;
1625}
1626
1627static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1628{
1629    MemOp opc = get_memop(lb->oi);
1630
1631    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1632        return false;
1633    }
1634
1635    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1636    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1637    tcg_out_goto(s, lb->raddr);
1638    return true;
1639}
1640
1641/* We expect to use a 7-bit scaled negative offset from ENV.  */
1642#define MIN_TLB_MASK_TABLE_OFS  -512
1643
1644/*
1645 * For system-mode, perform the TLB load and compare.
1646 * For user-mode, perform any required alignment tests.
1647 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1648 * is required and fill in @h with the host address for the fast path.
1649 */
1650static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1651                                           TCGReg addr_reg, MemOpIdx oi,
1652                                           bool is_ld)
1653{
1654    TCGType addr_type = s->addr_type;
1655    TCGLabelQemuLdst *ldst = NULL;
1656    MemOp opc = get_memop(oi);
1657    MemOp s_bits = opc & MO_SIZE;
1658    unsigned a_mask;
1659
1660    h->aa = atom_and_align_for_opc(s, opc,
1661                                   have_lse2 ? MO_ATOM_WITHIN16
1662                                             : MO_ATOM_IFALIGN,
1663                                   s_bits == MO_128);
1664    a_mask = (1 << h->aa.align) - 1;
1665
1666    if (tcg_use_softmmu) {
1667        unsigned s_mask = (1u << s_bits) - 1;
1668        unsigned mem_index = get_mmuidx(oi);
1669        TCGReg addr_adj;
1670        TCGType mask_type;
1671        uint64_t compare_mask;
1672
1673        ldst = new_ldst_label(s);
1674        ldst->is_ld = is_ld;
1675        ldst->oi = oi;
1676        ldst->addrlo_reg = addr_reg;
1677
1678        mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
1679                     ? TCG_TYPE_I64 : TCG_TYPE_I32);
1680
1681        /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
1682        QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1683        QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1684        tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
1685                     tlb_mask_table_ofs(s, mem_index), 1, 0);
1686
1687        /* Extract the TLB index from the address into X0.  */
1688        tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1689                     TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
1690                     s->page_bits - CPU_TLB_ENTRY_BITS);
1691
1692        /* Add the tlb_table pointer, forming the CPUTLBEntry address. */
1693        tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
1694
1695        /* Load the tlb comparator into TMP0, and the fast path addend. */
1696        QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
1697        tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
1698                   is_ld ? offsetof(CPUTLBEntry, addr_read)
1699                         : offsetof(CPUTLBEntry, addr_write));
1700        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
1701                   offsetof(CPUTLBEntry, addend));
1702
1703        /*
1704         * For aligned accesses, we check the first byte and include
1705         * the alignment bits within the address.  For unaligned access,
1706         * we check that we don't cross pages using the address of the
1707         * last byte of the access.
1708         */
1709        if (a_mask >= s_mask) {
1710            addr_adj = addr_reg;
1711        } else {
1712            addr_adj = TCG_REG_TMP2;
1713            tcg_out_insn(s, 3401, ADDI, addr_type,
1714                         addr_adj, addr_reg, s_mask - a_mask);
1715        }
1716        compare_mask = (uint64_t)s->page_mask | a_mask;
1717
1718        /* Store the page mask part of the address into TMP2.  */
1719        tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
1720                         addr_adj, compare_mask);
1721
1722        /* Perform the address comparison. */
1723        tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0);
1724
1725        /* If not equal, we jump to the slow path. */
1726        ldst->label_ptr[0] = s->code_ptr;
1727        tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1728
1729        h->base = TCG_REG_TMP1;
1730        h->index = addr_reg;
1731        h->index_ext = addr_type;
1732    } else {
1733        if (a_mask) {
1734            ldst = new_ldst_label(s);
1735
1736            ldst->is_ld = is_ld;
1737            ldst->oi = oi;
1738            ldst->addrlo_reg = addr_reg;
1739
1740            /* tst addr, #mask */
1741            tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1742
1743            /* b.ne slow_path */
1744            ldst->label_ptr[0] = s->code_ptr;
1745            tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1746        }
1747
1748        if (guest_base || addr_type == TCG_TYPE_I32) {
1749            h->base = TCG_REG_GUEST_BASE;
1750            h->index = addr_reg;
1751            h->index_ext = addr_type;
1752        } else {
1753            h->base = addr_reg;
1754            h->index = TCG_REG_XZR;
1755            h->index_ext = TCG_TYPE_I64;
1756        }
1757    }
1758
1759    return ldst;
1760}
1761
1762static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1763                                   TCGReg data_r, HostAddress h)
1764{
1765    switch (memop & MO_SSIZE) {
1766    case MO_UB:
1767        tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
1768        break;
1769    case MO_SB:
1770        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1771                       data_r, h.base, h.index_ext, h.index);
1772        break;
1773    case MO_UW:
1774        tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
1775        break;
1776    case MO_SW:
1777        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1778                       data_r, h.base, h.index_ext, h.index);
1779        break;
1780    case MO_UL:
1781        tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
1782        break;
1783    case MO_SL:
1784        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
1785        break;
1786    case MO_UQ:
1787        tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
1788        break;
1789    default:
1790        g_assert_not_reached();
1791    }
1792}
1793
1794static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1795                                   TCGReg data_r, HostAddress h)
1796{
1797    switch (memop & MO_SIZE) {
1798    case MO_8:
1799        tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
1800        break;
1801    case MO_16:
1802        tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
1803        break;
1804    case MO_32:
1805        tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
1806        break;
1807    case MO_64:
1808        tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
1809        break;
1810    default:
1811        g_assert_not_reached();
1812    }
1813}
1814
1815static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1816                            MemOpIdx oi, TCGType data_type)
1817{
1818    TCGLabelQemuLdst *ldst;
1819    HostAddress h;
1820
1821    ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1822    tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
1823
1824    if (ldst) {
1825        ldst->type = data_type;
1826        ldst->datalo_reg = data_reg;
1827        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1828    }
1829}
1830
1831static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1832                            MemOpIdx oi, TCGType data_type)
1833{
1834    TCGLabelQemuLdst *ldst;
1835    HostAddress h;
1836
1837    ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1838    tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1839
1840    if (ldst) {
1841        ldst->type = data_type;
1842        ldst->datalo_reg = data_reg;
1843        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1844    }
1845}
1846
1847static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
1848                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
1849{
1850    TCGLabelQemuLdst *ldst;
1851    HostAddress h;
1852    TCGReg base;
1853    bool use_pair;
1854
1855    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
1856
1857    /* Compose the final address, as LDP/STP have no indexing. */
1858    if (h.index == TCG_REG_XZR) {
1859        base = h.base;
1860    } else {
1861        base = TCG_REG_TMP2;
1862        if (h.index_ext == TCG_TYPE_I32) {
1863            /* add base, base, index, uxtw */
1864            tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base,
1865                         h.base, h.index, MO_32, 0);
1866        } else {
1867            /* add base, base, index */
1868            tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index);
1869        }
1870    }
1871
1872    use_pair = h.aa.atom < MO_128 || have_lse2;
1873
1874    if (!use_pair) {
1875        tcg_insn_unit *branch = NULL;
1876        TCGReg ll, lh, sl, sh;
1877
1878        /*
1879         * If we have already checked for 16-byte alignment, that's all
1880         * we need. Otherwise we have determined that misaligned atomicity
1881         * may be handled with two 8-byte loads.
1882         */
1883        if (h.aa.align < MO_128) {
1884            /*
1885             * TODO: align should be MO_64, so we only need test bit 3,
1886             * which means we could use TBNZ instead of ANDS+B_C.
1887             */
1888            tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15);
1889            branch = s->code_ptr;
1890            tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1891            use_pair = true;
1892        }
1893
1894        if (is_ld) {
1895            /*
1896             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1897             *    ldxp lo, hi, [base]
1898             *    stxp t0, lo, hi, [base]
1899             *    cbnz t0, .-8
1900             * Require no overlap between data{lo,hi} and base.
1901             */
1902            if (datalo == base || datahi == base) {
1903                tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base);
1904                base = TCG_REG_TMP2;
1905            }
1906            ll = sl = datalo;
1907            lh = sh = datahi;
1908        } else {
1909            /*
1910             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1911             * 1: ldxp t0, t1, [base]
1912             *    stxp t0, lo, hi, [base]
1913             *    cbnz t0, 1b
1914             */
1915            tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1);
1916            ll = TCG_REG_TMP0;
1917            lh = TCG_REG_TMP1;
1918            sl = datalo;
1919            sh = datahi;
1920        }
1921
1922        tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base);
1923        tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base);
1924        tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2);
1925
1926        if (use_pair) {
1927            /* "b .+8", branching across the one insn of use_pair. */
1928            tcg_out_insn(s, 3206, B, 2);
1929            reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr));
1930        }
1931    }
1932
1933    if (use_pair) {
1934        if (is_ld) {
1935            tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0);
1936        } else {
1937            tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0);
1938        }
1939    }
1940
1941    if (ldst) {
1942        ldst->type = TCG_TYPE_I128;
1943        ldst->datalo_reg = datalo;
1944        ldst->datahi_reg = datahi;
1945        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1946    }
1947}
1948
1949static const tcg_insn_unit *tb_ret_addr;
1950
1951static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1952{
1953    const tcg_insn_unit *target;
1954    ptrdiff_t offset;
1955
1956    /* Reuse the zeroing that exists for goto_ptr.  */
1957    if (a0 == 0) {
1958        target = tcg_code_gen_epilogue;
1959    } else {
1960        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1961        target = tb_ret_addr;
1962    }
1963
1964    offset = tcg_pcrel_diff(s, target) >> 2;
1965    if (offset == sextract64(offset, 0, 26)) {
1966        tcg_out_insn(s, 3206, B, offset);
1967    } else {
1968        /*
1969         * Only x16/x17 generate BTI type Jump (2),
1970         * other registers generate BTI type Jump|Call (3).
1971         */
1972        QEMU_BUILD_BUG_ON(TCG_REG_TMP0 != TCG_REG_X16);
1973        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1974        tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
1975    }
1976}
1977
1978static void tcg_out_goto_tb(TCGContext *s, int which)
1979{
1980    /*
1981     * Direct branch, or indirect address load, will be patched
1982     * by tb_target_set_jmp_target.  Assert indirect load offset
1983     * in range early, regardless of direct branch distance.
1984     */
1985    intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1986    tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1987
1988    set_jmp_insn_offset(s, which);
1989    tcg_out32(s, I3206_B);
1990    tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
1991    set_jmp_reset_offset(s, which);
1992    tcg_out_bti(s, BTI_J);
1993}
1994
1995void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1996                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1997{
1998    uintptr_t d_addr = tb->jmp_target_addr[n];
1999    ptrdiff_t d_offset = d_addr - jmp_rx;
2000    tcg_insn_unit insn;
2001
2002    /* Either directly branch, or indirect branch load. */
2003    if (d_offset == sextract64(d_offset, 0, 28)) {
2004        insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
2005    } else {
2006        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
2007        ptrdiff_t i_offset = i_addr - jmp_rx;
2008
2009        /* Note that we asserted this in range in tcg_out_goto_tb. */
2010        insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
2011    }
2012    qatomic_set((uint32_t *)jmp_rw, insn);
2013    flush_idcache_range(jmp_rx, jmp_rw, 4);
2014}
2015
2016static void tcg_out_op(TCGContext *s, TCGOpcode opc,
2017                       const TCGArg args[TCG_MAX_OP_ARGS],
2018                       const int const_args[TCG_MAX_OP_ARGS])
2019{
2020    /* 99% of the time, we can signal the use of extension registers
2021       by looking to see if the opcode handles 64-bit data.  */
2022    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
2023
2024    /* Hoist the loads of the most common arguments.  */
2025    TCGArg a0 = args[0];
2026    TCGArg a1 = args[1];
2027    TCGArg a2 = args[2];
2028    int c2 = const_args[2];
2029
2030    /* Some operands are defined with "rZ" constraint, a register or
2031       the zero register.  These need not actually test args[I] == 0.  */
2032#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
2033
2034    switch (opc) {
2035    case INDEX_op_goto_ptr:
2036        tcg_out_insn(s, 3207, BR, a0);
2037        break;
2038
2039    case INDEX_op_br:
2040        tcg_out_goto_label(s, arg_label(a0));
2041        break;
2042
2043    case INDEX_op_ld8u_i32:
2044    case INDEX_op_ld8u_i64:
2045        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
2046        break;
2047    case INDEX_op_ld8s_i32:
2048        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
2049        break;
2050    case INDEX_op_ld8s_i64:
2051        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
2052        break;
2053    case INDEX_op_ld16u_i32:
2054    case INDEX_op_ld16u_i64:
2055        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
2056        break;
2057    case INDEX_op_ld16s_i32:
2058        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
2059        break;
2060    case INDEX_op_ld16s_i64:
2061        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
2062        break;
2063    case INDEX_op_ld_i32:
2064    case INDEX_op_ld32u_i64:
2065        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
2066        break;
2067    case INDEX_op_ld32s_i64:
2068        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
2069        break;
2070    case INDEX_op_ld_i64:
2071        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
2072        break;
2073
2074    case INDEX_op_st8_i32:
2075    case INDEX_op_st8_i64:
2076        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
2077        break;
2078    case INDEX_op_st16_i32:
2079    case INDEX_op_st16_i64:
2080        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
2081        break;
2082    case INDEX_op_st_i32:
2083    case INDEX_op_st32_i64:
2084        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
2085        break;
2086    case INDEX_op_st_i64:
2087        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
2088        break;
2089
2090    case INDEX_op_add_i32:
2091        a2 = (int32_t)a2;
2092        /* FALLTHRU */
2093    case INDEX_op_add_i64:
2094        if (c2) {
2095            tcg_out_addsubi(s, ext, a0, a1, a2);
2096        } else {
2097            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2098        }
2099        break;
2100
2101    case INDEX_op_sub_i32:
2102        a2 = (int32_t)a2;
2103        /* FALLTHRU */
2104    case INDEX_op_sub_i64:
2105        if (c2) {
2106            tcg_out_addsubi(s, ext, a0, a1, -a2);
2107        } else {
2108            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2109        }
2110        break;
2111
2112    case INDEX_op_neg_i64:
2113    case INDEX_op_neg_i32:
2114        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2115        break;
2116
2117    case INDEX_op_and_i32:
2118        a2 = (int32_t)a2;
2119        /* FALLTHRU */
2120    case INDEX_op_and_i64:
2121        if (c2) {
2122            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2123        } else {
2124            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2125        }
2126        break;
2127
2128    case INDEX_op_andc_i32:
2129        a2 = (int32_t)a2;
2130        /* FALLTHRU */
2131    case INDEX_op_andc_i64:
2132        if (c2) {
2133            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2134        } else {
2135            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2136        }
2137        break;
2138
2139    case INDEX_op_or_i32:
2140        a2 = (int32_t)a2;
2141        /* FALLTHRU */
2142    case INDEX_op_or_i64:
2143        if (c2) {
2144            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2145        } else {
2146            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2147        }
2148        break;
2149
2150    case INDEX_op_orc_i32:
2151        a2 = (int32_t)a2;
2152        /* FALLTHRU */
2153    case INDEX_op_orc_i64:
2154        if (c2) {
2155            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2156        } else {
2157            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2158        }
2159        break;
2160
2161    case INDEX_op_xor_i32:
2162        a2 = (int32_t)a2;
2163        /* FALLTHRU */
2164    case INDEX_op_xor_i64:
2165        if (c2) {
2166            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2167        } else {
2168            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2169        }
2170        break;
2171
2172    case INDEX_op_eqv_i32:
2173        a2 = (int32_t)a2;
2174        /* FALLTHRU */
2175    case INDEX_op_eqv_i64:
2176        if (c2) {
2177            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2178        } else {
2179            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2180        }
2181        break;
2182
2183    case INDEX_op_not_i64:
2184    case INDEX_op_not_i32:
2185        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2186        break;
2187
2188    case INDEX_op_mul_i64:
2189    case INDEX_op_mul_i32:
2190        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2191        break;
2192
2193    case INDEX_op_div_i64:
2194    case INDEX_op_div_i32:
2195        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2196        break;
2197    case INDEX_op_divu_i64:
2198    case INDEX_op_divu_i32:
2199        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2200        break;
2201
2202    case INDEX_op_rem_i64:
2203    case INDEX_op_rem_i32:
2204        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2);
2205        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2206        break;
2207    case INDEX_op_remu_i64:
2208    case INDEX_op_remu_i32:
2209        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2);
2210        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2211        break;
2212
2213    case INDEX_op_shl_i64:
2214    case INDEX_op_shl_i32:
2215        if (c2) {
2216            tcg_out_shl(s, ext, a0, a1, a2);
2217        } else {
2218            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2219        }
2220        break;
2221
2222    case INDEX_op_shr_i64:
2223    case INDEX_op_shr_i32:
2224        if (c2) {
2225            tcg_out_shr(s, ext, a0, a1, a2);
2226        } else {
2227            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2228        }
2229        break;
2230
2231    case INDEX_op_sar_i64:
2232    case INDEX_op_sar_i32:
2233        if (c2) {
2234            tcg_out_sar(s, ext, a0, a1, a2);
2235        } else {
2236            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2237        }
2238        break;
2239
2240    case INDEX_op_rotr_i64:
2241    case INDEX_op_rotr_i32:
2242        if (c2) {
2243            tcg_out_rotr(s, ext, a0, a1, a2);
2244        } else {
2245            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2246        }
2247        break;
2248
2249    case INDEX_op_rotl_i64:
2250    case INDEX_op_rotl_i32:
2251        if (c2) {
2252            tcg_out_rotl(s, ext, a0, a1, a2);
2253        } else {
2254            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2);
2255            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0);
2256        }
2257        break;
2258
2259    case INDEX_op_clz_i64:
2260    case INDEX_op_clz_i32:
2261        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2262        break;
2263    case INDEX_op_ctz_i64:
2264    case INDEX_op_ctz_i32:
2265        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2266        break;
2267
2268    case INDEX_op_brcond_i32:
2269        a1 = (int32_t)a1;
2270        /* FALLTHRU */
2271    case INDEX_op_brcond_i64:
2272        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2273        break;
2274
2275    case INDEX_op_setcond_i32:
2276        a2 = (int32_t)a2;
2277        /* FALLTHRU */
2278    case INDEX_op_setcond_i64:
2279        tcg_out_cmp(s, ext, a1, a2, c2);
2280        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2281        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2282                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2283        break;
2284
2285    case INDEX_op_negsetcond_i32:
2286        a2 = (int32_t)a2;
2287        /* FALLTHRU */
2288    case INDEX_op_negsetcond_i64:
2289        tcg_out_cmp(s, ext, a1, a2, c2);
2290        /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond).  */
2291        tcg_out_insn(s, 3506, CSINV, ext, a0, TCG_REG_XZR,
2292                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2293        break;
2294
2295    case INDEX_op_movcond_i32:
2296        a2 = (int32_t)a2;
2297        /* FALLTHRU */
2298    case INDEX_op_movcond_i64:
2299        tcg_out_cmp(s, ext, a1, a2, c2);
2300        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2301        break;
2302
2303    case INDEX_op_qemu_ld_a32_i32:
2304    case INDEX_op_qemu_ld_a64_i32:
2305    case INDEX_op_qemu_ld_a32_i64:
2306    case INDEX_op_qemu_ld_a64_i64:
2307        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2308        break;
2309    case INDEX_op_qemu_st_a32_i32:
2310    case INDEX_op_qemu_st_a64_i32:
2311    case INDEX_op_qemu_st_a32_i64:
2312    case INDEX_op_qemu_st_a64_i64:
2313        tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
2314        break;
2315    case INDEX_op_qemu_ld_a32_i128:
2316    case INDEX_op_qemu_ld_a64_i128:
2317        tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
2318        break;
2319    case INDEX_op_qemu_st_a32_i128:
2320    case INDEX_op_qemu_st_a64_i128:
2321        tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false);
2322        break;
2323
2324    case INDEX_op_bswap64_i64:
2325        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2326        break;
2327    case INDEX_op_bswap32_i64:
2328        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2329        if (a2 & TCG_BSWAP_OS) {
2330            tcg_out_ext32s(s, a0, a0);
2331        }
2332        break;
2333    case INDEX_op_bswap32_i32:
2334        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2335        break;
2336    case INDEX_op_bswap16_i64:
2337    case INDEX_op_bswap16_i32:
2338        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2339        if (a2 & TCG_BSWAP_OS) {
2340            /* Output must be sign-extended. */
2341            tcg_out_ext16s(s, ext, a0, a0);
2342        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2343            /* Output must be zero-extended, but input isn't. */
2344            tcg_out_ext16u(s, a0, a0);
2345        }
2346        break;
2347
2348    case INDEX_op_deposit_i64:
2349    case INDEX_op_deposit_i32:
2350        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2351        break;
2352
2353    case INDEX_op_extract_i64:
2354    case INDEX_op_extract_i32:
2355        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2356        break;
2357
2358    case INDEX_op_sextract_i64:
2359    case INDEX_op_sextract_i32:
2360        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2361        break;
2362
2363    case INDEX_op_extract2_i64:
2364    case INDEX_op_extract2_i32:
2365        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2366        break;
2367
2368    case INDEX_op_add2_i32:
2369        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2370                        (int32_t)args[4], args[5], const_args[4],
2371                        const_args[5], false);
2372        break;
2373    case INDEX_op_add2_i64:
2374        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2375                        args[5], const_args[4], const_args[5], false);
2376        break;
2377    case INDEX_op_sub2_i32:
2378        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2379                        (int32_t)args[4], args[5], const_args[4],
2380                        const_args[5], true);
2381        break;
2382    case INDEX_op_sub2_i64:
2383        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2384                        args[5], const_args[4], const_args[5], true);
2385        break;
2386
2387    case INDEX_op_muluh_i64:
2388        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2389        break;
2390    case INDEX_op_mulsh_i64:
2391        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2392        break;
2393
2394    case INDEX_op_mb:
2395        tcg_out_mb(s, a0);
2396        break;
2397
2398    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2399    case INDEX_op_mov_i64:
2400    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2401    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2402    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2403    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2404    case INDEX_op_ext8s_i64:
2405    case INDEX_op_ext8u_i32:
2406    case INDEX_op_ext8u_i64:
2407    case INDEX_op_ext16s_i64:
2408    case INDEX_op_ext16s_i32:
2409    case INDEX_op_ext16u_i64:
2410    case INDEX_op_ext16u_i32:
2411    case INDEX_op_ext32s_i64:
2412    case INDEX_op_ext32u_i64:
2413    case INDEX_op_ext_i32_i64:
2414    case INDEX_op_extu_i32_i64:
2415    case INDEX_op_extrl_i64_i32:
2416    default:
2417        g_assert_not_reached();
2418    }
2419
2420#undef REG0
2421}
2422
2423static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2424                           unsigned vecl, unsigned vece,
2425                           const TCGArg args[TCG_MAX_OP_ARGS],
2426                           const int const_args[TCG_MAX_OP_ARGS])
2427{
2428    static const AArch64Insn cmp_vec_insn[16] = {
2429        [TCG_COND_EQ] = I3616_CMEQ,
2430        [TCG_COND_GT] = I3616_CMGT,
2431        [TCG_COND_GE] = I3616_CMGE,
2432        [TCG_COND_GTU] = I3616_CMHI,
2433        [TCG_COND_GEU] = I3616_CMHS,
2434    };
2435    static const AArch64Insn cmp_scalar_insn[16] = {
2436        [TCG_COND_EQ] = I3611_CMEQ,
2437        [TCG_COND_GT] = I3611_CMGT,
2438        [TCG_COND_GE] = I3611_CMGE,
2439        [TCG_COND_GTU] = I3611_CMHI,
2440        [TCG_COND_GEU] = I3611_CMHS,
2441    };
2442    static const AArch64Insn cmp0_vec_insn[16] = {
2443        [TCG_COND_EQ] = I3617_CMEQ0,
2444        [TCG_COND_GT] = I3617_CMGT0,
2445        [TCG_COND_GE] = I3617_CMGE0,
2446        [TCG_COND_LT] = I3617_CMLT0,
2447        [TCG_COND_LE] = I3617_CMLE0,
2448    };
2449    static const AArch64Insn cmp0_scalar_insn[16] = {
2450        [TCG_COND_EQ] = I3612_CMEQ0,
2451        [TCG_COND_GT] = I3612_CMGT0,
2452        [TCG_COND_GE] = I3612_CMGE0,
2453        [TCG_COND_LT] = I3612_CMLT0,
2454        [TCG_COND_LE] = I3612_CMLE0,
2455    };
2456
2457    TCGType type = vecl + TCG_TYPE_V64;
2458    unsigned is_q = vecl;
2459    bool is_scalar = !is_q && vece == MO_64;
2460    TCGArg a0, a1, a2, a3;
2461    int cmode, imm8;
2462
2463    a0 = args[0];
2464    a1 = args[1];
2465    a2 = args[2];
2466
2467    switch (opc) {
2468    case INDEX_op_ld_vec:
2469        tcg_out_ld(s, type, a0, a1, a2);
2470        break;
2471    case INDEX_op_st_vec:
2472        tcg_out_st(s, type, a0, a1, a2);
2473        break;
2474    case INDEX_op_dupm_vec:
2475        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2476        break;
2477    case INDEX_op_add_vec:
2478        if (is_scalar) {
2479            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2480        } else {
2481            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2482        }
2483        break;
2484    case INDEX_op_sub_vec:
2485        if (is_scalar) {
2486            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2487        } else {
2488            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2489        }
2490        break;
2491    case INDEX_op_mul_vec:
2492        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2493        break;
2494    case INDEX_op_neg_vec:
2495        if (is_scalar) {
2496            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2497        } else {
2498            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2499        }
2500        break;
2501    case INDEX_op_abs_vec:
2502        if (is_scalar) {
2503            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2504        } else {
2505            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2506        }
2507        break;
2508    case INDEX_op_and_vec:
2509        if (const_args[2]) {
2510            is_shimm1632(~a2, &cmode, &imm8);
2511            if (a0 == a1) {
2512                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2513                return;
2514            }
2515            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2516            a2 = a0;
2517        }
2518        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2519        break;
2520    case INDEX_op_or_vec:
2521        if (const_args[2]) {
2522            is_shimm1632(a2, &cmode, &imm8);
2523            if (a0 == a1) {
2524                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2525                return;
2526            }
2527            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2528            a2 = a0;
2529        }
2530        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2531        break;
2532    case INDEX_op_andc_vec:
2533        if (const_args[2]) {
2534            is_shimm1632(a2, &cmode, &imm8);
2535            if (a0 == a1) {
2536                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2537                return;
2538            }
2539            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2540            a2 = a0;
2541        }
2542        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2543        break;
2544    case INDEX_op_orc_vec:
2545        if (const_args[2]) {
2546            is_shimm1632(~a2, &cmode, &imm8);
2547            if (a0 == a1) {
2548                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2549                return;
2550            }
2551            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2552            a2 = a0;
2553        }
2554        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2555        break;
2556    case INDEX_op_xor_vec:
2557        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2558        break;
2559    case INDEX_op_ssadd_vec:
2560        if (is_scalar) {
2561            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2562        } else {
2563            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2564        }
2565        break;
2566    case INDEX_op_sssub_vec:
2567        if (is_scalar) {
2568            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2569        } else {
2570            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2571        }
2572        break;
2573    case INDEX_op_usadd_vec:
2574        if (is_scalar) {
2575            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2576        } else {
2577            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2578        }
2579        break;
2580    case INDEX_op_ussub_vec:
2581        if (is_scalar) {
2582            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2583        } else {
2584            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2585        }
2586        break;
2587    case INDEX_op_smax_vec:
2588        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2589        break;
2590    case INDEX_op_smin_vec:
2591        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2592        break;
2593    case INDEX_op_umax_vec:
2594        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2595        break;
2596    case INDEX_op_umin_vec:
2597        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2598        break;
2599    case INDEX_op_not_vec:
2600        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2601        break;
2602    case INDEX_op_shli_vec:
2603        if (is_scalar) {
2604            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2605        } else {
2606            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2607        }
2608        break;
2609    case INDEX_op_shri_vec:
2610        if (is_scalar) {
2611            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2612        } else {
2613            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2614        }
2615        break;
2616    case INDEX_op_sari_vec:
2617        if (is_scalar) {
2618            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2619        } else {
2620            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2621        }
2622        break;
2623    case INDEX_op_aa64_sli_vec:
2624        if (is_scalar) {
2625            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2626        } else {
2627            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2628        }
2629        break;
2630    case INDEX_op_shlv_vec:
2631        if (is_scalar) {
2632            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2633        } else {
2634            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2635        }
2636        break;
2637    case INDEX_op_aa64_sshl_vec:
2638        if (is_scalar) {
2639            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2640        } else {
2641            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2642        }
2643        break;
2644    case INDEX_op_cmp_vec:
2645        {
2646            TCGCond cond = args[3];
2647            AArch64Insn insn;
2648
2649            if (cond == TCG_COND_NE) {
2650                if (const_args[2]) {
2651                    if (is_scalar) {
2652                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2653                    } else {
2654                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2655                    }
2656                } else {
2657                    if (is_scalar) {
2658                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2659                    } else {
2660                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2661                    }
2662                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2663                }
2664            } else {
2665                if (const_args[2]) {
2666                    if (is_scalar) {
2667                        insn = cmp0_scalar_insn[cond];
2668                        if (insn) {
2669                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2670                            break;
2671                        }
2672                    } else {
2673                        insn = cmp0_vec_insn[cond];
2674                        if (insn) {
2675                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2676                            break;
2677                        }
2678                    }
2679                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
2680                    a2 = TCG_VEC_TMP0;
2681                }
2682                if (is_scalar) {
2683                    insn = cmp_scalar_insn[cond];
2684                    if (insn == 0) {
2685                        TCGArg t;
2686                        t = a1, a1 = a2, a2 = t;
2687                        cond = tcg_swap_cond(cond);
2688                        insn = cmp_scalar_insn[cond];
2689                        tcg_debug_assert(insn != 0);
2690                    }
2691                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2692                } else {
2693                    insn = cmp_vec_insn[cond];
2694                    if (insn == 0) {
2695                        TCGArg t;
2696                        t = a1, a1 = a2, a2 = t;
2697                        cond = tcg_swap_cond(cond);
2698                        insn = cmp_vec_insn[cond];
2699                        tcg_debug_assert(insn != 0);
2700                    }
2701                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2702                }
2703            }
2704        }
2705        break;
2706
2707    case INDEX_op_bitsel_vec:
2708        a3 = args[3];
2709        if (a0 == a3) {
2710            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2711        } else if (a0 == a2) {
2712            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2713        } else {
2714            if (a0 != a1) {
2715                tcg_out_mov(s, type, a0, a1);
2716            }
2717            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2718        }
2719        break;
2720
2721    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2722    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2723    default:
2724        g_assert_not_reached();
2725    }
2726}
2727
2728int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2729{
2730    switch (opc) {
2731    case INDEX_op_add_vec:
2732    case INDEX_op_sub_vec:
2733    case INDEX_op_and_vec:
2734    case INDEX_op_or_vec:
2735    case INDEX_op_xor_vec:
2736    case INDEX_op_andc_vec:
2737    case INDEX_op_orc_vec:
2738    case INDEX_op_neg_vec:
2739    case INDEX_op_abs_vec:
2740    case INDEX_op_not_vec:
2741    case INDEX_op_cmp_vec:
2742    case INDEX_op_shli_vec:
2743    case INDEX_op_shri_vec:
2744    case INDEX_op_sari_vec:
2745    case INDEX_op_ssadd_vec:
2746    case INDEX_op_sssub_vec:
2747    case INDEX_op_usadd_vec:
2748    case INDEX_op_ussub_vec:
2749    case INDEX_op_shlv_vec:
2750    case INDEX_op_bitsel_vec:
2751        return 1;
2752    case INDEX_op_rotli_vec:
2753    case INDEX_op_shrv_vec:
2754    case INDEX_op_sarv_vec:
2755    case INDEX_op_rotlv_vec:
2756    case INDEX_op_rotrv_vec:
2757        return -1;
2758    case INDEX_op_mul_vec:
2759    case INDEX_op_smax_vec:
2760    case INDEX_op_smin_vec:
2761    case INDEX_op_umax_vec:
2762    case INDEX_op_umin_vec:
2763        return vece < MO_64;
2764
2765    default:
2766        return 0;
2767    }
2768}
2769
2770void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2771                       TCGArg a0, ...)
2772{
2773    va_list va;
2774    TCGv_vec v0, v1, v2, t1, t2, c1;
2775    TCGArg a2;
2776
2777    va_start(va, a0);
2778    v0 = temp_tcgv_vec(arg_temp(a0));
2779    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2780    a2 = va_arg(va, TCGArg);
2781    va_end(va);
2782
2783    switch (opc) {
2784    case INDEX_op_rotli_vec:
2785        t1 = tcg_temp_new_vec(type);
2786        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2787        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2788                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2789        tcg_temp_free_vec(t1);
2790        break;
2791
2792    case INDEX_op_shrv_vec:
2793    case INDEX_op_sarv_vec:
2794        /* Right shifts are negative left shifts for AArch64.  */
2795        v2 = temp_tcgv_vec(arg_temp(a2));
2796        t1 = tcg_temp_new_vec(type);
2797        tcg_gen_neg_vec(vece, t1, v2);
2798        opc = (opc == INDEX_op_shrv_vec
2799               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2800        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2801                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2802        tcg_temp_free_vec(t1);
2803        break;
2804
2805    case INDEX_op_rotlv_vec:
2806        v2 = temp_tcgv_vec(arg_temp(a2));
2807        t1 = tcg_temp_new_vec(type);
2808        c1 = tcg_constant_vec(type, vece, 8 << vece);
2809        tcg_gen_sub_vec(vece, t1, v2, c1);
2810        /* Right shifts are negative left shifts for AArch64.  */
2811        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2812                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2813        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2814                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2815        tcg_gen_or_vec(vece, v0, v0, t1);
2816        tcg_temp_free_vec(t1);
2817        break;
2818
2819    case INDEX_op_rotrv_vec:
2820        v2 = temp_tcgv_vec(arg_temp(a2));
2821        t1 = tcg_temp_new_vec(type);
2822        t2 = tcg_temp_new_vec(type);
2823        c1 = tcg_constant_vec(type, vece, 8 << vece);
2824        tcg_gen_neg_vec(vece, t1, v2);
2825        tcg_gen_sub_vec(vece, t2, c1, v2);
2826        /* Right shifts are negative left shifts for AArch64.  */
2827        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2828                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2829        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2830                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2831        tcg_gen_or_vec(vece, v0, t1, t2);
2832        tcg_temp_free_vec(t1);
2833        tcg_temp_free_vec(t2);
2834        break;
2835
2836    default:
2837        g_assert_not_reached();
2838    }
2839}
2840
2841static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2842{
2843    switch (op) {
2844    case INDEX_op_goto_ptr:
2845        return C_O0_I1(r);
2846
2847    case INDEX_op_ld8u_i32:
2848    case INDEX_op_ld8s_i32:
2849    case INDEX_op_ld16u_i32:
2850    case INDEX_op_ld16s_i32:
2851    case INDEX_op_ld_i32:
2852    case INDEX_op_ld8u_i64:
2853    case INDEX_op_ld8s_i64:
2854    case INDEX_op_ld16u_i64:
2855    case INDEX_op_ld16s_i64:
2856    case INDEX_op_ld32u_i64:
2857    case INDEX_op_ld32s_i64:
2858    case INDEX_op_ld_i64:
2859    case INDEX_op_neg_i32:
2860    case INDEX_op_neg_i64:
2861    case INDEX_op_not_i32:
2862    case INDEX_op_not_i64:
2863    case INDEX_op_bswap16_i32:
2864    case INDEX_op_bswap32_i32:
2865    case INDEX_op_bswap16_i64:
2866    case INDEX_op_bswap32_i64:
2867    case INDEX_op_bswap64_i64:
2868    case INDEX_op_ext8s_i32:
2869    case INDEX_op_ext16s_i32:
2870    case INDEX_op_ext8u_i32:
2871    case INDEX_op_ext16u_i32:
2872    case INDEX_op_ext8s_i64:
2873    case INDEX_op_ext16s_i64:
2874    case INDEX_op_ext32s_i64:
2875    case INDEX_op_ext8u_i64:
2876    case INDEX_op_ext16u_i64:
2877    case INDEX_op_ext32u_i64:
2878    case INDEX_op_ext_i32_i64:
2879    case INDEX_op_extu_i32_i64:
2880    case INDEX_op_extract_i32:
2881    case INDEX_op_extract_i64:
2882    case INDEX_op_sextract_i32:
2883    case INDEX_op_sextract_i64:
2884        return C_O1_I1(r, r);
2885
2886    case INDEX_op_st8_i32:
2887    case INDEX_op_st16_i32:
2888    case INDEX_op_st_i32:
2889    case INDEX_op_st8_i64:
2890    case INDEX_op_st16_i64:
2891    case INDEX_op_st32_i64:
2892    case INDEX_op_st_i64:
2893        return C_O0_I2(rZ, r);
2894
2895    case INDEX_op_add_i32:
2896    case INDEX_op_add_i64:
2897    case INDEX_op_sub_i32:
2898    case INDEX_op_sub_i64:
2899    case INDEX_op_setcond_i32:
2900    case INDEX_op_setcond_i64:
2901    case INDEX_op_negsetcond_i32:
2902    case INDEX_op_negsetcond_i64:
2903        return C_O1_I2(r, r, rA);
2904
2905    case INDEX_op_mul_i32:
2906    case INDEX_op_mul_i64:
2907    case INDEX_op_div_i32:
2908    case INDEX_op_div_i64:
2909    case INDEX_op_divu_i32:
2910    case INDEX_op_divu_i64:
2911    case INDEX_op_rem_i32:
2912    case INDEX_op_rem_i64:
2913    case INDEX_op_remu_i32:
2914    case INDEX_op_remu_i64:
2915    case INDEX_op_muluh_i64:
2916    case INDEX_op_mulsh_i64:
2917        return C_O1_I2(r, r, r);
2918
2919    case INDEX_op_and_i32:
2920    case INDEX_op_and_i64:
2921    case INDEX_op_or_i32:
2922    case INDEX_op_or_i64:
2923    case INDEX_op_xor_i32:
2924    case INDEX_op_xor_i64:
2925    case INDEX_op_andc_i32:
2926    case INDEX_op_andc_i64:
2927    case INDEX_op_orc_i32:
2928    case INDEX_op_orc_i64:
2929    case INDEX_op_eqv_i32:
2930    case INDEX_op_eqv_i64:
2931        return C_O1_I2(r, r, rL);
2932
2933    case INDEX_op_shl_i32:
2934    case INDEX_op_shr_i32:
2935    case INDEX_op_sar_i32:
2936    case INDEX_op_rotl_i32:
2937    case INDEX_op_rotr_i32:
2938    case INDEX_op_shl_i64:
2939    case INDEX_op_shr_i64:
2940    case INDEX_op_sar_i64:
2941    case INDEX_op_rotl_i64:
2942    case INDEX_op_rotr_i64:
2943        return C_O1_I2(r, r, ri);
2944
2945    case INDEX_op_clz_i32:
2946    case INDEX_op_ctz_i32:
2947    case INDEX_op_clz_i64:
2948    case INDEX_op_ctz_i64:
2949        return C_O1_I2(r, r, rAL);
2950
2951    case INDEX_op_brcond_i32:
2952    case INDEX_op_brcond_i64:
2953        return C_O0_I2(r, rA);
2954
2955    case INDEX_op_movcond_i32:
2956    case INDEX_op_movcond_i64:
2957        return C_O1_I4(r, r, rA, rZ, rZ);
2958
2959    case INDEX_op_qemu_ld_a32_i32:
2960    case INDEX_op_qemu_ld_a64_i32:
2961    case INDEX_op_qemu_ld_a32_i64:
2962    case INDEX_op_qemu_ld_a64_i64:
2963        return C_O1_I1(r, r);
2964    case INDEX_op_qemu_ld_a32_i128:
2965    case INDEX_op_qemu_ld_a64_i128:
2966        return C_O2_I1(r, r, r);
2967    case INDEX_op_qemu_st_a32_i32:
2968    case INDEX_op_qemu_st_a64_i32:
2969    case INDEX_op_qemu_st_a32_i64:
2970    case INDEX_op_qemu_st_a64_i64:
2971        return C_O0_I2(rZ, r);
2972    case INDEX_op_qemu_st_a32_i128:
2973    case INDEX_op_qemu_st_a64_i128:
2974        return C_O0_I3(rZ, rZ, r);
2975
2976    case INDEX_op_deposit_i32:
2977    case INDEX_op_deposit_i64:
2978        return C_O1_I2(r, 0, rZ);
2979
2980    case INDEX_op_extract2_i32:
2981    case INDEX_op_extract2_i64:
2982        return C_O1_I2(r, rZ, rZ);
2983
2984    case INDEX_op_add2_i32:
2985    case INDEX_op_add2_i64:
2986    case INDEX_op_sub2_i32:
2987    case INDEX_op_sub2_i64:
2988        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2989
2990    case INDEX_op_add_vec:
2991    case INDEX_op_sub_vec:
2992    case INDEX_op_mul_vec:
2993    case INDEX_op_xor_vec:
2994    case INDEX_op_ssadd_vec:
2995    case INDEX_op_sssub_vec:
2996    case INDEX_op_usadd_vec:
2997    case INDEX_op_ussub_vec:
2998    case INDEX_op_smax_vec:
2999    case INDEX_op_smin_vec:
3000    case INDEX_op_umax_vec:
3001    case INDEX_op_umin_vec:
3002    case INDEX_op_shlv_vec:
3003    case INDEX_op_shrv_vec:
3004    case INDEX_op_sarv_vec:
3005    case INDEX_op_aa64_sshl_vec:
3006        return C_O1_I2(w, w, w);
3007    case INDEX_op_not_vec:
3008    case INDEX_op_neg_vec:
3009    case INDEX_op_abs_vec:
3010    case INDEX_op_shli_vec:
3011    case INDEX_op_shri_vec:
3012    case INDEX_op_sari_vec:
3013        return C_O1_I1(w, w);
3014    case INDEX_op_ld_vec:
3015    case INDEX_op_dupm_vec:
3016        return C_O1_I1(w, r);
3017    case INDEX_op_st_vec:
3018        return C_O0_I2(w, r);
3019    case INDEX_op_dup_vec:
3020        return C_O1_I1(w, wr);
3021    case INDEX_op_or_vec:
3022    case INDEX_op_andc_vec:
3023        return C_O1_I2(w, w, wO);
3024    case INDEX_op_and_vec:
3025    case INDEX_op_orc_vec:
3026        return C_O1_I2(w, w, wN);
3027    case INDEX_op_cmp_vec:
3028        return C_O1_I2(w, w, wZ);
3029    case INDEX_op_bitsel_vec:
3030        return C_O1_I3(w, w, w, w);
3031    case INDEX_op_aa64_sli_vec:
3032        return C_O1_I2(w, 0, w);
3033
3034    default:
3035        g_assert_not_reached();
3036    }
3037}
3038
3039static void tcg_target_init(TCGContext *s)
3040{
3041    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
3042    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
3043    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3044    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3045
3046    tcg_target_call_clobber_regs = -1ull;
3047    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
3048    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
3049    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
3050    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
3051    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
3052    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
3053    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
3054    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
3055    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
3056    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
3057    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
3058    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3059    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3060    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3061    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3062    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3063    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3064    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3065    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3066
3067    s->reserved_regs = 0;
3068    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
3069    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
3070    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
3071    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
3072    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
3073    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
3074    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
3075}
3076
3077/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
3078#define PUSH_SIZE  ((30 - 19 + 1) * 8)
3079
3080#define FRAME_SIZE \
3081    ((PUSH_SIZE \
3082      + TCG_STATIC_CALL_ARGS_SIZE \
3083      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3084      + TCG_TARGET_STACK_ALIGN - 1) \
3085     & ~(TCG_TARGET_STACK_ALIGN - 1))
3086
3087/* We're expecting a 2 byte uleb128 encoded value.  */
3088QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3089
3090/* We're expecting to use a single ADDI insn.  */
3091QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
3092
3093static void tcg_target_qemu_prologue(TCGContext *s)
3094{
3095    TCGReg r;
3096
3097    tcg_out_bti(s, BTI_C);
3098
3099    /* Push (FP, LR) and allocate space for all saved registers.  */
3100    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
3101                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
3102
3103    /* Set up frame pointer for canonical unwinding.  */
3104    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
3105
3106    /* Store callee-preserved regs x19..x28.  */
3107    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3108        int ofs = (r - TCG_REG_X19 + 2) * 8;
3109        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3110    }
3111
3112    /* Make stack space for TCG locals.  */
3113    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3114                 FRAME_SIZE - PUSH_SIZE);
3115
3116    /* Inform TCG about how to find TCG locals with register, offset, size.  */
3117    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
3118                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3119
3120    if (!tcg_use_softmmu) {
3121        /*
3122         * Note that XZR cannot be encoded in the address base register slot,
3123         * as that actually encodes SP.  Depending on the guest, we may need
3124         * to zero-extend the guest address via the address index register slot,
3125         * therefore we need to load even a zero guest base into a register.
3126         */
3127        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3128        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3129    }
3130
3131    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3132    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3133
3134    /*
3135     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3136     * and fall through to the rest of the epilogue.
3137     */
3138    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3139    tcg_out_bti(s, BTI_J);
3140    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3141
3142    /* TB epilogue */
3143    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3144    tcg_out_bti(s, BTI_J);
3145
3146    /* Remove TCG locals stack space.  */
3147    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3148                 FRAME_SIZE - PUSH_SIZE);
3149
3150    /* Restore registers x19..x28.  */
3151    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3152        int ofs = (r - TCG_REG_X19 + 2) * 8;
3153        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3154    }
3155
3156    /* Pop (FP, LR), restore SP to previous frame.  */
3157    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3158                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3159    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3160}
3161
3162static void tcg_out_tb_start(TCGContext *s)
3163{
3164    tcg_out_bti(s, BTI_J);
3165}
3166
3167static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3168{
3169    int i;
3170    for (i = 0; i < count; ++i) {
3171        p[i] = NOP;
3172    }
3173}
3174
3175typedef struct {
3176    DebugFrameHeader h;
3177    uint8_t fde_def_cfa[4];
3178    uint8_t fde_reg_ofs[24];
3179} DebugFrame;
3180
3181#define ELF_HOST_MACHINE EM_AARCH64
3182
3183static const DebugFrame debug_frame = {
3184    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3185    .h.cie.id = -1,
3186    .h.cie.version = 1,
3187    .h.cie.code_align = 1,
3188    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3189    .h.cie.return_column = TCG_REG_LR,
3190
3191    /* Total FDE size does not include the "len" member.  */
3192    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3193
3194    .fde_def_cfa = {
3195        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3196        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3197        (FRAME_SIZE >> 7)
3198    },
3199    .fde_reg_ofs = {
3200        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3201        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3202        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3203        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3204        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3205        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3206        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3207        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3208        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3209        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3210        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3211        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3212    }
3213};
3214
3215void tcg_register_jit(const void *buf, size_t buf_size)
3216{
3217    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3218}
3219