xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision bad5cfcd)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-ldst.c.inc"
14#include "../tcg-pool.c.inc"
15#include "qemu/bitops.h"
16
17/* We're going to re-use TCGType in setting of the SF bit, which controls
18   the size of the operation performed.  If we know the values match, it
19   makes things much cleaner.  */
20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21
22#ifdef CONFIG_DEBUG_TCG
23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28
29    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33};
34#endif /* CONFIG_DEBUG_TCG */
35
36static const int tcg_target_reg_alloc_order[] = {
37    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39    TCG_REG_X28, /* we will reserve this for guest_base if configured */
40
41    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
43
44    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47    /* X16 reserved as temporary */
48    /* X17 reserved as temporary */
49    /* X18 reserved by system */
50    /* X19 reserved for AREG0 */
51    /* X29 reserved as fp */
52    /* X30 reserved as temporary */
53
54    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
55    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
56    /* V8 - V15 are call-saved, and skipped.  */
57    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
58    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
59    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
60    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
61};
62
63static const int tcg_target_call_iarg_regs[8] = {
64    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
65    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
66};
67
68static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
69{
70    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
71    tcg_debug_assert(slot >= 0 && slot <= 1);
72    return TCG_REG_X0 + slot;
73}
74
75#define TCG_REG_TMP0 TCG_REG_X16
76#define TCG_REG_TMP1 TCG_REG_X17
77#define TCG_REG_TMP2 TCG_REG_X30
78#define TCG_VEC_TMP0 TCG_REG_V31
79
80#ifndef CONFIG_SOFTMMU
81#define TCG_REG_GUEST_BASE TCG_REG_X28
82#endif
83
84static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
85{
86    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
87    ptrdiff_t offset = target - src_rx;
88
89    if (offset == sextract64(offset, 0, 26)) {
90        /* read instruction, mask away previous PC_REL26 parameter contents,
91           set the proper offset, then write back the instruction. */
92        *src_rw = deposit32(*src_rw, 0, 26, offset);
93        return true;
94    }
95    return false;
96}
97
98static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
99{
100    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
101    ptrdiff_t offset = target - src_rx;
102
103    if (offset == sextract64(offset, 0, 19)) {
104        *src_rw = deposit32(*src_rw, 5, 19, offset);
105        return true;
106    }
107    return false;
108}
109
110static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
111                        intptr_t value, intptr_t addend)
112{
113    tcg_debug_assert(addend == 0);
114    switch (type) {
115    case R_AARCH64_JUMP26:
116    case R_AARCH64_CALL26:
117        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
118    case R_AARCH64_CONDBR19:
119        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
120    default:
121        g_assert_not_reached();
122    }
123}
124
125#define TCG_CT_CONST_AIMM 0x100
126#define TCG_CT_CONST_LIMM 0x200
127#define TCG_CT_CONST_ZERO 0x400
128#define TCG_CT_CONST_MONE 0x800
129#define TCG_CT_CONST_ORRI 0x1000
130#define TCG_CT_CONST_ANDI 0x2000
131
132#define ALL_GENERAL_REGS  0xffffffffu
133#define ALL_VECTOR_REGS   0xffffffff00000000ull
134
135/* Match a constant valid for addition (12-bit, optionally shifted).  */
136static inline bool is_aimm(uint64_t val)
137{
138    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
139}
140
141/* Match a constant valid for logical operations.  */
142static inline bool is_limm(uint64_t val)
143{
144    /* Taking a simplified view of the logical immediates for now, ignoring
145       the replication that can happen across the field.  Match bit patterns
146       of the forms
147           0....01....1
148           0..01..10..0
149       and their inverses.  */
150
151    /* Make things easier below, by testing the form with msb clear. */
152    if ((int64_t)val < 0) {
153        val = ~val;
154    }
155    if (val == 0) {
156        return false;
157    }
158    val += val & -val;
159    return (val & (val - 1)) == 0;
160}
161
162/* Return true if v16 is a valid 16-bit shifted immediate.  */
163static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
164{
165    if (v16 == (v16 & 0xff)) {
166        *cmode = 0x8;
167        *imm8 = v16 & 0xff;
168        return true;
169    } else if (v16 == (v16 & 0xff00)) {
170        *cmode = 0xa;
171        *imm8 = v16 >> 8;
172        return true;
173    }
174    return false;
175}
176
177/* Return true if v32 is a valid 32-bit shifted immediate.  */
178static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
179{
180    if (v32 == (v32 & 0xff)) {
181        *cmode = 0x0;
182        *imm8 = v32 & 0xff;
183        return true;
184    } else if (v32 == (v32 & 0xff00)) {
185        *cmode = 0x2;
186        *imm8 = (v32 >> 8) & 0xff;
187        return true;
188    } else if (v32 == (v32 & 0xff0000)) {
189        *cmode = 0x4;
190        *imm8 = (v32 >> 16) & 0xff;
191        return true;
192    } else if (v32 == (v32 & 0xff000000)) {
193        *cmode = 0x6;
194        *imm8 = v32 >> 24;
195        return true;
196    }
197    return false;
198}
199
200/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
201static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
202{
203    if ((v32 & 0xffff00ff) == 0xff) {
204        *cmode = 0xc;
205        *imm8 = (v32 >> 8) & 0xff;
206        return true;
207    } else if ((v32 & 0xff00ffff) == 0xffff) {
208        *cmode = 0xd;
209        *imm8 = (v32 >> 16) & 0xff;
210        return true;
211    }
212    return false;
213}
214
215/* Return true if v32 is a valid float32 immediate.  */
216static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
217{
218    if (extract32(v32, 0, 19) == 0
219        && (extract32(v32, 25, 6) == 0x20
220            || extract32(v32, 25, 6) == 0x1f)) {
221        *cmode = 0xf;
222        *imm8 = (extract32(v32, 31, 1) << 7)
223              | (extract32(v32, 25, 1) << 6)
224              | extract32(v32, 19, 6);
225        return true;
226    }
227    return false;
228}
229
230/* Return true if v64 is a valid float64 immediate.  */
231static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
232{
233    if (extract64(v64, 0, 48) == 0
234        && (extract64(v64, 54, 9) == 0x100
235            || extract64(v64, 54, 9) == 0x0ff)) {
236        *cmode = 0xf;
237        *imm8 = (extract64(v64, 63, 1) << 7)
238              | (extract64(v64, 54, 1) << 6)
239              | extract64(v64, 48, 6);
240        return true;
241    }
242    return false;
243}
244
245/*
246 * Return non-zero if v32 can be formed by MOVI+ORR.
247 * Place the parameters for MOVI in (cmode, imm8).
248 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
249 */
250static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
251{
252    int i;
253
254    for (i = 6; i > 0; i -= 2) {
255        /* Mask out one byte we can add with ORR.  */
256        uint32_t tmp = v32 & ~(0xffu << (i * 4));
257        if (is_shimm32(tmp, cmode, imm8) ||
258            is_soimm32(tmp, cmode, imm8)) {
259            break;
260        }
261    }
262    return i;
263}
264
265/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
266static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
267{
268    if (v32 == deposit32(v32, 16, 16, v32)) {
269        return is_shimm16(v32, cmode, imm8);
270    } else {
271        return is_shimm32(v32, cmode, imm8);
272    }
273}
274
275static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
276{
277    if (ct & TCG_CT_CONST) {
278        return 1;
279    }
280    if (type == TCG_TYPE_I32) {
281        val = (int32_t)val;
282    }
283    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
284        return 1;
285    }
286    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
287        return 1;
288    }
289    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
290        return 1;
291    }
292    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
293        return 1;
294    }
295
296    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
297    case 0:
298        break;
299    case TCG_CT_CONST_ANDI:
300        val = ~val;
301        /* fallthru */
302    case TCG_CT_CONST_ORRI:
303        if (val == deposit64(val, 32, 32, val)) {
304            int cmode, imm8;
305            return is_shimm1632(val, &cmode, &imm8);
306        }
307        break;
308    default:
309        /* Both bits should not be set for the same insn.  */
310        g_assert_not_reached();
311    }
312
313    return 0;
314}
315
316enum aarch64_cond_code {
317    COND_EQ = 0x0,
318    COND_NE = 0x1,
319    COND_CS = 0x2,     /* Unsigned greater or equal */
320    COND_HS = COND_CS, /* ALIAS greater or equal */
321    COND_CC = 0x3,     /* Unsigned less than */
322    COND_LO = COND_CC, /* ALIAS Lower */
323    COND_MI = 0x4,     /* Negative */
324    COND_PL = 0x5,     /* Zero or greater */
325    COND_VS = 0x6,     /* Overflow */
326    COND_VC = 0x7,     /* No overflow */
327    COND_HI = 0x8,     /* Unsigned greater than */
328    COND_LS = 0x9,     /* Unsigned less or equal */
329    COND_GE = 0xa,
330    COND_LT = 0xb,
331    COND_GT = 0xc,
332    COND_LE = 0xd,
333    COND_AL = 0xe,
334    COND_NV = 0xf, /* behaves like COND_AL here */
335};
336
337static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
338    [TCG_COND_EQ] = COND_EQ,
339    [TCG_COND_NE] = COND_NE,
340    [TCG_COND_LT] = COND_LT,
341    [TCG_COND_GE] = COND_GE,
342    [TCG_COND_LE] = COND_LE,
343    [TCG_COND_GT] = COND_GT,
344    /* unsigned */
345    [TCG_COND_LTU] = COND_LO,
346    [TCG_COND_GTU] = COND_HI,
347    [TCG_COND_GEU] = COND_HS,
348    [TCG_COND_LEU] = COND_LS,
349};
350
351typedef enum {
352    LDST_ST = 0,    /* store */
353    LDST_LD = 1,    /* load */
354    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
355    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
356} AArch64LdstType;
357
358/* We encode the format of the insn into the beginning of the name, so that
359   we can have the preprocessor help "typecheck" the insn vs the output
360   function.  Arm didn't provide us with nice names for the formats, so we
361   use the section number of the architecture reference manual in which the
362   instruction group is described.  */
363typedef enum {
364    /* Compare and branch (immediate).  */
365    I3201_CBZ       = 0x34000000,
366    I3201_CBNZ      = 0x35000000,
367
368    /* Conditional branch (immediate).  */
369    I3202_B_C       = 0x54000000,
370
371    /* Unconditional branch (immediate).  */
372    I3206_B         = 0x14000000,
373    I3206_BL        = 0x94000000,
374
375    /* Unconditional branch (register).  */
376    I3207_BR        = 0xd61f0000,
377    I3207_BLR       = 0xd63f0000,
378    I3207_RET       = 0xd65f0000,
379
380    /* AdvSIMD load/store single structure.  */
381    I3303_LD1R      = 0x0d40c000,
382
383    /* Load literal for loading the address at pc-relative offset */
384    I3305_LDR       = 0x58000000,
385    I3305_LDR_v64   = 0x5c000000,
386    I3305_LDR_v128  = 0x9c000000,
387
388    /* Load/store exclusive. */
389    I3306_LDXP      = 0xc8600000,
390    I3306_STXP      = 0xc8200000,
391
392    /* Load/store register.  Described here as 3.3.12, but the helper
393       that emits them can transform to 3.3.10 or 3.3.13.  */
394    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
395    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
396    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
397    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
398
399    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
400    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
401    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
402    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
403
404    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
405    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
406
407    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
408    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
409    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
410
411    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
412    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
413
414    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
415    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
416
417    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
418    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
419
420    I3312_TO_I3310  = 0x00200800,
421    I3312_TO_I3313  = 0x01000000,
422
423    /* Load/store register pair instructions.  */
424    I3314_LDP       = 0x28400000,
425    I3314_STP       = 0x28000000,
426
427    /* Add/subtract immediate instructions.  */
428    I3401_ADDI      = 0x11000000,
429    I3401_ADDSI     = 0x31000000,
430    I3401_SUBI      = 0x51000000,
431    I3401_SUBSI     = 0x71000000,
432
433    /* Bitfield instructions.  */
434    I3402_BFM       = 0x33000000,
435    I3402_SBFM      = 0x13000000,
436    I3402_UBFM      = 0x53000000,
437
438    /* Extract instruction.  */
439    I3403_EXTR      = 0x13800000,
440
441    /* Logical immediate instructions.  */
442    I3404_ANDI      = 0x12000000,
443    I3404_ORRI      = 0x32000000,
444    I3404_EORI      = 0x52000000,
445    I3404_ANDSI     = 0x72000000,
446
447    /* Move wide immediate instructions.  */
448    I3405_MOVN      = 0x12800000,
449    I3405_MOVZ      = 0x52800000,
450    I3405_MOVK      = 0x72800000,
451
452    /* PC relative addressing instructions.  */
453    I3406_ADR       = 0x10000000,
454    I3406_ADRP      = 0x90000000,
455
456    /* Add/subtract extended register instructions. */
457    I3501_ADD       = 0x0b200000,
458
459    /* Add/subtract shifted register instructions (without a shift).  */
460    I3502_ADD       = 0x0b000000,
461    I3502_ADDS      = 0x2b000000,
462    I3502_SUB       = 0x4b000000,
463    I3502_SUBS      = 0x6b000000,
464
465    /* Add/subtract shifted register instructions (with a shift).  */
466    I3502S_ADD_LSL  = I3502_ADD,
467
468    /* Add/subtract with carry instructions.  */
469    I3503_ADC       = 0x1a000000,
470    I3503_SBC       = 0x5a000000,
471
472    /* Conditional select instructions.  */
473    I3506_CSEL      = 0x1a800000,
474    I3506_CSINC     = 0x1a800400,
475    I3506_CSINV     = 0x5a800000,
476    I3506_CSNEG     = 0x5a800400,
477
478    /* Data-processing (1 source) instructions.  */
479    I3507_CLZ       = 0x5ac01000,
480    I3507_RBIT      = 0x5ac00000,
481    I3507_REV       = 0x5ac00000, /* + size << 10 */
482
483    /* Data-processing (2 source) instructions.  */
484    I3508_LSLV      = 0x1ac02000,
485    I3508_LSRV      = 0x1ac02400,
486    I3508_ASRV      = 0x1ac02800,
487    I3508_RORV      = 0x1ac02c00,
488    I3508_SMULH     = 0x9b407c00,
489    I3508_UMULH     = 0x9bc07c00,
490    I3508_UDIV      = 0x1ac00800,
491    I3508_SDIV      = 0x1ac00c00,
492
493    /* Data-processing (3 source) instructions.  */
494    I3509_MADD      = 0x1b000000,
495    I3509_MSUB      = 0x1b008000,
496
497    /* Logical shifted register instructions (without a shift).  */
498    I3510_AND       = 0x0a000000,
499    I3510_BIC       = 0x0a200000,
500    I3510_ORR       = 0x2a000000,
501    I3510_ORN       = 0x2a200000,
502    I3510_EOR       = 0x4a000000,
503    I3510_EON       = 0x4a200000,
504    I3510_ANDS      = 0x6a000000,
505
506    /* Logical shifted register instructions (with a shift).  */
507    I3502S_AND_LSR  = I3510_AND | (1 << 22),
508
509    /* AdvSIMD copy */
510    I3605_DUP      = 0x0e000400,
511    I3605_INS      = 0x4e001c00,
512    I3605_UMOV     = 0x0e003c00,
513
514    /* AdvSIMD modified immediate */
515    I3606_MOVI      = 0x0f000400,
516    I3606_MVNI      = 0x2f000400,
517    I3606_BIC       = 0x2f001400,
518    I3606_ORR       = 0x0f001400,
519
520    /* AdvSIMD scalar shift by immediate */
521    I3609_SSHR      = 0x5f000400,
522    I3609_SSRA      = 0x5f001400,
523    I3609_SHL       = 0x5f005400,
524    I3609_USHR      = 0x7f000400,
525    I3609_USRA      = 0x7f001400,
526    I3609_SLI       = 0x7f005400,
527
528    /* AdvSIMD scalar three same */
529    I3611_SQADD     = 0x5e200c00,
530    I3611_SQSUB     = 0x5e202c00,
531    I3611_CMGT      = 0x5e203400,
532    I3611_CMGE      = 0x5e203c00,
533    I3611_SSHL      = 0x5e204400,
534    I3611_ADD       = 0x5e208400,
535    I3611_CMTST     = 0x5e208c00,
536    I3611_UQADD     = 0x7e200c00,
537    I3611_UQSUB     = 0x7e202c00,
538    I3611_CMHI      = 0x7e203400,
539    I3611_CMHS      = 0x7e203c00,
540    I3611_USHL      = 0x7e204400,
541    I3611_SUB       = 0x7e208400,
542    I3611_CMEQ      = 0x7e208c00,
543
544    /* AdvSIMD scalar two-reg misc */
545    I3612_CMGT0     = 0x5e208800,
546    I3612_CMEQ0     = 0x5e209800,
547    I3612_CMLT0     = 0x5e20a800,
548    I3612_ABS       = 0x5e20b800,
549    I3612_CMGE0     = 0x7e208800,
550    I3612_CMLE0     = 0x7e209800,
551    I3612_NEG       = 0x7e20b800,
552
553    /* AdvSIMD shift by immediate */
554    I3614_SSHR      = 0x0f000400,
555    I3614_SSRA      = 0x0f001400,
556    I3614_SHL       = 0x0f005400,
557    I3614_SLI       = 0x2f005400,
558    I3614_USHR      = 0x2f000400,
559    I3614_USRA      = 0x2f001400,
560
561    /* AdvSIMD three same.  */
562    I3616_ADD       = 0x0e208400,
563    I3616_AND       = 0x0e201c00,
564    I3616_BIC       = 0x0e601c00,
565    I3616_BIF       = 0x2ee01c00,
566    I3616_BIT       = 0x2ea01c00,
567    I3616_BSL       = 0x2e601c00,
568    I3616_EOR       = 0x2e201c00,
569    I3616_MUL       = 0x0e209c00,
570    I3616_ORR       = 0x0ea01c00,
571    I3616_ORN       = 0x0ee01c00,
572    I3616_SUB       = 0x2e208400,
573    I3616_CMGT      = 0x0e203400,
574    I3616_CMGE      = 0x0e203c00,
575    I3616_CMTST     = 0x0e208c00,
576    I3616_CMHI      = 0x2e203400,
577    I3616_CMHS      = 0x2e203c00,
578    I3616_CMEQ      = 0x2e208c00,
579    I3616_SMAX      = 0x0e206400,
580    I3616_SMIN      = 0x0e206c00,
581    I3616_SSHL      = 0x0e204400,
582    I3616_SQADD     = 0x0e200c00,
583    I3616_SQSUB     = 0x0e202c00,
584    I3616_UMAX      = 0x2e206400,
585    I3616_UMIN      = 0x2e206c00,
586    I3616_UQADD     = 0x2e200c00,
587    I3616_UQSUB     = 0x2e202c00,
588    I3616_USHL      = 0x2e204400,
589
590    /* AdvSIMD two-reg misc.  */
591    I3617_CMGT0     = 0x0e208800,
592    I3617_CMEQ0     = 0x0e209800,
593    I3617_CMLT0     = 0x0e20a800,
594    I3617_CMGE0     = 0x2e208800,
595    I3617_CMLE0     = 0x2e209800,
596    I3617_NOT       = 0x2e205800,
597    I3617_ABS       = 0x0e20b800,
598    I3617_NEG       = 0x2e20b800,
599
600    /* System instructions.  */
601    NOP             = 0xd503201f,
602    DMB_ISH         = 0xd50338bf,
603    DMB_LD          = 0x00000100,
604    DMB_ST          = 0x00000200,
605
606    BTI_C           = 0xd503245f,
607    BTI_J           = 0xd503249f,
608    BTI_JC          = 0xd50324df,
609} AArch64Insn;
610
611static inline uint32_t tcg_in32(TCGContext *s)
612{
613    uint32_t v = *(uint32_t *)s->code_ptr;
614    return v;
615}
616
617/* Emit an opcode with "type-checking" of the format.  */
618#define tcg_out_insn(S, FMT, OP, ...) \
619    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
620
621static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
622                              TCGReg rt, TCGReg rn, unsigned size)
623{
624    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
625}
626
627static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
628                              int imm19, TCGReg rt)
629{
630    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
631}
632
633static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
634                              TCGReg rt, TCGReg rt2, TCGReg rn)
635{
636    tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
637}
638
639static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
640                              TCGReg rt, int imm19)
641{
642    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
643}
644
645static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
646                              TCGCond c, int imm19)
647{
648    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
649}
650
651static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
652{
653    tcg_out32(s, insn | (imm26 & 0x03ffffff));
654}
655
656static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
657{
658    tcg_out32(s, insn | rn << 5);
659}
660
661static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
662                              TCGReg r1, TCGReg r2, TCGReg rn,
663                              tcg_target_long ofs, bool pre, bool w)
664{
665    insn |= 1u << 31; /* ext */
666    insn |= pre << 24;
667    insn |= w << 23;
668
669    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
670    insn |= (ofs & (0x7f << 3)) << (15 - 3);
671
672    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
673}
674
675static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
676                              TCGReg rd, TCGReg rn, uint64_t aimm)
677{
678    if (aimm > 0xfff) {
679        tcg_debug_assert((aimm & 0xfff) == 0);
680        aimm >>= 12;
681        tcg_debug_assert(aimm <= 0xfff);
682        aimm |= 1 << 12;  /* apply LSL 12 */
683    }
684    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
685}
686
687/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
688   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
689   that feed the DecodeBitMasks pseudo function.  */
690static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
691                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
692{
693    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
694              | rn << 5 | rd);
695}
696
697#define tcg_out_insn_3404  tcg_out_insn_3402
698
699static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
700                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
701{
702    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
703              | rn << 5 | rd);
704}
705
706/* This function is used for the Move (wide immediate) instruction group.
707   Note that SHIFT is a full shift count, not the 2 bit HW field. */
708static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
709                              TCGReg rd, uint16_t half, unsigned shift)
710{
711    tcg_debug_assert((shift & ~0x30) == 0);
712    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
713}
714
715static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
716                              TCGReg rd, int64_t disp)
717{
718    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
719}
720
721static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
722                                     TCGType sf, TCGReg rd, TCGReg rn,
723                                     TCGReg rm, int opt, int imm3)
724{
725    tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
726              imm3 << 10 | rn << 5 | rd);
727}
728
729/* This function is for both 3.5.2 (Add/Subtract shifted register), for
730   the rare occasion when we actually want to supply a shift amount.  */
731static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
732                                      TCGType ext, TCGReg rd, TCGReg rn,
733                                      TCGReg rm, int imm6)
734{
735    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
736}
737
738/* This function is for 3.5.2 (Add/subtract shifted register),
739   and 3.5.10 (Logical shifted register), for the vast majorty of cases
740   when we don't want to apply a shift.  Thus it can also be used for
741   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
742static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
743                              TCGReg rd, TCGReg rn, TCGReg rm)
744{
745    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
746}
747
748#define tcg_out_insn_3503  tcg_out_insn_3502
749#define tcg_out_insn_3508  tcg_out_insn_3502
750#define tcg_out_insn_3510  tcg_out_insn_3502
751
752static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
753                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
754{
755    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
756              | tcg_cond_to_aarch64[c] << 12);
757}
758
759static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
760                              TCGReg rd, TCGReg rn)
761{
762    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
763}
764
765static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
766                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
767{
768    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
769}
770
771static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
772                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
773{
774    /* Note that bit 11 set means general register input.  Therefore
775       we can handle both register sets with one function.  */
776    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
777              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
778}
779
780static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
781                              TCGReg rd, bool op, int cmode, uint8_t imm8)
782{
783    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
784              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
785}
786
787static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
788                              TCGReg rd, TCGReg rn, unsigned immhb)
789{
790    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
791}
792
793static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
794                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
795{
796    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
797              | (rn & 0x1f) << 5 | (rd & 0x1f));
798}
799
800static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
801                              unsigned size, TCGReg rd, TCGReg rn)
802{
803    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
804}
805
806static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
807                              TCGReg rd, TCGReg rn, unsigned immhb)
808{
809    tcg_out32(s, insn | q << 30 | immhb << 16
810              | (rn & 0x1f) << 5 | (rd & 0x1f));
811}
812
813static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
814                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
815{
816    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
817              | (rn & 0x1f) << 5 | (rd & 0x1f));
818}
819
820static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
821                              unsigned size, TCGReg rd, TCGReg rn)
822{
823    tcg_out32(s, insn | q << 30 | (size << 22)
824              | (rn & 0x1f) << 5 | (rd & 0x1f));
825}
826
827static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
828                              TCGReg rd, TCGReg base, TCGType ext,
829                              TCGReg regoff)
830{
831    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
832    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
833              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
834}
835
836static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
837                              TCGReg rd, TCGReg rn, intptr_t offset)
838{
839    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
840}
841
842static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
843                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
844{
845    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
846    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
847              | rn << 5 | (rd & 0x1f));
848}
849
850static void tcg_out_bti(TCGContext *s, AArch64Insn insn)
851{
852    /*
853     * While BTI insns are nops on hosts without FEAT_BTI,
854     * there is no point in emitting them in that case either.
855     */
856    if (cpuinfo & CPUINFO_BTI) {
857        tcg_out32(s, insn);
858    }
859}
860
861/* Register to register move using ORR (shifted register with no shift). */
862static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
863{
864    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
865}
866
867/* Register to register move using ADDI (move to/from SP).  */
868static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
869{
870    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
871}
872
873/* This function is used for the Logical (immediate) instruction group.
874   The value of LIMM must satisfy IS_LIMM.  See the comment above about
875   only supporting simplified logical immediates.  */
876static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
877                             TCGReg rd, TCGReg rn, uint64_t limm)
878{
879    unsigned h, l, r, c;
880
881    tcg_debug_assert(is_limm(limm));
882
883    h = clz64(limm);
884    l = ctz64(limm);
885    if (l == 0) {
886        r = 0;                  /* form 0....01....1 */
887        c = ctz64(~limm) - 1;
888        if (h == 0) {
889            r = clz64(~limm);   /* form 1..10..01..1 */
890            c += r;
891        }
892    } else {
893        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
894        c = r - h - 1;
895    }
896    if (ext == TCG_TYPE_I32) {
897        r &= 31;
898        c &= 31;
899    }
900
901    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
902}
903
904static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
905                             TCGReg rd, int64_t v64)
906{
907    bool q = type == TCG_TYPE_V128;
908    int cmode, imm8, i;
909
910    /* Test all bytes equal first.  */
911    if (vece == MO_8) {
912        imm8 = (uint8_t)v64;
913        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
914        return;
915    }
916
917    /*
918     * Test all bytes 0x00 or 0xff second.  This can match cases that
919     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
920     */
921    for (i = imm8 = 0; i < 8; i++) {
922        uint8_t byte = v64 >> (i * 8);
923        if (byte == 0xff) {
924            imm8 |= 1 << i;
925        } else if (byte != 0) {
926            goto fail_bytes;
927        }
928    }
929    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
930    return;
931 fail_bytes:
932
933    /*
934     * Tests for various replications.  For each element width, if we
935     * cannot find an expansion there's no point checking a larger
936     * width because we already know by replication it cannot match.
937     */
938    if (vece == MO_16) {
939        uint16_t v16 = v64;
940
941        if (is_shimm16(v16, &cmode, &imm8)) {
942            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
943            return;
944        }
945        if (is_shimm16(~v16, &cmode, &imm8)) {
946            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
947            return;
948        }
949
950        /*
951         * Otherwise, all remaining constants can be loaded in two insns:
952         * rd = v16 & 0xff, rd |= v16 & 0xff00.
953         */
954        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
955        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
956        return;
957    } else if (vece == MO_32) {
958        uint32_t v32 = v64;
959        uint32_t n32 = ~v32;
960
961        if (is_shimm32(v32, &cmode, &imm8) ||
962            is_soimm32(v32, &cmode, &imm8) ||
963            is_fimm32(v32, &cmode, &imm8)) {
964            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
965            return;
966        }
967        if (is_shimm32(n32, &cmode, &imm8) ||
968            is_soimm32(n32, &cmode, &imm8)) {
969            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
970            return;
971        }
972
973        /*
974         * Restrict the set of constants to those we can load with
975         * two instructions.  Others we load from the pool.
976         */
977        i = is_shimm32_pair(v32, &cmode, &imm8);
978        if (i) {
979            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
980            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
981            return;
982        }
983        i = is_shimm32_pair(n32, &cmode, &imm8);
984        if (i) {
985            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
986            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
987            return;
988        }
989    } else if (is_fimm64(v64, &cmode, &imm8)) {
990        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
991        return;
992    }
993
994    /*
995     * As a last resort, load from the constant pool.  Sadly there
996     * is no LD1R (literal), so store the full 16-byte vector.
997     */
998    if (type == TCG_TYPE_V128) {
999        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
1000        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
1001    } else {
1002        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
1003        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
1004    }
1005}
1006
1007static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
1008                            TCGReg rd, TCGReg rs)
1009{
1010    int is_q = type - TCG_TYPE_V64;
1011    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
1012    return true;
1013}
1014
1015static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1016                             TCGReg r, TCGReg base, intptr_t offset)
1017{
1018    TCGReg temp = TCG_REG_TMP0;
1019
1020    if (offset < -0xffffff || offset > 0xffffff) {
1021        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
1022        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
1023        base = temp;
1024    } else {
1025        AArch64Insn add_insn = I3401_ADDI;
1026
1027        if (offset < 0) {
1028            add_insn = I3401_SUBI;
1029            offset = -offset;
1030        }
1031        if (offset & 0xfff000) {
1032            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1033            base = temp;
1034        }
1035        if (offset & 0xfff) {
1036            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1037            base = temp;
1038        }
1039    }
1040    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1041    return true;
1042}
1043
1044static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1045                         tcg_target_long value)
1046{
1047    tcg_target_long svalue = value;
1048    tcg_target_long ivalue = ~value;
1049    tcg_target_long t0, t1, t2;
1050    int s0, s1;
1051    AArch64Insn opc;
1052
1053    switch (type) {
1054    case TCG_TYPE_I32:
1055    case TCG_TYPE_I64:
1056        tcg_debug_assert(rd < 32);
1057        break;
1058    default:
1059        g_assert_not_reached();
1060    }
1061
1062    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1063       values within [2**31, 2**32-1], we can create smaller sequences by
1064       interpreting this as a negative 32-bit number, while ensuring that
1065       the high 32 bits are cleared by setting SF=0.  */
1066    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1067        svalue = (int32_t)value;
1068        value = (uint32_t)value;
1069        ivalue = (uint32_t)ivalue;
1070        type = TCG_TYPE_I32;
1071    }
1072
1073    /* Speed things up by handling the common case of small positive
1074       and negative values specially.  */
1075    if ((value & ~0xffffull) == 0) {
1076        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1077        return;
1078    } else if ((ivalue & ~0xffffull) == 0) {
1079        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1080        return;
1081    }
1082
1083    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1084       use the sign-extended value.  That lets us match rotated values such
1085       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1086    if (is_limm(svalue)) {
1087        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1088        return;
1089    }
1090
1091    /* Look for host pointer values within 4G of the PC.  This happens
1092       often when loading pointers to QEMU's own data structures.  */
1093    if (type == TCG_TYPE_I64) {
1094        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1095        tcg_target_long disp = value - src_rx;
1096        if (disp == sextract64(disp, 0, 21)) {
1097            tcg_out_insn(s, 3406, ADR, rd, disp);
1098            return;
1099        }
1100        disp = (value >> 12) - (src_rx >> 12);
1101        if (disp == sextract64(disp, 0, 21)) {
1102            tcg_out_insn(s, 3406, ADRP, rd, disp);
1103            if (value & 0xfff) {
1104                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1105            }
1106            return;
1107        }
1108    }
1109
1110    /* Would it take fewer insns to begin with MOVN?  */
1111    if (ctpop64(value) >= 32) {
1112        t0 = ivalue;
1113        opc = I3405_MOVN;
1114    } else {
1115        t0 = value;
1116        opc = I3405_MOVZ;
1117    }
1118    s0 = ctz64(t0) & (63 & -16);
1119    t1 = t0 & ~(0xffffull << s0);
1120    s1 = ctz64(t1) & (63 & -16);
1121    t2 = t1 & ~(0xffffull << s1);
1122    if (t2 == 0) {
1123        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1124        if (t1 != 0) {
1125            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1126        }
1127        return;
1128    }
1129
1130    /* For more than 2 insns, dump it into the constant pool.  */
1131    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1132    tcg_out_insn(s, 3305, LDR, 0, rd);
1133}
1134
1135static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1136{
1137    return false;
1138}
1139
1140static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1141                             tcg_target_long imm)
1142{
1143    /* This function is only used for passing structs by reference. */
1144    g_assert_not_reached();
1145}
1146
1147/* Define something more legible for general use.  */
1148#define tcg_out_ldst_r  tcg_out_insn_3310
1149
1150static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1151                         TCGReg rn, intptr_t offset, int lgsize)
1152{
1153    /* If the offset is naturally aligned and in range, then we can
1154       use the scaled uimm12 encoding */
1155    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1156        uintptr_t scaled_uimm = offset >> lgsize;
1157        if (scaled_uimm <= 0xfff) {
1158            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1159            return;
1160        }
1161    }
1162
1163    /* Small signed offsets can use the unscaled encoding.  */
1164    if (offset >= -256 && offset < 256) {
1165        tcg_out_insn_3312(s, insn, rd, rn, offset);
1166        return;
1167    }
1168
1169    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1170    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
1171    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
1172}
1173
1174static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1175{
1176    if (ret == arg) {
1177        return true;
1178    }
1179    switch (type) {
1180    case TCG_TYPE_I32:
1181    case TCG_TYPE_I64:
1182        if (ret < 32 && arg < 32) {
1183            tcg_out_movr(s, type, ret, arg);
1184            break;
1185        } else if (ret < 32) {
1186            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1187            break;
1188        } else if (arg < 32) {
1189            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1190            break;
1191        }
1192        /* FALLTHRU */
1193
1194    case TCG_TYPE_V64:
1195        tcg_debug_assert(ret >= 32 && arg >= 32);
1196        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1197        break;
1198    case TCG_TYPE_V128:
1199        tcg_debug_assert(ret >= 32 && arg >= 32);
1200        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1201        break;
1202
1203    default:
1204        g_assert_not_reached();
1205    }
1206    return true;
1207}
1208
1209static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1210                       TCGReg base, intptr_t ofs)
1211{
1212    AArch64Insn insn;
1213    int lgsz;
1214
1215    switch (type) {
1216    case TCG_TYPE_I32:
1217        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1218        lgsz = 2;
1219        break;
1220    case TCG_TYPE_I64:
1221        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1222        lgsz = 3;
1223        break;
1224    case TCG_TYPE_V64:
1225        insn = I3312_LDRVD;
1226        lgsz = 3;
1227        break;
1228    case TCG_TYPE_V128:
1229        insn = I3312_LDRVQ;
1230        lgsz = 4;
1231        break;
1232    default:
1233        g_assert_not_reached();
1234    }
1235    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1236}
1237
1238static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1239                       TCGReg base, intptr_t ofs)
1240{
1241    AArch64Insn insn;
1242    int lgsz;
1243
1244    switch (type) {
1245    case TCG_TYPE_I32:
1246        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1247        lgsz = 2;
1248        break;
1249    case TCG_TYPE_I64:
1250        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1251        lgsz = 3;
1252        break;
1253    case TCG_TYPE_V64:
1254        insn = I3312_STRVD;
1255        lgsz = 3;
1256        break;
1257    case TCG_TYPE_V128:
1258        insn = I3312_STRVQ;
1259        lgsz = 4;
1260        break;
1261    default:
1262        g_assert_not_reached();
1263    }
1264    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1265}
1266
1267static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1268                               TCGReg base, intptr_t ofs)
1269{
1270    if (type <= TCG_TYPE_I64 && val == 0) {
1271        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1272        return true;
1273    }
1274    return false;
1275}
1276
1277static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1278                               TCGReg rn, unsigned int a, unsigned int b)
1279{
1280    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1281}
1282
1283static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1284                                TCGReg rn, unsigned int a, unsigned int b)
1285{
1286    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1287}
1288
1289static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1290                                TCGReg rn, unsigned int a, unsigned int b)
1291{
1292    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1293}
1294
1295static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1296                                TCGReg rn, TCGReg rm, unsigned int a)
1297{
1298    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1299}
1300
1301static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1302                               TCGReg rd, TCGReg rn, unsigned int m)
1303{
1304    int bits = ext ? 64 : 32;
1305    int max = bits - 1;
1306    tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1307}
1308
1309static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1310                               TCGReg rd, TCGReg rn, unsigned int m)
1311{
1312    int max = ext ? 63 : 31;
1313    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1314}
1315
1316static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1317                               TCGReg rd, TCGReg rn, unsigned int m)
1318{
1319    int max = ext ? 63 : 31;
1320    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1321}
1322
1323static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1324                                TCGReg rd, TCGReg rn, unsigned int m)
1325{
1326    int max = ext ? 63 : 31;
1327    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1328}
1329
1330static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1331                                TCGReg rd, TCGReg rn, unsigned int m)
1332{
1333    int max = ext ? 63 : 31;
1334    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1335}
1336
1337static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1338                               TCGReg rn, unsigned lsb, unsigned width)
1339{
1340    unsigned size = ext ? 64 : 32;
1341    unsigned a = (size - lsb) & (size - 1);
1342    unsigned b = width - 1;
1343    tcg_out_bfm(s, ext, rd, rn, a, b);
1344}
1345
1346static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1347                        tcg_target_long b, bool const_b)
1348{
1349    if (const_b) {
1350        /* Using CMP or CMN aliases.  */
1351        if (b >= 0) {
1352            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1353        } else {
1354            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1355        }
1356    } else {
1357        /* Using CMP alias SUBS wzr, Wn, Wm */
1358        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1359    }
1360}
1361
1362static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1363{
1364    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1365    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1366    tcg_out_insn(s, 3206, B, offset);
1367}
1368
1369static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1370{
1371    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1372    if (offset == sextract64(offset, 0, 26)) {
1373        tcg_out_insn(s, 3206, BL, offset);
1374    } else {
1375        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1376        tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
1377    }
1378}
1379
1380static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1381                         const TCGHelperInfo *info)
1382{
1383    tcg_out_call_int(s, target);
1384}
1385
1386static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1387{
1388    if (!l->has_value) {
1389        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1390        tcg_out_insn(s, 3206, B, 0);
1391    } else {
1392        tcg_out_goto(s, l->u.value_ptr);
1393    }
1394}
1395
1396static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1397                           TCGArg b, bool b_const, TCGLabel *l)
1398{
1399    intptr_t offset;
1400    bool need_cmp;
1401
1402    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1403        need_cmp = false;
1404    } else {
1405        need_cmp = true;
1406        tcg_out_cmp(s, ext, a, b, b_const);
1407    }
1408
1409    if (!l->has_value) {
1410        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1411        offset = tcg_in32(s) >> 5;
1412    } else {
1413        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1414        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1415    }
1416
1417    if (need_cmp) {
1418        tcg_out_insn(s, 3202, B_C, c, offset);
1419    } else if (c == TCG_COND_EQ) {
1420        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1421    } else {
1422        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1423    }
1424}
1425
1426static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1427                               TCGReg rd, TCGReg rn)
1428{
1429    /* REV, REV16, REV32 */
1430    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1431}
1432
1433static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1434                               TCGReg rd, TCGReg rn)
1435{
1436    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1437    int bits = (8 << s_bits) - 1;
1438    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1439}
1440
1441static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1442{
1443    tcg_out_sxt(s, type, MO_8, rd, rn);
1444}
1445
1446static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1447{
1448    tcg_out_sxt(s, type, MO_16, rd, rn);
1449}
1450
1451static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1452{
1453    tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1454}
1455
1456static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1457{
1458    tcg_out_ext32s(s, rd, rn);
1459}
1460
1461static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1462                               TCGReg rd, TCGReg rn)
1463{
1464    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1465    int bits = (8 << s_bits) - 1;
1466    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1467}
1468
1469static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1470{
1471    tcg_out_uxt(s, MO_8, rd, rn);
1472}
1473
1474static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1475{
1476    tcg_out_uxt(s, MO_16, rd, rn);
1477}
1478
1479static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1480{
1481    tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1482}
1483
1484static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1485{
1486    tcg_out_ext32u(s, rd, rn);
1487}
1488
1489static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1490{
1491    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1492}
1493
1494static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1495                            TCGReg rn, int64_t aimm)
1496{
1497    if (aimm >= 0) {
1498        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1499    } else {
1500        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1501    }
1502}
1503
1504static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1505                            TCGReg rh, TCGReg al, TCGReg ah,
1506                            tcg_target_long bl, tcg_target_long bh,
1507                            bool const_bl, bool const_bh, bool sub)
1508{
1509    TCGReg orig_rl = rl;
1510    AArch64Insn insn;
1511
1512    if (rl == ah || (!const_bh && rl == bh)) {
1513        rl = TCG_REG_TMP0;
1514    }
1515
1516    if (const_bl) {
1517        if (bl < 0) {
1518            bl = -bl;
1519            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1520        } else {
1521            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1522        }
1523
1524        if (unlikely(al == TCG_REG_XZR)) {
1525            /* ??? We want to allow al to be zero for the benefit of
1526               negation via subtraction.  However, that leaves open the
1527               possibility of adding 0+const in the low part, and the
1528               immediate add instructions encode XSP not XZR.  Don't try
1529               anything more elaborate here than loading another zero.  */
1530            al = TCG_REG_TMP0;
1531            tcg_out_movi(s, ext, al, 0);
1532        }
1533        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1534    } else {
1535        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1536    }
1537
1538    insn = I3503_ADC;
1539    if (const_bh) {
1540        /* Note that the only two constants we support are 0 and -1, and
1541           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1542        if ((bh != 0) ^ sub) {
1543            insn = I3503_SBC;
1544        }
1545        bh = TCG_REG_XZR;
1546    } else if (sub) {
1547        insn = I3503_SBC;
1548    }
1549    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1550
1551    tcg_out_mov(s, ext, orig_rl, rl);
1552}
1553
1554static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1555{
1556    static const uint32_t sync[] = {
1557        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1558        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1559        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1560        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1561        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1562    };
1563    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1564}
1565
1566static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1567                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1568{
1569    TCGReg a1 = a0;
1570    if (is_ctz) {
1571        a1 = TCG_REG_TMP0;
1572        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1573    }
1574    if (const_b && b == (ext ? 64 : 32)) {
1575        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1576    } else {
1577        AArch64Insn sel = I3506_CSEL;
1578
1579        tcg_out_cmp(s, ext, a0, 0, 1);
1580        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1);
1581
1582        if (const_b) {
1583            if (b == -1) {
1584                b = TCG_REG_XZR;
1585                sel = I3506_CSINV;
1586            } else if (b == 0) {
1587                b = TCG_REG_XZR;
1588            } else {
1589                tcg_out_movi(s, ext, d, b);
1590                b = d;
1591            }
1592        }
1593        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE);
1594    }
1595}
1596
1597typedef struct {
1598    TCGReg base;
1599    TCGReg index;
1600    TCGType index_ext;
1601    TCGAtomAlign aa;
1602} HostAddress;
1603
1604bool tcg_target_has_memory_bswap(MemOp memop)
1605{
1606    return false;
1607}
1608
1609static const TCGLdstHelperParam ldst_helper_param = {
1610    .ntmp = 1, .tmp = { TCG_REG_TMP0 }
1611};
1612
1613static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1614{
1615    MemOp opc = get_memop(lb->oi);
1616
1617    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1618        return false;
1619    }
1620
1621    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1622    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1623    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1624    tcg_out_goto(s, lb->raddr);
1625    return true;
1626}
1627
1628static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1629{
1630    MemOp opc = get_memop(lb->oi);
1631
1632    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1633        return false;
1634    }
1635
1636    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1637    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1638    tcg_out_goto(s, lb->raddr);
1639    return true;
1640}
1641
1642/* We expect to use a 7-bit scaled negative offset from ENV.  */
1643#define MIN_TLB_MASK_TABLE_OFS  -512
1644
1645/*
1646 * For softmmu, perform the TLB load and compare.
1647 * For useronly, perform any required alignment tests.
1648 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1649 * is required and fill in @h with the host address for the fast path.
1650 */
1651static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1652                                           TCGReg addr_reg, MemOpIdx oi,
1653                                           bool is_ld)
1654{
1655    TCGType addr_type = s->addr_type;
1656    TCGLabelQemuLdst *ldst = NULL;
1657    MemOp opc = get_memop(oi);
1658    MemOp s_bits = opc & MO_SIZE;
1659    unsigned a_mask;
1660
1661    h->aa = atom_and_align_for_opc(s, opc,
1662                                   have_lse2 ? MO_ATOM_WITHIN16
1663                                             : MO_ATOM_IFALIGN,
1664                                   s_bits == MO_128);
1665    a_mask = (1 << h->aa.align) - 1;
1666
1667#ifdef CONFIG_SOFTMMU
1668    unsigned s_mask = (1u << s_bits) - 1;
1669    unsigned mem_index = get_mmuidx(oi);
1670    TCGReg addr_adj;
1671    TCGType mask_type;
1672    uint64_t compare_mask;
1673
1674    ldst = new_ldst_label(s);
1675    ldst->is_ld = is_ld;
1676    ldst->oi = oi;
1677    ldst->addrlo_reg = addr_reg;
1678
1679    mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
1680                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1681
1682    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
1683    QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1684    QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1685    tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
1686                 tlb_mask_table_ofs(s, mem_index), 1, 0);
1687
1688    /* Extract the TLB index from the address into X0.  */
1689    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1690                 TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
1691                 s->page_bits - CPU_TLB_ENTRY_BITS);
1692
1693    /* Add the tlb_table pointer, forming the CPUTLBEntry address in TMP1. */
1694    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
1695
1696    /* Load the tlb comparator into TMP0, and the fast path addend into TMP1. */
1697    QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
1698    tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
1699               is_ld ? offsetof(CPUTLBEntry, addr_read)
1700                     : offsetof(CPUTLBEntry, addr_write));
1701    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
1702               offsetof(CPUTLBEntry, addend));
1703
1704    /*
1705     * For aligned accesses, we check the first byte and include the alignment
1706     * bits within the address.  For unaligned access, we check that we don't
1707     * cross pages using the address of the last byte of the access.
1708     */
1709    if (a_mask >= s_mask) {
1710        addr_adj = addr_reg;
1711    } else {
1712        addr_adj = TCG_REG_TMP2;
1713        tcg_out_insn(s, 3401, ADDI, addr_type,
1714                     addr_adj, addr_reg, s_mask - a_mask);
1715    }
1716    compare_mask = (uint64_t)s->page_mask | a_mask;
1717
1718    /* Store the page mask part of the address into TMP2.  */
1719    tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
1720                     addr_adj, compare_mask);
1721
1722    /* Perform the address comparison. */
1723    tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0);
1724
1725    /* If not equal, we jump to the slow path. */
1726    ldst->label_ptr[0] = s->code_ptr;
1727    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1728
1729    h->base = TCG_REG_TMP1;
1730    h->index = addr_reg;
1731    h->index_ext = addr_type;
1732#else
1733    if (a_mask) {
1734        ldst = new_ldst_label(s);
1735
1736        ldst->is_ld = is_ld;
1737        ldst->oi = oi;
1738        ldst->addrlo_reg = addr_reg;
1739
1740        /* tst addr, #mask */
1741        tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1742
1743        /* b.ne slow_path */
1744        ldst->label_ptr[0] = s->code_ptr;
1745        tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1746    }
1747
1748    if (guest_base || addr_type == TCG_TYPE_I32) {
1749        h->base = TCG_REG_GUEST_BASE;
1750        h->index = addr_reg;
1751        h->index_ext = addr_type;
1752    } else {
1753        h->base = addr_reg;
1754        h->index = TCG_REG_XZR;
1755        h->index_ext = TCG_TYPE_I64;
1756    }
1757#endif
1758
1759    return ldst;
1760}
1761
1762static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1763                                   TCGReg data_r, HostAddress h)
1764{
1765    switch (memop & MO_SSIZE) {
1766    case MO_UB:
1767        tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
1768        break;
1769    case MO_SB:
1770        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1771                       data_r, h.base, h.index_ext, h.index);
1772        break;
1773    case MO_UW:
1774        tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
1775        break;
1776    case MO_SW:
1777        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1778                       data_r, h.base, h.index_ext, h.index);
1779        break;
1780    case MO_UL:
1781        tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
1782        break;
1783    case MO_SL:
1784        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
1785        break;
1786    case MO_UQ:
1787        tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
1788        break;
1789    default:
1790        g_assert_not_reached();
1791    }
1792}
1793
1794static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1795                                   TCGReg data_r, HostAddress h)
1796{
1797    switch (memop & MO_SIZE) {
1798    case MO_8:
1799        tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
1800        break;
1801    case MO_16:
1802        tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
1803        break;
1804    case MO_32:
1805        tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
1806        break;
1807    case MO_64:
1808        tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
1809        break;
1810    default:
1811        g_assert_not_reached();
1812    }
1813}
1814
1815static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1816                            MemOpIdx oi, TCGType data_type)
1817{
1818    TCGLabelQemuLdst *ldst;
1819    HostAddress h;
1820
1821    ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1822    tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
1823
1824    if (ldst) {
1825        ldst->type = data_type;
1826        ldst->datalo_reg = data_reg;
1827        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1828    }
1829}
1830
1831static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1832                            MemOpIdx oi, TCGType data_type)
1833{
1834    TCGLabelQemuLdst *ldst;
1835    HostAddress h;
1836
1837    ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1838    tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1839
1840    if (ldst) {
1841        ldst->type = data_type;
1842        ldst->datalo_reg = data_reg;
1843        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1844    }
1845}
1846
1847static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
1848                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
1849{
1850    TCGLabelQemuLdst *ldst;
1851    HostAddress h;
1852    TCGReg base;
1853    bool use_pair;
1854
1855    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
1856
1857    /* Compose the final address, as LDP/STP have no indexing. */
1858    if (h.index == TCG_REG_XZR) {
1859        base = h.base;
1860    } else {
1861        base = TCG_REG_TMP2;
1862        if (h.index_ext == TCG_TYPE_I32) {
1863            /* add base, base, index, uxtw */
1864            tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base,
1865                         h.base, h.index, MO_32, 0);
1866        } else {
1867            /* add base, base, index */
1868            tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index);
1869        }
1870    }
1871
1872    use_pair = h.aa.atom < MO_128 || have_lse2;
1873
1874    if (!use_pair) {
1875        tcg_insn_unit *branch = NULL;
1876        TCGReg ll, lh, sl, sh;
1877
1878        /*
1879         * If we have already checked for 16-byte alignment, that's all
1880         * we need. Otherwise we have determined that misaligned atomicity
1881         * may be handled with two 8-byte loads.
1882         */
1883        if (h.aa.align < MO_128) {
1884            /*
1885             * TODO: align should be MO_64, so we only need test bit 3,
1886             * which means we could use TBNZ instead of ANDS+B_C.
1887             */
1888            tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15);
1889            branch = s->code_ptr;
1890            tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1891            use_pair = true;
1892        }
1893
1894        if (is_ld) {
1895            /*
1896             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1897             *    ldxp lo, hi, [base]
1898             *    stxp t0, lo, hi, [base]
1899             *    cbnz t0, .-8
1900             * Require no overlap between data{lo,hi} and base.
1901             */
1902            if (datalo == base || datahi == base) {
1903                tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base);
1904                base = TCG_REG_TMP2;
1905            }
1906            ll = sl = datalo;
1907            lh = sh = datahi;
1908        } else {
1909            /*
1910             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1911             * 1: ldxp t0, t1, [base]
1912             *    stxp t0, lo, hi, [base]
1913             *    cbnz t0, 1b
1914             */
1915            tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1);
1916            ll = TCG_REG_TMP0;
1917            lh = TCG_REG_TMP1;
1918            sl = datalo;
1919            sh = datahi;
1920        }
1921
1922        tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base);
1923        tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base);
1924        tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2);
1925
1926        if (use_pair) {
1927            /* "b .+8", branching across the one insn of use_pair. */
1928            tcg_out_insn(s, 3206, B, 2);
1929            reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr));
1930        }
1931    }
1932
1933    if (use_pair) {
1934        if (is_ld) {
1935            tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0);
1936        } else {
1937            tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0);
1938        }
1939    }
1940
1941    if (ldst) {
1942        ldst->type = TCG_TYPE_I128;
1943        ldst->datalo_reg = datalo;
1944        ldst->datahi_reg = datahi;
1945        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1946    }
1947}
1948
1949static const tcg_insn_unit *tb_ret_addr;
1950
1951static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1952{
1953    const tcg_insn_unit *target;
1954    ptrdiff_t offset;
1955
1956    /* Reuse the zeroing that exists for goto_ptr.  */
1957    if (a0 == 0) {
1958        target = tcg_code_gen_epilogue;
1959    } else {
1960        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1961        target = tb_ret_addr;
1962    }
1963
1964    offset = tcg_pcrel_diff(s, target) >> 2;
1965    if (offset == sextract64(offset, 0, 26)) {
1966        tcg_out_insn(s, 3206, B, offset);
1967    } else {
1968        /*
1969         * Only x16/x17 generate BTI type Jump (2),
1970         * other registers generate BTI type Jump|Call (3).
1971         */
1972        QEMU_BUILD_BUG_ON(TCG_REG_TMP0 != TCG_REG_X16);
1973        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1974        tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
1975    }
1976}
1977
1978static void tcg_out_goto_tb(TCGContext *s, int which)
1979{
1980    /*
1981     * Direct branch, or indirect address load, will be patched
1982     * by tb_target_set_jmp_target.  Assert indirect load offset
1983     * in range early, regardless of direct branch distance.
1984     */
1985    intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1986    tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1987
1988    set_jmp_insn_offset(s, which);
1989    tcg_out32(s, I3206_B);
1990    tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
1991    set_jmp_reset_offset(s, which);
1992    tcg_out_bti(s, BTI_J);
1993}
1994
1995void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1996                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1997{
1998    uintptr_t d_addr = tb->jmp_target_addr[n];
1999    ptrdiff_t d_offset = d_addr - jmp_rx;
2000    tcg_insn_unit insn;
2001
2002    /* Either directly branch, or indirect branch load. */
2003    if (d_offset == sextract64(d_offset, 0, 28)) {
2004        insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
2005    } else {
2006        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
2007        ptrdiff_t i_offset = i_addr - jmp_rx;
2008
2009        /* Note that we asserted this in range in tcg_out_goto_tb. */
2010        insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
2011    }
2012    qatomic_set((uint32_t *)jmp_rw, insn);
2013    flush_idcache_range(jmp_rx, jmp_rw, 4);
2014}
2015
2016static void tcg_out_op(TCGContext *s, TCGOpcode opc,
2017                       const TCGArg args[TCG_MAX_OP_ARGS],
2018                       const int const_args[TCG_MAX_OP_ARGS])
2019{
2020    /* 99% of the time, we can signal the use of extension registers
2021       by looking to see if the opcode handles 64-bit data.  */
2022    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
2023
2024    /* Hoist the loads of the most common arguments.  */
2025    TCGArg a0 = args[0];
2026    TCGArg a1 = args[1];
2027    TCGArg a2 = args[2];
2028    int c2 = const_args[2];
2029
2030    /* Some operands are defined with "rZ" constraint, a register or
2031       the zero register.  These need not actually test args[I] == 0.  */
2032#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
2033
2034    switch (opc) {
2035    case INDEX_op_goto_ptr:
2036        tcg_out_insn(s, 3207, BR, a0);
2037        break;
2038
2039    case INDEX_op_br:
2040        tcg_out_goto_label(s, arg_label(a0));
2041        break;
2042
2043    case INDEX_op_ld8u_i32:
2044    case INDEX_op_ld8u_i64:
2045        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
2046        break;
2047    case INDEX_op_ld8s_i32:
2048        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
2049        break;
2050    case INDEX_op_ld8s_i64:
2051        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
2052        break;
2053    case INDEX_op_ld16u_i32:
2054    case INDEX_op_ld16u_i64:
2055        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
2056        break;
2057    case INDEX_op_ld16s_i32:
2058        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
2059        break;
2060    case INDEX_op_ld16s_i64:
2061        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
2062        break;
2063    case INDEX_op_ld_i32:
2064    case INDEX_op_ld32u_i64:
2065        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
2066        break;
2067    case INDEX_op_ld32s_i64:
2068        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
2069        break;
2070    case INDEX_op_ld_i64:
2071        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
2072        break;
2073
2074    case INDEX_op_st8_i32:
2075    case INDEX_op_st8_i64:
2076        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
2077        break;
2078    case INDEX_op_st16_i32:
2079    case INDEX_op_st16_i64:
2080        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
2081        break;
2082    case INDEX_op_st_i32:
2083    case INDEX_op_st32_i64:
2084        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
2085        break;
2086    case INDEX_op_st_i64:
2087        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
2088        break;
2089
2090    case INDEX_op_add_i32:
2091        a2 = (int32_t)a2;
2092        /* FALLTHRU */
2093    case INDEX_op_add_i64:
2094        if (c2) {
2095            tcg_out_addsubi(s, ext, a0, a1, a2);
2096        } else {
2097            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2098        }
2099        break;
2100
2101    case INDEX_op_sub_i32:
2102        a2 = (int32_t)a2;
2103        /* FALLTHRU */
2104    case INDEX_op_sub_i64:
2105        if (c2) {
2106            tcg_out_addsubi(s, ext, a0, a1, -a2);
2107        } else {
2108            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2109        }
2110        break;
2111
2112    case INDEX_op_neg_i64:
2113    case INDEX_op_neg_i32:
2114        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2115        break;
2116
2117    case INDEX_op_and_i32:
2118        a2 = (int32_t)a2;
2119        /* FALLTHRU */
2120    case INDEX_op_and_i64:
2121        if (c2) {
2122            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2123        } else {
2124            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2125        }
2126        break;
2127
2128    case INDEX_op_andc_i32:
2129        a2 = (int32_t)a2;
2130        /* FALLTHRU */
2131    case INDEX_op_andc_i64:
2132        if (c2) {
2133            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2134        } else {
2135            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2136        }
2137        break;
2138
2139    case INDEX_op_or_i32:
2140        a2 = (int32_t)a2;
2141        /* FALLTHRU */
2142    case INDEX_op_or_i64:
2143        if (c2) {
2144            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2145        } else {
2146            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2147        }
2148        break;
2149
2150    case INDEX_op_orc_i32:
2151        a2 = (int32_t)a2;
2152        /* FALLTHRU */
2153    case INDEX_op_orc_i64:
2154        if (c2) {
2155            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2156        } else {
2157            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2158        }
2159        break;
2160
2161    case INDEX_op_xor_i32:
2162        a2 = (int32_t)a2;
2163        /* FALLTHRU */
2164    case INDEX_op_xor_i64:
2165        if (c2) {
2166            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2167        } else {
2168            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2169        }
2170        break;
2171
2172    case INDEX_op_eqv_i32:
2173        a2 = (int32_t)a2;
2174        /* FALLTHRU */
2175    case INDEX_op_eqv_i64:
2176        if (c2) {
2177            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2178        } else {
2179            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2180        }
2181        break;
2182
2183    case INDEX_op_not_i64:
2184    case INDEX_op_not_i32:
2185        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2186        break;
2187
2188    case INDEX_op_mul_i64:
2189    case INDEX_op_mul_i32:
2190        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2191        break;
2192
2193    case INDEX_op_div_i64:
2194    case INDEX_op_div_i32:
2195        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2196        break;
2197    case INDEX_op_divu_i64:
2198    case INDEX_op_divu_i32:
2199        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2200        break;
2201
2202    case INDEX_op_rem_i64:
2203    case INDEX_op_rem_i32:
2204        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2);
2205        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2206        break;
2207    case INDEX_op_remu_i64:
2208    case INDEX_op_remu_i32:
2209        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2);
2210        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2211        break;
2212
2213    case INDEX_op_shl_i64:
2214    case INDEX_op_shl_i32:
2215        if (c2) {
2216            tcg_out_shl(s, ext, a0, a1, a2);
2217        } else {
2218            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2219        }
2220        break;
2221
2222    case INDEX_op_shr_i64:
2223    case INDEX_op_shr_i32:
2224        if (c2) {
2225            tcg_out_shr(s, ext, a0, a1, a2);
2226        } else {
2227            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2228        }
2229        break;
2230
2231    case INDEX_op_sar_i64:
2232    case INDEX_op_sar_i32:
2233        if (c2) {
2234            tcg_out_sar(s, ext, a0, a1, a2);
2235        } else {
2236            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2237        }
2238        break;
2239
2240    case INDEX_op_rotr_i64:
2241    case INDEX_op_rotr_i32:
2242        if (c2) {
2243            tcg_out_rotr(s, ext, a0, a1, a2);
2244        } else {
2245            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2246        }
2247        break;
2248
2249    case INDEX_op_rotl_i64:
2250    case INDEX_op_rotl_i32:
2251        if (c2) {
2252            tcg_out_rotl(s, ext, a0, a1, a2);
2253        } else {
2254            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2);
2255            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0);
2256        }
2257        break;
2258
2259    case INDEX_op_clz_i64:
2260    case INDEX_op_clz_i32:
2261        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2262        break;
2263    case INDEX_op_ctz_i64:
2264    case INDEX_op_ctz_i32:
2265        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2266        break;
2267
2268    case INDEX_op_brcond_i32:
2269        a1 = (int32_t)a1;
2270        /* FALLTHRU */
2271    case INDEX_op_brcond_i64:
2272        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2273        break;
2274
2275    case INDEX_op_setcond_i32:
2276        a2 = (int32_t)a2;
2277        /* FALLTHRU */
2278    case INDEX_op_setcond_i64:
2279        tcg_out_cmp(s, ext, a1, a2, c2);
2280        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2281        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2282                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2283        break;
2284
2285    case INDEX_op_negsetcond_i32:
2286        a2 = (int32_t)a2;
2287        /* FALLTHRU */
2288    case INDEX_op_negsetcond_i64:
2289        tcg_out_cmp(s, ext, a1, a2, c2);
2290        /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond).  */
2291        tcg_out_insn(s, 3506, CSINV, ext, a0, TCG_REG_XZR,
2292                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2293        break;
2294
2295    case INDEX_op_movcond_i32:
2296        a2 = (int32_t)a2;
2297        /* FALLTHRU */
2298    case INDEX_op_movcond_i64:
2299        tcg_out_cmp(s, ext, a1, a2, c2);
2300        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2301        break;
2302
2303    case INDEX_op_qemu_ld_a32_i32:
2304    case INDEX_op_qemu_ld_a64_i32:
2305    case INDEX_op_qemu_ld_a32_i64:
2306    case INDEX_op_qemu_ld_a64_i64:
2307        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2308        break;
2309    case INDEX_op_qemu_st_a32_i32:
2310    case INDEX_op_qemu_st_a64_i32:
2311    case INDEX_op_qemu_st_a32_i64:
2312    case INDEX_op_qemu_st_a64_i64:
2313        tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
2314        break;
2315    case INDEX_op_qemu_ld_a32_i128:
2316    case INDEX_op_qemu_ld_a64_i128:
2317        tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
2318        break;
2319    case INDEX_op_qemu_st_a32_i128:
2320    case INDEX_op_qemu_st_a64_i128:
2321        tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false);
2322        break;
2323
2324    case INDEX_op_bswap64_i64:
2325        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2326        break;
2327    case INDEX_op_bswap32_i64:
2328        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2329        if (a2 & TCG_BSWAP_OS) {
2330            tcg_out_ext32s(s, a0, a0);
2331        }
2332        break;
2333    case INDEX_op_bswap32_i32:
2334        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2335        break;
2336    case INDEX_op_bswap16_i64:
2337    case INDEX_op_bswap16_i32:
2338        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2339        if (a2 & TCG_BSWAP_OS) {
2340            /* Output must be sign-extended. */
2341            tcg_out_ext16s(s, ext, a0, a0);
2342        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2343            /* Output must be zero-extended, but input isn't. */
2344            tcg_out_ext16u(s, a0, a0);
2345        }
2346        break;
2347
2348    case INDEX_op_deposit_i64:
2349    case INDEX_op_deposit_i32:
2350        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2351        break;
2352
2353    case INDEX_op_extract_i64:
2354    case INDEX_op_extract_i32:
2355        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2356        break;
2357
2358    case INDEX_op_sextract_i64:
2359    case INDEX_op_sextract_i32:
2360        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2361        break;
2362
2363    case INDEX_op_extract2_i64:
2364    case INDEX_op_extract2_i32:
2365        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2366        break;
2367
2368    case INDEX_op_add2_i32:
2369        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2370                        (int32_t)args[4], args[5], const_args[4],
2371                        const_args[5], false);
2372        break;
2373    case INDEX_op_add2_i64:
2374        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2375                        args[5], const_args[4], const_args[5], false);
2376        break;
2377    case INDEX_op_sub2_i32:
2378        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2379                        (int32_t)args[4], args[5], const_args[4],
2380                        const_args[5], true);
2381        break;
2382    case INDEX_op_sub2_i64:
2383        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2384                        args[5], const_args[4], const_args[5], true);
2385        break;
2386
2387    case INDEX_op_muluh_i64:
2388        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2389        break;
2390    case INDEX_op_mulsh_i64:
2391        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2392        break;
2393
2394    case INDEX_op_mb:
2395        tcg_out_mb(s, a0);
2396        break;
2397
2398    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2399    case INDEX_op_mov_i64:
2400    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2401    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2402    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2403    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2404    case INDEX_op_ext8s_i64:
2405    case INDEX_op_ext8u_i32:
2406    case INDEX_op_ext8u_i64:
2407    case INDEX_op_ext16s_i64:
2408    case INDEX_op_ext16s_i32:
2409    case INDEX_op_ext16u_i64:
2410    case INDEX_op_ext16u_i32:
2411    case INDEX_op_ext32s_i64:
2412    case INDEX_op_ext32u_i64:
2413    case INDEX_op_ext_i32_i64:
2414    case INDEX_op_extu_i32_i64:
2415    case INDEX_op_extrl_i64_i32:
2416    default:
2417        g_assert_not_reached();
2418    }
2419
2420#undef REG0
2421}
2422
2423static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2424                           unsigned vecl, unsigned vece,
2425                           const TCGArg args[TCG_MAX_OP_ARGS],
2426                           const int const_args[TCG_MAX_OP_ARGS])
2427{
2428    static const AArch64Insn cmp_vec_insn[16] = {
2429        [TCG_COND_EQ] = I3616_CMEQ,
2430        [TCG_COND_GT] = I3616_CMGT,
2431        [TCG_COND_GE] = I3616_CMGE,
2432        [TCG_COND_GTU] = I3616_CMHI,
2433        [TCG_COND_GEU] = I3616_CMHS,
2434    };
2435    static const AArch64Insn cmp_scalar_insn[16] = {
2436        [TCG_COND_EQ] = I3611_CMEQ,
2437        [TCG_COND_GT] = I3611_CMGT,
2438        [TCG_COND_GE] = I3611_CMGE,
2439        [TCG_COND_GTU] = I3611_CMHI,
2440        [TCG_COND_GEU] = I3611_CMHS,
2441    };
2442    static const AArch64Insn cmp0_vec_insn[16] = {
2443        [TCG_COND_EQ] = I3617_CMEQ0,
2444        [TCG_COND_GT] = I3617_CMGT0,
2445        [TCG_COND_GE] = I3617_CMGE0,
2446        [TCG_COND_LT] = I3617_CMLT0,
2447        [TCG_COND_LE] = I3617_CMLE0,
2448    };
2449    static const AArch64Insn cmp0_scalar_insn[16] = {
2450        [TCG_COND_EQ] = I3612_CMEQ0,
2451        [TCG_COND_GT] = I3612_CMGT0,
2452        [TCG_COND_GE] = I3612_CMGE0,
2453        [TCG_COND_LT] = I3612_CMLT0,
2454        [TCG_COND_LE] = I3612_CMLE0,
2455    };
2456
2457    TCGType type = vecl + TCG_TYPE_V64;
2458    unsigned is_q = vecl;
2459    bool is_scalar = !is_q && vece == MO_64;
2460    TCGArg a0, a1, a2, a3;
2461    int cmode, imm8;
2462
2463    a0 = args[0];
2464    a1 = args[1];
2465    a2 = args[2];
2466
2467    switch (opc) {
2468    case INDEX_op_ld_vec:
2469        tcg_out_ld(s, type, a0, a1, a2);
2470        break;
2471    case INDEX_op_st_vec:
2472        tcg_out_st(s, type, a0, a1, a2);
2473        break;
2474    case INDEX_op_dupm_vec:
2475        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2476        break;
2477    case INDEX_op_add_vec:
2478        if (is_scalar) {
2479            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2480        } else {
2481            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2482        }
2483        break;
2484    case INDEX_op_sub_vec:
2485        if (is_scalar) {
2486            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2487        } else {
2488            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2489        }
2490        break;
2491    case INDEX_op_mul_vec:
2492        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2493        break;
2494    case INDEX_op_neg_vec:
2495        if (is_scalar) {
2496            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2497        } else {
2498            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2499        }
2500        break;
2501    case INDEX_op_abs_vec:
2502        if (is_scalar) {
2503            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2504        } else {
2505            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2506        }
2507        break;
2508    case INDEX_op_and_vec:
2509        if (const_args[2]) {
2510            is_shimm1632(~a2, &cmode, &imm8);
2511            if (a0 == a1) {
2512                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2513                return;
2514            }
2515            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2516            a2 = a0;
2517        }
2518        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2519        break;
2520    case INDEX_op_or_vec:
2521        if (const_args[2]) {
2522            is_shimm1632(a2, &cmode, &imm8);
2523            if (a0 == a1) {
2524                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2525                return;
2526            }
2527            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2528            a2 = a0;
2529        }
2530        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2531        break;
2532    case INDEX_op_andc_vec:
2533        if (const_args[2]) {
2534            is_shimm1632(a2, &cmode, &imm8);
2535            if (a0 == a1) {
2536                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2537                return;
2538            }
2539            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2540            a2 = a0;
2541        }
2542        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2543        break;
2544    case INDEX_op_orc_vec:
2545        if (const_args[2]) {
2546            is_shimm1632(~a2, &cmode, &imm8);
2547            if (a0 == a1) {
2548                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2549                return;
2550            }
2551            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2552            a2 = a0;
2553        }
2554        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2555        break;
2556    case INDEX_op_xor_vec:
2557        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2558        break;
2559    case INDEX_op_ssadd_vec:
2560        if (is_scalar) {
2561            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2562        } else {
2563            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2564        }
2565        break;
2566    case INDEX_op_sssub_vec:
2567        if (is_scalar) {
2568            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2569        } else {
2570            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2571        }
2572        break;
2573    case INDEX_op_usadd_vec:
2574        if (is_scalar) {
2575            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2576        } else {
2577            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2578        }
2579        break;
2580    case INDEX_op_ussub_vec:
2581        if (is_scalar) {
2582            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2583        } else {
2584            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2585        }
2586        break;
2587    case INDEX_op_smax_vec:
2588        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2589        break;
2590    case INDEX_op_smin_vec:
2591        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2592        break;
2593    case INDEX_op_umax_vec:
2594        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2595        break;
2596    case INDEX_op_umin_vec:
2597        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2598        break;
2599    case INDEX_op_not_vec:
2600        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2601        break;
2602    case INDEX_op_shli_vec:
2603        if (is_scalar) {
2604            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2605        } else {
2606            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2607        }
2608        break;
2609    case INDEX_op_shri_vec:
2610        if (is_scalar) {
2611            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2612        } else {
2613            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2614        }
2615        break;
2616    case INDEX_op_sari_vec:
2617        if (is_scalar) {
2618            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2619        } else {
2620            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2621        }
2622        break;
2623    case INDEX_op_aa64_sli_vec:
2624        if (is_scalar) {
2625            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2626        } else {
2627            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2628        }
2629        break;
2630    case INDEX_op_shlv_vec:
2631        if (is_scalar) {
2632            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2633        } else {
2634            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2635        }
2636        break;
2637    case INDEX_op_aa64_sshl_vec:
2638        if (is_scalar) {
2639            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2640        } else {
2641            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2642        }
2643        break;
2644    case INDEX_op_cmp_vec:
2645        {
2646            TCGCond cond = args[3];
2647            AArch64Insn insn;
2648
2649            if (cond == TCG_COND_NE) {
2650                if (const_args[2]) {
2651                    if (is_scalar) {
2652                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2653                    } else {
2654                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2655                    }
2656                } else {
2657                    if (is_scalar) {
2658                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2659                    } else {
2660                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2661                    }
2662                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2663                }
2664            } else {
2665                if (const_args[2]) {
2666                    if (is_scalar) {
2667                        insn = cmp0_scalar_insn[cond];
2668                        if (insn) {
2669                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2670                            break;
2671                        }
2672                    } else {
2673                        insn = cmp0_vec_insn[cond];
2674                        if (insn) {
2675                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2676                            break;
2677                        }
2678                    }
2679                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
2680                    a2 = TCG_VEC_TMP0;
2681                }
2682                if (is_scalar) {
2683                    insn = cmp_scalar_insn[cond];
2684                    if (insn == 0) {
2685                        TCGArg t;
2686                        t = a1, a1 = a2, a2 = t;
2687                        cond = tcg_swap_cond(cond);
2688                        insn = cmp_scalar_insn[cond];
2689                        tcg_debug_assert(insn != 0);
2690                    }
2691                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2692                } else {
2693                    insn = cmp_vec_insn[cond];
2694                    if (insn == 0) {
2695                        TCGArg t;
2696                        t = a1, a1 = a2, a2 = t;
2697                        cond = tcg_swap_cond(cond);
2698                        insn = cmp_vec_insn[cond];
2699                        tcg_debug_assert(insn != 0);
2700                    }
2701                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2702                }
2703            }
2704        }
2705        break;
2706
2707    case INDEX_op_bitsel_vec:
2708        a3 = args[3];
2709        if (a0 == a3) {
2710            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2711        } else if (a0 == a2) {
2712            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2713        } else {
2714            if (a0 != a1) {
2715                tcg_out_mov(s, type, a0, a1);
2716            }
2717            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2718        }
2719        break;
2720
2721    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2722    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2723    default:
2724        g_assert_not_reached();
2725    }
2726}
2727
2728int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2729{
2730    switch (opc) {
2731    case INDEX_op_add_vec:
2732    case INDEX_op_sub_vec:
2733    case INDEX_op_and_vec:
2734    case INDEX_op_or_vec:
2735    case INDEX_op_xor_vec:
2736    case INDEX_op_andc_vec:
2737    case INDEX_op_orc_vec:
2738    case INDEX_op_neg_vec:
2739    case INDEX_op_abs_vec:
2740    case INDEX_op_not_vec:
2741    case INDEX_op_cmp_vec:
2742    case INDEX_op_shli_vec:
2743    case INDEX_op_shri_vec:
2744    case INDEX_op_sari_vec:
2745    case INDEX_op_ssadd_vec:
2746    case INDEX_op_sssub_vec:
2747    case INDEX_op_usadd_vec:
2748    case INDEX_op_ussub_vec:
2749    case INDEX_op_shlv_vec:
2750    case INDEX_op_bitsel_vec:
2751        return 1;
2752    case INDEX_op_rotli_vec:
2753    case INDEX_op_shrv_vec:
2754    case INDEX_op_sarv_vec:
2755    case INDEX_op_rotlv_vec:
2756    case INDEX_op_rotrv_vec:
2757        return -1;
2758    case INDEX_op_mul_vec:
2759    case INDEX_op_smax_vec:
2760    case INDEX_op_smin_vec:
2761    case INDEX_op_umax_vec:
2762    case INDEX_op_umin_vec:
2763        return vece < MO_64;
2764
2765    default:
2766        return 0;
2767    }
2768}
2769
2770void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2771                       TCGArg a0, ...)
2772{
2773    va_list va;
2774    TCGv_vec v0, v1, v2, t1, t2, c1;
2775    TCGArg a2;
2776
2777    va_start(va, a0);
2778    v0 = temp_tcgv_vec(arg_temp(a0));
2779    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2780    a2 = va_arg(va, TCGArg);
2781    va_end(va);
2782
2783    switch (opc) {
2784    case INDEX_op_rotli_vec:
2785        t1 = tcg_temp_new_vec(type);
2786        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2787        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2788                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2789        tcg_temp_free_vec(t1);
2790        break;
2791
2792    case INDEX_op_shrv_vec:
2793    case INDEX_op_sarv_vec:
2794        /* Right shifts are negative left shifts for AArch64.  */
2795        v2 = temp_tcgv_vec(arg_temp(a2));
2796        t1 = tcg_temp_new_vec(type);
2797        tcg_gen_neg_vec(vece, t1, v2);
2798        opc = (opc == INDEX_op_shrv_vec
2799               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2800        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2801                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2802        tcg_temp_free_vec(t1);
2803        break;
2804
2805    case INDEX_op_rotlv_vec:
2806        v2 = temp_tcgv_vec(arg_temp(a2));
2807        t1 = tcg_temp_new_vec(type);
2808        c1 = tcg_constant_vec(type, vece, 8 << vece);
2809        tcg_gen_sub_vec(vece, t1, v2, c1);
2810        /* Right shifts are negative left shifts for AArch64.  */
2811        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2812                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2813        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2814                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2815        tcg_gen_or_vec(vece, v0, v0, t1);
2816        tcg_temp_free_vec(t1);
2817        break;
2818
2819    case INDEX_op_rotrv_vec:
2820        v2 = temp_tcgv_vec(arg_temp(a2));
2821        t1 = tcg_temp_new_vec(type);
2822        t2 = tcg_temp_new_vec(type);
2823        c1 = tcg_constant_vec(type, vece, 8 << vece);
2824        tcg_gen_neg_vec(vece, t1, v2);
2825        tcg_gen_sub_vec(vece, t2, c1, v2);
2826        /* Right shifts are negative left shifts for AArch64.  */
2827        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2828                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2829        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2830                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2831        tcg_gen_or_vec(vece, v0, t1, t2);
2832        tcg_temp_free_vec(t1);
2833        tcg_temp_free_vec(t2);
2834        break;
2835
2836    default:
2837        g_assert_not_reached();
2838    }
2839}
2840
2841static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2842{
2843    switch (op) {
2844    case INDEX_op_goto_ptr:
2845        return C_O0_I1(r);
2846
2847    case INDEX_op_ld8u_i32:
2848    case INDEX_op_ld8s_i32:
2849    case INDEX_op_ld16u_i32:
2850    case INDEX_op_ld16s_i32:
2851    case INDEX_op_ld_i32:
2852    case INDEX_op_ld8u_i64:
2853    case INDEX_op_ld8s_i64:
2854    case INDEX_op_ld16u_i64:
2855    case INDEX_op_ld16s_i64:
2856    case INDEX_op_ld32u_i64:
2857    case INDEX_op_ld32s_i64:
2858    case INDEX_op_ld_i64:
2859    case INDEX_op_neg_i32:
2860    case INDEX_op_neg_i64:
2861    case INDEX_op_not_i32:
2862    case INDEX_op_not_i64:
2863    case INDEX_op_bswap16_i32:
2864    case INDEX_op_bswap32_i32:
2865    case INDEX_op_bswap16_i64:
2866    case INDEX_op_bswap32_i64:
2867    case INDEX_op_bswap64_i64:
2868    case INDEX_op_ext8s_i32:
2869    case INDEX_op_ext16s_i32:
2870    case INDEX_op_ext8u_i32:
2871    case INDEX_op_ext16u_i32:
2872    case INDEX_op_ext8s_i64:
2873    case INDEX_op_ext16s_i64:
2874    case INDEX_op_ext32s_i64:
2875    case INDEX_op_ext8u_i64:
2876    case INDEX_op_ext16u_i64:
2877    case INDEX_op_ext32u_i64:
2878    case INDEX_op_ext_i32_i64:
2879    case INDEX_op_extu_i32_i64:
2880    case INDEX_op_extract_i32:
2881    case INDEX_op_extract_i64:
2882    case INDEX_op_sextract_i32:
2883    case INDEX_op_sextract_i64:
2884        return C_O1_I1(r, r);
2885
2886    case INDEX_op_st8_i32:
2887    case INDEX_op_st16_i32:
2888    case INDEX_op_st_i32:
2889    case INDEX_op_st8_i64:
2890    case INDEX_op_st16_i64:
2891    case INDEX_op_st32_i64:
2892    case INDEX_op_st_i64:
2893        return C_O0_I2(rZ, r);
2894
2895    case INDEX_op_add_i32:
2896    case INDEX_op_add_i64:
2897    case INDEX_op_sub_i32:
2898    case INDEX_op_sub_i64:
2899    case INDEX_op_setcond_i32:
2900    case INDEX_op_setcond_i64:
2901    case INDEX_op_negsetcond_i32:
2902    case INDEX_op_negsetcond_i64:
2903        return C_O1_I2(r, r, rA);
2904
2905    case INDEX_op_mul_i32:
2906    case INDEX_op_mul_i64:
2907    case INDEX_op_div_i32:
2908    case INDEX_op_div_i64:
2909    case INDEX_op_divu_i32:
2910    case INDEX_op_divu_i64:
2911    case INDEX_op_rem_i32:
2912    case INDEX_op_rem_i64:
2913    case INDEX_op_remu_i32:
2914    case INDEX_op_remu_i64:
2915    case INDEX_op_muluh_i64:
2916    case INDEX_op_mulsh_i64:
2917        return C_O1_I2(r, r, r);
2918
2919    case INDEX_op_and_i32:
2920    case INDEX_op_and_i64:
2921    case INDEX_op_or_i32:
2922    case INDEX_op_or_i64:
2923    case INDEX_op_xor_i32:
2924    case INDEX_op_xor_i64:
2925    case INDEX_op_andc_i32:
2926    case INDEX_op_andc_i64:
2927    case INDEX_op_orc_i32:
2928    case INDEX_op_orc_i64:
2929    case INDEX_op_eqv_i32:
2930    case INDEX_op_eqv_i64:
2931        return C_O1_I2(r, r, rL);
2932
2933    case INDEX_op_shl_i32:
2934    case INDEX_op_shr_i32:
2935    case INDEX_op_sar_i32:
2936    case INDEX_op_rotl_i32:
2937    case INDEX_op_rotr_i32:
2938    case INDEX_op_shl_i64:
2939    case INDEX_op_shr_i64:
2940    case INDEX_op_sar_i64:
2941    case INDEX_op_rotl_i64:
2942    case INDEX_op_rotr_i64:
2943        return C_O1_I2(r, r, ri);
2944
2945    case INDEX_op_clz_i32:
2946    case INDEX_op_ctz_i32:
2947    case INDEX_op_clz_i64:
2948    case INDEX_op_ctz_i64:
2949        return C_O1_I2(r, r, rAL);
2950
2951    case INDEX_op_brcond_i32:
2952    case INDEX_op_brcond_i64:
2953        return C_O0_I2(r, rA);
2954
2955    case INDEX_op_movcond_i32:
2956    case INDEX_op_movcond_i64:
2957        return C_O1_I4(r, r, rA, rZ, rZ);
2958
2959    case INDEX_op_qemu_ld_a32_i32:
2960    case INDEX_op_qemu_ld_a64_i32:
2961    case INDEX_op_qemu_ld_a32_i64:
2962    case INDEX_op_qemu_ld_a64_i64:
2963        return C_O1_I1(r, r);
2964    case INDEX_op_qemu_ld_a32_i128:
2965    case INDEX_op_qemu_ld_a64_i128:
2966        return C_O2_I1(r, r, r);
2967    case INDEX_op_qemu_st_a32_i32:
2968    case INDEX_op_qemu_st_a64_i32:
2969    case INDEX_op_qemu_st_a32_i64:
2970    case INDEX_op_qemu_st_a64_i64:
2971        return C_O0_I2(rZ, r);
2972    case INDEX_op_qemu_st_a32_i128:
2973    case INDEX_op_qemu_st_a64_i128:
2974        return C_O0_I3(rZ, rZ, r);
2975
2976    case INDEX_op_deposit_i32:
2977    case INDEX_op_deposit_i64:
2978        return C_O1_I2(r, 0, rZ);
2979
2980    case INDEX_op_extract2_i32:
2981    case INDEX_op_extract2_i64:
2982        return C_O1_I2(r, rZ, rZ);
2983
2984    case INDEX_op_add2_i32:
2985    case INDEX_op_add2_i64:
2986    case INDEX_op_sub2_i32:
2987    case INDEX_op_sub2_i64:
2988        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2989
2990    case INDEX_op_add_vec:
2991    case INDEX_op_sub_vec:
2992    case INDEX_op_mul_vec:
2993    case INDEX_op_xor_vec:
2994    case INDEX_op_ssadd_vec:
2995    case INDEX_op_sssub_vec:
2996    case INDEX_op_usadd_vec:
2997    case INDEX_op_ussub_vec:
2998    case INDEX_op_smax_vec:
2999    case INDEX_op_smin_vec:
3000    case INDEX_op_umax_vec:
3001    case INDEX_op_umin_vec:
3002    case INDEX_op_shlv_vec:
3003    case INDEX_op_shrv_vec:
3004    case INDEX_op_sarv_vec:
3005    case INDEX_op_aa64_sshl_vec:
3006        return C_O1_I2(w, w, w);
3007    case INDEX_op_not_vec:
3008    case INDEX_op_neg_vec:
3009    case INDEX_op_abs_vec:
3010    case INDEX_op_shli_vec:
3011    case INDEX_op_shri_vec:
3012    case INDEX_op_sari_vec:
3013        return C_O1_I1(w, w);
3014    case INDEX_op_ld_vec:
3015    case INDEX_op_dupm_vec:
3016        return C_O1_I1(w, r);
3017    case INDEX_op_st_vec:
3018        return C_O0_I2(w, r);
3019    case INDEX_op_dup_vec:
3020        return C_O1_I1(w, wr);
3021    case INDEX_op_or_vec:
3022    case INDEX_op_andc_vec:
3023        return C_O1_I2(w, w, wO);
3024    case INDEX_op_and_vec:
3025    case INDEX_op_orc_vec:
3026        return C_O1_I2(w, w, wN);
3027    case INDEX_op_cmp_vec:
3028        return C_O1_I2(w, w, wZ);
3029    case INDEX_op_bitsel_vec:
3030        return C_O1_I3(w, w, w, w);
3031    case INDEX_op_aa64_sli_vec:
3032        return C_O1_I2(w, 0, w);
3033
3034    default:
3035        g_assert_not_reached();
3036    }
3037}
3038
3039static void tcg_target_init(TCGContext *s)
3040{
3041    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
3042    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
3043    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3044    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3045
3046    tcg_target_call_clobber_regs = -1ull;
3047    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
3048    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
3049    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
3050    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
3051    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
3052    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
3053    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
3054    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
3055    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
3056    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
3057    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
3058    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3059    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3060    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3061    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3062    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3063    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3064    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3065    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3066
3067    s->reserved_regs = 0;
3068    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
3069    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
3070    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
3071    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
3072    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
3073    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
3074    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
3075}
3076
3077/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
3078#define PUSH_SIZE  ((30 - 19 + 1) * 8)
3079
3080#define FRAME_SIZE \
3081    ((PUSH_SIZE \
3082      + TCG_STATIC_CALL_ARGS_SIZE \
3083      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3084      + TCG_TARGET_STACK_ALIGN - 1) \
3085     & ~(TCG_TARGET_STACK_ALIGN - 1))
3086
3087/* We're expecting a 2 byte uleb128 encoded value.  */
3088QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3089
3090/* We're expecting to use a single ADDI insn.  */
3091QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
3092
3093static void tcg_target_qemu_prologue(TCGContext *s)
3094{
3095    TCGReg r;
3096
3097    tcg_out_bti(s, BTI_C);
3098
3099    /* Push (FP, LR) and allocate space for all saved registers.  */
3100    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
3101                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
3102
3103    /* Set up frame pointer for canonical unwinding.  */
3104    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
3105
3106    /* Store callee-preserved regs x19..x28.  */
3107    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3108        int ofs = (r - TCG_REG_X19 + 2) * 8;
3109        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3110    }
3111
3112    /* Make stack space for TCG locals.  */
3113    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3114                 FRAME_SIZE - PUSH_SIZE);
3115
3116    /* Inform TCG about how to find TCG locals with register, offset, size.  */
3117    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
3118                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3119
3120#if !defined(CONFIG_SOFTMMU)
3121    /*
3122     * Note that XZR cannot be encoded in the address base register slot,
3123     * as that actually encodes SP.  Depending on the guest, we may need
3124     * to zero-extend the guest address via the address index register slot,
3125     * therefore we need to load even a zero guest base into a register.
3126     */
3127    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3128    tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3129#endif
3130
3131    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3132    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3133
3134    /*
3135     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3136     * and fall through to the rest of the epilogue.
3137     */
3138    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3139    tcg_out_bti(s, BTI_J);
3140    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3141
3142    /* TB epilogue */
3143    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3144    tcg_out_bti(s, BTI_J);
3145
3146    /* Remove TCG locals stack space.  */
3147    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3148                 FRAME_SIZE - PUSH_SIZE);
3149
3150    /* Restore registers x19..x28.  */
3151    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3152        int ofs = (r - TCG_REG_X19 + 2) * 8;
3153        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3154    }
3155
3156    /* Pop (FP, LR), restore SP to previous frame.  */
3157    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3158                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3159    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3160}
3161
3162static void tcg_out_tb_start(TCGContext *s)
3163{
3164    tcg_out_bti(s, BTI_J);
3165}
3166
3167static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3168{
3169    int i;
3170    for (i = 0; i < count; ++i) {
3171        p[i] = NOP;
3172    }
3173}
3174
3175typedef struct {
3176    DebugFrameHeader h;
3177    uint8_t fde_def_cfa[4];
3178    uint8_t fde_reg_ofs[24];
3179} DebugFrame;
3180
3181#define ELF_HOST_MACHINE EM_AARCH64
3182
3183static const DebugFrame debug_frame = {
3184    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3185    .h.cie.id = -1,
3186    .h.cie.version = 1,
3187    .h.cie.code_align = 1,
3188    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3189    .h.cie.return_column = TCG_REG_LR,
3190
3191    /* Total FDE size does not include the "len" member.  */
3192    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3193
3194    .fde_def_cfa = {
3195        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3196        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3197        (FRAME_SIZE >> 7)
3198    },
3199    .fde_reg_ofs = {
3200        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3201        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3202        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3203        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3204        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3205        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3206        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3207        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3208        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3209        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3210        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3211        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3212    }
3213};
3214
3215void tcg_register_jit(const void *buf, size_t buf_size)
3216{
3217    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3218}
3219