xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision d2dfe0b5)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-ldst.c.inc"
14#include "../tcg-pool.c.inc"
15#include "qemu/bitops.h"
16
17/* We're going to re-use TCGType in setting of the SF bit, which controls
18   the size of the operation performed.  If we know the values match, it
19   makes things much cleaner.  */
20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21
22#ifdef CONFIG_DEBUG_TCG
23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28
29    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33};
34#endif /* CONFIG_DEBUG_TCG */
35
36static const int tcg_target_reg_alloc_order[] = {
37    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39    TCG_REG_X28, /* we will reserve this for guest_base if configured */
40
41    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
43
44    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47    /* X16 reserved as temporary */
48    /* X17 reserved as temporary */
49    /* X18 reserved by system */
50    /* X19 reserved for AREG0 */
51    /* X29 reserved as fp */
52    /* X30 reserved as temporary */
53
54    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
55    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
56    /* V8 - V15 are call-saved, and skipped.  */
57    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
58    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
59    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
60    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
61};
62
63static const int tcg_target_call_iarg_regs[8] = {
64    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
65    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
66};
67
68static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
69{
70    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
71    tcg_debug_assert(slot >= 0 && slot <= 1);
72    return TCG_REG_X0 + slot;
73}
74
75#define TCG_REG_TMP0 TCG_REG_X16
76#define TCG_REG_TMP1 TCG_REG_X17
77#define TCG_REG_TMP2 TCG_REG_X30
78#define TCG_VEC_TMP0 TCG_REG_V31
79
80#ifndef CONFIG_SOFTMMU
81#define TCG_REG_GUEST_BASE TCG_REG_X28
82#endif
83
84static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
85{
86    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
87    ptrdiff_t offset = target - src_rx;
88
89    if (offset == sextract64(offset, 0, 26)) {
90        /* read instruction, mask away previous PC_REL26 parameter contents,
91           set the proper offset, then write back the instruction. */
92        *src_rw = deposit32(*src_rw, 0, 26, offset);
93        return true;
94    }
95    return false;
96}
97
98static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
99{
100    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
101    ptrdiff_t offset = target - src_rx;
102
103    if (offset == sextract64(offset, 0, 19)) {
104        *src_rw = deposit32(*src_rw, 5, 19, offset);
105        return true;
106    }
107    return false;
108}
109
110static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
111                        intptr_t value, intptr_t addend)
112{
113    tcg_debug_assert(addend == 0);
114    switch (type) {
115    case R_AARCH64_JUMP26:
116    case R_AARCH64_CALL26:
117        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
118    case R_AARCH64_CONDBR19:
119        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
120    default:
121        g_assert_not_reached();
122    }
123}
124
125#define TCG_CT_CONST_AIMM 0x100
126#define TCG_CT_CONST_LIMM 0x200
127#define TCG_CT_CONST_ZERO 0x400
128#define TCG_CT_CONST_MONE 0x800
129#define TCG_CT_CONST_ORRI 0x1000
130#define TCG_CT_CONST_ANDI 0x2000
131
132#define ALL_GENERAL_REGS  0xffffffffu
133#define ALL_VECTOR_REGS   0xffffffff00000000ull
134
135/* Match a constant valid for addition (12-bit, optionally shifted).  */
136static inline bool is_aimm(uint64_t val)
137{
138    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
139}
140
141/* Match a constant valid for logical operations.  */
142static inline bool is_limm(uint64_t val)
143{
144    /* Taking a simplified view of the logical immediates for now, ignoring
145       the replication that can happen across the field.  Match bit patterns
146       of the forms
147           0....01....1
148           0..01..10..0
149       and their inverses.  */
150
151    /* Make things easier below, by testing the form with msb clear. */
152    if ((int64_t)val < 0) {
153        val = ~val;
154    }
155    if (val == 0) {
156        return false;
157    }
158    val += val & -val;
159    return (val & (val - 1)) == 0;
160}
161
162/* Return true if v16 is a valid 16-bit shifted immediate.  */
163static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
164{
165    if (v16 == (v16 & 0xff)) {
166        *cmode = 0x8;
167        *imm8 = v16 & 0xff;
168        return true;
169    } else if (v16 == (v16 & 0xff00)) {
170        *cmode = 0xa;
171        *imm8 = v16 >> 8;
172        return true;
173    }
174    return false;
175}
176
177/* Return true if v32 is a valid 32-bit shifted immediate.  */
178static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
179{
180    if (v32 == (v32 & 0xff)) {
181        *cmode = 0x0;
182        *imm8 = v32 & 0xff;
183        return true;
184    } else if (v32 == (v32 & 0xff00)) {
185        *cmode = 0x2;
186        *imm8 = (v32 >> 8) & 0xff;
187        return true;
188    } else if (v32 == (v32 & 0xff0000)) {
189        *cmode = 0x4;
190        *imm8 = (v32 >> 16) & 0xff;
191        return true;
192    } else if (v32 == (v32 & 0xff000000)) {
193        *cmode = 0x6;
194        *imm8 = v32 >> 24;
195        return true;
196    }
197    return false;
198}
199
200/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
201static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
202{
203    if ((v32 & 0xffff00ff) == 0xff) {
204        *cmode = 0xc;
205        *imm8 = (v32 >> 8) & 0xff;
206        return true;
207    } else if ((v32 & 0xff00ffff) == 0xffff) {
208        *cmode = 0xd;
209        *imm8 = (v32 >> 16) & 0xff;
210        return true;
211    }
212    return false;
213}
214
215/* Return true if v32 is a valid float32 immediate.  */
216static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
217{
218    if (extract32(v32, 0, 19) == 0
219        && (extract32(v32, 25, 6) == 0x20
220            || extract32(v32, 25, 6) == 0x1f)) {
221        *cmode = 0xf;
222        *imm8 = (extract32(v32, 31, 1) << 7)
223              | (extract32(v32, 25, 1) << 6)
224              | extract32(v32, 19, 6);
225        return true;
226    }
227    return false;
228}
229
230/* Return true if v64 is a valid float64 immediate.  */
231static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
232{
233    if (extract64(v64, 0, 48) == 0
234        && (extract64(v64, 54, 9) == 0x100
235            || extract64(v64, 54, 9) == 0x0ff)) {
236        *cmode = 0xf;
237        *imm8 = (extract64(v64, 63, 1) << 7)
238              | (extract64(v64, 54, 1) << 6)
239              | extract64(v64, 48, 6);
240        return true;
241    }
242    return false;
243}
244
245/*
246 * Return non-zero if v32 can be formed by MOVI+ORR.
247 * Place the parameters for MOVI in (cmode, imm8).
248 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
249 */
250static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
251{
252    int i;
253
254    for (i = 6; i > 0; i -= 2) {
255        /* Mask out one byte we can add with ORR.  */
256        uint32_t tmp = v32 & ~(0xffu << (i * 4));
257        if (is_shimm32(tmp, cmode, imm8) ||
258            is_soimm32(tmp, cmode, imm8)) {
259            break;
260        }
261    }
262    return i;
263}
264
265/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
266static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
267{
268    if (v32 == deposit32(v32, 16, 16, v32)) {
269        return is_shimm16(v32, cmode, imm8);
270    } else {
271        return is_shimm32(v32, cmode, imm8);
272    }
273}
274
275static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
276{
277    if (ct & TCG_CT_CONST) {
278        return 1;
279    }
280    if (type == TCG_TYPE_I32) {
281        val = (int32_t)val;
282    }
283    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
284        return 1;
285    }
286    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
287        return 1;
288    }
289    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
290        return 1;
291    }
292    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
293        return 1;
294    }
295
296    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
297    case 0:
298        break;
299    case TCG_CT_CONST_ANDI:
300        val = ~val;
301        /* fallthru */
302    case TCG_CT_CONST_ORRI:
303        if (val == deposit64(val, 32, 32, val)) {
304            int cmode, imm8;
305            return is_shimm1632(val, &cmode, &imm8);
306        }
307        break;
308    default:
309        /* Both bits should not be set for the same insn.  */
310        g_assert_not_reached();
311    }
312
313    return 0;
314}
315
316enum aarch64_cond_code {
317    COND_EQ = 0x0,
318    COND_NE = 0x1,
319    COND_CS = 0x2,     /* Unsigned greater or equal */
320    COND_HS = COND_CS, /* ALIAS greater or equal */
321    COND_CC = 0x3,     /* Unsigned less than */
322    COND_LO = COND_CC, /* ALIAS Lower */
323    COND_MI = 0x4,     /* Negative */
324    COND_PL = 0x5,     /* Zero or greater */
325    COND_VS = 0x6,     /* Overflow */
326    COND_VC = 0x7,     /* No overflow */
327    COND_HI = 0x8,     /* Unsigned greater than */
328    COND_LS = 0x9,     /* Unsigned less or equal */
329    COND_GE = 0xa,
330    COND_LT = 0xb,
331    COND_GT = 0xc,
332    COND_LE = 0xd,
333    COND_AL = 0xe,
334    COND_NV = 0xf, /* behaves like COND_AL here */
335};
336
337static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
338    [TCG_COND_EQ] = COND_EQ,
339    [TCG_COND_NE] = COND_NE,
340    [TCG_COND_LT] = COND_LT,
341    [TCG_COND_GE] = COND_GE,
342    [TCG_COND_LE] = COND_LE,
343    [TCG_COND_GT] = COND_GT,
344    /* unsigned */
345    [TCG_COND_LTU] = COND_LO,
346    [TCG_COND_GTU] = COND_HI,
347    [TCG_COND_GEU] = COND_HS,
348    [TCG_COND_LEU] = COND_LS,
349};
350
351typedef enum {
352    LDST_ST = 0,    /* store */
353    LDST_LD = 1,    /* load */
354    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
355    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
356} AArch64LdstType;
357
358/* We encode the format of the insn into the beginning of the name, so that
359   we can have the preprocessor help "typecheck" the insn vs the output
360   function.  Arm didn't provide us with nice names for the formats, so we
361   use the section number of the architecture reference manual in which the
362   instruction group is described.  */
363typedef enum {
364    /* Compare and branch (immediate).  */
365    I3201_CBZ       = 0x34000000,
366    I3201_CBNZ      = 0x35000000,
367
368    /* Conditional branch (immediate).  */
369    I3202_B_C       = 0x54000000,
370
371    /* Unconditional branch (immediate).  */
372    I3206_B         = 0x14000000,
373    I3206_BL        = 0x94000000,
374
375    /* Unconditional branch (register).  */
376    I3207_BR        = 0xd61f0000,
377    I3207_BLR       = 0xd63f0000,
378    I3207_RET       = 0xd65f0000,
379
380    /* AdvSIMD load/store single structure.  */
381    I3303_LD1R      = 0x0d40c000,
382
383    /* Load literal for loading the address at pc-relative offset */
384    I3305_LDR       = 0x58000000,
385    I3305_LDR_v64   = 0x5c000000,
386    I3305_LDR_v128  = 0x9c000000,
387
388    /* Load/store exclusive. */
389    I3306_LDXP      = 0xc8600000,
390    I3306_STXP      = 0xc8200000,
391
392    /* Load/store register.  Described here as 3.3.12, but the helper
393       that emits them can transform to 3.3.10 or 3.3.13.  */
394    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
395    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
396    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
397    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
398
399    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
400    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
401    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
402    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
403
404    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
405    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
406
407    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
408    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
409    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
410
411    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
412    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
413
414    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
415    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
416
417    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
418    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
419
420    I3312_TO_I3310  = 0x00200800,
421    I3312_TO_I3313  = 0x01000000,
422
423    /* Load/store register pair instructions.  */
424    I3314_LDP       = 0x28400000,
425    I3314_STP       = 0x28000000,
426
427    /* Add/subtract immediate instructions.  */
428    I3401_ADDI      = 0x11000000,
429    I3401_ADDSI     = 0x31000000,
430    I3401_SUBI      = 0x51000000,
431    I3401_SUBSI     = 0x71000000,
432
433    /* Bitfield instructions.  */
434    I3402_BFM       = 0x33000000,
435    I3402_SBFM      = 0x13000000,
436    I3402_UBFM      = 0x53000000,
437
438    /* Extract instruction.  */
439    I3403_EXTR      = 0x13800000,
440
441    /* Logical immediate instructions.  */
442    I3404_ANDI      = 0x12000000,
443    I3404_ORRI      = 0x32000000,
444    I3404_EORI      = 0x52000000,
445    I3404_ANDSI     = 0x72000000,
446
447    /* Move wide immediate instructions.  */
448    I3405_MOVN      = 0x12800000,
449    I3405_MOVZ      = 0x52800000,
450    I3405_MOVK      = 0x72800000,
451
452    /* PC relative addressing instructions.  */
453    I3406_ADR       = 0x10000000,
454    I3406_ADRP      = 0x90000000,
455
456    /* Add/subtract extended register instructions. */
457    I3501_ADD       = 0x0b200000,
458
459    /* Add/subtract shifted register instructions (without a shift).  */
460    I3502_ADD       = 0x0b000000,
461    I3502_ADDS      = 0x2b000000,
462    I3502_SUB       = 0x4b000000,
463    I3502_SUBS      = 0x6b000000,
464
465    /* Add/subtract shifted register instructions (with a shift).  */
466    I3502S_ADD_LSL  = I3502_ADD,
467
468    /* Add/subtract with carry instructions.  */
469    I3503_ADC       = 0x1a000000,
470    I3503_SBC       = 0x5a000000,
471
472    /* Conditional select instructions.  */
473    I3506_CSEL      = 0x1a800000,
474    I3506_CSINC     = 0x1a800400,
475    I3506_CSINV     = 0x5a800000,
476    I3506_CSNEG     = 0x5a800400,
477
478    /* Data-processing (1 source) instructions.  */
479    I3507_CLZ       = 0x5ac01000,
480    I3507_RBIT      = 0x5ac00000,
481    I3507_REV       = 0x5ac00000, /* + size << 10 */
482
483    /* Data-processing (2 source) instructions.  */
484    I3508_LSLV      = 0x1ac02000,
485    I3508_LSRV      = 0x1ac02400,
486    I3508_ASRV      = 0x1ac02800,
487    I3508_RORV      = 0x1ac02c00,
488    I3508_SMULH     = 0x9b407c00,
489    I3508_UMULH     = 0x9bc07c00,
490    I3508_UDIV      = 0x1ac00800,
491    I3508_SDIV      = 0x1ac00c00,
492
493    /* Data-processing (3 source) instructions.  */
494    I3509_MADD      = 0x1b000000,
495    I3509_MSUB      = 0x1b008000,
496
497    /* Logical shifted register instructions (without a shift).  */
498    I3510_AND       = 0x0a000000,
499    I3510_BIC       = 0x0a200000,
500    I3510_ORR       = 0x2a000000,
501    I3510_ORN       = 0x2a200000,
502    I3510_EOR       = 0x4a000000,
503    I3510_EON       = 0x4a200000,
504    I3510_ANDS      = 0x6a000000,
505
506    /* Logical shifted register instructions (with a shift).  */
507    I3502S_AND_LSR  = I3510_AND | (1 << 22),
508
509    /* AdvSIMD copy */
510    I3605_DUP      = 0x0e000400,
511    I3605_INS      = 0x4e001c00,
512    I3605_UMOV     = 0x0e003c00,
513
514    /* AdvSIMD modified immediate */
515    I3606_MOVI      = 0x0f000400,
516    I3606_MVNI      = 0x2f000400,
517    I3606_BIC       = 0x2f001400,
518    I3606_ORR       = 0x0f001400,
519
520    /* AdvSIMD scalar shift by immediate */
521    I3609_SSHR      = 0x5f000400,
522    I3609_SSRA      = 0x5f001400,
523    I3609_SHL       = 0x5f005400,
524    I3609_USHR      = 0x7f000400,
525    I3609_USRA      = 0x7f001400,
526    I3609_SLI       = 0x7f005400,
527
528    /* AdvSIMD scalar three same */
529    I3611_SQADD     = 0x5e200c00,
530    I3611_SQSUB     = 0x5e202c00,
531    I3611_CMGT      = 0x5e203400,
532    I3611_CMGE      = 0x5e203c00,
533    I3611_SSHL      = 0x5e204400,
534    I3611_ADD       = 0x5e208400,
535    I3611_CMTST     = 0x5e208c00,
536    I3611_UQADD     = 0x7e200c00,
537    I3611_UQSUB     = 0x7e202c00,
538    I3611_CMHI      = 0x7e203400,
539    I3611_CMHS      = 0x7e203c00,
540    I3611_USHL      = 0x7e204400,
541    I3611_SUB       = 0x7e208400,
542    I3611_CMEQ      = 0x7e208c00,
543
544    /* AdvSIMD scalar two-reg misc */
545    I3612_CMGT0     = 0x5e208800,
546    I3612_CMEQ0     = 0x5e209800,
547    I3612_CMLT0     = 0x5e20a800,
548    I3612_ABS       = 0x5e20b800,
549    I3612_CMGE0     = 0x7e208800,
550    I3612_CMLE0     = 0x7e209800,
551    I3612_NEG       = 0x7e20b800,
552
553    /* AdvSIMD shift by immediate */
554    I3614_SSHR      = 0x0f000400,
555    I3614_SSRA      = 0x0f001400,
556    I3614_SHL       = 0x0f005400,
557    I3614_SLI       = 0x2f005400,
558    I3614_USHR      = 0x2f000400,
559    I3614_USRA      = 0x2f001400,
560
561    /* AdvSIMD three same.  */
562    I3616_ADD       = 0x0e208400,
563    I3616_AND       = 0x0e201c00,
564    I3616_BIC       = 0x0e601c00,
565    I3616_BIF       = 0x2ee01c00,
566    I3616_BIT       = 0x2ea01c00,
567    I3616_BSL       = 0x2e601c00,
568    I3616_EOR       = 0x2e201c00,
569    I3616_MUL       = 0x0e209c00,
570    I3616_ORR       = 0x0ea01c00,
571    I3616_ORN       = 0x0ee01c00,
572    I3616_SUB       = 0x2e208400,
573    I3616_CMGT      = 0x0e203400,
574    I3616_CMGE      = 0x0e203c00,
575    I3616_CMTST     = 0x0e208c00,
576    I3616_CMHI      = 0x2e203400,
577    I3616_CMHS      = 0x2e203c00,
578    I3616_CMEQ      = 0x2e208c00,
579    I3616_SMAX      = 0x0e206400,
580    I3616_SMIN      = 0x0e206c00,
581    I3616_SSHL      = 0x0e204400,
582    I3616_SQADD     = 0x0e200c00,
583    I3616_SQSUB     = 0x0e202c00,
584    I3616_UMAX      = 0x2e206400,
585    I3616_UMIN      = 0x2e206c00,
586    I3616_UQADD     = 0x2e200c00,
587    I3616_UQSUB     = 0x2e202c00,
588    I3616_USHL      = 0x2e204400,
589
590    /* AdvSIMD two-reg misc.  */
591    I3617_CMGT0     = 0x0e208800,
592    I3617_CMEQ0     = 0x0e209800,
593    I3617_CMLT0     = 0x0e20a800,
594    I3617_CMGE0     = 0x2e208800,
595    I3617_CMLE0     = 0x2e209800,
596    I3617_NOT       = 0x2e205800,
597    I3617_ABS       = 0x0e20b800,
598    I3617_NEG       = 0x2e20b800,
599
600    /* System instructions.  */
601    NOP             = 0xd503201f,
602    DMB_ISH         = 0xd50338bf,
603    DMB_LD          = 0x00000100,
604    DMB_ST          = 0x00000200,
605} AArch64Insn;
606
607static inline uint32_t tcg_in32(TCGContext *s)
608{
609    uint32_t v = *(uint32_t *)s->code_ptr;
610    return v;
611}
612
613/* Emit an opcode with "type-checking" of the format.  */
614#define tcg_out_insn(S, FMT, OP, ...) \
615    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
616
617static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
618                              TCGReg rt, TCGReg rn, unsigned size)
619{
620    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
621}
622
623static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
624                              int imm19, TCGReg rt)
625{
626    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
627}
628
629static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
630                              TCGReg rt, TCGReg rt2, TCGReg rn)
631{
632    tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
633}
634
635static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
636                              TCGReg rt, int imm19)
637{
638    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
639}
640
641static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
642                              TCGCond c, int imm19)
643{
644    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
645}
646
647static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
648{
649    tcg_out32(s, insn | (imm26 & 0x03ffffff));
650}
651
652static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
653{
654    tcg_out32(s, insn | rn << 5);
655}
656
657static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
658                              TCGReg r1, TCGReg r2, TCGReg rn,
659                              tcg_target_long ofs, bool pre, bool w)
660{
661    insn |= 1u << 31; /* ext */
662    insn |= pre << 24;
663    insn |= w << 23;
664
665    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
666    insn |= (ofs & (0x7f << 3)) << (15 - 3);
667
668    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
669}
670
671static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
672                              TCGReg rd, TCGReg rn, uint64_t aimm)
673{
674    if (aimm > 0xfff) {
675        tcg_debug_assert((aimm & 0xfff) == 0);
676        aimm >>= 12;
677        tcg_debug_assert(aimm <= 0xfff);
678        aimm |= 1 << 12;  /* apply LSL 12 */
679    }
680    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
681}
682
683/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
684   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
685   that feed the DecodeBitMasks pseudo function.  */
686static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
687                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
688{
689    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
690              | rn << 5 | rd);
691}
692
693#define tcg_out_insn_3404  tcg_out_insn_3402
694
695static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
696                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
697{
698    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
699              | rn << 5 | rd);
700}
701
702/* This function is used for the Move (wide immediate) instruction group.
703   Note that SHIFT is a full shift count, not the 2 bit HW field. */
704static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
705                              TCGReg rd, uint16_t half, unsigned shift)
706{
707    tcg_debug_assert((shift & ~0x30) == 0);
708    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
709}
710
711static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
712                              TCGReg rd, int64_t disp)
713{
714    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
715}
716
717static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
718                                     TCGType sf, TCGReg rd, TCGReg rn,
719                                     TCGReg rm, int opt, int imm3)
720{
721    tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
722              imm3 << 10 | rn << 5 | rd);
723}
724
725/* This function is for both 3.5.2 (Add/Subtract shifted register), for
726   the rare occasion when we actually want to supply a shift amount.  */
727static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
728                                      TCGType ext, TCGReg rd, TCGReg rn,
729                                      TCGReg rm, int imm6)
730{
731    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
732}
733
734/* This function is for 3.5.2 (Add/subtract shifted register),
735   and 3.5.10 (Logical shifted register), for the vast majorty of cases
736   when we don't want to apply a shift.  Thus it can also be used for
737   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
738static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
739                              TCGReg rd, TCGReg rn, TCGReg rm)
740{
741    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
742}
743
744#define tcg_out_insn_3503  tcg_out_insn_3502
745#define tcg_out_insn_3508  tcg_out_insn_3502
746#define tcg_out_insn_3510  tcg_out_insn_3502
747
748static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
749                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
750{
751    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
752              | tcg_cond_to_aarch64[c] << 12);
753}
754
755static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
756                              TCGReg rd, TCGReg rn)
757{
758    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
759}
760
761static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
762                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
763{
764    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
765}
766
767static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
768                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
769{
770    /* Note that bit 11 set means general register input.  Therefore
771       we can handle both register sets with one function.  */
772    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
773              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
774}
775
776static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
777                              TCGReg rd, bool op, int cmode, uint8_t imm8)
778{
779    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
780              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
781}
782
783static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
784                              TCGReg rd, TCGReg rn, unsigned immhb)
785{
786    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
787}
788
789static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
790                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
791{
792    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
793              | (rn & 0x1f) << 5 | (rd & 0x1f));
794}
795
796static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
797                              unsigned size, TCGReg rd, TCGReg rn)
798{
799    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
800}
801
802static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
803                              TCGReg rd, TCGReg rn, unsigned immhb)
804{
805    tcg_out32(s, insn | q << 30 | immhb << 16
806              | (rn & 0x1f) << 5 | (rd & 0x1f));
807}
808
809static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
810                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
811{
812    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
813              | (rn & 0x1f) << 5 | (rd & 0x1f));
814}
815
816static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
817                              unsigned size, TCGReg rd, TCGReg rn)
818{
819    tcg_out32(s, insn | q << 30 | (size << 22)
820              | (rn & 0x1f) << 5 | (rd & 0x1f));
821}
822
823static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
824                              TCGReg rd, TCGReg base, TCGType ext,
825                              TCGReg regoff)
826{
827    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
828    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
829              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
830}
831
832static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
833                              TCGReg rd, TCGReg rn, intptr_t offset)
834{
835    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
836}
837
838static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
839                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
840{
841    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
842    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
843              | rn << 5 | (rd & 0x1f));
844}
845
846/* Register to register move using ORR (shifted register with no shift). */
847static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
848{
849    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
850}
851
852/* Register to register move using ADDI (move to/from SP).  */
853static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
854{
855    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
856}
857
858/* This function is used for the Logical (immediate) instruction group.
859   The value of LIMM must satisfy IS_LIMM.  See the comment above about
860   only supporting simplified logical immediates.  */
861static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
862                             TCGReg rd, TCGReg rn, uint64_t limm)
863{
864    unsigned h, l, r, c;
865
866    tcg_debug_assert(is_limm(limm));
867
868    h = clz64(limm);
869    l = ctz64(limm);
870    if (l == 0) {
871        r = 0;                  /* form 0....01....1 */
872        c = ctz64(~limm) - 1;
873        if (h == 0) {
874            r = clz64(~limm);   /* form 1..10..01..1 */
875            c += r;
876        }
877    } else {
878        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
879        c = r - h - 1;
880    }
881    if (ext == TCG_TYPE_I32) {
882        r &= 31;
883        c &= 31;
884    }
885
886    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
887}
888
889static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
890                             TCGReg rd, int64_t v64)
891{
892    bool q = type == TCG_TYPE_V128;
893    int cmode, imm8, i;
894
895    /* Test all bytes equal first.  */
896    if (vece == MO_8) {
897        imm8 = (uint8_t)v64;
898        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
899        return;
900    }
901
902    /*
903     * Test all bytes 0x00 or 0xff second.  This can match cases that
904     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
905     */
906    for (i = imm8 = 0; i < 8; i++) {
907        uint8_t byte = v64 >> (i * 8);
908        if (byte == 0xff) {
909            imm8 |= 1 << i;
910        } else if (byte != 0) {
911            goto fail_bytes;
912        }
913    }
914    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
915    return;
916 fail_bytes:
917
918    /*
919     * Tests for various replications.  For each element width, if we
920     * cannot find an expansion there's no point checking a larger
921     * width because we already know by replication it cannot match.
922     */
923    if (vece == MO_16) {
924        uint16_t v16 = v64;
925
926        if (is_shimm16(v16, &cmode, &imm8)) {
927            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
928            return;
929        }
930        if (is_shimm16(~v16, &cmode, &imm8)) {
931            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
932            return;
933        }
934
935        /*
936         * Otherwise, all remaining constants can be loaded in two insns:
937         * rd = v16 & 0xff, rd |= v16 & 0xff00.
938         */
939        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
940        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
941        return;
942    } else if (vece == MO_32) {
943        uint32_t v32 = v64;
944        uint32_t n32 = ~v32;
945
946        if (is_shimm32(v32, &cmode, &imm8) ||
947            is_soimm32(v32, &cmode, &imm8) ||
948            is_fimm32(v32, &cmode, &imm8)) {
949            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
950            return;
951        }
952        if (is_shimm32(n32, &cmode, &imm8) ||
953            is_soimm32(n32, &cmode, &imm8)) {
954            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
955            return;
956        }
957
958        /*
959         * Restrict the set of constants to those we can load with
960         * two instructions.  Others we load from the pool.
961         */
962        i = is_shimm32_pair(v32, &cmode, &imm8);
963        if (i) {
964            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
965            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
966            return;
967        }
968        i = is_shimm32_pair(n32, &cmode, &imm8);
969        if (i) {
970            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
971            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
972            return;
973        }
974    } else if (is_fimm64(v64, &cmode, &imm8)) {
975        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
976        return;
977    }
978
979    /*
980     * As a last resort, load from the constant pool.  Sadly there
981     * is no LD1R (literal), so store the full 16-byte vector.
982     */
983    if (type == TCG_TYPE_V128) {
984        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
985        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
986    } else {
987        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
988        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
989    }
990}
991
992static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
993                            TCGReg rd, TCGReg rs)
994{
995    int is_q = type - TCG_TYPE_V64;
996    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
997    return true;
998}
999
1000static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1001                             TCGReg r, TCGReg base, intptr_t offset)
1002{
1003    TCGReg temp = TCG_REG_TMP0;
1004
1005    if (offset < -0xffffff || offset > 0xffffff) {
1006        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
1007        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
1008        base = temp;
1009    } else {
1010        AArch64Insn add_insn = I3401_ADDI;
1011
1012        if (offset < 0) {
1013            add_insn = I3401_SUBI;
1014            offset = -offset;
1015        }
1016        if (offset & 0xfff000) {
1017            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1018            base = temp;
1019        }
1020        if (offset & 0xfff) {
1021            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1022            base = temp;
1023        }
1024    }
1025    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1026    return true;
1027}
1028
1029static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1030                         tcg_target_long value)
1031{
1032    tcg_target_long svalue = value;
1033    tcg_target_long ivalue = ~value;
1034    tcg_target_long t0, t1, t2;
1035    int s0, s1;
1036    AArch64Insn opc;
1037
1038    switch (type) {
1039    case TCG_TYPE_I32:
1040    case TCG_TYPE_I64:
1041        tcg_debug_assert(rd < 32);
1042        break;
1043    default:
1044        g_assert_not_reached();
1045    }
1046
1047    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1048       values within [2**31, 2**32-1], we can create smaller sequences by
1049       interpreting this as a negative 32-bit number, while ensuring that
1050       the high 32 bits are cleared by setting SF=0.  */
1051    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1052        svalue = (int32_t)value;
1053        value = (uint32_t)value;
1054        ivalue = (uint32_t)ivalue;
1055        type = TCG_TYPE_I32;
1056    }
1057
1058    /* Speed things up by handling the common case of small positive
1059       and negative values specially.  */
1060    if ((value & ~0xffffull) == 0) {
1061        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1062        return;
1063    } else if ((ivalue & ~0xffffull) == 0) {
1064        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1065        return;
1066    }
1067
1068    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1069       use the sign-extended value.  That lets us match rotated values such
1070       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1071    if (is_limm(svalue)) {
1072        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1073        return;
1074    }
1075
1076    /* Look for host pointer values within 4G of the PC.  This happens
1077       often when loading pointers to QEMU's own data structures.  */
1078    if (type == TCG_TYPE_I64) {
1079        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1080        tcg_target_long disp = value - src_rx;
1081        if (disp == sextract64(disp, 0, 21)) {
1082            tcg_out_insn(s, 3406, ADR, rd, disp);
1083            return;
1084        }
1085        disp = (value >> 12) - (src_rx >> 12);
1086        if (disp == sextract64(disp, 0, 21)) {
1087            tcg_out_insn(s, 3406, ADRP, rd, disp);
1088            if (value & 0xfff) {
1089                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1090            }
1091            return;
1092        }
1093    }
1094
1095    /* Would it take fewer insns to begin with MOVN?  */
1096    if (ctpop64(value) >= 32) {
1097        t0 = ivalue;
1098        opc = I3405_MOVN;
1099    } else {
1100        t0 = value;
1101        opc = I3405_MOVZ;
1102    }
1103    s0 = ctz64(t0) & (63 & -16);
1104    t1 = t0 & ~(0xffffull << s0);
1105    s1 = ctz64(t1) & (63 & -16);
1106    t2 = t1 & ~(0xffffull << s1);
1107    if (t2 == 0) {
1108        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1109        if (t1 != 0) {
1110            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1111        }
1112        return;
1113    }
1114
1115    /* For more than 2 insns, dump it into the constant pool.  */
1116    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1117    tcg_out_insn(s, 3305, LDR, 0, rd);
1118}
1119
1120static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1121{
1122    return false;
1123}
1124
1125static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1126                             tcg_target_long imm)
1127{
1128    /* This function is only used for passing structs by reference. */
1129    g_assert_not_reached();
1130}
1131
1132/* Define something more legible for general use.  */
1133#define tcg_out_ldst_r  tcg_out_insn_3310
1134
1135static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1136                         TCGReg rn, intptr_t offset, int lgsize)
1137{
1138    /* If the offset is naturally aligned and in range, then we can
1139       use the scaled uimm12 encoding */
1140    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1141        uintptr_t scaled_uimm = offset >> lgsize;
1142        if (scaled_uimm <= 0xfff) {
1143            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1144            return;
1145        }
1146    }
1147
1148    /* Small signed offsets can use the unscaled encoding.  */
1149    if (offset >= -256 && offset < 256) {
1150        tcg_out_insn_3312(s, insn, rd, rn, offset);
1151        return;
1152    }
1153
1154    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1155    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
1156    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
1157}
1158
1159static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1160{
1161    if (ret == arg) {
1162        return true;
1163    }
1164    switch (type) {
1165    case TCG_TYPE_I32:
1166    case TCG_TYPE_I64:
1167        if (ret < 32 && arg < 32) {
1168            tcg_out_movr(s, type, ret, arg);
1169            break;
1170        } else if (ret < 32) {
1171            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1172            break;
1173        } else if (arg < 32) {
1174            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1175            break;
1176        }
1177        /* FALLTHRU */
1178
1179    case TCG_TYPE_V64:
1180        tcg_debug_assert(ret >= 32 && arg >= 32);
1181        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1182        break;
1183    case TCG_TYPE_V128:
1184        tcg_debug_assert(ret >= 32 && arg >= 32);
1185        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1186        break;
1187
1188    default:
1189        g_assert_not_reached();
1190    }
1191    return true;
1192}
1193
1194static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1195                       TCGReg base, intptr_t ofs)
1196{
1197    AArch64Insn insn;
1198    int lgsz;
1199
1200    switch (type) {
1201    case TCG_TYPE_I32:
1202        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1203        lgsz = 2;
1204        break;
1205    case TCG_TYPE_I64:
1206        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1207        lgsz = 3;
1208        break;
1209    case TCG_TYPE_V64:
1210        insn = I3312_LDRVD;
1211        lgsz = 3;
1212        break;
1213    case TCG_TYPE_V128:
1214        insn = I3312_LDRVQ;
1215        lgsz = 4;
1216        break;
1217    default:
1218        g_assert_not_reached();
1219    }
1220    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1221}
1222
1223static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1224                       TCGReg base, intptr_t ofs)
1225{
1226    AArch64Insn insn;
1227    int lgsz;
1228
1229    switch (type) {
1230    case TCG_TYPE_I32:
1231        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1232        lgsz = 2;
1233        break;
1234    case TCG_TYPE_I64:
1235        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1236        lgsz = 3;
1237        break;
1238    case TCG_TYPE_V64:
1239        insn = I3312_STRVD;
1240        lgsz = 3;
1241        break;
1242    case TCG_TYPE_V128:
1243        insn = I3312_STRVQ;
1244        lgsz = 4;
1245        break;
1246    default:
1247        g_assert_not_reached();
1248    }
1249    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1250}
1251
1252static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1253                               TCGReg base, intptr_t ofs)
1254{
1255    if (type <= TCG_TYPE_I64 && val == 0) {
1256        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1257        return true;
1258    }
1259    return false;
1260}
1261
1262static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1263                               TCGReg rn, unsigned int a, unsigned int b)
1264{
1265    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1266}
1267
1268static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1269                                TCGReg rn, unsigned int a, unsigned int b)
1270{
1271    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1272}
1273
1274static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1275                                TCGReg rn, unsigned int a, unsigned int b)
1276{
1277    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1278}
1279
1280static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1281                                TCGReg rn, TCGReg rm, unsigned int a)
1282{
1283    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1284}
1285
1286static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1287                               TCGReg rd, TCGReg rn, unsigned int m)
1288{
1289    int bits = ext ? 64 : 32;
1290    int max = bits - 1;
1291    tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1292}
1293
1294static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1295                               TCGReg rd, TCGReg rn, unsigned int m)
1296{
1297    int max = ext ? 63 : 31;
1298    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1299}
1300
1301static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1302                               TCGReg rd, TCGReg rn, unsigned int m)
1303{
1304    int max = ext ? 63 : 31;
1305    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1306}
1307
1308static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1309                                TCGReg rd, TCGReg rn, unsigned int m)
1310{
1311    int max = ext ? 63 : 31;
1312    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1313}
1314
1315static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1316                                TCGReg rd, TCGReg rn, unsigned int m)
1317{
1318    int max = ext ? 63 : 31;
1319    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1320}
1321
1322static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1323                               TCGReg rn, unsigned lsb, unsigned width)
1324{
1325    unsigned size = ext ? 64 : 32;
1326    unsigned a = (size - lsb) & (size - 1);
1327    unsigned b = width - 1;
1328    tcg_out_bfm(s, ext, rd, rn, a, b);
1329}
1330
1331static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1332                        tcg_target_long b, bool const_b)
1333{
1334    if (const_b) {
1335        /* Using CMP or CMN aliases.  */
1336        if (b >= 0) {
1337            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1338        } else {
1339            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1340        }
1341    } else {
1342        /* Using CMP alias SUBS wzr, Wn, Wm */
1343        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1344    }
1345}
1346
1347static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1348{
1349    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1350    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1351    tcg_out_insn(s, 3206, B, offset);
1352}
1353
1354static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1355{
1356    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1357    if (offset == sextract64(offset, 0, 26)) {
1358        tcg_out_insn(s, 3206, B, offset);
1359    } else {
1360        /* Choose X9 as a call-clobbered non-LR temporary. */
1361        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
1362        tcg_out_insn(s, 3207, BR, TCG_REG_X9);
1363    }
1364}
1365
1366static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1367{
1368    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1369    if (offset == sextract64(offset, 0, 26)) {
1370        tcg_out_insn(s, 3206, BL, offset);
1371    } else {
1372        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1373        tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
1374    }
1375}
1376
1377static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1378                         const TCGHelperInfo *info)
1379{
1380    tcg_out_call_int(s, target);
1381}
1382
1383static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1384{
1385    if (!l->has_value) {
1386        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1387        tcg_out_insn(s, 3206, B, 0);
1388    } else {
1389        tcg_out_goto(s, l->u.value_ptr);
1390    }
1391}
1392
1393static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1394                           TCGArg b, bool b_const, TCGLabel *l)
1395{
1396    intptr_t offset;
1397    bool need_cmp;
1398
1399    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1400        need_cmp = false;
1401    } else {
1402        need_cmp = true;
1403        tcg_out_cmp(s, ext, a, b, b_const);
1404    }
1405
1406    if (!l->has_value) {
1407        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1408        offset = tcg_in32(s) >> 5;
1409    } else {
1410        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1411        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1412    }
1413
1414    if (need_cmp) {
1415        tcg_out_insn(s, 3202, B_C, c, offset);
1416    } else if (c == TCG_COND_EQ) {
1417        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1418    } else {
1419        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1420    }
1421}
1422
1423static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1424                               TCGReg rd, TCGReg rn)
1425{
1426    /* REV, REV16, REV32 */
1427    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1428}
1429
1430static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1431                               TCGReg rd, TCGReg rn)
1432{
1433    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1434    int bits = (8 << s_bits) - 1;
1435    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1436}
1437
1438static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1439{
1440    tcg_out_sxt(s, type, MO_8, rd, rn);
1441}
1442
1443static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1444{
1445    tcg_out_sxt(s, type, MO_16, rd, rn);
1446}
1447
1448static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1449{
1450    tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1451}
1452
1453static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1454{
1455    tcg_out_ext32s(s, rd, rn);
1456}
1457
1458static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1459                               TCGReg rd, TCGReg rn)
1460{
1461    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1462    int bits = (8 << s_bits) - 1;
1463    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1464}
1465
1466static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1467{
1468    tcg_out_uxt(s, MO_8, rd, rn);
1469}
1470
1471static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1472{
1473    tcg_out_uxt(s, MO_16, rd, rn);
1474}
1475
1476static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1477{
1478    tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1479}
1480
1481static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1482{
1483    tcg_out_ext32u(s, rd, rn);
1484}
1485
1486static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1487{
1488    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1489}
1490
1491static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1492                            TCGReg rn, int64_t aimm)
1493{
1494    if (aimm >= 0) {
1495        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1496    } else {
1497        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1498    }
1499}
1500
1501static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1502                            TCGReg rh, TCGReg al, TCGReg ah,
1503                            tcg_target_long bl, tcg_target_long bh,
1504                            bool const_bl, bool const_bh, bool sub)
1505{
1506    TCGReg orig_rl = rl;
1507    AArch64Insn insn;
1508
1509    if (rl == ah || (!const_bh && rl == bh)) {
1510        rl = TCG_REG_TMP0;
1511    }
1512
1513    if (const_bl) {
1514        if (bl < 0) {
1515            bl = -bl;
1516            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1517        } else {
1518            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1519        }
1520
1521        if (unlikely(al == TCG_REG_XZR)) {
1522            /* ??? We want to allow al to be zero for the benefit of
1523               negation via subtraction.  However, that leaves open the
1524               possibility of adding 0+const in the low part, and the
1525               immediate add instructions encode XSP not XZR.  Don't try
1526               anything more elaborate here than loading another zero.  */
1527            al = TCG_REG_TMP0;
1528            tcg_out_movi(s, ext, al, 0);
1529        }
1530        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1531    } else {
1532        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1533    }
1534
1535    insn = I3503_ADC;
1536    if (const_bh) {
1537        /* Note that the only two constants we support are 0 and -1, and
1538           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1539        if ((bh != 0) ^ sub) {
1540            insn = I3503_SBC;
1541        }
1542        bh = TCG_REG_XZR;
1543    } else if (sub) {
1544        insn = I3503_SBC;
1545    }
1546    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1547
1548    tcg_out_mov(s, ext, orig_rl, rl);
1549}
1550
1551static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1552{
1553    static const uint32_t sync[] = {
1554        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1555        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1556        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1557        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1558        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1559    };
1560    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1561}
1562
1563static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1564                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1565{
1566    TCGReg a1 = a0;
1567    if (is_ctz) {
1568        a1 = TCG_REG_TMP0;
1569        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1570    }
1571    if (const_b && b == (ext ? 64 : 32)) {
1572        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1573    } else {
1574        AArch64Insn sel = I3506_CSEL;
1575
1576        tcg_out_cmp(s, ext, a0, 0, 1);
1577        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1);
1578
1579        if (const_b) {
1580            if (b == -1) {
1581                b = TCG_REG_XZR;
1582                sel = I3506_CSINV;
1583            } else if (b == 0) {
1584                b = TCG_REG_XZR;
1585            } else {
1586                tcg_out_movi(s, ext, d, b);
1587                b = d;
1588            }
1589        }
1590        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE);
1591    }
1592}
1593
1594typedef struct {
1595    TCGReg base;
1596    TCGReg index;
1597    TCGType index_ext;
1598    TCGAtomAlign aa;
1599} HostAddress;
1600
1601bool tcg_target_has_memory_bswap(MemOp memop)
1602{
1603    return false;
1604}
1605
1606static const TCGLdstHelperParam ldst_helper_param = {
1607    .ntmp = 1, .tmp = { TCG_REG_TMP0 }
1608};
1609
1610static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1611{
1612    MemOp opc = get_memop(lb->oi);
1613
1614    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1615        return false;
1616    }
1617
1618    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1619    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1620    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1621    tcg_out_goto(s, lb->raddr);
1622    return true;
1623}
1624
1625static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1626{
1627    MemOp opc = get_memop(lb->oi);
1628
1629    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1630        return false;
1631    }
1632
1633    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1634    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1635    tcg_out_goto(s, lb->raddr);
1636    return true;
1637}
1638
1639/* We expect to use a 7-bit scaled negative offset from ENV.  */
1640#define MIN_TLB_MASK_TABLE_OFS  -512
1641
1642/*
1643 * For softmmu, perform the TLB load and compare.
1644 * For useronly, perform any required alignment tests.
1645 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1646 * is required and fill in @h with the host address for the fast path.
1647 */
1648static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1649                                           TCGReg addr_reg, MemOpIdx oi,
1650                                           bool is_ld)
1651{
1652    TCGType addr_type = s->addr_type;
1653    TCGLabelQemuLdst *ldst = NULL;
1654    MemOp opc = get_memop(oi);
1655    MemOp s_bits = opc & MO_SIZE;
1656    unsigned a_mask;
1657
1658    h->aa = atom_and_align_for_opc(s, opc,
1659                                   have_lse2 ? MO_ATOM_WITHIN16
1660                                             : MO_ATOM_IFALIGN,
1661                                   s_bits == MO_128);
1662    a_mask = (1 << h->aa.align) - 1;
1663
1664#ifdef CONFIG_SOFTMMU
1665    unsigned s_mask = (1u << s_bits) - 1;
1666    unsigned mem_index = get_mmuidx(oi);
1667    TCGReg addr_adj;
1668    TCGType mask_type;
1669    uint64_t compare_mask;
1670
1671    ldst = new_ldst_label(s);
1672    ldst->is_ld = is_ld;
1673    ldst->oi = oi;
1674    ldst->addrlo_reg = addr_reg;
1675
1676    mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
1677                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1678
1679    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
1680    QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1681    QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1682    tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
1683                 tlb_mask_table_ofs(s, mem_index), 1, 0);
1684
1685    /* Extract the TLB index from the address into X0.  */
1686    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1687                 TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
1688                 s->page_bits - CPU_TLB_ENTRY_BITS);
1689
1690    /* Add the tlb_table pointer, forming the CPUTLBEntry address in TMP1. */
1691    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
1692
1693    /* Load the tlb comparator into TMP0, and the fast path addend into TMP1. */
1694    QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
1695    tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
1696               is_ld ? offsetof(CPUTLBEntry, addr_read)
1697                     : offsetof(CPUTLBEntry, addr_write));
1698    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
1699               offsetof(CPUTLBEntry, addend));
1700
1701    /*
1702     * For aligned accesses, we check the first byte and include the alignment
1703     * bits within the address.  For unaligned access, we check that we don't
1704     * cross pages using the address of the last byte of the access.
1705     */
1706    if (a_mask >= s_mask) {
1707        addr_adj = addr_reg;
1708    } else {
1709        addr_adj = TCG_REG_TMP2;
1710        tcg_out_insn(s, 3401, ADDI, addr_type,
1711                     addr_adj, addr_reg, s_mask - a_mask);
1712    }
1713    compare_mask = (uint64_t)s->page_mask | a_mask;
1714
1715    /* Store the page mask part of the address into TMP2.  */
1716    tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
1717                     addr_adj, compare_mask);
1718
1719    /* Perform the address comparison. */
1720    tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0);
1721
1722    /* If not equal, we jump to the slow path. */
1723    ldst->label_ptr[0] = s->code_ptr;
1724    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1725
1726    h->base = TCG_REG_TMP1;
1727    h->index = addr_reg;
1728    h->index_ext = addr_type;
1729#else
1730    if (a_mask) {
1731        ldst = new_ldst_label(s);
1732
1733        ldst->is_ld = is_ld;
1734        ldst->oi = oi;
1735        ldst->addrlo_reg = addr_reg;
1736
1737        /* tst addr, #mask */
1738        tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1739
1740        /* b.ne slow_path */
1741        ldst->label_ptr[0] = s->code_ptr;
1742        tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1743    }
1744
1745    if (guest_base || addr_type == TCG_TYPE_I32) {
1746        h->base = TCG_REG_GUEST_BASE;
1747        h->index = addr_reg;
1748        h->index_ext = addr_type;
1749    } else {
1750        h->base = addr_reg;
1751        h->index = TCG_REG_XZR;
1752        h->index_ext = TCG_TYPE_I64;
1753    }
1754#endif
1755
1756    return ldst;
1757}
1758
1759static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1760                                   TCGReg data_r, HostAddress h)
1761{
1762    switch (memop & MO_SSIZE) {
1763    case MO_UB:
1764        tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
1765        break;
1766    case MO_SB:
1767        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1768                       data_r, h.base, h.index_ext, h.index);
1769        break;
1770    case MO_UW:
1771        tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
1772        break;
1773    case MO_SW:
1774        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1775                       data_r, h.base, h.index_ext, h.index);
1776        break;
1777    case MO_UL:
1778        tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
1779        break;
1780    case MO_SL:
1781        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
1782        break;
1783    case MO_UQ:
1784        tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
1785        break;
1786    default:
1787        g_assert_not_reached();
1788    }
1789}
1790
1791static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1792                                   TCGReg data_r, HostAddress h)
1793{
1794    switch (memop & MO_SIZE) {
1795    case MO_8:
1796        tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
1797        break;
1798    case MO_16:
1799        tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
1800        break;
1801    case MO_32:
1802        tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
1803        break;
1804    case MO_64:
1805        tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
1806        break;
1807    default:
1808        g_assert_not_reached();
1809    }
1810}
1811
1812static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1813                            MemOpIdx oi, TCGType data_type)
1814{
1815    TCGLabelQemuLdst *ldst;
1816    HostAddress h;
1817
1818    ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1819    tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
1820
1821    if (ldst) {
1822        ldst->type = data_type;
1823        ldst->datalo_reg = data_reg;
1824        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1825    }
1826}
1827
1828static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1829                            MemOpIdx oi, TCGType data_type)
1830{
1831    TCGLabelQemuLdst *ldst;
1832    HostAddress h;
1833
1834    ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1835    tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1836
1837    if (ldst) {
1838        ldst->type = data_type;
1839        ldst->datalo_reg = data_reg;
1840        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1841    }
1842}
1843
1844static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
1845                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
1846{
1847    TCGLabelQemuLdst *ldst;
1848    HostAddress h;
1849    TCGReg base;
1850    bool use_pair;
1851
1852    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
1853
1854    /* Compose the final address, as LDP/STP have no indexing. */
1855    if (h.index == TCG_REG_XZR) {
1856        base = h.base;
1857    } else {
1858        base = TCG_REG_TMP2;
1859        if (h.index_ext == TCG_TYPE_I32) {
1860            /* add base, base, index, uxtw */
1861            tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base,
1862                         h.base, h.index, MO_32, 0);
1863        } else {
1864            /* add base, base, index */
1865            tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index);
1866        }
1867    }
1868
1869    use_pair = h.aa.atom < MO_128 || have_lse2;
1870
1871    if (!use_pair) {
1872        tcg_insn_unit *branch = NULL;
1873        TCGReg ll, lh, sl, sh;
1874
1875        /*
1876         * If we have already checked for 16-byte alignment, that's all
1877         * we need. Otherwise we have determined that misaligned atomicity
1878         * may be handled with two 8-byte loads.
1879         */
1880        if (h.aa.align < MO_128) {
1881            /*
1882             * TODO: align should be MO_64, so we only need test bit 3,
1883             * which means we could use TBNZ instead of ANDS+B_C.
1884             */
1885            tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15);
1886            branch = s->code_ptr;
1887            tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1888            use_pair = true;
1889        }
1890
1891        if (is_ld) {
1892            /*
1893             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1894             *    ldxp lo, hi, [base]
1895             *    stxp t0, lo, hi, [base]
1896             *    cbnz t0, .-8
1897             * Require no overlap between data{lo,hi} and base.
1898             */
1899            if (datalo == base || datahi == base) {
1900                tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base);
1901                base = TCG_REG_TMP2;
1902            }
1903            ll = sl = datalo;
1904            lh = sh = datahi;
1905        } else {
1906            /*
1907             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1908             * 1: ldxp t0, t1, [base]
1909             *    stxp t0, lo, hi, [base]
1910             *    cbnz t0, 1b
1911             */
1912            tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1);
1913            ll = TCG_REG_TMP0;
1914            lh = TCG_REG_TMP1;
1915            sl = datalo;
1916            sh = datahi;
1917        }
1918
1919        tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base);
1920        tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base);
1921        tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2);
1922
1923        if (use_pair) {
1924            /* "b .+8", branching across the one insn of use_pair. */
1925            tcg_out_insn(s, 3206, B, 2);
1926            reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr));
1927        }
1928    }
1929
1930    if (use_pair) {
1931        if (is_ld) {
1932            tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0);
1933        } else {
1934            tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0);
1935        }
1936    }
1937
1938    if (ldst) {
1939        ldst->type = TCG_TYPE_I128;
1940        ldst->datalo_reg = datalo;
1941        ldst->datahi_reg = datahi;
1942        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1943    }
1944}
1945
1946static const tcg_insn_unit *tb_ret_addr;
1947
1948static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1949{
1950    /* Reuse the zeroing that exists for goto_ptr.  */
1951    if (a0 == 0) {
1952        tcg_out_goto_long(s, tcg_code_gen_epilogue);
1953    } else {
1954        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1955        tcg_out_goto_long(s, tb_ret_addr);
1956    }
1957}
1958
1959static void tcg_out_goto_tb(TCGContext *s, int which)
1960{
1961    /*
1962     * Direct branch, or indirect address load, will be patched
1963     * by tb_target_set_jmp_target.  Assert indirect load offset
1964     * in range early, regardless of direct branch distance.
1965     */
1966    intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1967    tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1968
1969    set_jmp_insn_offset(s, which);
1970    tcg_out32(s, I3206_B);
1971    tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
1972    set_jmp_reset_offset(s, which);
1973}
1974
1975void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1976                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1977{
1978    uintptr_t d_addr = tb->jmp_target_addr[n];
1979    ptrdiff_t d_offset = d_addr - jmp_rx;
1980    tcg_insn_unit insn;
1981
1982    /* Either directly branch, or indirect branch load. */
1983    if (d_offset == sextract64(d_offset, 0, 28)) {
1984        insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
1985    } else {
1986        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
1987        ptrdiff_t i_offset = i_addr - jmp_rx;
1988
1989        /* Note that we asserted this in range in tcg_out_goto_tb. */
1990        insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
1991    }
1992    qatomic_set((uint32_t *)jmp_rw, insn);
1993    flush_idcache_range(jmp_rx, jmp_rw, 4);
1994}
1995
1996static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1997                       const TCGArg args[TCG_MAX_OP_ARGS],
1998                       const int const_args[TCG_MAX_OP_ARGS])
1999{
2000    /* 99% of the time, we can signal the use of extension registers
2001       by looking to see if the opcode handles 64-bit data.  */
2002    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
2003
2004    /* Hoist the loads of the most common arguments.  */
2005    TCGArg a0 = args[0];
2006    TCGArg a1 = args[1];
2007    TCGArg a2 = args[2];
2008    int c2 = const_args[2];
2009
2010    /* Some operands are defined with "rZ" constraint, a register or
2011       the zero register.  These need not actually test args[I] == 0.  */
2012#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
2013
2014    switch (opc) {
2015    case INDEX_op_goto_ptr:
2016        tcg_out_insn(s, 3207, BR, a0);
2017        break;
2018
2019    case INDEX_op_br:
2020        tcg_out_goto_label(s, arg_label(a0));
2021        break;
2022
2023    case INDEX_op_ld8u_i32:
2024    case INDEX_op_ld8u_i64:
2025        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
2026        break;
2027    case INDEX_op_ld8s_i32:
2028        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
2029        break;
2030    case INDEX_op_ld8s_i64:
2031        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
2032        break;
2033    case INDEX_op_ld16u_i32:
2034    case INDEX_op_ld16u_i64:
2035        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
2036        break;
2037    case INDEX_op_ld16s_i32:
2038        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
2039        break;
2040    case INDEX_op_ld16s_i64:
2041        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
2042        break;
2043    case INDEX_op_ld_i32:
2044    case INDEX_op_ld32u_i64:
2045        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
2046        break;
2047    case INDEX_op_ld32s_i64:
2048        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
2049        break;
2050    case INDEX_op_ld_i64:
2051        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
2052        break;
2053
2054    case INDEX_op_st8_i32:
2055    case INDEX_op_st8_i64:
2056        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
2057        break;
2058    case INDEX_op_st16_i32:
2059    case INDEX_op_st16_i64:
2060        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
2061        break;
2062    case INDEX_op_st_i32:
2063    case INDEX_op_st32_i64:
2064        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
2065        break;
2066    case INDEX_op_st_i64:
2067        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
2068        break;
2069
2070    case INDEX_op_add_i32:
2071        a2 = (int32_t)a2;
2072        /* FALLTHRU */
2073    case INDEX_op_add_i64:
2074        if (c2) {
2075            tcg_out_addsubi(s, ext, a0, a1, a2);
2076        } else {
2077            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2078        }
2079        break;
2080
2081    case INDEX_op_sub_i32:
2082        a2 = (int32_t)a2;
2083        /* FALLTHRU */
2084    case INDEX_op_sub_i64:
2085        if (c2) {
2086            tcg_out_addsubi(s, ext, a0, a1, -a2);
2087        } else {
2088            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2089        }
2090        break;
2091
2092    case INDEX_op_neg_i64:
2093    case INDEX_op_neg_i32:
2094        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2095        break;
2096
2097    case INDEX_op_and_i32:
2098        a2 = (int32_t)a2;
2099        /* FALLTHRU */
2100    case INDEX_op_and_i64:
2101        if (c2) {
2102            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2103        } else {
2104            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2105        }
2106        break;
2107
2108    case INDEX_op_andc_i32:
2109        a2 = (int32_t)a2;
2110        /* FALLTHRU */
2111    case INDEX_op_andc_i64:
2112        if (c2) {
2113            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2114        } else {
2115            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2116        }
2117        break;
2118
2119    case INDEX_op_or_i32:
2120        a2 = (int32_t)a2;
2121        /* FALLTHRU */
2122    case INDEX_op_or_i64:
2123        if (c2) {
2124            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2125        } else {
2126            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2127        }
2128        break;
2129
2130    case INDEX_op_orc_i32:
2131        a2 = (int32_t)a2;
2132        /* FALLTHRU */
2133    case INDEX_op_orc_i64:
2134        if (c2) {
2135            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2136        } else {
2137            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2138        }
2139        break;
2140
2141    case INDEX_op_xor_i32:
2142        a2 = (int32_t)a2;
2143        /* FALLTHRU */
2144    case INDEX_op_xor_i64:
2145        if (c2) {
2146            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2147        } else {
2148            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2149        }
2150        break;
2151
2152    case INDEX_op_eqv_i32:
2153        a2 = (int32_t)a2;
2154        /* FALLTHRU */
2155    case INDEX_op_eqv_i64:
2156        if (c2) {
2157            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2158        } else {
2159            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2160        }
2161        break;
2162
2163    case INDEX_op_not_i64:
2164    case INDEX_op_not_i32:
2165        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2166        break;
2167
2168    case INDEX_op_mul_i64:
2169    case INDEX_op_mul_i32:
2170        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2171        break;
2172
2173    case INDEX_op_div_i64:
2174    case INDEX_op_div_i32:
2175        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2176        break;
2177    case INDEX_op_divu_i64:
2178    case INDEX_op_divu_i32:
2179        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2180        break;
2181
2182    case INDEX_op_rem_i64:
2183    case INDEX_op_rem_i32:
2184        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2);
2185        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2186        break;
2187    case INDEX_op_remu_i64:
2188    case INDEX_op_remu_i32:
2189        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2);
2190        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2191        break;
2192
2193    case INDEX_op_shl_i64:
2194    case INDEX_op_shl_i32:
2195        if (c2) {
2196            tcg_out_shl(s, ext, a0, a1, a2);
2197        } else {
2198            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2199        }
2200        break;
2201
2202    case INDEX_op_shr_i64:
2203    case INDEX_op_shr_i32:
2204        if (c2) {
2205            tcg_out_shr(s, ext, a0, a1, a2);
2206        } else {
2207            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2208        }
2209        break;
2210
2211    case INDEX_op_sar_i64:
2212    case INDEX_op_sar_i32:
2213        if (c2) {
2214            tcg_out_sar(s, ext, a0, a1, a2);
2215        } else {
2216            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2217        }
2218        break;
2219
2220    case INDEX_op_rotr_i64:
2221    case INDEX_op_rotr_i32:
2222        if (c2) {
2223            tcg_out_rotr(s, ext, a0, a1, a2);
2224        } else {
2225            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2226        }
2227        break;
2228
2229    case INDEX_op_rotl_i64:
2230    case INDEX_op_rotl_i32:
2231        if (c2) {
2232            tcg_out_rotl(s, ext, a0, a1, a2);
2233        } else {
2234            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2);
2235            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0);
2236        }
2237        break;
2238
2239    case INDEX_op_clz_i64:
2240    case INDEX_op_clz_i32:
2241        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2242        break;
2243    case INDEX_op_ctz_i64:
2244    case INDEX_op_ctz_i32:
2245        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2246        break;
2247
2248    case INDEX_op_brcond_i32:
2249        a1 = (int32_t)a1;
2250        /* FALLTHRU */
2251    case INDEX_op_brcond_i64:
2252        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2253        break;
2254
2255    case INDEX_op_setcond_i32:
2256        a2 = (int32_t)a2;
2257        /* FALLTHRU */
2258    case INDEX_op_setcond_i64:
2259        tcg_out_cmp(s, ext, a1, a2, c2);
2260        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2261        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2262                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2263        break;
2264
2265    case INDEX_op_movcond_i32:
2266        a2 = (int32_t)a2;
2267        /* FALLTHRU */
2268    case INDEX_op_movcond_i64:
2269        tcg_out_cmp(s, ext, a1, a2, c2);
2270        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2271        break;
2272
2273    case INDEX_op_qemu_ld_a32_i32:
2274    case INDEX_op_qemu_ld_a64_i32:
2275    case INDEX_op_qemu_ld_a32_i64:
2276    case INDEX_op_qemu_ld_a64_i64:
2277        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2278        break;
2279    case INDEX_op_qemu_st_a32_i32:
2280    case INDEX_op_qemu_st_a64_i32:
2281    case INDEX_op_qemu_st_a32_i64:
2282    case INDEX_op_qemu_st_a64_i64:
2283        tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
2284        break;
2285    case INDEX_op_qemu_ld_a32_i128:
2286    case INDEX_op_qemu_ld_a64_i128:
2287        tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
2288        break;
2289    case INDEX_op_qemu_st_a32_i128:
2290    case INDEX_op_qemu_st_a64_i128:
2291        tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false);
2292        break;
2293
2294    case INDEX_op_bswap64_i64:
2295        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2296        break;
2297    case INDEX_op_bswap32_i64:
2298        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2299        if (a2 & TCG_BSWAP_OS) {
2300            tcg_out_ext32s(s, a0, a0);
2301        }
2302        break;
2303    case INDEX_op_bswap32_i32:
2304        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2305        break;
2306    case INDEX_op_bswap16_i64:
2307    case INDEX_op_bswap16_i32:
2308        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2309        if (a2 & TCG_BSWAP_OS) {
2310            /* Output must be sign-extended. */
2311            tcg_out_ext16s(s, ext, a0, a0);
2312        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2313            /* Output must be zero-extended, but input isn't. */
2314            tcg_out_ext16u(s, a0, a0);
2315        }
2316        break;
2317
2318    case INDEX_op_deposit_i64:
2319    case INDEX_op_deposit_i32:
2320        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2321        break;
2322
2323    case INDEX_op_extract_i64:
2324    case INDEX_op_extract_i32:
2325        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2326        break;
2327
2328    case INDEX_op_sextract_i64:
2329    case INDEX_op_sextract_i32:
2330        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2331        break;
2332
2333    case INDEX_op_extract2_i64:
2334    case INDEX_op_extract2_i32:
2335        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2336        break;
2337
2338    case INDEX_op_add2_i32:
2339        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2340                        (int32_t)args[4], args[5], const_args[4],
2341                        const_args[5], false);
2342        break;
2343    case INDEX_op_add2_i64:
2344        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2345                        args[5], const_args[4], const_args[5], false);
2346        break;
2347    case INDEX_op_sub2_i32:
2348        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2349                        (int32_t)args[4], args[5], const_args[4],
2350                        const_args[5], true);
2351        break;
2352    case INDEX_op_sub2_i64:
2353        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2354                        args[5], const_args[4], const_args[5], true);
2355        break;
2356
2357    case INDEX_op_muluh_i64:
2358        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2359        break;
2360    case INDEX_op_mulsh_i64:
2361        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2362        break;
2363
2364    case INDEX_op_mb:
2365        tcg_out_mb(s, a0);
2366        break;
2367
2368    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2369    case INDEX_op_mov_i64:
2370    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2371    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2372    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2373    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2374    case INDEX_op_ext8s_i64:
2375    case INDEX_op_ext8u_i32:
2376    case INDEX_op_ext8u_i64:
2377    case INDEX_op_ext16s_i64:
2378    case INDEX_op_ext16s_i32:
2379    case INDEX_op_ext16u_i64:
2380    case INDEX_op_ext16u_i32:
2381    case INDEX_op_ext32s_i64:
2382    case INDEX_op_ext32u_i64:
2383    case INDEX_op_ext_i32_i64:
2384    case INDEX_op_extu_i32_i64:
2385    case INDEX_op_extrl_i64_i32:
2386    default:
2387        g_assert_not_reached();
2388    }
2389
2390#undef REG0
2391}
2392
2393static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2394                           unsigned vecl, unsigned vece,
2395                           const TCGArg args[TCG_MAX_OP_ARGS],
2396                           const int const_args[TCG_MAX_OP_ARGS])
2397{
2398    static const AArch64Insn cmp_vec_insn[16] = {
2399        [TCG_COND_EQ] = I3616_CMEQ,
2400        [TCG_COND_GT] = I3616_CMGT,
2401        [TCG_COND_GE] = I3616_CMGE,
2402        [TCG_COND_GTU] = I3616_CMHI,
2403        [TCG_COND_GEU] = I3616_CMHS,
2404    };
2405    static const AArch64Insn cmp_scalar_insn[16] = {
2406        [TCG_COND_EQ] = I3611_CMEQ,
2407        [TCG_COND_GT] = I3611_CMGT,
2408        [TCG_COND_GE] = I3611_CMGE,
2409        [TCG_COND_GTU] = I3611_CMHI,
2410        [TCG_COND_GEU] = I3611_CMHS,
2411    };
2412    static const AArch64Insn cmp0_vec_insn[16] = {
2413        [TCG_COND_EQ] = I3617_CMEQ0,
2414        [TCG_COND_GT] = I3617_CMGT0,
2415        [TCG_COND_GE] = I3617_CMGE0,
2416        [TCG_COND_LT] = I3617_CMLT0,
2417        [TCG_COND_LE] = I3617_CMLE0,
2418    };
2419    static const AArch64Insn cmp0_scalar_insn[16] = {
2420        [TCG_COND_EQ] = I3612_CMEQ0,
2421        [TCG_COND_GT] = I3612_CMGT0,
2422        [TCG_COND_GE] = I3612_CMGE0,
2423        [TCG_COND_LT] = I3612_CMLT0,
2424        [TCG_COND_LE] = I3612_CMLE0,
2425    };
2426
2427    TCGType type = vecl + TCG_TYPE_V64;
2428    unsigned is_q = vecl;
2429    bool is_scalar = !is_q && vece == MO_64;
2430    TCGArg a0, a1, a2, a3;
2431    int cmode, imm8;
2432
2433    a0 = args[0];
2434    a1 = args[1];
2435    a2 = args[2];
2436
2437    switch (opc) {
2438    case INDEX_op_ld_vec:
2439        tcg_out_ld(s, type, a0, a1, a2);
2440        break;
2441    case INDEX_op_st_vec:
2442        tcg_out_st(s, type, a0, a1, a2);
2443        break;
2444    case INDEX_op_dupm_vec:
2445        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2446        break;
2447    case INDEX_op_add_vec:
2448        if (is_scalar) {
2449            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2450        } else {
2451            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2452        }
2453        break;
2454    case INDEX_op_sub_vec:
2455        if (is_scalar) {
2456            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2457        } else {
2458            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2459        }
2460        break;
2461    case INDEX_op_mul_vec:
2462        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2463        break;
2464    case INDEX_op_neg_vec:
2465        if (is_scalar) {
2466            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2467        } else {
2468            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2469        }
2470        break;
2471    case INDEX_op_abs_vec:
2472        if (is_scalar) {
2473            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2474        } else {
2475            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2476        }
2477        break;
2478    case INDEX_op_and_vec:
2479        if (const_args[2]) {
2480            is_shimm1632(~a2, &cmode, &imm8);
2481            if (a0 == a1) {
2482                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2483                return;
2484            }
2485            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2486            a2 = a0;
2487        }
2488        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2489        break;
2490    case INDEX_op_or_vec:
2491        if (const_args[2]) {
2492            is_shimm1632(a2, &cmode, &imm8);
2493            if (a0 == a1) {
2494                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2495                return;
2496            }
2497            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2498            a2 = a0;
2499        }
2500        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2501        break;
2502    case INDEX_op_andc_vec:
2503        if (const_args[2]) {
2504            is_shimm1632(a2, &cmode, &imm8);
2505            if (a0 == a1) {
2506                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2507                return;
2508            }
2509            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2510            a2 = a0;
2511        }
2512        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2513        break;
2514    case INDEX_op_orc_vec:
2515        if (const_args[2]) {
2516            is_shimm1632(~a2, &cmode, &imm8);
2517            if (a0 == a1) {
2518                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2519                return;
2520            }
2521            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2522            a2 = a0;
2523        }
2524        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2525        break;
2526    case INDEX_op_xor_vec:
2527        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2528        break;
2529    case INDEX_op_ssadd_vec:
2530        if (is_scalar) {
2531            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2532        } else {
2533            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2534        }
2535        break;
2536    case INDEX_op_sssub_vec:
2537        if (is_scalar) {
2538            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2539        } else {
2540            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2541        }
2542        break;
2543    case INDEX_op_usadd_vec:
2544        if (is_scalar) {
2545            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2546        } else {
2547            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2548        }
2549        break;
2550    case INDEX_op_ussub_vec:
2551        if (is_scalar) {
2552            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2553        } else {
2554            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2555        }
2556        break;
2557    case INDEX_op_smax_vec:
2558        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2559        break;
2560    case INDEX_op_smin_vec:
2561        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2562        break;
2563    case INDEX_op_umax_vec:
2564        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2565        break;
2566    case INDEX_op_umin_vec:
2567        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2568        break;
2569    case INDEX_op_not_vec:
2570        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2571        break;
2572    case INDEX_op_shli_vec:
2573        if (is_scalar) {
2574            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2575        } else {
2576            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2577        }
2578        break;
2579    case INDEX_op_shri_vec:
2580        if (is_scalar) {
2581            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2582        } else {
2583            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2584        }
2585        break;
2586    case INDEX_op_sari_vec:
2587        if (is_scalar) {
2588            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2589        } else {
2590            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2591        }
2592        break;
2593    case INDEX_op_aa64_sli_vec:
2594        if (is_scalar) {
2595            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2596        } else {
2597            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2598        }
2599        break;
2600    case INDEX_op_shlv_vec:
2601        if (is_scalar) {
2602            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2603        } else {
2604            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2605        }
2606        break;
2607    case INDEX_op_aa64_sshl_vec:
2608        if (is_scalar) {
2609            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2610        } else {
2611            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2612        }
2613        break;
2614    case INDEX_op_cmp_vec:
2615        {
2616            TCGCond cond = args[3];
2617            AArch64Insn insn;
2618
2619            if (cond == TCG_COND_NE) {
2620                if (const_args[2]) {
2621                    if (is_scalar) {
2622                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2623                    } else {
2624                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2625                    }
2626                } else {
2627                    if (is_scalar) {
2628                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2629                    } else {
2630                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2631                    }
2632                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2633                }
2634            } else {
2635                if (const_args[2]) {
2636                    if (is_scalar) {
2637                        insn = cmp0_scalar_insn[cond];
2638                        if (insn) {
2639                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2640                            break;
2641                        }
2642                    } else {
2643                        insn = cmp0_vec_insn[cond];
2644                        if (insn) {
2645                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2646                            break;
2647                        }
2648                    }
2649                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
2650                    a2 = TCG_VEC_TMP0;
2651                }
2652                if (is_scalar) {
2653                    insn = cmp_scalar_insn[cond];
2654                    if (insn == 0) {
2655                        TCGArg t;
2656                        t = a1, a1 = a2, a2 = t;
2657                        cond = tcg_swap_cond(cond);
2658                        insn = cmp_scalar_insn[cond];
2659                        tcg_debug_assert(insn != 0);
2660                    }
2661                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2662                } else {
2663                    insn = cmp_vec_insn[cond];
2664                    if (insn == 0) {
2665                        TCGArg t;
2666                        t = a1, a1 = a2, a2 = t;
2667                        cond = tcg_swap_cond(cond);
2668                        insn = cmp_vec_insn[cond];
2669                        tcg_debug_assert(insn != 0);
2670                    }
2671                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2672                }
2673            }
2674        }
2675        break;
2676
2677    case INDEX_op_bitsel_vec:
2678        a3 = args[3];
2679        if (a0 == a3) {
2680            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2681        } else if (a0 == a2) {
2682            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2683        } else {
2684            if (a0 != a1) {
2685                tcg_out_mov(s, type, a0, a1);
2686            }
2687            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2688        }
2689        break;
2690
2691    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2692    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2693    default:
2694        g_assert_not_reached();
2695    }
2696}
2697
2698int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2699{
2700    switch (opc) {
2701    case INDEX_op_add_vec:
2702    case INDEX_op_sub_vec:
2703    case INDEX_op_and_vec:
2704    case INDEX_op_or_vec:
2705    case INDEX_op_xor_vec:
2706    case INDEX_op_andc_vec:
2707    case INDEX_op_orc_vec:
2708    case INDEX_op_neg_vec:
2709    case INDEX_op_abs_vec:
2710    case INDEX_op_not_vec:
2711    case INDEX_op_cmp_vec:
2712    case INDEX_op_shli_vec:
2713    case INDEX_op_shri_vec:
2714    case INDEX_op_sari_vec:
2715    case INDEX_op_ssadd_vec:
2716    case INDEX_op_sssub_vec:
2717    case INDEX_op_usadd_vec:
2718    case INDEX_op_ussub_vec:
2719    case INDEX_op_shlv_vec:
2720    case INDEX_op_bitsel_vec:
2721        return 1;
2722    case INDEX_op_rotli_vec:
2723    case INDEX_op_shrv_vec:
2724    case INDEX_op_sarv_vec:
2725    case INDEX_op_rotlv_vec:
2726    case INDEX_op_rotrv_vec:
2727        return -1;
2728    case INDEX_op_mul_vec:
2729    case INDEX_op_smax_vec:
2730    case INDEX_op_smin_vec:
2731    case INDEX_op_umax_vec:
2732    case INDEX_op_umin_vec:
2733        return vece < MO_64;
2734
2735    default:
2736        return 0;
2737    }
2738}
2739
2740void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2741                       TCGArg a0, ...)
2742{
2743    va_list va;
2744    TCGv_vec v0, v1, v2, t1, t2, c1;
2745    TCGArg a2;
2746
2747    va_start(va, a0);
2748    v0 = temp_tcgv_vec(arg_temp(a0));
2749    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2750    a2 = va_arg(va, TCGArg);
2751    va_end(va);
2752
2753    switch (opc) {
2754    case INDEX_op_rotli_vec:
2755        t1 = tcg_temp_new_vec(type);
2756        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2757        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2758                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2759        tcg_temp_free_vec(t1);
2760        break;
2761
2762    case INDEX_op_shrv_vec:
2763    case INDEX_op_sarv_vec:
2764        /* Right shifts are negative left shifts for AArch64.  */
2765        v2 = temp_tcgv_vec(arg_temp(a2));
2766        t1 = tcg_temp_new_vec(type);
2767        tcg_gen_neg_vec(vece, t1, v2);
2768        opc = (opc == INDEX_op_shrv_vec
2769               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2770        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2771                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2772        tcg_temp_free_vec(t1);
2773        break;
2774
2775    case INDEX_op_rotlv_vec:
2776        v2 = temp_tcgv_vec(arg_temp(a2));
2777        t1 = tcg_temp_new_vec(type);
2778        c1 = tcg_constant_vec(type, vece, 8 << vece);
2779        tcg_gen_sub_vec(vece, t1, v2, c1);
2780        /* Right shifts are negative left shifts for AArch64.  */
2781        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2782                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2783        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2784                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2785        tcg_gen_or_vec(vece, v0, v0, t1);
2786        tcg_temp_free_vec(t1);
2787        break;
2788
2789    case INDEX_op_rotrv_vec:
2790        v2 = temp_tcgv_vec(arg_temp(a2));
2791        t1 = tcg_temp_new_vec(type);
2792        t2 = tcg_temp_new_vec(type);
2793        c1 = tcg_constant_vec(type, vece, 8 << vece);
2794        tcg_gen_neg_vec(vece, t1, v2);
2795        tcg_gen_sub_vec(vece, t2, c1, v2);
2796        /* Right shifts are negative left shifts for AArch64.  */
2797        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2798                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2799        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2800                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2801        tcg_gen_or_vec(vece, v0, t1, t2);
2802        tcg_temp_free_vec(t1);
2803        tcg_temp_free_vec(t2);
2804        break;
2805
2806    default:
2807        g_assert_not_reached();
2808    }
2809}
2810
2811static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2812{
2813    switch (op) {
2814    case INDEX_op_goto_ptr:
2815        return C_O0_I1(r);
2816
2817    case INDEX_op_ld8u_i32:
2818    case INDEX_op_ld8s_i32:
2819    case INDEX_op_ld16u_i32:
2820    case INDEX_op_ld16s_i32:
2821    case INDEX_op_ld_i32:
2822    case INDEX_op_ld8u_i64:
2823    case INDEX_op_ld8s_i64:
2824    case INDEX_op_ld16u_i64:
2825    case INDEX_op_ld16s_i64:
2826    case INDEX_op_ld32u_i64:
2827    case INDEX_op_ld32s_i64:
2828    case INDEX_op_ld_i64:
2829    case INDEX_op_neg_i32:
2830    case INDEX_op_neg_i64:
2831    case INDEX_op_not_i32:
2832    case INDEX_op_not_i64:
2833    case INDEX_op_bswap16_i32:
2834    case INDEX_op_bswap32_i32:
2835    case INDEX_op_bswap16_i64:
2836    case INDEX_op_bswap32_i64:
2837    case INDEX_op_bswap64_i64:
2838    case INDEX_op_ext8s_i32:
2839    case INDEX_op_ext16s_i32:
2840    case INDEX_op_ext8u_i32:
2841    case INDEX_op_ext16u_i32:
2842    case INDEX_op_ext8s_i64:
2843    case INDEX_op_ext16s_i64:
2844    case INDEX_op_ext32s_i64:
2845    case INDEX_op_ext8u_i64:
2846    case INDEX_op_ext16u_i64:
2847    case INDEX_op_ext32u_i64:
2848    case INDEX_op_ext_i32_i64:
2849    case INDEX_op_extu_i32_i64:
2850    case INDEX_op_extract_i32:
2851    case INDEX_op_extract_i64:
2852    case INDEX_op_sextract_i32:
2853    case INDEX_op_sextract_i64:
2854        return C_O1_I1(r, r);
2855
2856    case INDEX_op_st8_i32:
2857    case INDEX_op_st16_i32:
2858    case INDEX_op_st_i32:
2859    case INDEX_op_st8_i64:
2860    case INDEX_op_st16_i64:
2861    case INDEX_op_st32_i64:
2862    case INDEX_op_st_i64:
2863        return C_O0_I2(rZ, r);
2864
2865    case INDEX_op_add_i32:
2866    case INDEX_op_add_i64:
2867    case INDEX_op_sub_i32:
2868    case INDEX_op_sub_i64:
2869    case INDEX_op_setcond_i32:
2870    case INDEX_op_setcond_i64:
2871        return C_O1_I2(r, r, rA);
2872
2873    case INDEX_op_mul_i32:
2874    case INDEX_op_mul_i64:
2875    case INDEX_op_div_i32:
2876    case INDEX_op_div_i64:
2877    case INDEX_op_divu_i32:
2878    case INDEX_op_divu_i64:
2879    case INDEX_op_rem_i32:
2880    case INDEX_op_rem_i64:
2881    case INDEX_op_remu_i32:
2882    case INDEX_op_remu_i64:
2883    case INDEX_op_muluh_i64:
2884    case INDEX_op_mulsh_i64:
2885        return C_O1_I2(r, r, r);
2886
2887    case INDEX_op_and_i32:
2888    case INDEX_op_and_i64:
2889    case INDEX_op_or_i32:
2890    case INDEX_op_or_i64:
2891    case INDEX_op_xor_i32:
2892    case INDEX_op_xor_i64:
2893    case INDEX_op_andc_i32:
2894    case INDEX_op_andc_i64:
2895    case INDEX_op_orc_i32:
2896    case INDEX_op_orc_i64:
2897    case INDEX_op_eqv_i32:
2898    case INDEX_op_eqv_i64:
2899        return C_O1_I2(r, r, rL);
2900
2901    case INDEX_op_shl_i32:
2902    case INDEX_op_shr_i32:
2903    case INDEX_op_sar_i32:
2904    case INDEX_op_rotl_i32:
2905    case INDEX_op_rotr_i32:
2906    case INDEX_op_shl_i64:
2907    case INDEX_op_shr_i64:
2908    case INDEX_op_sar_i64:
2909    case INDEX_op_rotl_i64:
2910    case INDEX_op_rotr_i64:
2911        return C_O1_I2(r, r, ri);
2912
2913    case INDEX_op_clz_i32:
2914    case INDEX_op_ctz_i32:
2915    case INDEX_op_clz_i64:
2916    case INDEX_op_ctz_i64:
2917        return C_O1_I2(r, r, rAL);
2918
2919    case INDEX_op_brcond_i32:
2920    case INDEX_op_brcond_i64:
2921        return C_O0_I2(r, rA);
2922
2923    case INDEX_op_movcond_i32:
2924    case INDEX_op_movcond_i64:
2925        return C_O1_I4(r, r, rA, rZ, rZ);
2926
2927    case INDEX_op_qemu_ld_a32_i32:
2928    case INDEX_op_qemu_ld_a64_i32:
2929    case INDEX_op_qemu_ld_a32_i64:
2930    case INDEX_op_qemu_ld_a64_i64:
2931        return C_O1_I1(r, r);
2932    case INDEX_op_qemu_ld_a32_i128:
2933    case INDEX_op_qemu_ld_a64_i128:
2934        return C_O2_I1(r, r, r);
2935    case INDEX_op_qemu_st_a32_i32:
2936    case INDEX_op_qemu_st_a64_i32:
2937    case INDEX_op_qemu_st_a32_i64:
2938    case INDEX_op_qemu_st_a64_i64:
2939        return C_O0_I2(rZ, r);
2940    case INDEX_op_qemu_st_a32_i128:
2941    case INDEX_op_qemu_st_a64_i128:
2942        return C_O0_I3(rZ, rZ, r);
2943
2944    case INDEX_op_deposit_i32:
2945    case INDEX_op_deposit_i64:
2946        return C_O1_I2(r, 0, rZ);
2947
2948    case INDEX_op_extract2_i32:
2949    case INDEX_op_extract2_i64:
2950        return C_O1_I2(r, rZ, rZ);
2951
2952    case INDEX_op_add2_i32:
2953    case INDEX_op_add2_i64:
2954    case INDEX_op_sub2_i32:
2955    case INDEX_op_sub2_i64:
2956        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2957
2958    case INDEX_op_add_vec:
2959    case INDEX_op_sub_vec:
2960    case INDEX_op_mul_vec:
2961    case INDEX_op_xor_vec:
2962    case INDEX_op_ssadd_vec:
2963    case INDEX_op_sssub_vec:
2964    case INDEX_op_usadd_vec:
2965    case INDEX_op_ussub_vec:
2966    case INDEX_op_smax_vec:
2967    case INDEX_op_smin_vec:
2968    case INDEX_op_umax_vec:
2969    case INDEX_op_umin_vec:
2970    case INDEX_op_shlv_vec:
2971    case INDEX_op_shrv_vec:
2972    case INDEX_op_sarv_vec:
2973    case INDEX_op_aa64_sshl_vec:
2974        return C_O1_I2(w, w, w);
2975    case INDEX_op_not_vec:
2976    case INDEX_op_neg_vec:
2977    case INDEX_op_abs_vec:
2978    case INDEX_op_shli_vec:
2979    case INDEX_op_shri_vec:
2980    case INDEX_op_sari_vec:
2981        return C_O1_I1(w, w);
2982    case INDEX_op_ld_vec:
2983    case INDEX_op_dupm_vec:
2984        return C_O1_I1(w, r);
2985    case INDEX_op_st_vec:
2986        return C_O0_I2(w, r);
2987    case INDEX_op_dup_vec:
2988        return C_O1_I1(w, wr);
2989    case INDEX_op_or_vec:
2990    case INDEX_op_andc_vec:
2991        return C_O1_I2(w, w, wO);
2992    case INDEX_op_and_vec:
2993    case INDEX_op_orc_vec:
2994        return C_O1_I2(w, w, wN);
2995    case INDEX_op_cmp_vec:
2996        return C_O1_I2(w, w, wZ);
2997    case INDEX_op_bitsel_vec:
2998        return C_O1_I3(w, w, w, w);
2999    case INDEX_op_aa64_sli_vec:
3000        return C_O1_I2(w, 0, w);
3001
3002    default:
3003        g_assert_not_reached();
3004    }
3005}
3006
3007static void tcg_target_init(TCGContext *s)
3008{
3009    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
3010    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
3011    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3012    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3013
3014    tcg_target_call_clobber_regs = -1ull;
3015    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
3016    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
3017    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
3018    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
3019    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
3020    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
3021    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
3022    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
3023    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
3024    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
3025    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
3026    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3027    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3028    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3029    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3030    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3031    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3032    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3033    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3034
3035    s->reserved_regs = 0;
3036    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
3037    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
3038    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
3039    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
3040    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
3041    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
3042    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
3043}
3044
3045/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
3046#define PUSH_SIZE  ((30 - 19 + 1) * 8)
3047
3048#define FRAME_SIZE \
3049    ((PUSH_SIZE \
3050      + TCG_STATIC_CALL_ARGS_SIZE \
3051      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3052      + TCG_TARGET_STACK_ALIGN - 1) \
3053     & ~(TCG_TARGET_STACK_ALIGN - 1))
3054
3055/* We're expecting a 2 byte uleb128 encoded value.  */
3056QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3057
3058/* We're expecting to use a single ADDI insn.  */
3059QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
3060
3061static void tcg_target_qemu_prologue(TCGContext *s)
3062{
3063    TCGReg r;
3064
3065    /* Push (FP, LR) and allocate space for all saved registers.  */
3066    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
3067                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
3068
3069    /* Set up frame pointer for canonical unwinding.  */
3070    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
3071
3072    /* Store callee-preserved regs x19..x28.  */
3073    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3074        int ofs = (r - TCG_REG_X19 + 2) * 8;
3075        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3076    }
3077
3078    /* Make stack space for TCG locals.  */
3079    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3080                 FRAME_SIZE - PUSH_SIZE);
3081
3082    /* Inform TCG about how to find TCG locals with register, offset, size.  */
3083    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
3084                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3085
3086#if !defined(CONFIG_SOFTMMU)
3087    /*
3088     * Note that XZR cannot be encoded in the address base register slot,
3089     * as that actaully encodes SP.  Depending on the guest, we may need
3090     * to zero-extend the guest address via the address index register slot,
3091     * therefore we need to load even a zero guest base into a register.
3092     */
3093    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3094    tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3095#endif
3096
3097    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3098    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3099
3100    /*
3101     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3102     * and fall through to the rest of the epilogue.
3103     */
3104    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3105    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3106
3107    /* TB epilogue */
3108    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3109
3110    /* Remove TCG locals stack space.  */
3111    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3112                 FRAME_SIZE - PUSH_SIZE);
3113
3114    /* Restore registers x19..x28.  */
3115    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3116        int ofs = (r - TCG_REG_X19 + 2) * 8;
3117        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3118    }
3119
3120    /* Pop (FP, LR), restore SP to previous frame.  */
3121    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3122                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3123    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3124}
3125
3126static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3127{
3128    int i;
3129    for (i = 0; i < count; ++i) {
3130        p[i] = NOP;
3131    }
3132}
3133
3134typedef struct {
3135    DebugFrameHeader h;
3136    uint8_t fde_def_cfa[4];
3137    uint8_t fde_reg_ofs[24];
3138} DebugFrame;
3139
3140#define ELF_HOST_MACHINE EM_AARCH64
3141
3142static const DebugFrame debug_frame = {
3143    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3144    .h.cie.id = -1,
3145    .h.cie.version = 1,
3146    .h.cie.code_align = 1,
3147    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3148    .h.cie.return_column = TCG_REG_LR,
3149
3150    /* Total FDE size does not include the "len" member.  */
3151    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3152
3153    .fde_def_cfa = {
3154        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3155        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3156        (FRAME_SIZE >> 7)
3157    },
3158    .fde_reg_ofs = {
3159        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3160        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3161        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3162        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3163        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3164        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3165        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3166        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3167        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3168        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3169        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3170        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3171    }
3172};
3173
3174void tcg_register_jit(const void *buf, size_t buf_size)
3175{
3176    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3177}
3178