xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision 12fde9bcdb52118495d10c32ed375679f23e323c)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-ldst.c.inc"
14#include "../tcg-pool.c.inc"
15#include "qemu/bitops.h"
16#ifdef __linux__
17#include <asm/hwcap.h>
18#endif
19#ifdef CONFIG_DARWIN
20#include <sys/sysctl.h>
21#endif
22
23/* We're going to re-use TCGType in setting of the SF bit, which controls
24   the size of the operation performed.  If we know the values match, it
25   makes things much cleaner.  */
26QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
27
28#ifdef CONFIG_DEBUG_TCG
29static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
30    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
31    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
32    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
33    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
34
35    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
36    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
37    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
38    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
39};
40#endif /* CONFIG_DEBUG_TCG */
41
42static const int tcg_target_reg_alloc_order[] = {
43    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
44    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
45    TCG_REG_X28, /* we will reserve this for guest_base if configured */
46
47    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
48    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
49    TCG_REG_X16, TCG_REG_X17,
50
51    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
52    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
53
54    /* X18 reserved by system */
55    /* X19 reserved for AREG0 */
56    /* X29 reserved as fp */
57    /* X30 reserved as temporary */
58
59    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
60    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
61    /* V8 - V15 are call-saved, and skipped.  */
62    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
63    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
64    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
65    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
66};
67
68static const int tcg_target_call_iarg_regs[8] = {
69    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
70    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
71};
72
73static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
74{
75    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
76    tcg_debug_assert(slot >= 0 && slot <= 1);
77    return TCG_REG_X0 + slot;
78}
79
80bool have_lse;
81bool have_lse2;
82
83#define TCG_REG_TMP TCG_REG_X30
84#define TCG_VEC_TMP TCG_REG_V31
85
86#ifndef CONFIG_SOFTMMU
87/* Note that XZR cannot be encoded in the address base register slot,
88   as that actaully encodes SP.  So if we need to zero-extend the guest
89   address, via the address index register slot, we need to load even
90   a zero guest base into a register.  */
91#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
92#define TCG_REG_GUEST_BASE TCG_REG_X28
93#endif
94
95static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
96{
97    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
98    ptrdiff_t offset = target - src_rx;
99
100    if (offset == sextract64(offset, 0, 26)) {
101        /* read instruction, mask away previous PC_REL26 parameter contents,
102           set the proper offset, then write back the instruction. */
103        *src_rw = deposit32(*src_rw, 0, 26, offset);
104        return true;
105    }
106    return false;
107}
108
109static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
110{
111    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
112    ptrdiff_t offset = target - src_rx;
113
114    if (offset == sextract64(offset, 0, 19)) {
115        *src_rw = deposit32(*src_rw, 5, 19, offset);
116        return true;
117    }
118    return false;
119}
120
121static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
122                        intptr_t value, intptr_t addend)
123{
124    tcg_debug_assert(addend == 0);
125    switch (type) {
126    case R_AARCH64_JUMP26:
127    case R_AARCH64_CALL26:
128        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
129    case R_AARCH64_CONDBR19:
130        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
131    default:
132        g_assert_not_reached();
133    }
134}
135
136#define TCG_CT_CONST_AIMM 0x100
137#define TCG_CT_CONST_LIMM 0x200
138#define TCG_CT_CONST_ZERO 0x400
139#define TCG_CT_CONST_MONE 0x800
140#define TCG_CT_CONST_ORRI 0x1000
141#define TCG_CT_CONST_ANDI 0x2000
142
143#define ALL_GENERAL_REGS  0xffffffffu
144#define ALL_VECTOR_REGS   0xffffffff00000000ull
145
146#ifdef CONFIG_SOFTMMU
147#define ALL_QLDST_REGS \
148    (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
149                          (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
150#else
151#define ALL_QLDST_REGS   ALL_GENERAL_REGS
152#endif
153
154/* Match a constant valid for addition (12-bit, optionally shifted).  */
155static inline bool is_aimm(uint64_t val)
156{
157    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
158}
159
160/* Match a constant valid for logical operations.  */
161static inline bool is_limm(uint64_t val)
162{
163    /* Taking a simplified view of the logical immediates for now, ignoring
164       the replication that can happen across the field.  Match bit patterns
165       of the forms
166           0....01....1
167           0..01..10..0
168       and their inverses.  */
169
170    /* Make things easier below, by testing the form with msb clear. */
171    if ((int64_t)val < 0) {
172        val = ~val;
173    }
174    if (val == 0) {
175        return false;
176    }
177    val += val & -val;
178    return (val & (val - 1)) == 0;
179}
180
181/* Return true if v16 is a valid 16-bit shifted immediate.  */
182static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
183{
184    if (v16 == (v16 & 0xff)) {
185        *cmode = 0x8;
186        *imm8 = v16 & 0xff;
187        return true;
188    } else if (v16 == (v16 & 0xff00)) {
189        *cmode = 0xa;
190        *imm8 = v16 >> 8;
191        return true;
192    }
193    return false;
194}
195
196/* Return true if v32 is a valid 32-bit shifted immediate.  */
197static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
198{
199    if (v32 == (v32 & 0xff)) {
200        *cmode = 0x0;
201        *imm8 = v32 & 0xff;
202        return true;
203    } else if (v32 == (v32 & 0xff00)) {
204        *cmode = 0x2;
205        *imm8 = (v32 >> 8) & 0xff;
206        return true;
207    } else if (v32 == (v32 & 0xff0000)) {
208        *cmode = 0x4;
209        *imm8 = (v32 >> 16) & 0xff;
210        return true;
211    } else if (v32 == (v32 & 0xff000000)) {
212        *cmode = 0x6;
213        *imm8 = v32 >> 24;
214        return true;
215    }
216    return false;
217}
218
219/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
220static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
221{
222    if ((v32 & 0xffff00ff) == 0xff) {
223        *cmode = 0xc;
224        *imm8 = (v32 >> 8) & 0xff;
225        return true;
226    } else if ((v32 & 0xff00ffff) == 0xffff) {
227        *cmode = 0xd;
228        *imm8 = (v32 >> 16) & 0xff;
229        return true;
230    }
231    return false;
232}
233
234/* Return true if v32 is a valid float32 immediate.  */
235static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
236{
237    if (extract32(v32, 0, 19) == 0
238        && (extract32(v32, 25, 6) == 0x20
239            || extract32(v32, 25, 6) == 0x1f)) {
240        *cmode = 0xf;
241        *imm8 = (extract32(v32, 31, 1) << 7)
242              | (extract32(v32, 25, 1) << 6)
243              | extract32(v32, 19, 6);
244        return true;
245    }
246    return false;
247}
248
249/* Return true if v64 is a valid float64 immediate.  */
250static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
251{
252    if (extract64(v64, 0, 48) == 0
253        && (extract64(v64, 54, 9) == 0x100
254            || extract64(v64, 54, 9) == 0x0ff)) {
255        *cmode = 0xf;
256        *imm8 = (extract64(v64, 63, 1) << 7)
257              | (extract64(v64, 54, 1) << 6)
258              | extract64(v64, 48, 6);
259        return true;
260    }
261    return false;
262}
263
264/*
265 * Return non-zero if v32 can be formed by MOVI+ORR.
266 * Place the parameters for MOVI in (cmode, imm8).
267 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
268 */
269static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
270{
271    int i;
272
273    for (i = 6; i > 0; i -= 2) {
274        /* Mask out one byte we can add with ORR.  */
275        uint32_t tmp = v32 & ~(0xffu << (i * 4));
276        if (is_shimm32(tmp, cmode, imm8) ||
277            is_soimm32(tmp, cmode, imm8)) {
278            break;
279        }
280    }
281    return i;
282}
283
284/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
285static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
286{
287    if (v32 == deposit32(v32, 16, 16, v32)) {
288        return is_shimm16(v32, cmode, imm8);
289    } else {
290        return is_shimm32(v32, cmode, imm8);
291    }
292}
293
294static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
295{
296    if (ct & TCG_CT_CONST) {
297        return 1;
298    }
299    if (type == TCG_TYPE_I32) {
300        val = (int32_t)val;
301    }
302    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
303        return 1;
304    }
305    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
306        return 1;
307    }
308    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
309        return 1;
310    }
311    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
312        return 1;
313    }
314
315    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
316    case 0:
317        break;
318    case TCG_CT_CONST_ANDI:
319        val = ~val;
320        /* fallthru */
321    case TCG_CT_CONST_ORRI:
322        if (val == deposit64(val, 32, 32, val)) {
323            int cmode, imm8;
324            return is_shimm1632(val, &cmode, &imm8);
325        }
326        break;
327    default:
328        /* Both bits should not be set for the same insn.  */
329        g_assert_not_reached();
330    }
331
332    return 0;
333}
334
335enum aarch64_cond_code {
336    COND_EQ = 0x0,
337    COND_NE = 0x1,
338    COND_CS = 0x2,     /* Unsigned greater or equal */
339    COND_HS = COND_CS, /* ALIAS greater or equal */
340    COND_CC = 0x3,     /* Unsigned less than */
341    COND_LO = COND_CC, /* ALIAS Lower */
342    COND_MI = 0x4,     /* Negative */
343    COND_PL = 0x5,     /* Zero or greater */
344    COND_VS = 0x6,     /* Overflow */
345    COND_VC = 0x7,     /* No overflow */
346    COND_HI = 0x8,     /* Unsigned greater than */
347    COND_LS = 0x9,     /* Unsigned less or equal */
348    COND_GE = 0xa,
349    COND_LT = 0xb,
350    COND_GT = 0xc,
351    COND_LE = 0xd,
352    COND_AL = 0xe,
353    COND_NV = 0xf, /* behaves like COND_AL here */
354};
355
356static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
357    [TCG_COND_EQ] = COND_EQ,
358    [TCG_COND_NE] = COND_NE,
359    [TCG_COND_LT] = COND_LT,
360    [TCG_COND_GE] = COND_GE,
361    [TCG_COND_LE] = COND_LE,
362    [TCG_COND_GT] = COND_GT,
363    /* unsigned */
364    [TCG_COND_LTU] = COND_LO,
365    [TCG_COND_GTU] = COND_HI,
366    [TCG_COND_GEU] = COND_HS,
367    [TCG_COND_LEU] = COND_LS,
368};
369
370typedef enum {
371    LDST_ST = 0,    /* store */
372    LDST_LD = 1,    /* load */
373    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
374    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
375} AArch64LdstType;
376
377/* We encode the format of the insn into the beginning of the name, so that
378   we can have the preprocessor help "typecheck" the insn vs the output
379   function.  Arm didn't provide us with nice names for the formats, so we
380   use the section number of the architecture reference manual in which the
381   instruction group is described.  */
382typedef enum {
383    /* Compare and branch (immediate).  */
384    I3201_CBZ       = 0x34000000,
385    I3201_CBNZ      = 0x35000000,
386
387    /* Conditional branch (immediate).  */
388    I3202_B_C       = 0x54000000,
389
390    /* Unconditional branch (immediate).  */
391    I3206_B         = 0x14000000,
392    I3206_BL        = 0x94000000,
393
394    /* Unconditional branch (register).  */
395    I3207_BR        = 0xd61f0000,
396    I3207_BLR       = 0xd63f0000,
397    I3207_RET       = 0xd65f0000,
398
399    /* AdvSIMD load/store single structure.  */
400    I3303_LD1R      = 0x0d40c000,
401
402    /* Load literal for loading the address at pc-relative offset */
403    I3305_LDR       = 0x58000000,
404    I3305_LDR_v64   = 0x5c000000,
405    I3305_LDR_v128  = 0x9c000000,
406
407    /* Load/store register.  Described here as 3.3.12, but the helper
408       that emits them can transform to 3.3.10 or 3.3.13.  */
409    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
410    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
411    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
412    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
413
414    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
415    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
416    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
417    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
418
419    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
420    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
421
422    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
423    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
424    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
425
426    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
427    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
428
429    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
430    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
431
432    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
433    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
434
435    I3312_TO_I3310  = 0x00200800,
436    I3312_TO_I3313  = 0x01000000,
437
438    /* Load/store register pair instructions.  */
439    I3314_LDP       = 0x28400000,
440    I3314_STP       = 0x28000000,
441
442    /* Add/subtract immediate instructions.  */
443    I3401_ADDI      = 0x11000000,
444    I3401_ADDSI     = 0x31000000,
445    I3401_SUBI      = 0x51000000,
446    I3401_SUBSI     = 0x71000000,
447
448    /* Bitfield instructions.  */
449    I3402_BFM       = 0x33000000,
450    I3402_SBFM      = 0x13000000,
451    I3402_UBFM      = 0x53000000,
452
453    /* Extract instruction.  */
454    I3403_EXTR      = 0x13800000,
455
456    /* Logical immediate instructions.  */
457    I3404_ANDI      = 0x12000000,
458    I3404_ORRI      = 0x32000000,
459    I3404_EORI      = 0x52000000,
460    I3404_ANDSI     = 0x72000000,
461
462    /* Move wide immediate instructions.  */
463    I3405_MOVN      = 0x12800000,
464    I3405_MOVZ      = 0x52800000,
465    I3405_MOVK      = 0x72800000,
466
467    /* PC relative addressing instructions.  */
468    I3406_ADR       = 0x10000000,
469    I3406_ADRP      = 0x90000000,
470
471    /* Add/subtract shifted register instructions (without a shift).  */
472    I3502_ADD       = 0x0b000000,
473    I3502_ADDS      = 0x2b000000,
474    I3502_SUB       = 0x4b000000,
475    I3502_SUBS      = 0x6b000000,
476
477    /* Add/subtract shifted register instructions (with a shift).  */
478    I3502S_ADD_LSL  = I3502_ADD,
479
480    /* Add/subtract with carry instructions.  */
481    I3503_ADC       = 0x1a000000,
482    I3503_SBC       = 0x5a000000,
483
484    /* Conditional select instructions.  */
485    I3506_CSEL      = 0x1a800000,
486    I3506_CSINC     = 0x1a800400,
487    I3506_CSINV     = 0x5a800000,
488    I3506_CSNEG     = 0x5a800400,
489
490    /* Data-processing (1 source) instructions.  */
491    I3507_CLZ       = 0x5ac01000,
492    I3507_RBIT      = 0x5ac00000,
493    I3507_REV       = 0x5ac00000, /* + size << 10 */
494
495    /* Data-processing (2 source) instructions.  */
496    I3508_LSLV      = 0x1ac02000,
497    I3508_LSRV      = 0x1ac02400,
498    I3508_ASRV      = 0x1ac02800,
499    I3508_RORV      = 0x1ac02c00,
500    I3508_SMULH     = 0x9b407c00,
501    I3508_UMULH     = 0x9bc07c00,
502    I3508_UDIV      = 0x1ac00800,
503    I3508_SDIV      = 0x1ac00c00,
504
505    /* Data-processing (3 source) instructions.  */
506    I3509_MADD      = 0x1b000000,
507    I3509_MSUB      = 0x1b008000,
508
509    /* Logical shifted register instructions (without a shift).  */
510    I3510_AND       = 0x0a000000,
511    I3510_BIC       = 0x0a200000,
512    I3510_ORR       = 0x2a000000,
513    I3510_ORN       = 0x2a200000,
514    I3510_EOR       = 0x4a000000,
515    I3510_EON       = 0x4a200000,
516    I3510_ANDS      = 0x6a000000,
517
518    /* Logical shifted register instructions (with a shift).  */
519    I3502S_AND_LSR  = I3510_AND | (1 << 22),
520
521    /* AdvSIMD copy */
522    I3605_DUP      = 0x0e000400,
523    I3605_INS      = 0x4e001c00,
524    I3605_UMOV     = 0x0e003c00,
525
526    /* AdvSIMD modified immediate */
527    I3606_MOVI      = 0x0f000400,
528    I3606_MVNI      = 0x2f000400,
529    I3606_BIC       = 0x2f001400,
530    I3606_ORR       = 0x0f001400,
531
532    /* AdvSIMD scalar shift by immediate */
533    I3609_SSHR      = 0x5f000400,
534    I3609_SSRA      = 0x5f001400,
535    I3609_SHL       = 0x5f005400,
536    I3609_USHR      = 0x7f000400,
537    I3609_USRA      = 0x7f001400,
538    I3609_SLI       = 0x7f005400,
539
540    /* AdvSIMD scalar three same */
541    I3611_SQADD     = 0x5e200c00,
542    I3611_SQSUB     = 0x5e202c00,
543    I3611_CMGT      = 0x5e203400,
544    I3611_CMGE      = 0x5e203c00,
545    I3611_SSHL      = 0x5e204400,
546    I3611_ADD       = 0x5e208400,
547    I3611_CMTST     = 0x5e208c00,
548    I3611_UQADD     = 0x7e200c00,
549    I3611_UQSUB     = 0x7e202c00,
550    I3611_CMHI      = 0x7e203400,
551    I3611_CMHS      = 0x7e203c00,
552    I3611_USHL      = 0x7e204400,
553    I3611_SUB       = 0x7e208400,
554    I3611_CMEQ      = 0x7e208c00,
555
556    /* AdvSIMD scalar two-reg misc */
557    I3612_CMGT0     = 0x5e208800,
558    I3612_CMEQ0     = 0x5e209800,
559    I3612_CMLT0     = 0x5e20a800,
560    I3612_ABS       = 0x5e20b800,
561    I3612_CMGE0     = 0x7e208800,
562    I3612_CMLE0     = 0x7e209800,
563    I3612_NEG       = 0x7e20b800,
564
565    /* AdvSIMD shift by immediate */
566    I3614_SSHR      = 0x0f000400,
567    I3614_SSRA      = 0x0f001400,
568    I3614_SHL       = 0x0f005400,
569    I3614_SLI       = 0x2f005400,
570    I3614_USHR      = 0x2f000400,
571    I3614_USRA      = 0x2f001400,
572
573    /* AdvSIMD three same.  */
574    I3616_ADD       = 0x0e208400,
575    I3616_AND       = 0x0e201c00,
576    I3616_BIC       = 0x0e601c00,
577    I3616_BIF       = 0x2ee01c00,
578    I3616_BIT       = 0x2ea01c00,
579    I3616_BSL       = 0x2e601c00,
580    I3616_EOR       = 0x2e201c00,
581    I3616_MUL       = 0x0e209c00,
582    I3616_ORR       = 0x0ea01c00,
583    I3616_ORN       = 0x0ee01c00,
584    I3616_SUB       = 0x2e208400,
585    I3616_CMGT      = 0x0e203400,
586    I3616_CMGE      = 0x0e203c00,
587    I3616_CMTST     = 0x0e208c00,
588    I3616_CMHI      = 0x2e203400,
589    I3616_CMHS      = 0x2e203c00,
590    I3616_CMEQ      = 0x2e208c00,
591    I3616_SMAX      = 0x0e206400,
592    I3616_SMIN      = 0x0e206c00,
593    I3616_SSHL      = 0x0e204400,
594    I3616_SQADD     = 0x0e200c00,
595    I3616_SQSUB     = 0x0e202c00,
596    I3616_UMAX      = 0x2e206400,
597    I3616_UMIN      = 0x2e206c00,
598    I3616_UQADD     = 0x2e200c00,
599    I3616_UQSUB     = 0x2e202c00,
600    I3616_USHL      = 0x2e204400,
601
602    /* AdvSIMD two-reg misc.  */
603    I3617_CMGT0     = 0x0e208800,
604    I3617_CMEQ0     = 0x0e209800,
605    I3617_CMLT0     = 0x0e20a800,
606    I3617_CMGE0     = 0x2e208800,
607    I3617_CMLE0     = 0x2e209800,
608    I3617_NOT       = 0x2e205800,
609    I3617_ABS       = 0x0e20b800,
610    I3617_NEG       = 0x2e20b800,
611
612    /* System instructions.  */
613    NOP             = 0xd503201f,
614    DMB_ISH         = 0xd50338bf,
615    DMB_LD          = 0x00000100,
616    DMB_ST          = 0x00000200,
617} AArch64Insn;
618
619static inline uint32_t tcg_in32(TCGContext *s)
620{
621    uint32_t v = *(uint32_t *)s->code_ptr;
622    return v;
623}
624
625/* Emit an opcode with "type-checking" of the format.  */
626#define tcg_out_insn(S, FMT, OP, ...) \
627    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
628
629static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
630                              TCGReg rt, TCGReg rn, unsigned size)
631{
632    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
633}
634
635static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
636                              int imm19, TCGReg rt)
637{
638    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
639}
640
641static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
642                              TCGReg rt, int imm19)
643{
644    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
645}
646
647static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
648                              TCGCond c, int imm19)
649{
650    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
651}
652
653static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
654{
655    tcg_out32(s, insn | (imm26 & 0x03ffffff));
656}
657
658static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
659{
660    tcg_out32(s, insn | rn << 5);
661}
662
663static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
664                              TCGReg r1, TCGReg r2, TCGReg rn,
665                              tcg_target_long ofs, bool pre, bool w)
666{
667    insn |= 1u << 31; /* ext */
668    insn |= pre << 24;
669    insn |= w << 23;
670
671    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
672    insn |= (ofs & (0x7f << 3)) << (15 - 3);
673
674    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
675}
676
677static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
678                              TCGReg rd, TCGReg rn, uint64_t aimm)
679{
680    if (aimm > 0xfff) {
681        tcg_debug_assert((aimm & 0xfff) == 0);
682        aimm >>= 12;
683        tcg_debug_assert(aimm <= 0xfff);
684        aimm |= 1 << 12;  /* apply LSL 12 */
685    }
686    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
687}
688
689/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
690   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
691   that feed the DecodeBitMasks pseudo function.  */
692static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
693                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
694{
695    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
696              | rn << 5 | rd);
697}
698
699#define tcg_out_insn_3404  tcg_out_insn_3402
700
701static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
702                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
703{
704    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
705              | rn << 5 | rd);
706}
707
708/* This function is used for the Move (wide immediate) instruction group.
709   Note that SHIFT is a full shift count, not the 2 bit HW field. */
710static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
711                              TCGReg rd, uint16_t half, unsigned shift)
712{
713    tcg_debug_assert((shift & ~0x30) == 0);
714    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
715}
716
717static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
718                              TCGReg rd, int64_t disp)
719{
720    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
721}
722
723/* This function is for both 3.5.2 (Add/Subtract shifted register), for
724   the rare occasion when we actually want to supply a shift amount.  */
725static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
726                                      TCGType ext, TCGReg rd, TCGReg rn,
727                                      TCGReg rm, int imm6)
728{
729    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
730}
731
732/* This function is for 3.5.2 (Add/subtract shifted register),
733   and 3.5.10 (Logical shifted register), for the vast majorty of cases
734   when we don't want to apply a shift.  Thus it can also be used for
735   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
736static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
737                              TCGReg rd, TCGReg rn, TCGReg rm)
738{
739    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
740}
741
742#define tcg_out_insn_3503  tcg_out_insn_3502
743#define tcg_out_insn_3508  tcg_out_insn_3502
744#define tcg_out_insn_3510  tcg_out_insn_3502
745
746static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
747                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
748{
749    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
750              | tcg_cond_to_aarch64[c] << 12);
751}
752
753static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
754                              TCGReg rd, TCGReg rn)
755{
756    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
757}
758
759static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
760                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
761{
762    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
763}
764
765static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
766                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
767{
768    /* Note that bit 11 set means general register input.  Therefore
769       we can handle both register sets with one function.  */
770    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
771              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
772}
773
774static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
775                              TCGReg rd, bool op, int cmode, uint8_t imm8)
776{
777    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
778              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
779}
780
781static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
782                              TCGReg rd, TCGReg rn, unsigned immhb)
783{
784    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
785}
786
787static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
788                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
789{
790    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
791              | (rn & 0x1f) << 5 | (rd & 0x1f));
792}
793
794static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
795                              unsigned size, TCGReg rd, TCGReg rn)
796{
797    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
798}
799
800static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
801                              TCGReg rd, TCGReg rn, unsigned immhb)
802{
803    tcg_out32(s, insn | q << 30 | immhb << 16
804              | (rn & 0x1f) << 5 | (rd & 0x1f));
805}
806
807static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
808                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
809{
810    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
811              | (rn & 0x1f) << 5 | (rd & 0x1f));
812}
813
814static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
815                              unsigned size, TCGReg rd, TCGReg rn)
816{
817    tcg_out32(s, insn | q << 30 | (size << 22)
818              | (rn & 0x1f) << 5 | (rd & 0x1f));
819}
820
821static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
822                              TCGReg rd, TCGReg base, TCGType ext,
823                              TCGReg regoff)
824{
825    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
826    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
827              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
828}
829
830static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
831                              TCGReg rd, TCGReg rn, intptr_t offset)
832{
833    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
834}
835
836static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
837                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
838{
839    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
840    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
841              | rn << 5 | (rd & 0x1f));
842}
843
844/* Register to register move using ORR (shifted register with no shift). */
845static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
846{
847    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
848}
849
850/* Register to register move using ADDI (move to/from SP).  */
851static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
852{
853    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
854}
855
856/* This function is used for the Logical (immediate) instruction group.
857   The value of LIMM must satisfy IS_LIMM.  See the comment above about
858   only supporting simplified logical immediates.  */
859static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
860                             TCGReg rd, TCGReg rn, uint64_t limm)
861{
862    unsigned h, l, r, c;
863
864    tcg_debug_assert(is_limm(limm));
865
866    h = clz64(limm);
867    l = ctz64(limm);
868    if (l == 0) {
869        r = 0;                  /* form 0....01....1 */
870        c = ctz64(~limm) - 1;
871        if (h == 0) {
872            r = clz64(~limm);   /* form 1..10..01..1 */
873            c += r;
874        }
875    } else {
876        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
877        c = r - h - 1;
878    }
879    if (ext == TCG_TYPE_I32) {
880        r &= 31;
881        c &= 31;
882    }
883
884    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
885}
886
887static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
888                             TCGReg rd, int64_t v64)
889{
890    bool q = type == TCG_TYPE_V128;
891    int cmode, imm8, i;
892
893    /* Test all bytes equal first.  */
894    if (vece == MO_8) {
895        imm8 = (uint8_t)v64;
896        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
897        return;
898    }
899
900    /*
901     * Test all bytes 0x00 or 0xff second.  This can match cases that
902     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
903     */
904    for (i = imm8 = 0; i < 8; i++) {
905        uint8_t byte = v64 >> (i * 8);
906        if (byte == 0xff) {
907            imm8 |= 1 << i;
908        } else if (byte != 0) {
909            goto fail_bytes;
910        }
911    }
912    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
913    return;
914 fail_bytes:
915
916    /*
917     * Tests for various replications.  For each element width, if we
918     * cannot find an expansion there's no point checking a larger
919     * width because we already know by replication it cannot match.
920     */
921    if (vece == MO_16) {
922        uint16_t v16 = v64;
923
924        if (is_shimm16(v16, &cmode, &imm8)) {
925            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
926            return;
927        }
928        if (is_shimm16(~v16, &cmode, &imm8)) {
929            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
930            return;
931        }
932
933        /*
934         * Otherwise, all remaining constants can be loaded in two insns:
935         * rd = v16 & 0xff, rd |= v16 & 0xff00.
936         */
937        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
938        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
939        return;
940    } else if (vece == MO_32) {
941        uint32_t v32 = v64;
942        uint32_t n32 = ~v32;
943
944        if (is_shimm32(v32, &cmode, &imm8) ||
945            is_soimm32(v32, &cmode, &imm8) ||
946            is_fimm32(v32, &cmode, &imm8)) {
947            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
948            return;
949        }
950        if (is_shimm32(n32, &cmode, &imm8) ||
951            is_soimm32(n32, &cmode, &imm8)) {
952            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
953            return;
954        }
955
956        /*
957         * Restrict the set of constants to those we can load with
958         * two instructions.  Others we load from the pool.
959         */
960        i = is_shimm32_pair(v32, &cmode, &imm8);
961        if (i) {
962            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
963            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
964            return;
965        }
966        i = is_shimm32_pair(n32, &cmode, &imm8);
967        if (i) {
968            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
969            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
970            return;
971        }
972    } else if (is_fimm64(v64, &cmode, &imm8)) {
973        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
974        return;
975    }
976
977    /*
978     * As a last resort, load from the constant pool.  Sadly there
979     * is no LD1R (literal), so store the full 16-byte vector.
980     */
981    if (type == TCG_TYPE_V128) {
982        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
983        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
984    } else {
985        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
986        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
987    }
988}
989
990static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
991                            TCGReg rd, TCGReg rs)
992{
993    int is_q = type - TCG_TYPE_V64;
994    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
995    return true;
996}
997
998static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
999                             TCGReg r, TCGReg base, intptr_t offset)
1000{
1001    TCGReg temp = TCG_REG_TMP;
1002
1003    if (offset < -0xffffff || offset > 0xffffff) {
1004        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
1005        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
1006        base = temp;
1007    } else {
1008        AArch64Insn add_insn = I3401_ADDI;
1009
1010        if (offset < 0) {
1011            add_insn = I3401_SUBI;
1012            offset = -offset;
1013        }
1014        if (offset & 0xfff000) {
1015            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1016            base = temp;
1017        }
1018        if (offset & 0xfff) {
1019            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1020            base = temp;
1021        }
1022    }
1023    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1024    return true;
1025}
1026
1027static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1028                         tcg_target_long value)
1029{
1030    tcg_target_long svalue = value;
1031    tcg_target_long ivalue = ~value;
1032    tcg_target_long t0, t1, t2;
1033    int s0, s1;
1034    AArch64Insn opc;
1035
1036    switch (type) {
1037    case TCG_TYPE_I32:
1038    case TCG_TYPE_I64:
1039        tcg_debug_assert(rd < 32);
1040        break;
1041    default:
1042        g_assert_not_reached();
1043    }
1044
1045    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1046       values within [2**31, 2**32-1], we can create smaller sequences by
1047       interpreting this as a negative 32-bit number, while ensuring that
1048       the high 32 bits are cleared by setting SF=0.  */
1049    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1050        svalue = (int32_t)value;
1051        value = (uint32_t)value;
1052        ivalue = (uint32_t)ivalue;
1053        type = TCG_TYPE_I32;
1054    }
1055
1056    /* Speed things up by handling the common case of small positive
1057       and negative values specially.  */
1058    if ((value & ~0xffffull) == 0) {
1059        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1060        return;
1061    } else if ((ivalue & ~0xffffull) == 0) {
1062        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1063        return;
1064    }
1065
1066    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1067       use the sign-extended value.  That lets us match rotated values such
1068       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1069    if (is_limm(svalue)) {
1070        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1071        return;
1072    }
1073
1074    /* Look for host pointer values within 4G of the PC.  This happens
1075       often when loading pointers to QEMU's own data structures.  */
1076    if (type == TCG_TYPE_I64) {
1077        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1078        tcg_target_long disp = value - src_rx;
1079        if (disp == sextract64(disp, 0, 21)) {
1080            tcg_out_insn(s, 3406, ADR, rd, disp);
1081            return;
1082        }
1083        disp = (value >> 12) - (src_rx >> 12);
1084        if (disp == sextract64(disp, 0, 21)) {
1085            tcg_out_insn(s, 3406, ADRP, rd, disp);
1086            if (value & 0xfff) {
1087                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1088            }
1089            return;
1090        }
1091    }
1092
1093    /* Would it take fewer insns to begin with MOVN?  */
1094    if (ctpop64(value) >= 32) {
1095        t0 = ivalue;
1096        opc = I3405_MOVN;
1097    } else {
1098        t0 = value;
1099        opc = I3405_MOVZ;
1100    }
1101    s0 = ctz64(t0) & (63 & -16);
1102    t1 = t0 & ~(0xffffull << s0);
1103    s1 = ctz64(t1) & (63 & -16);
1104    t2 = t1 & ~(0xffffull << s1);
1105    if (t2 == 0) {
1106        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1107        if (t1 != 0) {
1108            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1109        }
1110        return;
1111    }
1112
1113    /* For more than 2 insns, dump it into the constant pool.  */
1114    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1115    tcg_out_insn(s, 3305, LDR, 0, rd);
1116}
1117
1118static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1119{
1120    return false;
1121}
1122
1123static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1124                             tcg_target_long imm)
1125{
1126    /* This function is only used for passing structs by reference. */
1127    g_assert_not_reached();
1128}
1129
1130/* Define something more legible for general use.  */
1131#define tcg_out_ldst_r  tcg_out_insn_3310
1132
1133static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1134                         TCGReg rn, intptr_t offset, int lgsize)
1135{
1136    /* If the offset is naturally aligned and in range, then we can
1137       use the scaled uimm12 encoding */
1138    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1139        uintptr_t scaled_uimm = offset >> lgsize;
1140        if (scaled_uimm <= 0xfff) {
1141            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1142            return;
1143        }
1144    }
1145
1146    /* Small signed offsets can use the unscaled encoding.  */
1147    if (offset >= -256 && offset < 256) {
1148        tcg_out_insn_3312(s, insn, rd, rn, offset);
1149        return;
1150    }
1151
1152    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1153    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1154    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1155}
1156
1157static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1158{
1159    if (ret == arg) {
1160        return true;
1161    }
1162    switch (type) {
1163    case TCG_TYPE_I32:
1164    case TCG_TYPE_I64:
1165        if (ret < 32 && arg < 32) {
1166            tcg_out_movr(s, type, ret, arg);
1167            break;
1168        } else if (ret < 32) {
1169            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1170            break;
1171        } else if (arg < 32) {
1172            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1173            break;
1174        }
1175        /* FALLTHRU */
1176
1177    case TCG_TYPE_V64:
1178        tcg_debug_assert(ret >= 32 && arg >= 32);
1179        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1180        break;
1181    case TCG_TYPE_V128:
1182        tcg_debug_assert(ret >= 32 && arg >= 32);
1183        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1184        break;
1185
1186    default:
1187        g_assert_not_reached();
1188    }
1189    return true;
1190}
1191
1192static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1193                       TCGReg base, intptr_t ofs)
1194{
1195    AArch64Insn insn;
1196    int lgsz;
1197
1198    switch (type) {
1199    case TCG_TYPE_I32:
1200        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1201        lgsz = 2;
1202        break;
1203    case TCG_TYPE_I64:
1204        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1205        lgsz = 3;
1206        break;
1207    case TCG_TYPE_V64:
1208        insn = I3312_LDRVD;
1209        lgsz = 3;
1210        break;
1211    case TCG_TYPE_V128:
1212        insn = I3312_LDRVQ;
1213        lgsz = 4;
1214        break;
1215    default:
1216        g_assert_not_reached();
1217    }
1218    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1219}
1220
1221static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1222                       TCGReg base, intptr_t ofs)
1223{
1224    AArch64Insn insn;
1225    int lgsz;
1226
1227    switch (type) {
1228    case TCG_TYPE_I32:
1229        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1230        lgsz = 2;
1231        break;
1232    case TCG_TYPE_I64:
1233        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1234        lgsz = 3;
1235        break;
1236    case TCG_TYPE_V64:
1237        insn = I3312_STRVD;
1238        lgsz = 3;
1239        break;
1240    case TCG_TYPE_V128:
1241        insn = I3312_STRVQ;
1242        lgsz = 4;
1243        break;
1244    default:
1245        g_assert_not_reached();
1246    }
1247    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1248}
1249
1250static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1251                               TCGReg base, intptr_t ofs)
1252{
1253    if (type <= TCG_TYPE_I64 && val == 0) {
1254        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1255        return true;
1256    }
1257    return false;
1258}
1259
1260static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1261                               TCGReg rn, unsigned int a, unsigned int b)
1262{
1263    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1264}
1265
1266static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1267                                TCGReg rn, unsigned int a, unsigned int b)
1268{
1269    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1270}
1271
1272static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1273                                TCGReg rn, unsigned int a, unsigned int b)
1274{
1275    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1276}
1277
1278static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1279                                TCGReg rn, TCGReg rm, unsigned int a)
1280{
1281    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1282}
1283
1284static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1285                               TCGReg rd, TCGReg rn, unsigned int m)
1286{
1287    int bits = ext ? 64 : 32;
1288    int max = bits - 1;
1289    tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1290}
1291
1292static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1293                               TCGReg rd, TCGReg rn, unsigned int m)
1294{
1295    int max = ext ? 63 : 31;
1296    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1297}
1298
1299static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1300                               TCGReg rd, TCGReg rn, unsigned int m)
1301{
1302    int max = ext ? 63 : 31;
1303    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1304}
1305
1306static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1307                                TCGReg rd, TCGReg rn, unsigned int m)
1308{
1309    int max = ext ? 63 : 31;
1310    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1311}
1312
1313static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1314                                TCGReg rd, TCGReg rn, unsigned int m)
1315{
1316    int max = ext ? 63 : 31;
1317    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1318}
1319
1320static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1321                               TCGReg rn, unsigned lsb, unsigned width)
1322{
1323    unsigned size = ext ? 64 : 32;
1324    unsigned a = (size - lsb) & (size - 1);
1325    unsigned b = width - 1;
1326    tcg_out_bfm(s, ext, rd, rn, a, b);
1327}
1328
1329static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1330                        tcg_target_long b, bool const_b)
1331{
1332    if (const_b) {
1333        /* Using CMP or CMN aliases.  */
1334        if (b >= 0) {
1335            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1336        } else {
1337            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1338        }
1339    } else {
1340        /* Using CMP alias SUBS wzr, Wn, Wm */
1341        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1342    }
1343}
1344
1345static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1346{
1347    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1348    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1349    tcg_out_insn(s, 3206, B, offset);
1350}
1351
1352static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1353{
1354    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1355    if (offset == sextract64(offset, 0, 26)) {
1356        tcg_out_insn(s, 3206, B, offset);
1357    } else {
1358        /* Choose X9 as a call-clobbered non-LR temporary. */
1359        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X9, (intptr_t)target);
1360        tcg_out_insn(s, 3207, BR, TCG_REG_X9);
1361    }
1362}
1363
1364static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1365{
1366    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1367    if (offset == sextract64(offset, 0, 26)) {
1368        tcg_out_insn(s, 3206, BL, offset);
1369    } else {
1370        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1371        tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
1372    }
1373}
1374
1375static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1376                         const TCGHelperInfo *info)
1377{
1378    tcg_out_call_int(s, target);
1379}
1380
1381static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1382{
1383    if (!l->has_value) {
1384        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1385        tcg_out_insn(s, 3206, B, 0);
1386    } else {
1387        tcg_out_goto(s, l->u.value_ptr);
1388    }
1389}
1390
1391static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1392                           TCGArg b, bool b_const, TCGLabel *l)
1393{
1394    intptr_t offset;
1395    bool need_cmp;
1396
1397    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1398        need_cmp = false;
1399    } else {
1400        need_cmp = true;
1401        tcg_out_cmp(s, ext, a, b, b_const);
1402    }
1403
1404    if (!l->has_value) {
1405        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1406        offset = tcg_in32(s) >> 5;
1407    } else {
1408        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1409        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1410    }
1411
1412    if (need_cmp) {
1413        tcg_out_insn(s, 3202, B_C, c, offset);
1414    } else if (c == TCG_COND_EQ) {
1415        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1416    } else {
1417        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1418    }
1419}
1420
1421static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1422                               TCGReg rd, TCGReg rn)
1423{
1424    /* REV, REV16, REV32 */
1425    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1426}
1427
1428static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1429                               TCGReg rd, TCGReg rn)
1430{
1431    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1432    int bits = (8 << s_bits) - 1;
1433    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1434}
1435
1436static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1437{
1438    tcg_out_sxt(s, type, MO_8, rd, rn);
1439}
1440
1441static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1442{
1443    tcg_out_sxt(s, type, MO_16, rd, rn);
1444}
1445
1446static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1447{
1448    tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1449}
1450
1451static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1452{
1453    tcg_out_ext32s(s, rd, rn);
1454}
1455
1456static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1457                               TCGReg rd, TCGReg rn)
1458{
1459    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1460    int bits = (8 << s_bits) - 1;
1461    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1462}
1463
1464static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1465{
1466    tcg_out_uxt(s, MO_8, rd, rn);
1467}
1468
1469static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1470{
1471    tcg_out_uxt(s, MO_16, rd, rn);
1472}
1473
1474static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1475{
1476    tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1477}
1478
1479static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1480{
1481    tcg_out_ext32u(s, rd, rn);
1482}
1483
1484static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1485{
1486    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1487}
1488
1489static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1490                            TCGReg rn, int64_t aimm)
1491{
1492    if (aimm >= 0) {
1493        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1494    } else {
1495        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1496    }
1497}
1498
1499static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1500                            TCGReg rh, TCGReg al, TCGReg ah,
1501                            tcg_target_long bl, tcg_target_long bh,
1502                            bool const_bl, bool const_bh, bool sub)
1503{
1504    TCGReg orig_rl = rl;
1505    AArch64Insn insn;
1506
1507    if (rl == ah || (!const_bh && rl == bh)) {
1508        rl = TCG_REG_TMP;
1509    }
1510
1511    if (const_bl) {
1512        if (bl < 0) {
1513            bl = -bl;
1514            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1515        } else {
1516            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1517        }
1518
1519        if (unlikely(al == TCG_REG_XZR)) {
1520            /* ??? We want to allow al to be zero for the benefit of
1521               negation via subtraction.  However, that leaves open the
1522               possibility of adding 0+const in the low part, and the
1523               immediate add instructions encode XSP not XZR.  Don't try
1524               anything more elaborate here than loading another zero.  */
1525            al = TCG_REG_TMP;
1526            tcg_out_movi(s, ext, al, 0);
1527        }
1528        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1529    } else {
1530        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1531    }
1532
1533    insn = I3503_ADC;
1534    if (const_bh) {
1535        /* Note that the only two constants we support are 0 and -1, and
1536           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1537        if ((bh != 0) ^ sub) {
1538            insn = I3503_SBC;
1539        }
1540        bh = TCG_REG_XZR;
1541    } else if (sub) {
1542        insn = I3503_SBC;
1543    }
1544    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1545
1546    tcg_out_mov(s, ext, orig_rl, rl);
1547}
1548
1549static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1550{
1551    static const uint32_t sync[] = {
1552        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1553        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1554        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1555        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1556        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1557    };
1558    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1559}
1560
1561static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1562                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1563{
1564    TCGReg a1 = a0;
1565    if (is_ctz) {
1566        a1 = TCG_REG_TMP;
1567        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1568    }
1569    if (const_b && b == (ext ? 64 : 32)) {
1570        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1571    } else {
1572        AArch64Insn sel = I3506_CSEL;
1573
1574        tcg_out_cmp(s, ext, a0, 0, 1);
1575        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1576
1577        if (const_b) {
1578            if (b == -1) {
1579                b = TCG_REG_XZR;
1580                sel = I3506_CSINV;
1581            } else if (b == 0) {
1582                b = TCG_REG_XZR;
1583            } else {
1584                tcg_out_movi(s, ext, d, b);
1585                b = d;
1586            }
1587        }
1588        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1589    }
1590}
1591
1592typedef struct {
1593    TCGReg base;
1594    TCGReg index;
1595    TCGType index_ext;
1596} HostAddress;
1597
1598bool tcg_target_has_memory_bswap(MemOp memop)
1599{
1600    return false;
1601}
1602
1603static const TCGLdstHelperParam ldst_helper_param = {
1604    .ntmp = 1, .tmp = { TCG_REG_TMP }
1605};
1606
1607static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1608{
1609    MemOp opc = get_memop(lb->oi);
1610
1611    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1612        return false;
1613    }
1614
1615    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1616    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1617    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1618    tcg_out_goto(s, lb->raddr);
1619    return true;
1620}
1621
1622static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1623{
1624    MemOp opc = get_memop(lb->oi);
1625
1626    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1627        return false;
1628    }
1629
1630    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1631    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1632    tcg_out_goto(s, lb->raddr);
1633    return true;
1634}
1635
1636/*
1637 * For softmmu, perform the TLB load and compare.
1638 * For useronly, perform any required alignment tests.
1639 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1640 * is required and fill in @h with the host address for the fast path.
1641 */
1642static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1643                                           TCGReg addr_reg, MemOpIdx oi,
1644                                           bool is_ld)
1645{
1646    TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1647    TCGLabelQemuLdst *ldst = NULL;
1648    MemOp opc = get_memop(oi);
1649    unsigned a_bits = get_alignment_bits(opc);
1650    unsigned a_mask = (1u << a_bits) - 1;
1651
1652#ifdef CONFIG_SOFTMMU
1653    unsigned s_bits = opc & MO_SIZE;
1654    unsigned s_mask = (1u << s_bits) - 1;
1655    unsigned mem_index = get_mmuidx(oi);
1656    TCGReg x3;
1657    TCGType mask_type;
1658    uint64_t compare_mask;
1659
1660    ldst = new_ldst_label(s);
1661    ldst->is_ld = is_ld;
1662    ldst->oi = oi;
1663    ldst->addrlo_reg = addr_reg;
1664
1665    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1666                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1667
1668    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1669    QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1670    QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1671    QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1672    QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1673    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1674                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1675
1676    /* Extract the TLB index from the address into X0.  */
1677    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1678                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1679                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1680
1681    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1682    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1683
1684    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1685    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1,
1686               is_ld ? offsetof(CPUTLBEntry, addr_read)
1687                     : offsetof(CPUTLBEntry, addr_write));
1688    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1689               offsetof(CPUTLBEntry, addend));
1690
1691    /*
1692     * For aligned accesses, we check the first byte and include the alignment
1693     * bits within the address.  For unaligned access, we check that we don't
1694     * cross pages using the address of the last byte of the access.
1695     */
1696    if (a_bits >= s_bits) {
1697        x3 = addr_reg;
1698    } else {
1699        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1700                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1701        x3 = TCG_REG_X3;
1702    }
1703    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1704
1705    /* Store the page mask part of the address into X3.  */
1706    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1707                     TCG_REG_X3, x3, compare_mask);
1708
1709    /* Perform the address comparison. */
1710    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1711
1712    /* If not equal, we jump to the slow path. */
1713    ldst->label_ptr[0] = s->code_ptr;
1714    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1715
1716    *h = (HostAddress){
1717        .base = TCG_REG_X1,
1718        .index = addr_reg,
1719        .index_ext = addr_type
1720    };
1721#else
1722    if (a_mask) {
1723        ldst = new_ldst_label(s);
1724
1725        ldst->is_ld = is_ld;
1726        ldst->oi = oi;
1727        ldst->addrlo_reg = addr_reg;
1728
1729        /* tst addr, #mask */
1730        tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1731
1732        /* b.ne slow_path */
1733        ldst->label_ptr[0] = s->code_ptr;
1734        tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1735    }
1736
1737    if (USE_GUEST_BASE) {
1738        *h = (HostAddress){
1739            .base = TCG_REG_GUEST_BASE,
1740            .index = addr_reg,
1741            .index_ext = addr_type
1742        };
1743    } else {
1744        *h = (HostAddress){
1745            .base = addr_reg,
1746            .index = TCG_REG_XZR,
1747            .index_ext = TCG_TYPE_I64
1748        };
1749    }
1750#endif
1751
1752    return ldst;
1753}
1754
1755static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1756                                   TCGReg data_r, HostAddress h)
1757{
1758    switch (memop & MO_SSIZE) {
1759    case MO_UB:
1760        tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
1761        break;
1762    case MO_SB:
1763        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1764                       data_r, h.base, h.index_ext, h.index);
1765        break;
1766    case MO_UW:
1767        tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
1768        break;
1769    case MO_SW:
1770        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1771                       data_r, h.base, h.index_ext, h.index);
1772        break;
1773    case MO_UL:
1774        tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
1775        break;
1776    case MO_SL:
1777        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
1778        break;
1779    case MO_UQ:
1780        tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
1781        break;
1782    default:
1783        g_assert_not_reached();
1784    }
1785}
1786
1787static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1788                                   TCGReg data_r, HostAddress h)
1789{
1790    switch (memop & MO_SIZE) {
1791    case MO_8:
1792        tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
1793        break;
1794    case MO_16:
1795        tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
1796        break;
1797    case MO_32:
1798        tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
1799        break;
1800    case MO_64:
1801        tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
1802        break;
1803    default:
1804        g_assert_not_reached();
1805    }
1806}
1807
1808static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1809                            MemOpIdx oi, TCGType data_type)
1810{
1811    TCGLabelQemuLdst *ldst;
1812    HostAddress h;
1813
1814    ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1815    tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
1816
1817    if (ldst) {
1818        ldst->type = data_type;
1819        ldst->datalo_reg = data_reg;
1820        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1821    }
1822}
1823
1824static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1825                            MemOpIdx oi, TCGType data_type)
1826{
1827    TCGLabelQemuLdst *ldst;
1828    HostAddress h;
1829
1830    ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1831    tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1832
1833    if (ldst) {
1834        ldst->type = data_type;
1835        ldst->datalo_reg = data_reg;
1836        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1837    }
1838}
1839
1840static const tcg_insn_unit *tb_ret_addr;
1841
1842static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1843{
1844    /* Reuse the zeroing that exists for goto_ptr.  */
1845    if (a0 == 0) {
1846        tcg_out_goto_long(s, tcg_code_gen_epilogue);
1847    } else {
1848        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1849        tcg_out_goto_long(s, tb_ret_addr);
1850    }
1851}
1852
1853static void tcg_out_goto_tb(TCGContext *s, int which)
1854{
1855    /*
1856     * Direct branch, or indirect address load, will be patched
1857     * by tb_target_set_jmp_target.  Assert indirect load offset
1858     * in range early, regardless of direct branch distance.
1859     */
1860    intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1861    tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1862
1863    set_jmp_insn_offset(s, which);
1864    tcg_out32(s, I3206_B);
1865    tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1866    set_jmp_reset_offset(s, which);
1867}
1868
1869void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1870                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1871{
1872    uintptr_t d_addr = tb->jmp_target_addr[n];
1873    ptrdiff_t d_offset = d_addr - jmp_rx;
1874    tcg_insn_unit insn;
1875
1876    /* Either directly branch, or indirect branch load. */
1877    if (d_offset == sextract64(d_offset, 0, 28)) {
1878        insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
1879    } else {
1880        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
1881        ptrdiff_t i_offset = i_addr - jmp_rx;
1882
1883        /* Note that we asserted this in range in tcg_out_goto_tb. */
1884        insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2);
1885    }
1886    qatomic_set((uint32_t *)jmp_rw, insn);
1887    flush_idcache_range(jmp_rx, jmp_rw, 4);
1888}
1889
1890static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1891                       const TCGArg args[TCG_MAX_OP_ARGS],
1892                       const int const_args[TCG_MAX_OP_ARGS])
1893{
1894    /* 99% of the time, we can signal the use of extension registers
1895       by looking to see if the opcode handles 64-bit data.  */
1896    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1897
1898    /* Hoist the loads of the most common arguments.  */
1899    TCGArg a0 = args[0];
1900    TCGArg a1 = args[1];
1901    TCGArg a2 = args[2];
1902    int c2 = const_args[2];
1903
1904    /* Some operands are defined with "rZ" constraint, a register or
1905       the zero register.  These need not actually test args[I] == 0.  */
1906#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1907
1908    switch (opc) {
1909    case INDEX_op_goto_ptr:
1910        tcg_out_insn(s, 3207, BR, a0);
1911        break;
1912
1913    case INDEX_op_br:
1914        tcg_out_goto_label(s, arg_label(a0));
1915        break;
1916
1917    case INDEX_op_ld8u_i32:
1918    case INDEX_op_ld8u_i64:
1919        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1920        break;
1921    case INDEX_op_ld8s_i32:
1922        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1923        break;
1924    case INDEX_op_ld8s_i64:
1925        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1926        break;
1927    case INDEX_op_ld16u_i32:
1928    case INDEX_op_ld16u_i64:
1929        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1930        break;
1931    case INDEX_op_ld16s_i32:
1932        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1933        break;
1934    case INDEX_op_ld16s_i64:
1935        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1936        break;
1937    case INDEX_op_ld_i32:
1938    case INDEX_op_ld32u_i64:
1939        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1940        break;
1941    case INDEX_op_ld32s_i64:
1942        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1943        break;
1944    case INDEX_op_ld_i64:
1945        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1946        break;
1947
1948    case INDEX_op_st8_i32:
1949    case INDEX_op_st8_i64:
1950        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1951        break;
1952    case INDEX_op_st16_i32:
1953    case INDEX_op_st16_i64:
1954        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1955        break;
1956    case INDEX_op_st_i32:
1957    case INDEX_op_st32_i64:
1958        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1959        break;
1960    case INDEX_op_st_i64:
1961        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1962        break;
1963
1964    case INDEX_op_add_i32:
1965        a2 = (int32_t)a2;
1966        /* FALLTHRU */
1967    case INDEX_op_add_i64:
1968        if (c2) {
1969            tcg_out_addsubi(s, ext, a0, a1, a2);
1970        } else {
1971            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1972        }
1973        break;
1974
1975    case INDEX_op_sub_i32:
1976        a2 = (int32_t)a2;
1977        /* FALLTHRU */
1978    case INDEX_op_sub_i64:
1979        if (c2) {
1980            tcg_out_addsubi(s, ext, a0, a1, -a2);
1981        } else {
1982            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1983        }
1984        break;
1985
1986    case INDEX_op_neg_i64:
1987    case INDEX_op_neg_i32:
1988        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1989        break;
1990
1991    case INDEX_op_and_i32:
1992        a2 = (int32_t)a2;
1993        /* FALLTHRU */
1994    case INDEX_op_and_i64:
1995        if (c2) {
1996            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1997        } else {
1998            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1999        }
2000        break;
2001
2002    case INDEX_op_andc_i32:
2003        a2 = (int32_t)a2;
2004        /* FALLTHRU */
2005    case INDEX_op_andc_i64:
2006        if (c2) {
2007            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2008        } else {
2009            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2010        }
2011        break;
2012
2013    case INDEX_op_or_i32:
2014        a2 = (int32_t)a2;
2015        /* FALLTHRU */
2016    case INDEX_op_or_i64:
2017        if (c2) {
2018            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2019        } else {
2020            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2021        }
2022        break;
2023
2024    case INDEX_op_orc_i32:
2025        a2 = (int32_t)a2;
2026        /* FALLTHRU */
2027    case INDEX_op_orc_i64:
2028        if (c2) {
2029            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2030        } else {
2031            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2032        }
2033        break;
2034
2035    case INDEX_op_xor_i32:
2036        a2 = (int32_t)a2;
2037        /* FALLTHRU */
2038    case INDEX_op_xor_i64:
2039        if (c2) {
2040            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2041        } else {
2042            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2043        }
2044        break;
2045
2046    case INDEX_op_eqv_i32:
2047        a2 = (int32_t)a2;
2048        /* FALLTHRU */
2049    case INDEX_op_eqv_i64:
2050        if (c2) {
2051            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2052        } else {
2053            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2054        }
2055        break;
2056
2057    case INDEX_op_not_i64:
2058    case INDEX_op_not_i32:
2059        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2060        break;
2061
2062    case INDEX_op_mul_i64:
2063    case INDEX_op_mul_i32:
2064        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2065        break;
2066
2067    case INDEX_op_div_i64:
2068    case INDEX_op_div_i32:
2069        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2070        break;
2071    case INDEX_op_divu_i64:
2072    case INDEX_op_divu_i32:
2073        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2074        break;
2075
2076    case INDEX_op_rem_i64:
2077    case INDEX_op_rem_i32:
2078        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2079        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2080        break;
2081    case INDEX_op_remu_i64:
2082    case INDEX_op_remu_i32:
2083        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2084        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2085        break;
2086
2087    case INDEX_op_shl_i64:
2088    case INDEX_op_shl_i32:
2089        if (c2) {
2090            tcg_out_shl(s, ext, a0, a1, a2);
2091        } else {
2092            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2093        }
2094        break;
2095
2096    case INDEX_op_shr_i64:
2097    case INDEX_op_shr_i32:
2098        if (c2) {
2099            tcg_out_shr(s, ext, a0, a1, a2);
2100        } else {
2101            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2102        }
2103        break;
2104
2105    case INDEX_op_sar_i64:
2106    case INDEX_op_sar_i32:
2107        if (c2) {
2108            tcg_out_sar(s, ext, a0, a1, a2);
2109        } else {
2110            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2111        }
2112        break;
2113
2114    case INDEX_op_rotr_i64:
2115    case INDEX_op_rotr_i32:
2116        if (c2) {
2117            tcg_out_rotr(s, ext, a0, a1, a2);
2118        } else {
2119            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2120        }
2121        break;
2122
2123    case INDEX_op_rotl_i64:
2124    case INDEX_op_rotl_i32:
2125        if (c2) {
2126            tcg_out_rotl(s, ext, a0, a1, a2);
2127        } else {
2128            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2129            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2130        }
2131        break;
2132
2133    case INDEX_op_clz_i64:
2134    case INDEX_op_clz_i32:
2135        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2136        break;
2137    case INDEX_op_ctz_i64:
2138    case INDEX_op_ctz_i32:
2139        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2140        break;
2141
2142    case INDEX_op_brcond_i32:
2143        a1 = (int32_t)a1;
2144        /* FALLTHRU */
2145    case INDEX_op_brcond_i64:
2146        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2147        break;
2148
2149    case INDEX_op_setcond_i32:
2150        a2 = (int32_t)a2;
2151        /* FALLTHRU */
2152    case INDEX_op_setcond_i64:
2153        tcg_out_cmp(s, ext, a1, a2, c2);
2154        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2155        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2156                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2157        break;
2158
2159    case INDEX_op_movcond_i32:
2160        a2 = (int32_t)a2;
2161        /* FALLTHRU */
2162    case INDEX_op_movcond_i64:
2163        tcg_out_cmp(s, ext, a1, a2, c2);
2164        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2165        break;
2166
2167    case INDEX_op_qemu_ld_i32:
2168    case INDEX_op_qemu_ld_i64:
2169        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2170        break;
2171    case INDEX_op_qemu_st_i32:
2172    case INDEX_op_qemu_st_i64:
2173        tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
2174        break;
2175
2176    case INDEX_op_bswap64_i64:
2177        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2178        break;
2179    case INDEX_op_bswap32_i64:
2180        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2181        if (a2 & TCG_BSWAP_OS) {
2182            tcg_out_ext32s(s, a0, a0);
2183        }
2184        break;
2185    case INDEX_op_bswap32_i32:
2186        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2187        break;
2188    case INDEX_op_bswap16_i64:
2189    case INDEX_op_bswap16_i32:
2190        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2191        if (a2 & TCG_BSWAP_OS) {
2192            /* Output must be sign-extended. */
2193            tcg_out_ext16s(s, ext, a0, a0);
2194        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2195            /* Output must be zero-extended, but input isn't. */
2196            tcg_out_ext16u(s, a0, a0);
2197        }
2198        break;
2199
2200    case INDEX_op_deposit_i64:
2201    case INDEX_op_deposit_i32:
2202        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2203        break;
2204
2205    case INDEX_op_extract_i64:
2206    case INDEX_op_extract_i32:
2207        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2208        break;
2209
2210    case INDEX_op_sextract_i64:
2211    case INDEX_op_sextract_i32:
2212        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2213        break;
2214
2215    case INDEX_op_extract2_i64:
2216    case INDEX_op_extract2_i32:
2217        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2218        break;
2219
2220    case INDEX_op_add2_i32:
2221        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2222                        (int32_t)args[4], args[5], const_args[4],
2223                        const_args[5], false);
2224        break;
2225    case INDEX_op_add2_i64:
2226        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2227                        args[5], const_args[4], const_args[5], false);
2228        break;
2229    case INDEX_op_sub2_i32:
2230        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2231                        (int32_t)args[4], args[5], const_args[4],
2232                        const_args[5], true);
2233        break;
2234    case INDEX_op_sub2_i64:
2235        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2236                        args[5], const_args[4], const_args[5], true);
2237        break;
2238
2239    case INDEX_op_muluh_i64:
2240        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2241        break;
2242    case INDEX_op_mulsh_i64:
2243        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2244        break;
2245
2246    case INDEX_op_mb:
2247        tcg_out_mb(s, a0);
2248        break;
2249
2250    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2251    case INDEX_op_mov_i64:
2252    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2253    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2254    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2255    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2256    case INDEX_op_ext8s_i64:
2257    case INDEX_op_ext8u_i32:
2258    case INDEX_op_ext8u_i64:
2259    case INDEX_op_ext16s_i64:
2260    case INDEX_op_ext16s_i32:
2261    case INDEX_op_ext16u_i64:
2262    case INDEX_op_ext16u_i32:
2263    case INDEX_op_ext32s_i64:
2264    case INDEX_op_ext32u_i64:
2265    case INDEX_op_ext_i32_i64:
2266    case INDEX_op_extu_i32_i64:
2267    case INDEX_op_extrl_i64_i32:
2268    default:
2269        g_assert_not_reached();
2270    }
2271
2272#undef REG0
2273}
2274
2275static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2276                           unsigned vecl, unsigned vece,
2277                           const TCGArg args[TCG_MAX_OP_ARGS],
2278                           const int const_args[TCG_MAX_OP_ARGS])
2279{
2280    static const AArch64Insn cmp_vec_insn[16] = {
2281        [TCG_COND_EQ] = I3616_CMEQ,
2282        [TCG_COND_GT] = I3616_CMGT,
2283        [TCG_COND_GE] = I3616_CMGE,
2284        [TCG_COND_GTU] = I3616_CMHI,
2285        [TCG_COND_GEU] = I3616_CMHS,
2286    };
2287    static const AArch64Insn cmp_scalar_insn[16] = {
2288        [TCG_COND_EQ] = I3611_CMEQ,
2289        [TCG_COND_GT] = I3611_CMGT,
2290        [TCG_COND_GE] = I3611_CMGE,
2291        [TCG_COND_GTU] = I3611_CMHI,
2292        [TCG_COND_GEU] = I3611_CMHS,
2293    };
2294    static const AArch64Insn cmp0_vec_insn[16] = {
2295        [TCG_COND_EQ] = I3617_CMEQ0,
2296        [TCG_COND_GT] = I3617_CMGT0,
2297        [TCG_COND_GE] = I3617_CMGE0,
2298        [TCG_COND_LT] = I3617_CMLT0,
2299        [TCG_COND_LE] = I3617_CMLE0,
2300    };
2301    static const AArch64Insn cmp0_scalar_insn[16] = {
2302        [TCG_COND_EQ] = I3612_CMEQ0,
2303        [TCG_COND_GT] = I3612_CMGT0,
2304        [TCG_COND_GE] = I3612_CMGE0,
2305        [TCG_COND_LT] = I3612_CMLT0,
2306        [TCG_COND_LE] = I3612_CMLE0,
2307    };
2308
2309    TCGType type = vecl + TCG_TYPE_V64;
2310    unsigned is_q = vecl;
2311    bool is_scalar = !is_q && vece == MO_64;
2312    TCGArg a0, a1, a2, a3;
2313    int cmode, imm8;
2314
2315    a0 = args[0];
2316    a1 = args[1];
2317    a2 = args[2];
2318
2319    switch (opc) {
2320    case INDEX_op_ld_vec:
2321        tcg_out_ld(s, type, a0, a1, a2);
2322        break;
2323    case INDEX_op_st_vec:
2324        tcg_out_st(s, type, a0, a1, a2);
2325        break;
2326    case INDEX_op_dupm_vec:
2327        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2328        break;
2329    case INDEX_op_add_vec:
2330        if (is_scalar) {
2331            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2332        } else {
2333            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2334        }
2335        break;
2336    case INDEX_op_sub_vec:
2337        if (is_scalar) {
2338            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2339        } else {
2340            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2341        }
2342        break;
2343    case INDEX_op_mul_vec:
2344        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2345        break;
2346    case INDEX_op_neg_vec:
2347        if (is_scalar) {
2348            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2349        } else {
2350            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2351        }
2352        break;
2353    case INDEX_op_abs_vec:
2354        if (is_scalar) {
2355            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2356        } else {
2357            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2358        }
2359        break;
2360    case INDEX_op_and_vec:
2361        if (const_args[2]) {
2362            is_shimm1632(~a2, &cmode, &imm8);
2363            if (a0 == a1) {
2364                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2365                return;
2366            }
2367            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2368            a2 = a0;
2369        }
2370        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2371        break;
2372    case INDEX_op_or_vec:
2373        if (const_args[2]) {
2374            is_shimm1632(a2, &cmode, &imm8);
2375            if (a0 == a1) {
2376                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2377                return;
2378            }
2379            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2380            a2 = a0;
2381        }
2382        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2383        break;
2384    case INDEX_op_andc_vec:
2385        if (const_args[2]) {
2386            is_shimm1632(a2, &cmode, &imm8);
2387            if (a0 == a1) {
2388                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2389                return;
2390            }
2391            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2392            a2 = a0;
2393        }
2394        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2395        break;
2396    case INDEX_op_orc_vec:
2397        if (const_args[2]) {
2398            is_shimm1632(~a2, &cmode, &imm8);
2399            if (a0 == a1) {
2400                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2401                return;
2402            }
2403            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2404            a2 = a0;
2405        }
2406        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2407        break;
2408    case INDEX_op_xor_vec:
2409        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2410        break;
2411    case INDEX_op_ssadd_vec:
2412        if (is_scalar) {
2413            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2414        } else {
2415            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2416        }
2417        break;
2418    case INDEX_op_sssub_vec:
2419        if (is_scalar) {
2420            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2421        } else {
2422            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2423        }
2424        break;
2425    case INDEX_op_usadd_vec:
2426        if (is_scalar) {
2427            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2428        } else {
2429            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2430        }
2431        break;
2432    case INDEX_op_ussub_vec:
2433        if (is_scalar) {
2434            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2435        } else {
2436            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2437        }
2438        break;
2439    case INDEX_op_smax_vec:
2440        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2441        break;
2442    case INDEX_op_smin_vec:
2443        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2444        break;
2445    case INDEX_op_umax_vec:
2446        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2447        break;
2448    case INDEX_op_umin_vec:
2449        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2450        break;
2451    case INDEX_op_not_vec:
2452        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2453        break;
2454    case INDEX_op_shli_vec:
2455        if (is_scalar) {
2456            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2457        } else {
2458            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2459        }
2460        break;
2461    case INDEX_op_shri_vec:
2462        if (is_scalar) {
2463            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2464        } else {
2465            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2466        }
2467        break;
2468    case INDEX_op_sari_vec:
2469        if (is_scalar) {
2470            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2471        } else {
2472            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2473        }
2474        break;
2475    case INDEX_op_aa64_sli_vec:
2476        if (is_scalar) {
2477            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2478        } else {
2479            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2480        }
2481        break;
2482    case INDEX_op_shlv_vec:
2483        if (is_scalar) {
2484            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2485        } else {
2486            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2487        }
2488        break;
2489    case INDEX_op_aa64_sshl_vec:
2490        if (is_scalar) {
2491            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2492        } else {
2493            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2494        }
2495        break;
2496    case INDEX_op_cmp_vec:
2497        {
2498            TCGCond cond = args[3];
2499            AArch64Insn insn;
2500
2501            if (cond == TCG_COND_NE) {
2502                if (const_args[2]) {
2503                    if (is_scalar) {
2504                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2505                    } else {
2506                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2507                    }
2508                } else {
2509                    if (is_scalar) {
2510                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2511                    } else {
2512                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2513                    }
2514                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2515                }
2516            } else {
2517                if (const_args[2]) {
2518                    if (is_scalar) {
2519                        insn = cmp0_scalar_insn[cond];
2520                        if (insn) {
2521                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2522                            break;
2523                        }
2524                    } else {
2525                        insn = cmp0_vec_insn[cond];
2526                        if (insn) {
2527                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2528                            break;
2529                        }
2530                    }
2531                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2532                    a2 = TCG_VEC_TMP;
2533                }
2534                if (is_scalar) {
2535                    insn = cmp_scalar_insn[cond];
2536                    if (insn == 0) {
2537                        TCGArg t;
2538                        t = a1, a1 = a2, a2 = t;
2539                        cond = tcg_swap_cond(cond);
2540                        insn = cmp_scalar_insn[cond];
2541                        tcg_debug_assert(insn != 0);
2542                    }
2543                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2544                } else {
2545                    insn = cmp_vec_insn[cond];
2546                    if (insn == 0) {
2547                        TCGArg t;
2548                        t = a1, a1 = a2, a2 = t;
2549                        cond = tcg_swap_cond(cond);
2550                        insn = cmp_vec_insn[cond];
2551                        tcg_debug_assert(insn != 0);
2552                    }
2553                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2554                }
2555            }
2556        }
2557        break;
2558
2559    case INDEX_op_bitsel_vec:
2560        a3 = args[3];
2561        if (a0 == a3) {
2562            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2563        } else if (a0 == a2) {
2564            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2565        } else {
2566            if (a0 != a1) {
2567                tcg_out_mov(s, type, a0, a1);
2568            }
2569            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2570        }
2571        break;
2572
2573    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2574    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2575    default:
2576        g_assert_not_reached();
2577    }
2578}
2579
2580int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2581{
2582    switch (opc) {
2583    case INDEX_op_add_vec:
2584    case INDEX_op_sub_vec:
2585    case INDEX_op_and_vec:
2586    case INDEX_op_or_vec:
2587    case INDEX_op_xor_vec:
2588    case INDEX_op_andc_vec:
2589    case INDEX_op_orc_vec:
2590    case INDEX_op_neg_vec:
2591    case INDEX_op_abs_vec:
2592    case INDEX_op_not_vec:
2593    case INDEX_op_cmp_vec:
2594    case INDEX_op_shli_vec:
2595    case INDEX_op_shri_vec:
2596    case INDEX_op_sari_vec:
2597    case INDEX_op_ssadd_vec:
2598    case INDEX_op_sssub_vec:
2599    case INDEX_op_usadd_vec:
2600    case INDEX_op_ussub_vec:
2601    case INDEX_op_shlv_vec:
2602    case INDEX_op_bitsel_vec:
2603        return 1;
2604    case INDEX_op_rotli_vec:
2605    case INDEX_op_shrv_vec:
2606    case INDEX_op_sarv_vec:
2607    case INDEX_op_rotlv_vec:
2608    case INDEX_op_rotrv_vec:
2609        return -1;
2610    case INDEX_op_mul_vec:
2611    case INDEX_op_smax_vec:
2612    case INDEX_op_smin_vec:
2613    case INDEX_op_umax_vec:
2614    case INDEX_op_umin_vec:
2615        return vece < MO_64;
2616
2617    default:
2618        return 0;
2619    }
2620}
2621
2622void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2623                       TCGArg a0, ...)
2624{
2625    va_list va;
2626    TCGv_vec v0, v1, v2, t1, t2, c1;
2627    TCGArg a2;
2628
2629    va_start(va, a0);
2630    v0 = temp_tcgv_vec(arg_temp(a0));
2631    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2632    a2 = va_arg(va, TCGArg);
2633    va_end(va);
2634
2635    switch (opc) {
2636    case INDEX_op_rotli_vec:
2637        t1 = tcg_temp_new_vec(type);
2638        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2639        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2640                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2641        tcg_temp_free_vec(t1);
2642        break;
2643
2644    case INDEX_op_shrv_vec:
2645    case INDEX_op_sarv_vec:
2646        /* Right shifts are negative left shifts for AArch64.  */
2647        v2 = temp_tcgv_vec(arg_temp(a2));
2648        t1 = tcg_temp_new_vec(type);
2649        tcg_gen_neg_vec(vece, t1, v2);
2650        opc = (opc == INDEX_op_shrv_vec
2651               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2652        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2653                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2654        tcg_temp_free_vec(t1);
2655        break;
2656
2657    case INDEX_op_rotlv_vec:
2658        v2 = temp_tcgv_vec(arg_temp(a2));
2659        t1 = tcg_temp_new_vec(type);
2660        c1 = tcg_constant_vec(type, vece, 8 << vece);
2661        tcg_gen_sub_vec(vece, t1, v2, c1);
2662        /* Right shifts are negative left shifts for AArch64.  */
2663        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2664                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2665        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2666                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2667        tcg_gen_or_vec(vece, v0, v0, t1);
2668        tcg_temp_free_vec(t1);
2669        break;
2670
2671    case INDEX_op_rotrv_vec:
2672        v2 = temp_tcgv_vec(arg_temp(a2));
2673        t1 = tcg_temp_new_vec(type);
2674        t2 = tcg_temp_new_vec(type);
2675        c1 = tcg_constant_vec(type, vece, 8 << vece);
2676        tcg_gen_neg_vec(vece, t1, v2);
2677        tcg_gen_sub_vec(vece, t2, c1, v2);
2678        /* Right shifts are negative left shifts for AArch64.  */
2679        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2680                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2681        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2682                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2683        tcg_gen_or_vec(vece, v0, t1, t2);
2684        tcg_temp_free_vec(t1);
2685        tcg_temp_free_vec(t2);
2686        break;
2687
2688    default:
2689        g_assert_not_reached();
2690    }
2691}
2692
2693static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2694{
2695    switch (op) {
2696    case INDEX_op_goto_ptr:
2697        return C_O0_I1(r);
2698
2699    case INDEX_op_ld8u_i32:
2700    case INDEX_op_ld8s_i32:
2701    case INDEX_op_ld16u_i32:
2702    case INDEX_op_ld16s_i32:
2703    case INDEX_op_ld_i32:
2704    case INDEX_op_ld8u_i64:
2705    case INDEX_op_ld8s_i64:
2706    case INDEX_op_ld16u_i64:
2707    case INDEX_op_ld16s_i64:
2708    case INDEX_op_ld32u_i64:
2709    case INDEX_op_ld32s_i64:
2710    case INDEX_op_ld_i64:
2711    case INDEX_op_neg_i32:
2712    case INDEX_op_neg_i64:
2713    case INDEX_op_not_i32:
2714    case INDEX_op_not_i64:
2715    case INDEX_op_bswap16_i32:
2716    case INDEX_op_bswap32_i32:
2717    case INDEX_op_bswap16_i64:
2718    case INDEX_op_bswap32_i64:
2719    case INDEX_op_bswap64_i64:
2720    case INDEX_op_ext8s_i32:
2721    case INDEX_op_ext16s_i32:
2722    case INDEX_op_ext8u_i32:
2723    case INDEX_op_ext16u_i32:
2724    case INDEX_op_ext8s_i64:
2725    case INDEX_op_ext16s_i64:
2726    case INDEX_op_ext32s_i64:
2727    case INDEX_op_ext8u_i64:
2728    case INDEX_op_ext16u_i64:
2729    case INDEX_op_ext32u_i64:
2730    case INDEX_op_ext_i32_i64:
2731    case INDEX_op_extu_i32_i64:
2732    case INDEX_op_extract_i32:
2733    case INDEX_op_extract_i64:
2734    case INDEX_op_sextract_i32:
2735    case INDEX_op_sextract_i64:
2736        return C_O1_I1(r, r);
2737
2738    case INDEX_op_st8_i32:
2739    case INDEX_op_st16_i32:
2740    case INDEX_op_st_i32:
2741    case INDEX_op_st8_i64:
2742    case INDEX_op_st16_i64:
2743    case INDEX_op_st32_i64:
2744    case INDEX_op_st_i64:
2745        return C_O0_I2(rZ, r);
2746
2747    case INDEX_op_add_i32:
2748    case INDEX_op_add_i64:
2749    case INDEX_op_sub_i32:
2750    case INDEX_op_sub_i64:
2751    case INDEX_op_setcond_i32:
2752    case INDEX_op_setcond_i64:
2753        return C_O1_I2(r, r, rA);
2754
2755    case INDEX_op_mul_i32:
2756    case INDEX_op_mul_i64:
2757    case INDEX_op_div_i32:
2758    case INDEX_op_div_i64:
2759    case INDEX_op_divu_i32:
2760    case INDEX_op_divu_i64:
2761    case INDEX_op_rem_i32:
2762    case INDEX_op_rem_i64:
2763    case INDEX_op_remu_i32:
2764    case INDEX_op_remu_i64:
2765    case INDEX_op_muluh_i64:
2766    case INDEX_op_mulsh_i64:
2767        return C_O1_I2(r, r, r);
2768
2769    case INDEX_op_and_i32:
2770    case INDEX_op_and_i64:
2771    case INDEX_op_or_i32:
2772    case INDEX_op_or_i64:
2773    case INDEX_op_xor_i32:
2774    case INDEX_op_xor_i64:
2775    case INDEX_op_andc_i32:
2776    case INDEX_op_andc_i64:
2777    case INDEX_op_orc_i32:
2778    case INDEX_op_orc_i64:
2779    case INDEX_op_eqv_i32:
2780    case INDEX_op_eqv_i64:
2781        return C_O1_I2(r, r, rL);
2782
2783    case INDEX_op_shl_i32:
2784    case INDEX_op_shr_i32:
2785    case INDEX_op_sar_i32:
2786    case INDEX_op_rotl_i32:
2787    case INDEX_op_rotr_i32:
2788    case INDEX_op_shl_i64:
2789    case INDEX_op_shr_i64:
2790    case INDEX_op_sar_i64:
2791    case INDEX_op_rotl_i64:
2792    case INDEX_op_rotr_i64:
2793        return C_O1_I2(r, r, ri);
2794
2795    case INDEX_op_clz_i32:
2796    case INDEX_op_ctz_i32:
2797    case INDEX_op_clz_i64:
2798    case INDEX_op_ctz_i64:
2799        return C_O1_I2(r, r, rAL);
2800
2801    case INDEX_op_brcond_i32:
2802    case INDEX_op_brcond_i64:
2803        return C_O0_I2(r, rA);
2804
2805    case INDEX_op_movcond_i32:
2806    case INDEX_op_movcond_i64:
2807        return C_O1_I4(r, r, rA, rZ, rZ);
2808
2809    case INDEX_op_qemu_ld_i32:
2810    case INDEX_op_qemu_ld_i64:
2811        return C_O1_I1(r, l);
2812    case INDEX_op_qemu_st_i32:
2813    case INDEX_op_qemu_st_i64:
2814        return C_O0_I2(lZ, l);
2815
2816    case INDEX_op_deposit_i32:
2817    case INDEX_op_deposit_i64:
2818        return C_O1_I2(r, 0, rZ);
2819
2820    case INDEX_op_extract2_i32:
2821    case INDEX_op_extract2_i64:
2822        return C_O1_I2(r, rZ, rZ);
2823
2824    case INDEX_op_add2_i32:
2825    case INDEX_op_add2_i64:
2826    case INDEX_op_sub2_i32:
2827    case INDEX_op_sub2_i64:
2828        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2829
2830    case INDEX_op_add_vec:
2831    case INDEX_op_sub_vec:
2832    case INDEX_op_mul_vec:
2833    case INDEX_op_xor_vec:
2834    case INDEX_op_ssadd_vec:
2835    case INDEX_op_sssub_vec:
2836    case INDEX_op_usadd_vec:
2837    case INDEX_op_ussub_vec:
2838    case INDEX_op_smax_vec:
2839    case INDEX_op_smin_vec:
2840    case INDEX_op_umax_vec:
2841    case INDEX_op_umin_vec:
2842    case INDEX_op_shlv_vec:
2843    case INDEX_op_shrv_vec:
2844    case INDEX_op_sarv_vec:
2845    case INDEX_op_aa64_sshl_vec:
2846        return C_O1_I2(w, w, w);
2847    case INDEX_op_not_vec:
2848    case INDEX_op_neg_vec:
2849    case INDEX_op_abs_vec:
2850    case INDEX_op_shli_vec:
2851    case INDEX_op_shri_vec:
2852    case INDEX_op_sari_vec:
2853        return C_O1_I1(w, w);
2854    case INDEX_op_ld_vec:
2855    case INDEX_op_dupm_vec:
2856        return C_O1_I1(w, r);
2857    case INDEX_op_st_vec:
2858        return C_O0_I2(w, r);
2859    case INDEX_op_dup_vec:
2860        return C_O1_I1(w, wr);
2861    case INDEX_op_or_vec:
2862    case INDEX_op_andc_vec:
2863        return C_O1_I2(w, w, wO);
2864    case INDEX_op_and_vec:
2865    case INDEX_op_orc_vec:
2866        return C_O1_I2(w, w, wN);
2867    case INDEX_op_cmp_vec:
2868        return C_O1_I2(w, w, wZ);
2869    case INDEX_op_bitsel_vec:
2870        return C_O1_I3(w, w, w, w);
2871    case INDEX_op_aa64_sli_vec:
2872        return C_O1_I2(w, 0, w);
2873
2874    default:
2875        g_assert_not_reached();
2876    }
2877}
2878
2879#ifdef CONFIG_DARWIN
2880static bool sysctl_for_bool(const char *name)
2881{
2882    int val = 0;
2883    size_t len = sizeof(val);
2884
2885    if (sysctlbyname(name, &val, &len, NULL, 0) == 0) {
2886        return val != 0;
2887    }
2888
2889    /*
2890     * We might in the future ask for properties not present in older kernels,
2891     * but we're only asking about static properties, all of which should be
2892     * 'int'.  So we shouln't see ENOMEM (val too small), or any of the other
2893     * more exotic errors.
2894     */
2895    assert(errno == ENOENT);
2896    return false;
2897}
2898#endif
2899
2900static void tcg_target_init(TCGContext *s)
2901{
2902#ifdef __linux__
2903    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
2904    have_lse = hwcap & HWCAP_ATOMICS;
2905    have_lse2 = hwcap & HWCAP_USCAT;
2906#endif
2907#ifdef CONFIG_DARWIN
2908    have_lse = sysctl_for_bool("hw.optional.arm.FEAT_LSE");
2909    have_lse2 = sysctl_for_bool("hw.optional.arm.FEAT_LSE2");
2910#endif
2911
2912    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2913    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2914    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2915    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2916
2917    tcg_target_call_clobber_regs = -1ull;
2918    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2919    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2920    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2921    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2922    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2923    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2924    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2925    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2926    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2927    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2928    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2929    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2930    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2931    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2932    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2933    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2934    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2935    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2936    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2937
2938    s->reserved_regs = 0;
2939    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2940    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2941    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2942    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2943    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2944}
2945
2946/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2947#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2948
2949#define FRAME_SIZE \
2950    ((PUSH_SIZE \
2951      + TCG_STATIC_CALL_ARGS_SIZE \
2952      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2953      + TCG_TARGET_STACK_ALIGN - 1) \
2954     & ~(TCG_TARGET_STACK_ALIGN - 1))
2955
2956/* We're expecting a 2 byte uleb128 encoded value.  */
2957QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2958
2959/* We're expecting to use a single ADDI insn.  */
2960QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2961
2962static void tcg_target_qemu_prologue(TCGContext *s)
2963{
2964    TCGReg r;
2965
2966    /* Push (FP, LR) and allocate space for all saved registers.  */
2967    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2968                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2969
2970    /* Set up frame pointer for canonical unwinding.  */
2971    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2972
2973    /* Store callee-preserved regs x19..x28.  */
2974    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2975        int ofs = (r - TCG_REG_X19 + 2) * 8;
2976        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2977    }
2978
2979    /* Make stack space for TCG locals.  */
2980    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2981                 FRAME_SIZE - PUSH_SIZE);
2982
2983    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2984    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2985                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2986
2987#if !defined(CONFIG_SOFTMMU)
2988    if (USE_GUEST_BASE) {
2989        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2990        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2991    }
2992#endif
2993
2994    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2995    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2996
2997    /*
2998     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2999     * and fall through to the rest of the epilogue.
3000     */
3001    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3002    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3003
3004    /* TB epilogue */
3005    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3006
3007    /* Remove TCG locals stack space.  */
3008    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3009                 FRAME_SIZE - PUSH_SIZE);
3010
3011    /* Restore registers x19..x28.  */
3012    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3013        int ofs = (r - TCG_REG_X19 + 2) * 8;
3014        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3015    }
3016
3017    /* Pop (FP, LR), restore SP to previous frame.  */
3018    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3019                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3020    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3021}
3022
3023static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3024{
3025    int i;
3026    for (i = 0; i < count; ++i) {
3027        p[i] = NOP;
3028    }
3029}
3030
3031typedef struct {
3032    DebugFrameHeader h;
3033    uint8_t fde_def_cfa[4];
3034    uint8_t fde_reg_ofs[24];
3035} DebugFrame;
3036
3037#define ELF_HOST_MACHINE EM_AARCH64
3038
3039static const DebugFrame debug_frame = {
3040    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3041    .h.cie.id = -1,
3042    .h.cie.version = 1,
3043    .h.cie.code_align = 1,
3044    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3045    .h.cie.return_column = TCG_REG_LR,
3046
3047    /* Total FDE size does not include the "len" member.  */
3048    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3049
3050    .fde_def_cfa = {
3051        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3052        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3053        (FRAME_SIZE >> 7)
3054    },
3055    .fde_reg_ofs = {
3056        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3057        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3058        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3059        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3060        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3061        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3062        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3063        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3064        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3065        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3066        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3067        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3068    }
3069};
3070
3071void tcg_register_jit(const void *buf, size_t buf_size)
3072{
3073    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3074}
3075