xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision 719f0f60)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-pool.c.inc"
14#include "qemu/bitops.h"
15
16/* We're going to re-use TCGType in setting of the SF bit, which controls
17   the size of the operation performed.  If we know the values match, it
18   makes things much cleaner.  */
19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21#ifdef CONFIG_DEBUG_TCG
22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
27
28    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
32};
33#endif /* CONFIG_DEBUG_TCG */
34
35static const int tcg_target_reg_alloc_order[] = {
36    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38    TCG_REG_X28, /* we will reserve this for guest_base if configured */
39
40    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42    TCG_REG_X16, TCG_REG_X17,
43
44    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47    /* X18 reserved by system */
48    /* X19 reserved for AREG0 */
49    /* X29 reserved as fp */
50    /* X30 reserved as temporary */
51
52    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54    /* V8 - V15 are call-saved, and skipped.  */
55    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
59};
60
61static const int tcg_target_call_iarg_regs[8] = {
62    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
64};
65static const int tcg_target_call_oarg_regs[1] = {
66    TCG_REG_X0
67};
68
69#define TCG_REG_TMP TCG_REG_X30
70#define TCG_VEC_TMP TCG_REG_V31
71
72#ifndef CONFIG_SOFTMMU
73/* Note that XZR cannot be encoded in the address base register slot,
74   as that actaully encodes SP.  So if we need to zero-extend the guest
75   address, via the address index register slot, we need to load even
76   a zero guest base into a register.  */
77#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
78#define TCG_REG_GUEST_BASE TCG_REG_X28
79#endif
80
81static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
82{
83    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
84    ptrdiff_t offset = target - src_rx;
85
86    if (offset == sextract64(offset, 0, 26)) {
87        /* read instruction, mask away previous PC_REL26 parameter contents,
88           set the proper offset, then write back the instruction. */
89        *src_rw = deposit32(*src_rw, 0, 26, offset);
90        return true;
91    }
92    return false;
93}
94
95static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
96{
97    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
98    ptrdiff_t offset = target - src_rx;
99
100    if (offset == sextract64(offset, 0, 19)) {
101        *src_rw = deposit32(*src_rw, 5, 19, offset);
102        return true;
103    }
104    return false;
105}
106
107static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
108                        intptr_t value, intptr_t addend)
109{
110    tcg_debug_assert(addend == 0);
111    switch (type) {
112    case R_AARCH64_JUMP26:
113    case R_AARCH64_CALL26:
114        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
115    case R_AARCH64_CONDBR19:
116        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
117    default:
118        g_assert_not_reached();
119    }
120}
121
122#define TCG_CT_CONST_AIMM 0x100
123#define TCG_CT_CONST_LIMM 0x200
124#define TCG_CT_CONST_ZERO 0x400
125#define TCG_CT_CONST_MONE 0x800
126#define TCG_CT_CONST_ORRI 0x1000
127#define TCG_CT_CONST_ANDI 0x2000
128
129#define ALL_GENERAL_REGS  0xffffffffu
130#define ALL_VECTOR_REGS   0xffffffff00000000ull
131
132#ifdef CONFIG_SOFTMMU
133#define ALL_QLDST_REGS \
134    (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
135                          (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
136#else
137#define ALL_QLDST_REGS   ALL_GENERAL_REGS
138#endif
139
140/* Match a constant valid for addition (12-bit, optionally shifted).  */
141static inline bool is_aimm(uint64_t val)
142{
143    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
144}
145
146/* Match a constant valid for logical operations.  */
147static inline bool is_limm(uint64_t val)
148{
149    /* Taking a simplified view of the logical immediates for now, ignoring
150       the replication that can happen across the field.  Match bit patterns
151       of the forms
152           0....01....1
153           0..01..10..0
154       and their inverses.  */
155
156    /* Make things easier below, by testing the form with msb clear. */
157    if ((int64_t)val < 0) {
158        val = ~val;
159    }
160    if (val == 0) {
161        return false;
162    }
163    val += val & -val;
164    return (val & (val - 1)) == 0;
165}
166
167/* Return true if v16 is a valid 16-bit shifted immediate.  */
168static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
169{
170    if (v16 == (v16 & 0xff)) {
171        *cmode = 0x8;
172        *imm8 = v16 & 0xff;
173        return true;
174    } else if (v16 == (v16 & 0xff00)) {
175        *cmode = 0xa;
176        *imm8 = v16 >> 8;
177        return true;
178    }
179    return false;
180}
181
182/* Return true if v32 is a valid 32-bit shifted immediate.  */
183static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
184{
185    if (v32 == (v32 & 0xff)) {
186        *cmode = 0x0;
187        *imm8 = v32 & 0xff;
188        return true;
189    } else if (v32 == (v32 & 0xff00)) {
190        *cmode = 0x2;
191        *imm8 = (v32 >> 8) & 0xff;
192        return true;
193    } else if (v32 == (v32 & 0xff0000)) {
194        *cmode = 0x4;
195        *imm8 = (v32 >> 16) & 0xff;
196        return true;
197    } else if (v32 == (v32 & 0xff000000)) {
198        *cmode = 0x6;
199        *imm8 = v32 >> 24;
200        return true;
201    }
202    return false;
203}
204
205/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
206static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
207{
208    if ((v32 & 0xffff00ff) == 0xff) {
209        *cmode = 0xc;
210        *imm8 = (v32 >> 8) & 0xff;
211        return true;
212    } else if ((v32 & 0xff00ffff) == 0xffff) {
213        *cmode = 0xd;
214        *imm8 = (v32 >> 16) & 0xff;
215        return true;
216    }
217    return false;
218}
219
220/* Return true if v32 is a valid float32 immediate.  */
221static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
222{
223    if (extract32(v32, 0, 19) == 0
224        && (extract32(v32, 25, 6) == 0x20
225            || extract32(v32, 25, 6) == 0x1f)) {
226        *cmode = 0xf;
227        *imm8 = (extract32(v32, 31, 1) << 7)
228              | (extract32(v32, 25, 1) << 6)
229              | extract32(v32, 19, 6);
230        return true;
231    }
232    return false;
233}
234
235/* Return true if v64 is a valid float64 immediate.  */
236static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
237{
238    if (extract64(v64, 0, 48) == 0
239        && (extract64(v64, 54, 9) == 0x100
240            || extract64(v64, 54, 9) == 0x0ff)) {
241        *cmode = 0xf;
242        *imm8 = (extract64(v64, 63, 1) << 7)
243              | (extract64(v64, 54, 1) << 6)
244              | extract64(v64, 48, 6);
245        return true;
246    }
247    return false;
248}
249
250/*
251 * Return non-zero if v32 can be formed by MOVI+ORR.
252 * Place the parameters for MOVI in (cmode, imm8).
253 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
254 */
255static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
256{
257    int i;
258
259    for (i = 6; i > 0; i -= 2) {
260        /* Mask out one byte we can add with ORR.  */
261        uint32_t tmp = v32 & ~(0xffu << (i * 4));
262        if (is_shimm32(tmp, cmode, imm8) ||
263            is_soimm32(tmp, cmode, imm8)) {
264            break;
265        }
266    }
267    return i;
268}
269
270/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
271static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
272{
273    if (v32 == deposit32(v32, 16, 16, v32)) {
274        return is_shimm16(v32, cmode, imm8);
275    } else {
276        return is_shimm32(v32, cmode, imm8);
277    }
278}
279
280static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
281{
282    if (ct & TCG_CT_CONST) {
283        return 1;
284    }
285    if (type == TCG_TYPE_I32) {
286        val = (int32_t)val;
287    }
288    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
289        return 1;
290    }
291    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
292        return 1;
293    }
294    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
295        return 1;
296    }
297    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
298        return 1;
299    }
300
301    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
302    case 0:
303        break;
304    case TCG_CT_CONST_ANDI:
305        val = ~val;
306        /* fallthru */
307    case TCG_CT_CONST_ORRI:
308        if (val == deposit64(val, 32, 32, val)) {
309            int cmode, imm8;
310            return is_shimm1632(val, &cmode, &imm8);
311        }
312        break;
313    default:
314        /* Both bits should not be set for the same insn.  */
315        g_assert_not_reached();
316    }
317
318    return 0;
319}
320
321enum aarch64_cond_code {
322    COND_EQ = 0x0,
323    COND_NE = 0x1,
324    COND_CS = 0x2,     /* Unsigned greater or equal */
325    COND_HS = COND_CS, /* ALIAS greater or equal */
326    COND_CC = 0x3,     /* Unsigned less than */
327    COND_LO = COND_CC, /* ALIAS Lower */
328    COND_MI = 0x4,     /* Negative */
329    COND_PL = 0x5,     /* Zero or greater */
330    COND_VS = 0x6,     /* Overflow */
331    COND_VC = 0x7,     /* No overflow */
332    COND_HI = 0x8,     /* Unsigned greater than */
333    COND_LS = 0x9,     /* Unsigned less or equal */
334    COND_GE = 0xa,
335    COND_LT = 0xb,
336    COND_GT = 0xc,
337    COND_LE = 0xd,
338    COND_AL = 0xe,
339    COND_NV = 0xf, /* behaves like COND_AL here */
340};
341
342static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
343    [TCG_COND_EQ] = COND_EQ,
344    [TCG_COND_NE] = COND_NE,
345    [TCG_COND_LT] = COND_LT,
346    [TCG_COND_GE] = COND_GE,
347    [TCG_COND_LE] = COND_LE,
348    [TCG_COND_GT] = COND_GT,
349    /* unsigned */
350    [TCG_COND_LTU] = COND_LO,
351    [TCG_COND_GTU] = COND_HI,
352    [TCG_COND_GEU] = COND_HS,
353    [TCG_COND_LEU] = COND_LS,
354};
355
356typedef enum {
357    LDST_ST = 0,    /* store */
358    LDST_LD = 1,    /* load */
359    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
360    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
361} AArch64LdstType;
362
363/* We encode the format of the insn into the beginning of the name, so that
364   we can have the preprocessor help "typecheck" the insn vs the output
365   function.  Arm didn't provide us with nice names for the formats, so we
366   use the section number of the architecture reference manual in which the
367   instruction group is described.  */
368typedef enum {
369    /* Compare and branch (immediate).  */
370    I3201_CBZ       = 0x34000000,
371    I3201_CBNZ      = 0x35000000,
372
373    /* Conditional branch (immediate).  */
374    I3202_B_C       = 0x54000000,
375
376    /* Unconditional branch (immediate).  */
377    I3206_B         = 0x14000000,
378    I3206_BL        = 0x94000000,
379
380    /* Unconditional branch (register).  */
381    I3207_BR        = 0xd61f0000,
382    I3207_BLR       = 0xd63f0000,
383    I3207_RET       = 0xd65f0000,
384
385    /* AdvSIMD load/store single structure.  */
386    I3303_LD1R      = 0x0d40c000,
387
388    /* Load literal for loading the address at pc-relative offset */
389    I3305_LDR       = 0x58000000,
390    I3305_LDR_v64   = 0x5c000000,
391    I3305_LDR_v128  = 0x9c000000,
392
393    /* Load/store register.  Described here as 3.3.12, but the helper
394       that emits them can transform to 3.3.10 or 3.3.13.  */
395    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
396    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
397    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
398    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
399
400    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
401    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
402    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
403    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
404
405    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
406    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
407
408    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
409    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
410    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
411
412    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
413    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
414
415    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
416    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
417
418    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
419    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
420
421    I3312_TO_I3310  = 0x00200800,
422    I3312_TO_I3313  = 0x01000000,
423
424    /* Load/store register pair instructions.  */
425    I3314_LDP       = 0x28400000,
426    I3314_STP       = 0x28000000,
427
428    /* Add/subtract immediate instructions.  */
429    I3401_ADDI      = 0x11000000,
430    I3401_ADDSI     = 0x31000000,
431    I3401_SUBI      = 0x51000000,
432    I3401_SUBSI     = 0x71000000,
433
434    /* Bitfield instructions.  */
435    I3402_BFM       = 0x33000000,
436    I3402_SBFM      = 0x13000000,
437    I3402_UBFM      = 0x53000000,
438
439    /* Extract instruction.  */
440    I3403_EXTR      = 0x13800000,
441
442    /* Logical immediate instructions.  */
443    I3404_ANDI      = 0x12000000,
444    I3404_ORRI      = 0x32000000,
445    I3404_EORI      = 0x52000000,
446
447    /* Move wide immediate instructions.  */
448    I3405_MOVN      = 0x12800000,
449    I3405_MOVZ      = 0x52800000,
450    I3405_MOVK      = 0x72800000,
451
452    /* PC relative addressing instructions.  */
453    I3406_ADR       = 0x10000000,
454    I3406_ADRP      = 0x90000000,
455
456    /* Add/subtract shifted register instructions (without a shift).  */
457    I3502_ADD       = 0x0b000000,
458    I3502_ADDS      = 0x2b000000,
459    I3502_SUB       = 0x4b000000,
460    I3502_SUBS      = 0x6b000000,
461
462    /* Add/subtract shifted register instructions (with a shift).  */
463    I3502S_ADD_LSL  = I3502_ADD,
464
465    /* Add/subtract with carry instructions.  */
466    I3503_ADC       = 0x1a000000,
467    I3503_SBC       = 0x5a000000,
468
469    /* Conditional select instructions.  */
470    I3506_CSEL      = 0x1a800000,
471    I3506_CSINC     = 0x1a800400,
472    I3506_CSINV     = 0x5a800000,
473    I3506_CSNEG     = 0x5a800400,
474
475    /* Data-processing (1 source) instructions.  */
476    I3507_CLZ       = 0x5ac01000,
477    I3507_RBIT      = 0x5ac00000,
478    I3507_REV16     = 0x5ac00400,
479    I3507_REV32     = 0x5ac00800,
480    I3507_REV64     = 0x5ac00c00,
481
482    /* Data-processing (2 source) instructions.  */
483    I3508_LSLV      = 0x1ac02000,
484    I3508_LSRV      = 0x1ac02400,
485    I3508_ASRV      = 0x1ac02800,
486    I3508_RORV      = 0x1ac02c00,
487    I3508_SMULH     = 0x9b407c00,
488    I3508_UMULH     = 0x9bc07c00,
489    I3508_UDIV      = 0x1ac00800,
490    I3508_SDIV      = 0x1ac00c00,
491
492    /* Data-processing (3 source) instructions.  */
493    I3509_MADD      = 0x1b000000,
494    I3509_MSUB      = 0x1b008000,
495
496    /* Logical shifted register instructions (without a shift).  */
497    I3510_AND       = 0x0a000000,
498    I3510_BIC       = 0x0a200000,
499    I3510_ORR       = 0x2a000000,
500    I3510_ORN       = 0x2a200000,
501    I3510_EOR       = 0x4a000000,
502    I3510_EON       = 0x4a200000,
503    I3510_ANDS      = 0x6a000000,
504
505    /* Logical shifted register instructions (with a shift).  */
506    I3502S_AND_LSR  = I3510_AND | (1 << 22),
507
508    /* AdvSIMD copy */
509    I3605_DUP      = 0x0e000400,
510    I3605_INS      = 0x4e001c00,
511    I3605_UMOV     = 0x0e003c00,
512
513    /* AdvSIMD modified immediate */
514    I3606_MOVI      = 0x0f000400,
515    I3606_MVNI      = 0x2f000400,
516    I3606_BIC       = 0x2f001400,
517    I3606_ORR       = 0x0f001400,
518
519    /* AdvSIMD scalar shift by immediate */
520    I3609_SSHR      = 0x5f000400,
521    I3609_SSRA      = 0x5f001400,
522    I3609_SHL       = 0x5f005400,
523    I3609_USHR      = 0x7f000400,
524    I3609_USRA      = 0x7f001400,
525    I3609_SLI       = 0x7f005400,
526
527    /* AdvSIMD scalar three same */
528    I3611_SQADD     = 0x5e200c00,
529    I3611_SQSUB     = 0x5e202c00,
530    I3611_CMGT      = 0x5e203400,
531    I3611_CMGE      = 0x5e203c00,
532    I3611_SSHL      = 0x5e204400,
533    I3611_ADD       = 0x5e208400,
534    I3611_CMTST     = 0x5e208c00,
535    I3611_UQADD     = 0x7e200c00,
536    I3611_UQSUB     = 0x7e202c00,
537    I3611_CMHI      = 0x7e203400,
538    I3611_CMHS      = 0x7e203c00,
539    I3611_USHL      = 0x7e204400,
540    I3611_SUB       = 0x7e208400,
541    I3611_CMEQ      = 0x7e208c00,
542
543    /* AdvSIMD scalar two-reg misc */
544    I3612_CMGT0     = 0x5e208800,
545    I3612_CMEQ0     = 0x5e209800,
546    I3612_CMLT0     = 0x5e20a800,
547    I3612_ABS       = 0x5e20b800,
548    I3612_CMGE0     = 0x7e208800,
549    I3612_CMLE0     = 0x7e209800,
550    I3612_NEG       = 0x7e20b800,
551
552    /* AdvSIMD shift by immediate */
553    I3614_SSHR      = 0x0f000400,
554    I3614_SSRA      = 0x0f001400,
555    I3614_SHL       = 0x0f005400,
556    I3614_SLI       = 0x2f005400,
557    I3614_USHR      = 0x2f000400,
558    I3614_USRA      = 0x2f001400,
559
560    /* AdvSIMD three same.  */
561    I3616_ADD       = 0x0e208400,
562    I3616_AND       = 0x0e201c00,
563    I3616_BIC       = 0x0e601c00,
564    I3616_BIF       = 0x2ee01c00,
565    I3616_BIT       = 0x2ea01c00,
566    I3616_BSL       = 0x2e601c00,
567    I3616_EOR       = 0x2e201c00,
568    I3616_MUL       = 0x0e209c00,
569    I3616_ORR       = 0x0ea01c00,
570    I3616_ORN       = 0x0ee01c00,
571    I3616_SUB       = 0x2e208400,
572    I3616_CMGT      = 0x0e203400,
573    I3616_CMGE      = 0x0e203c00,
574    I3616_CMTST     = 0x0e208c00,
575    I3616_CMHI      = 0x2e203400,
576    I3616_CMHS      = 0x2e203c00,
577    I3616_CMEQ      = 0x2e208c00,
578    I3616_SMAX      = 0x0e206400,
579    I3616_SMIN      = 0x0e206c00,
580    I3616_SSHL      = 0x0e204400,
581    I3616_SQADD     = 0x0e200c00,
582    I3616_SQSUB     = 0x0e202c00,
583    I3616_UMAX      = 0x2e206400,
584    I3616_UMIN      = 0x2e206c00,
585    I3616_UQADD     = 0x2e200c00,
586    I3616_UQSUB     = 0x2e202c00,
587    I3616_USHL      = 0x2e204400,
588
589    /* AdvSIMD two-reg misc.  */
590    I3617_CMGT0     = 0x0e208800,
591    I3617_CMEQ0     = 0x0e209800,
592    I3617_CMLT0     = 0x0e20a800,
593    I3617_CMGE0     = 0x2e208800,
594    I3617_CMLE0     = 0x2e209800,
595    I3617_NOT       = 0x2e205800,
596    I3617_ABS       = 0x0e20b800,
597    I3617_NEG       = 0x2e20b800,
598
599    /* System instructions.  */
600    NOP             = 0xd503201f,
601    DMB_ISH         = 0xd50338bf,
602    DMB_LD          = 0x00000100,
603    DMB_ST          = 0x00000200,
604} AArch64Insn;
605
606static inline uint32_t tcg_in32(TCGContext *s)
607{
608    uint32_t v = *(uint32_t *)s->code_ptr;
609    return v;
610}
611
612/* Emit an opcode with "type-checking" of the format.  */
613#define tcg_out_insn(S, FMT, OP, ...) \
614    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
615
616static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
617                              TCGReg rt, TCGReg rn, unsigned size)
618{
619    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
620}
621
622static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
623                              int imm19, TCGReg rt)
624{
625    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
626}
627
628static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
629                              TCGReg rt, int imm19)
630{
631    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
632}
633
634static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
635                              TCGCond c, int imm19)
636{
637    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
638}
639
640static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
641{
642    tcg_out32(s, insn | (imm26 & 0x03ffffff));
643}
644
645static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
646{
647    tcg_out32(s, insn | rn << 5);
648}
649
650static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
651                              TCGReg r1, TCGReg r2, TCGReg rn,
652                              tcg_target_long ofs, bool pre, bool w)
653{
654    insn |= 1u << 31; /* ext */
655    insn |= pre << 24;
656    insn |= w << 23;
657
658    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
659    insn |= (ofs & (0x7f << 3)) << (15 - 3);
660
661    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
662}
663
664static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
665                              TCGReg rd, TCGReg rn, uint64_t aimm)
666{
667    if (aimm > 0xfff) {
668        tcg_debug_assert((aimm & 0xfff) == 0);
669        aimm >>= 12;
670        tcg_debug_assert(aimm <= 0xfff);
671        aimm |= 1 << 12;  /* apply LSL 12 */
672    }
673    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
674}
675
676/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
677   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
678   that feed the DecodeBitMasks pseudo function.  */
679static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
680                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
681{
682    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
683              | rn << 5 | rd);
684}
685
686#define tcg_out_insn_3404  tcg_out_insn_3402
687
688static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
689                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
690{
691    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
692              | rn << 5 | rd);
693}
694
695/* This function is used for the Move (wide immediate) instruction group.
696   Note that SHIFT is a full shift count, not the 2 bit HW field. */
697static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
698                              TCGReg rd, uint16_t half, unsigned shift)
699{
700    tcg_debug_assert((shift & ~0x30) == 0);
701    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
702}
703
704static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
705                              TCGReg rd, int64_t disp)
706{
707    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
708}
709
710/* This function is for both 3.5.2 (Add/Subtract shifted register), for
711   the rare occasion when we actually want to supply a shift amount.  */
712static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
713                                      TCGType ext, TCGReg rd, TCGReg rn,
714                                      TCGReg rm, int imm6)
715{
716    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
717}
718
719/* This function is for 3.5.2 (Add/subtract shifted register),
720   and 3.5.10 (Logical shifted register), for the vast majorty of cases
721   when we don't want to apply a shift.  Thus it can also be used for
722   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
723static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
724                              TCGReg rd, TCGReg rn, TCGReg rm)
725{
726    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
727}
728
729#define tcg_out_insn_3503  tcg_out_insn_3502
730#define tcg_out_insn_3508  tcg_out_insn_3502
731#define tcg_out_insn_3510  tcg_out_insn_3502
732
733static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
734                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
735{
736    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
737              | tcg_cond_to_aarch64[c] << 12);
738}
739
740static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
741                              TCGReg rd, TCGReg rn)
742{
743    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
744}
745
746static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
747                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
748{
749    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
750}
751
752static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
753                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
754{
755    /* Note that bit 11 set means general register input.  Therefore
756       we can handle both register sets with one function.  */
757    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
758              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
759}
760
761static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
762                              TCGReg rd, bool op, int cmode, uint8_t imm8)
763{
764    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
765              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
766}
767
768static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
769                              TCGReg rd, TCGReg rn, unsigned immhb)
770{
771    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
772}
773
774static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
775                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
776{
777    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
778              | (rn & 0x1f) << 5 | (rd & 0x1f));
779}
780
781static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
782                              unsigned size, TCGReg rd, TCGReg rn)
783{
784    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
785}
786
787static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
788                              TCGReg rd, TCGReg rn, unsigned immhb)
789{
790    tcg_out32(s, insn | q << 30 | immhb << 16
791              | (rn & 0x1f) << 5 | (rd & 0x1f));
792}
793
794static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
795                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
796{
797    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
798              | (rn & 0x1f) << 5 | (rd & 0x1f));
799}
800
801static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
802                              unsigned size, TCGReg rd, TCGReg rn)
803{
804    tcg_out32(s, insn | q << 30 | (size << 22)
805              | (rn & 0x1f) << 5 | (rd & 0x1f));
806}
807
808static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
809                              TCGReg rd, TCGReg base, TCGType ext,
810                              TCGReg regoff)
811{
812    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
813    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
814              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
815}
816
817static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
818                              TCGReg rd, TCGReg rn, intptr_t offset)
819{
820    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
821}
822
823static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
824                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
825{
826    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
827    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
828              | rn << 5 | (rd & 0x1f));
829}
830
831/* Register to register move using ORR (shifted register with no shift). */
832static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
833{
834    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
835}
836
837/* Register to register move using ADDI (move to/from SP).  */
838static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
839{
840    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
841}
842
843/* This function is used for the Logical (immediate) instruction group.
844   The value of LIMM must satisfy IS_LIMM.  See the comment above about
845   only supporting simplified logical immediates.  */
846static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
847                             TCGReg rd, TCGReg rn, uint64_t limm)
848{
849    unsigned h, l, r, c;
850
851    tcg_debug_assert(is_limm(limm));
852
853    h = clz64(limm);
854    l = ctz64(limm);
855    if (l == 0) {
856        r = 0;                  /* form 0....01....1 */
857        c = ctz64(~limm) - 1;
858        if (h == 0) {
859            r = clz64(~limm);   /* form 1..10..01..1 */
860            c += r;
861        }
862    } else {
863        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
864        c = r - h - 1;
865    }
866    if (ext == TCG_TYPE_I32) {
867        r &= 31;
868        c &= 31;
869    }
870
871    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
872}
873
874static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
875                             TCGReg rd, int64_t v64)
876{
877    bool q = type == TCG_TYPE_V128;
878    int cmode, imm8, i;
879
880    /* Test all bytes equal first.  */
881    if (vece == MO_8) {
882        imm8 = (uint8_t)v64;
883        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
884        return;
885    }
886
887    /*
888     * Test all bytes 0x00 or 0xff second.  This can match cases that
889     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
890     */
891    for (i = imm8 = 0; i < 8; i++) {
892        uint8_t byte = v64 >> (i * 8);
893        if (byte == 0xff) {
894            imm8 |= 1 << i;
895        } else if (byte != 0) {
896            goto fail_bytes;
897        }
898    }
899    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
900    return;
901 fail_bytes:
902
903    /*
904     * Tests for various replications.  For each element width, if we
905     * cannot find an expansion there's no point checking a larger
906     * width because we already know by replication it cannot match.
907     */
908    if (vece == MO_16) {
909        uint16_t v16 = v64;
910
911        if (is_shimm16(v16, &cmode, &imm8)) {
912            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
913            return;
914        }
915        if (is_shimm16(~v16, &cmode, &imm8)) {
916            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
917            return;
918        }
919
920        /*
921         * Otherwise, all remaining constants can be loaded in two insns:
922         * rd = v16 & 0xff, rd |= v16 & 0xff00.
923         */
924        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
925        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
926        return;
927    } else if (vece == MO_32) {
928        uint32_t v32 = v64;
929        uint32_t n32 = ~v32;
930
931        if (is_shimm32(v32, &cmode, &imm8) ||
932            is_soimm32(v32, &cmode, &imm8) ||
933            is_fimm32(v32, &cmode, &imm8)) {
934            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
935            return;
936        }
937        if (is_shimm32(n32, &cmode, &imm8) ||
938            is_soimm32(n32, &cmode, &imm8)) {
939            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
940            return;
941        }
942
943        /*
944         * Restrict the set of constants to those we can load with
945         * two instructions.  Others we load from the pool.
946         */
947        i = is_shimm32_pair(v32, &cmode, &imm8);
948        if (i) {
949            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
950            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
951            return;
952        }
953        i = is_shimm32_pair(n32, &cmode, &imm8);
954        if (i) {
955            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
956            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
957            return;
958        }
959    } else if (is_fimm64(v64, &cmode, &imm8)) {
960        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
961        return;
962    }
963
964    /*
965     * As a last resort, load from the constant pool.  Sadly there
966     * is no LD1R (literal), so store the full 16-byte vector.
967     */
968    if (type == TCG_TYPE_V128) {
969        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
970        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
971    } else {
972        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
973        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
974    }
975}
976
977static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
978                            TCGReg rd, TCGReg rs)
979{
980    int is_q = type - TCG_TYPE_V64;
981    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
982    return true;
983}
984
985static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
986                             TCGReg r, TCGReg base, intptr_t offset)
987{
988    TCGReg temp = TCG_REG_TMP;
989
990    if (offset < -0xffffff || offset > 0xffffff) {
991        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
992        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
993        base = temp;
994    } else {
995        AArch64Insn add_insn = I3401_ADDI;
996
997        if (offset < 0) {
998            add_insn = I3401_SUBI;
999            offset = -offset;
1000        }
1001        if (offset & 0xfff000) {
1002            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1003            base = temp;
1004        }
1005        if (offset & 0xfff) {
1006            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1007            base = temp;
1008        }
1009    }
1010    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1011    return true;
1012}
1013
1014static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1015                         tcg_target_long value)
1016{
1017    tcg_target_long svalue = value;
1018    tcg_target_long ivalue = ~value;
1019    tcg_target_long t0, t1, t2;
1020    int s0, s1;
1021    AArch64Insn opc;
1022
1023    switch (type) {
1024    case TCG_TYPE_I32:
1025    case TCG_TYPE_I64:
1026        tcg_debug_assert(rd < 32);
1027        break;
1028    default:
1029        g_assert_not_reached();
1030    }
1031
1032    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1033       values within [2**31, 2**32-1], we can create smaller sequences by
1034       interpreting this as a negative 32-bit number, while ensuring that
1035       the high 32 bits are cleared by setting SF=0.  */
1036    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1037        svalue = (int32_t)value;
1038        value = (uint32_t)value;
1039        ivalue = (uint32_t)ivalue;
1040        type = TCG_TYPE_I32;
1041    }
1042
1043    /* Speed things up by handling the common case of small positive
1044       and negative values specially.  */
1045    if ((value & ~0xffffull) == 0) {
1046        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1047        return;
1048    } else if ((ivalue & ~0xffffull) == 0) {
1049        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1050        return;
1051    }
1052
1053    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1054       use the sign-extended value.  That lets us match rotated values such
1055       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1056    if (is_limm(svalue)) {
1057        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1058        return;
1059    }
1060
1061    /* Look for host pointer values within 4G of the PC.  This happens
1062       often when loading pointers to QEMU's own data structures.  */
1063    if (type == TCG_TYPE_I64) {
1064        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1065        tcg_target_long disp = value - src_rx;
1066        if (disp == sextract64(disp, 0, 21)) {
1067            tcg_out_insn(s, 3406, ADR, rd, disp);
1068            return;
1069        }
1070        disp = (value >> 12) - (src_rx >> 12);
1071        if (disp == sextract64(disp, 0, 21)) {
1072            tcg_out_insn(s, 3406, ADRP, rd, disp);
1073            if (value & 0xfff) {
1074                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1075            }
1076            return;
1077        }
1078    }
1079
1080    /* Would it take fewer insns to begin with MOVN?  */
1081    if (ctpop64(value) >= 32) {
1082        t0 = ivalue;
1083        opc = I3405_MOVN;
1084    } else {
1085        t0 = value;
1086        opc = I3405_MOVZ;
1087    }
1088    s0 = ctz64(t0) & (63 & -16);
1089    t1 = t0 & ~(0xffffUL << s0);
1090    s1 = ctz64(t1) & (63 & -16);
1091    t2 = t1 & ~(0xffffUL << s1);
1092    if (t2 == 0) {
1093        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1094        if (t1 != 0) {
1095            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1096        }
1097        return;
1098    }
1099
1100    /* For more than 2 insns, dump it into the constant pool.  */
1101    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1102    tcg_out_insn(s, 3305, LDR, 0, rd);
1103}
1104
1105/* Define something more legible for general use.  */
1106#define tcg_out_ldst_r  tcg_out_insn_3310
1107
1108static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1109                         TCGReg rn, intptr_t offset, int lgsize)
1110{
1111    /* If the offset is naturally aligned and in range, then we can
1112       use the scaled uimm12 encoding */
1113    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1114        uintptr_t scaled_uimm = offset >> lgsize;
1115        if (scaled_uimm <= 0xfff) {
1116            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1117            return;
1118        }
1119    }
1120
1121    /* Small signed offsets can use the unscaled encoding.  */
1122    if (offset >= -256 && offset < 256) {
1123        tcg_out_insn_3312(s, insn, rd, rn, offset);
1124        return;
1125    }
1126
1127    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1128    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1129    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1130}
1131
1132static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1133{
1134    if (ret == arg) {
1135        return true;
1136    }
1137    switch (type) {
1138    case TCG_TYPE_I32:
1139    case TCG_TYPE_I64:
1140        if (ret < 32 && arg < 32) {
1141            tcg_out_movr(s, type, ret, arg);
1142            break;
1143        } else if (ret < 32) {
1144            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1145            break;
1146        } else if (arg < 32) {
1147            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1148            break;
1149        }
1150        /* FALLTHRU */
1151
1152    case TCG_TYPE_V64:
1153        tcg_debug_assert(ret >= 32 && arg >= 32);
1154        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1155        break;
1156    case TCG_TYPE_V128:
1157        tcg_debug_assert(ret >= 32 && arg >= 32);
1158        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1159        break;
1160
1161    default:
1162        g_assert_not_reached();
1163    }
1164    return true;
1165}
1166
1167static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1168                       TCGReg base, intptr_t ofs)
1169{
1170    AArch64Insn insn;
1171    int lgsz;
1172
1173    switch (type) {
1174    case TCG_TYPE_I32:
1175        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1176        lgsz = 2;
1177        break;
1178    case TCG_TYPE_I64:
1179        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1180        lgsz = 3;
1181        break;
1182    case TCG_TYPE_V64:
1183        insn = I3312_LDRVD;
1184        lgsz = 3;
1185        break;
1186    case TCG_TYPE_V128:
1187        insn = I3312_LDRVQ;
1188        lgsz = 4;
1189        break;
1190    default:
1191        g_assert_not_reached();
1192    }
1193    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1194}
1195
1196static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1197                       TCGReg base, intptr_t ofs)
1198{
1199    AArch64Insn insn;
1200    int lgsz;
1201
1202    switch (type) {
1203    case TCG_TYPE_I32:
1204        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1205        lgsz = 2;
1206        break;
1207    case TCG_TYPE_I64:
1208        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1209        lgsz = 3;
1210        break;
1211    case TCG_TYPE_V64:
1212        insn = I3312_STRVD;
1213        lgsz = 3;
1214        break;
1215    case TCG_TYPE_V128:
1216        insn = I3312_STRVQ;
1217        lgsz = 4;
1218        break;
1219    default:
1220        g_assert_not_reached();
1221    }
1222    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1223}
1224
1225static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1226                               TCGReg base, intptr_t ofs)
1227{
1228    if (type <= TCG_TYPE_I64 && val == 0) {
1229        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1230        return true;
1231    }
1232    return false;
1233}
1234
1235static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1236                               TCGReg rn, unsigned int a, unsigned int b)
1237{
1238    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1239}
1240
1241static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1242                                TCGReg rn, unsigned int a, unsigned int b)
1243{
1244    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1245}
1246
1247static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1248                                TCGReg rn, unsigned int a, unsigned int b)
1249{
1250    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1251}
1252
1253static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1254                                TCGReg rn, TCGReg rm, unsigned int a)
1255{
1256    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1257}
1258
1259static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1260                               TCGReg rd, TCGReg rn, unsigned int m)
1261{
1262    int bits = ext ? 64 : 32;
1263    int max = bits - 1;
1264    tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1265}
1266
1267static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1268                               TCGReg rd, TCGReg rn, unsigned int m)
1269{
1270    int max = ext ? 63 : 31;
1271    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1272}
1273
1274static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1275                               TCGReg rd, TCGReg rn, unsigned int m)
1276{
1277    int max = ext ? 63 : 31;
1278    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1279}
1280
1281static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1282                                TCGReg rd, TCGReg rn, unsigned int m)
1283{
1284    int max = ext ? 63 : 31;
1285    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1286}
1287
1288static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1289                                TCGReg rd, TCGReg rn, unsigned int m)
1290{
1291    int max = ext ? 63 : 31;
1292    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1293}
1294
1295static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1296                               TCGReg rn, unsigned lsb, unsigned width)
1297{
1298    unsigned size = ext ? 64 : 32;
1299    unsigned a = (size - lsb) & (size - 1);
1300    unsigned b = width - 1;
1301    tcg_out_bfm(s, ext, rd, rn, a, b);
1302}
1303
1304static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1305                        tcg_target_long b, bool const_b)
1306{
1307    if (const_b) {
1308        /* Using CMP or CMN aliases.  */
1309        if (b >= 0) {
1310            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1311        } else {
1312            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1313        }
1314    } else {
1315        /* Using CMP alias SUBS wzr, Wn, Wm */
1316        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1317    }
1318}
1319
1320static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1321{
1322    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1323    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1324    tcg_out_insn(s, 3206, B, offset);
1325}
1326
1327static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1328{
1329    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1330    if (offset == sextract64(offset, 0, 26)) {
1331        tcg_out_insn(s, 3206, B, offset);
1332    } else {
1333        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1334        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1335    }
1336}
1337
1338static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1339{
1340    tcg_out_insn(s, 3207, BLR, reg);
1341}
1342
1343static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
1344{
1345    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1346    if (offset == sextract64(offset, 0, 26)) {
1347        tcg_out_insn(s, 3206, BL, offset);
1348    } else {
1349        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1350        tcg_out_callr(s, TCG_REG_TMP);
1351    }
1352}
1353
1354void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
1355                              uintptr_t jmp_rw, uintptr_t addr)
1356{
1357    tcg_insn_unit i1, i2;
1358    TCGType rt = TCG_TYPE_I64;
1359    TCGReg  rd = TCG_REG_TMP;
1360    uint64_t pair;
1361
1362    ptrdiff_t offset = addr - jmp_rx;
1363
1364    if (offset == sextract64(offset, 0, 26)) {
1365        i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1366        i2 = NOP;
1367    } else {
1368        offset = (addr >> 12) - (jmp_rx >> 12);
1369
1370        /* patch ADRP */
1371        i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1372        /* patch ADDI */
1373        i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1374    }
1375    pair = (uint64_t)i2 << 32 | i1;
1376    qatomic_set((uint64_t *)jmp_rw, pair);
1377    flush_idcache_range(jmp_rx, jmp_rw, 8);
1378}
1379
1380static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1381{
1382    if (!l->has_value) {
1383        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1384        tcg_out_insn(s, 3206, B, 0);
1385    } else {
1386        tcg_out_goto(s, l->u.value_ptr);
1387    }
1388}
1389
1390static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1391                           TCGArg b, bool b_const, TCGLabel *l)
1392{
1393    intptr_t offset;
1394    bool need_cmp;
1395
1396    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1397        need_cmp = false;
1398    } else {
1399        need_cmp = true;
1400        tcg_out_cmp(s, ext, a, b, b_const);
1401    }
1402
1403    if (!l->has_value) {
1404        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1405        offset = tcg_in32(s) >> 5;
1406    } else {
1407        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1408        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1409    }
1410
1411    if (need_cmp) {
1412        tcg_out_insn(s, 3202, B_C, c, offset);
1413    } else if (c == TCG_COND_EQ) {
1414        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1415    } else {
1416        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1417    }
1418}
1419
1420static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1421{
1422    tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1423}
1424
1425static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1426{
1427    tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1428}
1429
1430static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1431{
1432    tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1433}
1434
1435static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1436                               TCGReg rd, TCGReg rn)
1437{
1438    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1439    int bits = (8 << s_bits) - 1;
1440    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1441}
1442
1443static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1444                               TCGReg rd, TCGReg rn)
1445{
1446    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1447    int bits = (8 << s_bits) - 1;
1448    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1449}
1450
1451static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1452                            TCGReg rn, int64_t aimm)
1453{
1454    if (aimm >= 0) {
1455        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1456    } else {
1457        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1458    }
1459}
1460
1461static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1462                            TCGReg rh, TCGReg al, TCGReg ah,
1463                            tcg_target_long bl, tcg_target_long bh,
1464                            bool const_bl, bool const_bh, bool sub)
1465{
1466    TCGReg orig_rl = rl;
1467    AArch64Insn insn;
1468
1469    if (rl == ah || (!const_bh && rl == bh)) {
1470        rl = TCG_REG_TMP;
1471    }
1472
1473    if (const_bl) {
1474        if (bl < 0) {
1475            bl = -bl;
1476            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1477        } else {
1478            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1479        }
1480
1481        if (unlikely(al == TCG_REG_XZR)) {
1482            /* ??? We want to allow al to be zero for the benefit of
1483               negation via subtraction.  However, that leaves open the
1484               possibility of adding 0+const in the low part, and the
1485               immediate add instructions encode XSP not XZR.  Don't try
1486               anything more elaborate here than loading another zero.  */
1487            al = TCG_REG_TMP;
1488            tcg_out_movi(s, ext, al, 0);
1489        }
1490        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1491    } else {
1492        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1493    }
1494
1495    insn = I3503_ADC;
1496    if (const_bh) {
1497        /* Note that the only two constants we support are 0 and -1, and
1498           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1499        if ((bh != 0) ^ sub) {
1500            insn = I3503_SBC;
1501        }
1502        bh = TCG_REG_XZR;
1503    } else if (sub) {
1504        insn = I3503_SBC;
1505    }
1506    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1507
1508    tcg_out_mov(s, ext, orig_rl, rl);
1509}
1510
1511static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1512{
1513    static const uint32_t sync[] = {
1514        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1515        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1516        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1517        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1518        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1519    };
1520    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1521}
1522
1523static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1524                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1525{
1526    TCGReg a1 = a0;
1527    if (is_ctz) {
1528        a1 = TCG_REG_TMP;
1529        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1530    }
1531    if (const_b && b == (ext ? 64 : 32)) {
1532        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1533    } else {
1534        AArch64Insn sel = I3506_CSEL;
1535
1536        tcg_out_cmp(s, ext, a0, 0, 1);
1537        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1538
1539        if (const_b) {
1540            if (b == -1) {
1541                b = TCG_REG_XZR;
1542                sel = I3506_CSINV;
1543            } else if (b == 0) {
1544                b = TCG_REG_XZR;
1545            } else {
1546                tcg_out_movi(s, ext, d, b);
1547                b = d;
1548            }
1549        }
1550        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1551    }
1552}
1553
1554#ifdef CONFIG_SOFTMMU
1555#include "../tcg-ldst.c.inc"
1556
1557/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1558 *                                     TCGMemOpIdx oi, uintptr_t ra)
1559 */
1560static void * const qemu_ld_helpers[16] = {
1561    [MO_UB]   = helper_ret_ldub_mmu,
1562    [MO_LEUW] = helper_le_lduw_mmu,
1563    [MO_LEUL] = helper_le_ldul_mmu,
1564    [MO_LEQ]  = helper_le_ldq_mmu,
1565    [MO_BEUW] = helper_be_lduw_mmu,
1566    [MO_BEUL] = helper_be_ldul_mmu,
1567    [MO_BEQ]  = helper_be_ldq_mmu,
1568};
1569
1570/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1571 *                                     uintxx_t val, TCGMemOpIdx oi,
1572 *                                     uintptr_t ra)
1573 */
1574static void * const qemu_st_helpers[16] = {
1575    [MO_UB]   = helper_ret_stb_mmu,
1576    [MO_LEUW] = helper_le_stw_mmu,
1577    [MO_LEUL] = helper_le_stl_mmu,
1578    [MO_LEQ]  = helper_le_stq_mmu,
1579    [MO_BEUW] = helper_be_stw_mmu,
1580    [MO_BEUL] = helper_be_stl_mmu,
1581    [MO_BEQ]  = helper_be_stq_mmu,
1582};
1583
1584static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1585{
1586    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1587    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1588    tcg_out_insn(s, 3406, ADR, rd, offset);
1589}
1590
1591static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1592{
1593    TCGMemOpIdx oi = lb->oi;
1594    MemOp opc = get_memop(oi);
1595    MemOp size = opc & MO_SIZE;
1596
1597    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1598        return false;
1599    }
1600
1601    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1602    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1603    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1604    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1605    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1606    if (opc & MO_SIGN) {
1607        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1608    } else {
1609        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1610    }
1611
1612    tcg_out_goto(s, lb->raddr);
1613    return true;
1614}
1615
1616static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1617{
1618    TCGMemOpIdx oi = lb->oi;
1619    MemOp opc = get_memop(oi);
1620    MemOp size = opc & MO_SIZE;
1621
1622    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1623        return false;
1624    }
1625
1626    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1627    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1628    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1629    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1630    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1631    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1632    tcg_out_goto(s, lb->raddr);
1633    return true;
1634}
1635
1636static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1637                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1638                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1639{
1640    TCGLabelQemuLdst *label = new_ldst_label(s);
1641
1642    label->is_ld = is_ld;
1643    label->oi = oi;
1644    label->type = ext;
1645    label->datalo_reg = data_reg;
1646    label->addrlo_reg = addr_reg;
1647    label->raddr = tcg_splitwx_to_rx(raddr);
1648    label->label_ptr[0] = label_ptr;
1649}
1650
1651/* We expect to use a 7-bit scaled negative offset from ENV.  */
1652QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1653QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1654
1655/* These offsets are built into the LDP below.  */
1656QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1657QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1658
1659/* Load and compare a TLB entry, emitting the conditional jump to the
1660   slow path for the failure case, which will be patched later when finalizing
1661   the slow path. Generated code returns the host addend in X1,
1662   clobbers X0,X2,X3,TMP. */
1663static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1664                             tcg_insn_unit **label_ptr, int mem_index,
1665                             bool is_read)
1666{
1667    unsigned a_bits = get_alignment_bits(opc);
1668    unsigned s_bits = opc & MO_SIZE;
1669    unsigned a_mask = (1u << a_bits) - 1;
1670    unsigned s_mask = (1u << s_bits) - 1;
1671    TCGReg x3;
1672    TCGType mask_type;
1673    uint64_t compare_mask;
1674
1675    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1676                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1677
1678    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1679    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1680                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1681
1682    /* Extract the TLB index from the address into X0.  */
1683    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1684                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1685                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1686
1687    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1688    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1689
1690    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1691    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1692               ? offsetof(CPUTLBEntry, addr_read)
1693               : offsetof(CPUTLBEntry, addr_write));
1694    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1695               offsetof(CPUTLBEntry, addend));
1696
1697    /* For aligned accesses, we check the first byte and include the alignment
1698       bits within the address.  For unaligned access, we check that we don't
1699       cross pages using the address of the last byte of the access.  */
1700    if (a_bits >= s_bits) {
1701        x3 = addr_reg;
1702    } else {
1703        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1704                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1705        x3 = TCG_REG_X3;
1706    }
1707    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1708
1709    /* Store the page mask part of the address into X3.  */
1710    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1711                     TCG_REG_X3, x3, compare_mask);
1712
1713    /* Perform the address comparison. */
1714    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1715
1716    /* If not equal, we jump to the slow path. */
1717    *label_ptr = s->code_ptr;
1718    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1719}
1720
1721#endif /* CONFIG_SOFTMMU */
1722
1723static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1724                                   TCGReg data_r, TCGReg addr_r,
1725                                   TCGType otype, TCGReg off_r)
1726{
1727    const MemOp bswap = memop & MO_BSWAP;
1728
1729    switch (memop & MO_SSIZE) {
1730    case MO_UB:
1731        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1732        break;
1733    case MO_SB:
1734        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1735                       data_r, addr_r, otype, off_r);
1736        break;
1737    case MO_UW:
1738        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1739        if (bswap) {
1740            tcg_out_rev16(s, data_r, data_r);
1741        }
1742        break;
1743    case MO_SW:
1744        if (bswap) {
1745            tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1746            tcg_out_rev16(s, data_r, data_r);
1747            tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1748        } else {
1749            tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1750                           data_r, addr_r, otype, off_r);
1751        }
1752        break;
1753    case MO_UL:
1754        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1755        if (bswap) {
1756            tcg_out_rev32(s, data_r, data_r);
1757        }
1758        break;
1759    case MO_SL:
1760        if (bswap) {
1761            tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1762            tcg_out_rev32(s, data_r, data_r);
1763            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1764        } else {
1765            tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1766        }
1767        break;
1768    case MO_Q:
1769        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1770        if (bswap) {
1771            tcg_out_rev64(s, data_r, data_r);
1772        }
1773        break;
1774    default:
1775        tcg_abort();
1776    }
1777}
1778
1779static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1780                                   TCGReg data_r, TCGReg addr_r,
1781                                   TCGType otype, TCGReg off_r)
1782{
1783    const MemOp bswap = memop & MO_BSWAP;
1784
1785    switch (memop & MO_SIZE) {
1786    case MO_8:
1787        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1788        break;
1789    case MO_16:
1790        if (bswap && data_r != TCG_REG_XZR) {
1791            tcg_out_rev16(s, TCG_REG_TMP, data_r);
1792            data_r = TCG_REG_TMP;
1793        }
1794        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1795        break;
1796    case MO_32:
1797        if (bswap && data_r != TCG_REG_XZR) {
1798            tcg_out_rev32(s, TCG_REG_TMP, data_r);
1799            data_r = TCG_REG_TMP;
1800        }
1801        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1802        break;
1803    case MO_64:
1804        if (bswap && data_r != TCG_REG_XZR) {
1805            tcg_out_rev64(s, TCG_REG_TMP, data_r);
1806            data_r = TCG_REG_TMP;
1807        }
1808        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1809        break;
1810    default:
1811        tcg_abort();
1812    }
1813}
1814
1815static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1816                            TCGMemOpIdx oi, TCGType ext)
1817{
1818    MemOp memop = get_memop(oi);
1819    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1820#ifdef CONFIG_SOFTMMU
1821    unsigned mem_index = get_mmuidx(oi);
1822    tcg_insn_unit *label_ptr;
1823
1824    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1825    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1826                           TCG_REG_X1, otype, addr_reg);
1827    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1828                        s->code_ptr, label_ptr);
1829#else /* !CONFIG_SOFTMMU */
1830    if (USE_GUEST_BASE) {
1831        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1832                               TCG_REG_GUEST_BASE, otype, addr_reg);
1833    } else {
1834        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1835                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1836    }
1837#endif /* CONFIG_SOFTMMU */
1838}
1839
1840static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1841                            TCGMemOpIdx oi)
1842{
1843    MemOp memop = get_memop(oi);
1844    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1845#ifdef CONFIG_SOFTMMU
1846    unsigned mem_index = get_mmuidx(oi);
1847    tcg_insn_unit *label_ptr;
1848
1849    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1850    tcg_out_qemu_st_direct(s, memop, data_reg,
1851                           TCG_REG_X1, otype, addr_reg);
1852    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1853                        data_reg, addr_reg, s->code_ptr, label_ptr);
1854#else /* !CONFIG_SOFTMMU */
1855    if (USE_GUEST_BASE) {
1856        tcg_out_qemu_st_direct(s, memop, data_reg,
1857                               TCG_REG_GUEST_BASE, otype, addr_reg);
1858    } else {
1859        tcg_out_qemu_st_direct(s, memop, data_reg,
1860                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1861    }
1862#endif /* CONFIG_SOFTMMU */
1863}
1864
1865static const tcg_insn_unit *tb_ret_addr;
1866
1867static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1868                       const TCGArg args[TCG_MAX_OP_ARGS],
1869                       const int const_args[TCG_MAX_OP_ARGS])
1870{
1871    /* 99% of the time, we can signal the use of extension registers
1872       by looking to see if the opcode handles 64-bit data.  */
1873    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1874
1875    /* Hoist the loads of the most common arguments.  */
1876    TCGArg a0 = args[0];
1877    TCGArg a1 = args[1];
1878    TCGArg a2 = args[2];
1879    int c2 = const_args[2];
1880
1881    /* Some operands are defined with "rZ" constraint, a register or
1882       the zero register.  These need not actually test args[I] == 0.  */
1883#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1884
1885    switch (opc) {
1886    case INDEX_op_exit_tb:
1887        /* Reuse the zeroing that exists for goto_ptr.  */
1888        if (a0 == 0) {
1889            tcg_out_goto_long(s, tcg_code_gen_epilogue);
1890        } else {
1891            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1892            tcg_out_goto_long(s, tb_ret_addr);
1893        }
1894        break;
1895
1896    case INDEX_op_goto_tb:
1897        if (s->tb_jmp_insn_offset != NULL) {
1898            /* TCG_TARGET_HAS_direct_jump */
1899            /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1900               write can be used to patch the target address. */
1901            if ((uintptr_t)s->code_ptr & 7) {
1902                tcg_out32(s, NOP);
1903            }
1904            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1905            /* actual branch destination will be patched by
1906               tb_target_set_jmp_target later. */
1907            tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1908            tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1909        } else {
1910            /* !TCG_TARGET_HAS_direct_jump */
1911            tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1912            intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1913            tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1914        }
1915        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1916        set_jmp_reset_offset(s, a0);
1917        break;
1918
1919    case INDEX_op_goto_ptr:
1920        tcg_out_insn(s, 3207, BR, a0);
1921        break;
1922
1923    case INDEX_op_br:
1924        tcg_out_goto_label(s, arg_label(a0));
1925        break;
1926
1927    case INDEX_op_ld8u_i32:
1928    case INDEX_op_ld8u_i64:
1929        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1930        break;
1931    case INDEX_op_ld8s_i32:
1932        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1933        break;
1934    case INDEX_op_ld8s_i64:
1935        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1936        break;
1937    case INDEX_op_ld16u_i32:
1938    case INDEX_op_ld16u_i64:
1939        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1940        break;
1941    case INDEX_op_ld16s_i32:
1942        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1943        break;
1944    case INDEX_op_ld16s_i64:
1945        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1946        break;
1947    case INDEX_op_ld_i32:
1948    case INDEX_op_ld32u_i64:
1949        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1950        break;
1951    case INDEX_op_ld32s_i64:
1952        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1953        break;
1954    case INDEX_op_ld_i64:
1955        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1956        break;
1957
1958    case INDEX_op_st8_i32:
1959    case INDEX_op_st8_i64:
1960        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1961        break;
1962    case INDEX_op_st16_i32:
1963    case INDEX_op_st16_i64:
1964        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1965        break;
1966    case INDEX_op_st_i32:
1967    case INDEX_op_st32_i64:
1968        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1969        break;
1970    case INDEX_op_st_i64:
1971        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1972        break;
1973
1974    case INDEX_op_add_i32:
1975        a2 = (int32_t)a2;
1976        /* FALLTHRU */
1977    case INDEX_op_add_i64:
1978        if (c2) {
1979            tcg_out_addsubi(s, ext, a0, a1, a2);
1980        } else {
1981            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1982        }
1983        break;
1984
1985    case INDEX_op_sub_i32:
1986        a2 = (int32_t)a2;
1987        /* FALLTHRU */
1988    case INDEX_op_sub_i64:
1989        if (c2) {
1990            tcg_out_addsubi(s, ext, a0, a1, -a2);
1991        } else {
1992            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1993        }
1994        break;
1995
1996    case INDEX_op_neg_i64:
1997    case INDEX_op_neg_i32:
1998        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1999        break;
2000
2001    case INDEX_op_and_i32:
2002        a2 = (int32_t)a2;
2003        /* FALLTHRU */
2004    case INDEX_op_and_i64:
2005        if (c2) {
2006            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2007        } else {
2008            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2009        }
2010        break;
2011
2012    case INDEX_op_andc_i32:
2013        a2 = (int32_t)a2;
2014        /* FALLTHRU */
2015    case INDEX_op_andc_i64:
2016        if (c2) {
2017            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2018        } else {
2019            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2020        }
2021        break;
2022
2023    case INDEX_op_or_i32:
2024        a2 = (int32_t)a2;
2025        /* FALLTHRU */
2026    case INDEX_op_or_i64:
2027        if (c2) {
2028            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2029        } else {
2030            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2031        }
2032        break;
2033
2034    case INDEX_op_orc_i32:
2035        a2 = (int32_t)a2;
2036        /* FALLTHRU */
2037    case INDEX_op_orc_i64:
2038        if (c2) {
2039            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2040        } else {
2041            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2042        }
2043        break;
2044
2045    case INDEX_op_xor_i32:
2046        a2 = (int32_t)a2;
2047        /* FALLTHRU */
2048    case INDEX_op_xor_i64:
2049        if (c2) {
2050            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2051        } else {
2052            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2053        }
2054        break;
2055
2056    case INDEX_op_eqv_i32:
2057        a2 = (int32_t)a2;
2058        /* FALLTHRU */
2059    case INDEX_op_eqv_i64:
2060        if (c2) {
2061            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2062        } else {
2063            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2064        }
2065        break;
2066
2067    case INDEX_op_not_i64:
2068    case INDEX_op_not_i32:
2069        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2070        break;
2071
2072    case INDEX_op_mul_i64:
2073    case INDEX_op_mul_i32:
2074        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2075        break;
2076
2077    case INDEX_op_div_i64:
2078    case INDEX_op_div_i32:
2079        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2080        break;
2081    case INDEX_op_divu_i64:
2082    case INDEX_op_divu_i32:
2083        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2084        break;
2085
2086    case INDEX_op_rem_i64:
2087    case INDEX_op_rem_i32:
2088        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2089        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2090        break;
2091    case INDEX_op_remu_i64:
2092    case INDEX_op_remu_i32:
2093        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2094        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2095        break;
2096
2097    case INDEX_op_shl_i64:
2098    case INDEX_op_shl_i32:
2099        if (c2) {
2100            tcg_out_shl(s, ext, a0, a1, a2);
2101        } else {
2102            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2103        }
2104        break;
2105
2106    case INDEX_op_shr_i64:
2107    case INDEX_op_shr_i32:
2108        if (c2) {
2109            tcg_out_shr(s, ext, a0, a1, a2);
2110        } else {
2111            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2112        }
2113        break;
2114
2115    case INDEX_op_sar_i64:
2116    case INDEX_op_sar_i32:
2117        if (c2) {
2118            tcg_out_sar(s, ext, a0, a1, a2);
2119        } else {
2120            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2121        }
2122        break;
2123
2124    case INDEX_op_rotr_i64:
2125    case INDEX_op_rotr_i32:
2126        if (c2) {
2127            tcg_out_rotr(s, ext, a0, a1, a2);
2128        } else {
2129            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2130        }
2131        break;
2132
2133    case INDEX_op_rotl_i64:
2134    case INDEX_op_rotl_i32:
2135        if (c2) {
2136            tcg_out_rotl(s, ext, a0, a1, a2);
2137        } else {
2138            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2139            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2140        }
2141        break;
2142
2143    case INDEX_op_clz_i64:
2144    case INDEX_op_clz_i32:
2145        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2146        break;
2147    case INDEX_op_ctz_i64:
2148    case INDEX_op_ctz_i32:
2149        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2150        break;
2151
2152    case INDEX_op_brcond_i32:
2153        a1 = (int32_t)a1;
2154        /* FALLTHRU */
2155    case INDEX_op_brcond_i64:
2156        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2157        break;
2158
2159    case INDEX_op_setcond_i32:
2160        a2 = (int32_t)a2;
2161        /* FALLTHRU */
2162    case INDEX_op_setcond_i64:
2163        tcg_out_cmp(s, ext, a1, a2, c2);
2164        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2165        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2166                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2167        break;
2168
2169    case INDEX_op_movcond_i32:
2170        a2 = (int32_t)a2;
2171        /* FALLTHRU */
2172    case INDEX_op_movcond_i64:
2173        tcg_out_cmp(s, ext, a1, a2, c2);
2174        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2175        break;
2176
2177    case INDEX_op_qemu_ld_i32:
2178    case INDEX_op_qemu_ld_i64:
2179        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2180        break;
2181    case INDEX_op_qemu_st_i32:
2182    case INDEX_op_qemu_st_i64:
2183        tcg_out_qemu_st(s, REG0(0), a1, a2);
2184        break;
2185
2186    case INDEX_op_bswap64_i64:
2187        tcg_out_rev64(s, a0, a1);
2188        break;
2189    case INDEX_op_bswap32_i64:
2190    case INDEX_op_bswap32_i32:
2191        tcg_out_rev32(s, a0, a1);
2192        break;
2193    case INDEX_op_bswap16_i64:
2194    case INDEX_op_bswap16_i32:
2195        tcg_out_rev16(s, a0, a1);
2196        break;
2197
2198    case INDEX_op_ext8s_i64:
2199    case INDEX_op_ext8s_i32:
2200        tcg_out_sxt(s, ext, MO_8, a0, a1);
2201        break;
2202    case INDEX_op_ext16s_i64:
2203    case INDEX_op_ext16s_i32:
2204        tcg_out_sxt(s, ext, MO_16, a0, a1);
2205        break;
2206    case INDEX_op_ext_i32_i64:
2207    case INDEX_op_ext32s_i64:
2208        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2209        break;
2210    case INDEX_op_ext8u_i64:
2211    case INDEX_op_ext8u_i32:
2212        tcg_out_uxt(s, MO_8, a0, a1);
2213        break;
2214    case INDEX_op_ext16u_i64:
2215    case INDEX_op_ext16u_i32:
2216        tcg_out_uxt(s, MO_16, a0, a1);
2217        break;
2218    case INDEX_op_extu_i32_i64:
2219    case INDEX_op_ext32u_i64:
2220        tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2221        break;
2222
2223    case INDEX_op_deposit_i64:
2224    case INDEX_op_deposit_i32:
2225        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2226        break;
2227
2228    case INDEX_op_extract_i64:
2229    case INDEX_op_extract_i32:
2230        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2231        break;
2232
2233    case INDEX_op_sextract_i64:
2234    case INDEX_op_sextract_i32:
2235        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2236        break;
2237
2238    case INDEX_op_extract2_i64:
2239    case INDEX_op_extract2_i32:
2240        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2241        break;
2242
2243    case INDEX_op_add2_i32:
2244        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2245                        (int32_t)args[4], args[5], const_args[4],
2246                        const_args[5], false);
2247        break;
2248    case INDEX_op_add2_i64:
2249        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2250                        args[5], const_args[4], const_args[5], false);
2251        break;
2252    case INDEX_op_sub2_i32:
2253        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2254                        (int32_t)args[4], args[5], const_args[4],
2255                        const_args[5], true);
2256        break;
2257    case INDEX_op_sub2_i64:
2258        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2259                        args[5], const_args[4], const_args[5], true);
2260        break;
2261
2262    case INDEX_op_muluh_i64:
2263        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2264        break;
2265    case INDEX_op_mulsh_i64:
2266        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2267        break;
2268
2269    case INDEX_op_mb:
2270        tcg_out_mb(s, a0);
2271        break;
2272
2273    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2274    case INDEX_op_mov_i64:
2275    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2276    default:
2277        g_assert_not_reached();
2278    }
2279
2280#undef REG0
2281}
2282
2283static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2284                           unsigned vecl, unsigned vece,
2285                           const TCGArg args[TCG_MAX_OP_ARGS],
2286                           const int const_args[TCG_MAX_OP_ARGS])
2287{
2288    static const AArch64Insn cmp_vec_insn[16] = {
2289        [TCG_COND_EQ] = I3616_CMEQ,
2290        [TCG_COND_GT] = I3616_CMGT,
2291        [TCG_COND_GE] = I3616_CMGE,
2292        [TCG_COND_GTU] = I3616_CMHI,
2293        [TCG_COND_GEU] = I3616_CMHS,
2294    };
2295    static const AArch64Insn cmp_scalar_insn[16] = {
2296        [TCG_COND_EQ] = I3611_CMEQ,
2297        [TCG_COND_GT] = I3611_CMGT,
2298        [TCG_COND_GE] = I3611_CMGE,
2299        [TCG_COND_GTU] = I3611_CMHI,
2300        [TCG_COND_GEU] = I3611_CMHS,
2301    };
2302    static const AArch64Insn cmp0_vec_insn[16] = {
2303        [TCG_COND_EQ] = I3617_CMEQ0,
2304        [TCG_COND_GT] = I3617_CMGT0,
2305        [TCG_COND_GE] = I3617_CMGE0,
2306        [TCG_COND_LT] = I3617_CMLT0,
2307        [TCG_COND_LE] = I3617_CMLE0,
2308    };
2309    static const AArch64Insn cmp0_scalar_insn[16] = {
2310        [TCG_COND_EQ] = I3612_CMEQ0,
2311        [TCG_COND_GT] = I3612_CMGT0,
2312        [TCG_COND_GE] = I3612_CMGE0,
2313        [TCG_COND_LT] = I3612_CMLT0,
2314        [TCG_COND_LE] = I3612_CMLE0,
2315    };
2316
2317    TCGType type = vecl + TCG_TYPE_V64;
2318    unsigned is_q = vecl;
2319    bool is_scalar = !is_q && vece == MO_64;
2320    TCGArg a0, a1, a2, a3;
2321    int cmode, imm8;
2322
2323    a0 = args[0];
2324    a1 = args[1];
2325    a2 = args[2];
2326
2327    switch (opc) {
2328    case INDEX_op_ld_vec:
2329        tcg_out_ld(s, type, a0, a1, a2);
2330        break;
2331    case INDEX_op_st_vec:
2332        tcg_out_st(s, type, a0, a1, a2);
2333        break;
2334    case INDEX_op_dupm_vec:
2335        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2336        break;
2337    case INDEX_op_add_vec:
2338        if (is_scalar) {
2339            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2340        } else {
2341            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2342        }
2343        break;
2344    case INDEX_op_sub_vec:
2345        if (is_scalar) {
2346            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2347        } else {
2348            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2349        }
2350        break;
2351    case INDEX_op_mul_vec:
2352        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2353        break;
2354    case INDEX_op_neg_vec:
2355        if (is_scalar) {
2356            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2357        } else {
2358            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2359        }
2360        break;
2361    case INDEX_op_abs_vec:
2362        if (is_scalar) {
2363            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2364        } else {
2365            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2366        }
2367        break;
2368    case INDEX_op_and_vec:
2369        if (const_args[2]) {
2370            is_shimm1632(~a2, &cmode, &imm8);
2371            if (a0 == a1) {
2372                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2373                return;
2374            }
2375            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2376            a2 = a0;
2377        }
2378        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2379        break;
2380    case INDEX_op_or_vec:
2381        if (const_args[2]) {
2382            is_shimm1632(a2, &cmode, &imm8);
2383            if (a0 == a1) {
2384                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2385                return;
2386            }
2387            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2388            a2 = a0;
2389        }
2390        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2391        break;
2392    case INDEX_op_andc_vec:
2393        if (const_args[2]) {
2394            is_shimm1632(a2, &cmode, &imm8);
2395            if (a0 == a1) {
2396                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2397                return;
2398            }
2399            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2400            a2 = a0;
2401        }
2402        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2403        break;
2404    case INDEX_op_orc_vec:
2405        if (const_args[2]) {
2406            is_shimm1632(~a2, &cmode, &imm8);
2407            if (a0 == a1) {
2408                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2409                return;
2410            }
2411            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2412            a2 = a0;
2413        }
2414        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2415        break;
2416    case INDEX_op_xor_vec:
2417        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2418        break;
2419    case INDEX_op_ssadd_vec:
2420        if (is_scalar) {
2421            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2422        } else {
2423            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2424        }
2425        break;
2426    case INDEX_op_sssub_vec:
2427        if (is_scalar) {
2428            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2429        } else {
2430            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2431        }
2432        break;
2433    case INDEX_op_usadd_vec:
2434        if (is_scalar) {
2435            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2436        } else {
2437            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2438        }
2439        break;
2440    case INDEX_op_ussub_vec:
2441        if (is_scalar) {
2442            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2443        } else {
2444            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2445        }
2446        break;
2447    case INDEX_op_smax_vec:
2448        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2449        break;
2450    case INDEX_op_smin_vec:
2451        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2452        break;
2453    case INDEX_op_umax_vec:
2454        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2455        break;
2456    case INDEX_op_umin_vec:
2457        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2458        break;
2459    case INDEX_op_not_vec:
2460        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2461        break;
2462    case INDEX_op_shli_vec:
2463        if (is_scalar) {
2464            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2465        } else {
2466            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2467        }
2468        break;
2469    case INDEX_op_shri_vec:
2470        if (is_scalar) {
2471            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2472        } else {
2473            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2474        }
2475        break;
2476    case INDEX_op_sari_vec:
2477        if (is_scalar) {
2478            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2479        } else {
2480            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2481        }
2482        break;
2483    case INDEX_op_aa64_sli_vec:
2484        if (is_scalar) {
2485            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2486        } else {
2487            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2488        }
2489        break;
2490    case INDEX_op_shlv_vec:
2491        if (is_scalar) {
2492            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2493        } else {
2494            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2495        }
2496        break;
2497    case INDEX_op_aa64_sshl_vec:
2498        if (is_scalar) {
2499            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2500        } else {
2501            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2502        }
2503        break;
2504    case INDEX_op_cmp_vec:
2505        {
2506            TCGCond cond = args[3];
2507            AArch64Insn insn;
2508
2509            if (cond == TCG_COND_NE) {
2510                if (const_args[2]) {
2511                    if (is_scalar) {
2512                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2513                    } else {
2514                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2515                    }
2516                } else {
2517                    if (is_scalar) {
2518                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2519                    } else {
2520                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2521                    }
2522                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2523                }
2524            } else {
2525                if (const_args[2]) {
2526                    if (is_scalar) {
2527                        insn = cmp0_scalar_insn[cond];
2528                        if (insn) {
2529                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2530                            break;
2531                        }
2532                    } else {
2533                        insn = cmp0_vec_insn[cond];
2534                        if (insn) {
2535                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2536                            break;
2537                        }
2538                    }
2539                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2540                    a2 = TCG_VEC_TMP;
2541                }
2542                if (is_scalar) {
2543                    insn = cmp_scalar_insn[cond];
2544                    if (insn == 0) {
2545                        TCGArg t;
2546                        t = a1, a1 = a2, a2 = t;
2547                        cond = tcg_swap_cond(cond);
2548                        insn = cmp_scalar_insn[cond];
2549                        tcg_debug_assert(insn != 0);
2550                    }
2551                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2552                } else {
2553                    insn = cmp_vec_insn[cond];
2554                    if (insn == 0) {
2555                        TCGArg t;
2556                        t = a1, a1 = a2, a2 = t;
2557                        cond = tcg_swap_cond(cond);
2558                        insn = cmp_vec_insn[cond];
2559                        tcg_debug_assert(insn != 0);
2560                    }
2561                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2562                }
2563            }
2564        }
2565        break;
2566
2567    case INDEX_op_bitsel_vec:
2568        a3 = args[3];
2569        if (a0 == a3) {
2570            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2571        } else if (a0 == a2) {
2572            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2573        } else {
2574            if (a0 != a1) {
2575                tcg_out_mov(s, type, a0, a1);
2576            }
2577            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2578        }
2579        break;
2580
2581    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2582    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2583    default:
2584        g_assert_not_reached();
2585    }
2586}
2587
2588int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2589{
2590    switch (opc) {
2591    case INDEX_op_add_vec:
2592    case INDEX_op_sub_vec:
2593    case INDEX_op_and_vec:
2594    case INDEX_op_or_vec:
2595    case INDEX_op_xor_vec:
2596    case INDEX_op_andc_vec:
2597    case INDEX_op_orc_vec:
2598    case INDEX_op_neg_vec:
2599    case INDEX_op_abs_vec:
2600    case INDEX_op_not_vec:
2601    case INDEX_op_cmp_vec:
2602    case INDEX_op_shli_vec:
2603    case INDEX_op_shri_vec:
2604    case INDEX_op_sari_vec:
2605    case INDEX_op_ssadd_vec:
2606    case INDEX_op_sssub_vec:
2607    case INDEX_op_usadd_vec:
2608    case INDEX_op_ussub_vec:
2609    case INDEX_op_shlv_vec:
2610    case INDEX_op_bitsel_vec:
2611        return 1;
2612    case INDEX_op_rotli_vec:
2613    case INDEX_op_shrv_vec:
2614    case INDEX_op_sarv_vec:
2615    case INDEX_op_rotlv_vec:
2616    case INDEX_op_rotrv_vec:
2617        return -1;
2618    case INDEX_op_mul_vec:
2619    case INDEX_op_smax_vec:
2620    case INDEX_op_smin_vec:
2621    case INDEX_op_umax_vec:
2622    case INDEX_op_umin_vec:
2623        return vece < MO_64;
2624
2625    default:
2626        return 0;
2627    }
2628}
2629
2630void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2631                       TCGArg a0, ...)
2632{
2633    va_list va;
2634    TCGv_vec v0, v1, v2, t1, t2, c1;
2635    TCGArg a2;
2636
2637    va_start(va, a0);
2638    v0 = temp_tcgv_vec(arg_temp(a0));
2639    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2640    a2 = va_arg(va, TCGArg);
2641    va_end(va);
2642
2643    switch (opc) {
2644    case INDEX_op_rotli_vec:
2645        t1 = tcg_temp_new_vec(type);
2646        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2647        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2648                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2649        tcg_temp_free_vec(t1);
2650        break;
2651
2652    case INDEX_op_shrv_vec:
2653    case INDEX_op_sarv_vec:
2654        /* Right shifts are negative left shifts for AArch64.  */
2655        v2 = temp_tcgv_vec(arg_temp(a2));
2656        t1 = tcg_temp_new_vec(type);
2657        tcg_gen_neg_vec(vece, t1, v2);
2658        opc = (opc == INDEX_op_shrv_vec
2659               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2660        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2661                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2662        tcg_temp_free_vec(t1);
2663        break;
2664
2665    case INDEX_op_rotlv_vec:
2666        v2 = temp_tcgv_vec(arg_temp(a2));
2667        t1 = tcg_temp_new_vec(type);
2668        c1 = tcg_constant_vec(type, vece, 8 << vece);
2669        tcg_gen_sub_vec(vece, t1, v2, c1);
2670        /* Right shifts are negative left shifts for AArch64.  */
2671        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2672                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2673        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2674                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2675        tcg_gen_or_vec(vece, v0, v0, t1);
2676        tcg_temp_free_vec(t1);
2677        break;
2678
2679    case INDEX_op_rotrv_vec:
2680        v2 = temp_tcgv_vec(arg_temp(a2));
2681        t1 = tcg_temp_new_vec(type);
2682        t2 = tcg_temp_new_vec(type);
2683        c1 = tcg_constant_vec(type, vece, 8 << vece);
2684        tcg_gen_neg_vec(vece, t1, v2);
2685        tcg_gen_sub_vec(vece, t2, c1, v2);
2686        /* Right shifts are negative left shifts for AArch64.  */
2687        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2688                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2689        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2690                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2691        tcg_gen_or_vec(vece, v0, t1, t2);
2692        tcg_temp_free_vec(t1);
2693        tcg_temp_free_vec(t2);
2694        break;
2695
2696    default:
2697        g_assert_not_reached();
2698    }
2699}
2700
2701static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2702{
2703    switch (op) {
2704    case INDEX_op_goto_ptr:
2705        return C_O0_I1(r);
2706
2707    case INDEX_op_ld8u_i32:
2708    case INDEX_op_ld8s_i32:
2709    case INDEX_op_ld16u_i32:
2710    case INDEX_op_ld16s_i32:
2711    case INDEX_op_ld_i32:
2712    case INDEX_op_ld8u_i64:
2713    case INDEX_op_ld8s_i64:
2714    case INDEX_op_ld16u_i64:
2715    case INDEX_op_ld16s_i64:
2716    case INDEX_op_ld32u_i64:
2717    case INDEX_op_ld32s_i64:
2718    case INDEX_op_ld_i64:
2719    case INDEX_op_neg_i32:
2720    case INDEX_op_neg_i64:
2721    case INDEX_op_not_i32:
2722    case INDEX_op_not_i64:
2723    case INDEX_op_bswap16_i32:
2724    case INDEX_op_bswap32_i32:
2725    case INDEX_op_bswap16_i64:
2726    case INDEX_op_bswap32_i64:
2727    case INDEX_op_bswap64_i64:
2728    case INDEX_op_ext8s_i32:
2729    case INDEX_op_ext16s_i32:
2730    case INDEX_op_ext8u_i32:
2731    case INDEX_op_ext16u_i32:
2732    case INDEX_op_ext8s_i64:
2733    case INDEX_op_ext16s_i64:
2734    case INDEX_op_ext32s_i64:
2735    case INDEX_op_ext8u_i64:
2736    case INDEX_op_ext16u_i64:
2737    case INDEX_op_ext32u_i64:
2738    case INDEX_op_ext_i32_i64:
2739    case INDEX_op_extu_i32_i64:
2740    case INDEX_op_extract_i32:
2741    case INDEX_op_extract_i64:
2742    case INDEX_op_sextract_i32:
2743    case INDEX_op_sextract_i64:
2744        return C_O1_I1(r, r);
2745
2746    case INDEX_op_st8_i32:
2747    case INDEX_op_st16_i32:
2748    case INDEX_op_st_i32:
2749    case INDEX_op_st8_i64:
2750    case INDEX_op_st16_i64:
2751    case INDEX_op_st32_i64:
2752    case INDEX_op_st_i64:
2753        return C_O0_I2(rZ, r);
2754
2755    case INDEX_op_add_i32:
2756    case INDEX_op_add_i64:
2757    case INDEX_op_sub_i32:
2758    case INDEX_op_sub_i64:
2759    case INDEX_op_setcond_i32:
2760    case INDEX_op_setcond_i64:
2761        return C_O1_I2(r, r, rA);
2762
2763    case INDEX_op_mul_i32:
2764    case INDEX_op_mul_i64:
2765    case INDEX_op_div_i32:
2766    case INDEX_op_div_i64:
2767    case INDEX_op_divu_i32:
2768    case INDEX_op_divu_i64:
2769    case INDEX_op_rem_i32:
2770    case INDEX_op_rem_i64:
2771    case INDEX_op_remu_i32:
2772    case INDEX_op_remu_i64:
2773    case INDEX_op_muluh_i64:
2774    case INDEX_op_mulsh_i64:
2775        return C_O1_I2(r, r, r);
2776
2777    case INDEX_op_and_i32:
2778    case INDEX_op_and_i64:
2779    case INDEX_op_or_i32:
2780    case INDEX_op_or_i64:
2781    case INDEX_op_xor_i32:
2782    case INDEX_op_xor_i64:
2783    case INDEX_op_andc_i32:
2784    case INDEX_op_andc_i64:
2785    case INDEX_op_orc_i32:
2786    case INDEX_op_orc_i64:
2787    case INDEX_op_eqv_i32:
2788    case INDEX_op_eqv_i64:
2789        return C_O1_I2(r, r, rL);
2790
2791    case INDEX_op_shl_i32:
2792    case INDEX_op_shr_i32:
2793    case INDEX_op_sar_i32:
2794    case INDEX_op_rotl_i32:
2795    case INDEX_op_rotr_i32:
2796    case INDEX_op_shl_i64:
2797    case INDEX_op_shr_i64:
2798    case INDEX_op_sar_i64:
2799    case INDEX_op_rotl_i64:
2800    case INDEX_op_rotr_i64:
2801        return C_O1_I2(r, r, ri);
2802
2803    case INDEX_op_clz_i32:
2804    case INDEX_op_ctz_i32:
2805    case INDEX_op_clz_i64:
2806    case INDEX_op_ctz_i64:
2807        return C_O1_I2(r, r, rAL);
2808
2809    case INDEX_op_brcond_i32:
2810    case INDEX_op_brcond_i64:
2811        return C_O0_I2(r, rA);
2812
2813    case INDEX_op_movcond_i32:
2814    case INDEX_op_movcond_i64:
2815        return C_O1_I4(r, r, rA, rZ, rZ);
2816
2817    case INDEX_op_qemu_ld_i32:
2818    case INDEX_op_qemu_ld_i64:
2819        return C_O1_I1(r, l);
2820    case INDEX_op_qemu_st_i32:
2821    case INDEX_op_qemu_st_i64:
2822        return C_O0_I2(lZ, l);
2823
2824    case INDEX_op_deposit_i32:
2825    case INDEX_op_deposit_i64:
2826        return C_O1_I2(r, 0, rZ);
2827
2828    case INDEX_op_extract2_i32:
2829    case INDEX_op_extract2_i64:
2830        return C_O1_I2(r, rZ, rZ);
2831
2832    case INDEX_op_add2_i32:
2833    case INDEX_op_add2_i64:
2834    case INDEX_op_sub2_i32:
2835    case INDEX_op_sub2_i64:
2836        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2837
2838    case INDEX_op_add_vec:
2839    case INDEX_op_sub_vec:
2840    case INDEX_op_mul_vec:
2841    case INDEX_op_xor_vec:
2842    case INDEX_op_ssadd_vec:
2843    case INDEX_op_sssub_vec:
2844    case INDEX_op_usadd_vec:
2845    case INDEX_op_ussub_vec:
2846    case INDEX_op_smax_vec:
2847    case INDEX_op_smin_vec:
2848    case INDEX_op_umax_vec:
2849    case INDEX_op_umin_vec:
2850    case INDEX_op_shlv_vec:
2851    case INDEX_op_shrv_vec:
2852    case INDEX_op_sarv_vec:
2853    case INDEX_op_aa64_sshl_vec:
2854        return C_O1_I2(w, w, w);
2855    case INDEX_op_not_vec:
2856    case INDEX_op_neg_vec:
2857    case INDEX_op_abs_vec:
2858    case INDEX_op_shli_vec:
2859    case INDEX_op_shri_vec:
2860    case INDEX_op_sari_vec:
2861        return C_O1_I1(w, w);
2862    case INDEX_op_ld_vec:
2863    case INDEX_op_dupm_vec:
2864        return C_O1_I1(w, r);
2865    case INDEX_op_st_vec:
2866        return C_O0_I2(w, r);
2867    case INDEX_op_dup_vec:
2868        return C_O1_I1(w, wr);
2869    case INDEX_op_or_vec:
2870    case INDEX_op_andc_vec:
2871        return C_O1_I2(w, w, wO);
2872    case INDEX_op_and_vec:
2873    case INDEX_op_orc_vec:
2874        return C_O1_I2(w, w, wN);
2875    case INDEX_op_cmp_vec:
2876        return C_O1_I2(w, w, wZ);
2877    case INDEX_op_bitsel_vec:
2878        return C_O1_I3(w, w, w, w);
2879    case INDEX_op_aa64_sli_vec:
2880        return C_O1_I2(w, 0, w);
2881
2882    default:
2883        g_assert_not_reached();
2884    }
2885}
2886
2887static void tcg_target_init(TCGContext *s)
2888{
2889    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2890    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2891    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2892    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2893
2894    tcg_target_call_clobber_regs = -1ull;
2895    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2896    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2897    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2898    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2899    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2900    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2901    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2902    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2903    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2904    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2905    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2906    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2907    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2908    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2909    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2910    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2911    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2912    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2913    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2914
2915    s->reserved_regs = 0;
2916    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2917    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2918    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2919    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2920    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2921}
2922
2923/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2924#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2925
2926#define FRAME_SIZE \
2927    ((PUSH_SIZE \
2928      + TCG_STATIC_CALL_ARGS_SIZE \
2929      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2930      + TCG_TARGET_STACK_ALIGN - 1) \
2931     & ~(TCG_TARGET_STACK_ALIGN - 1))
2932
2933/* We're expecting a 2 byte uleb128 encoded value.  */
2934QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2935
2936/* We're expecting to use a single ADDI insn.  */
2937QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2938
2939static void tcg_target_qemu_prologue(TCGContext *s)
2940{
2941    TCGReg r;
2942
2943    /* Push (FP, LR) and allocate space for all saved registers.  */
2944    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2945                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2946
2947    /* Set up frame pointer for canonical unwinding.  */
2948    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2949
2950    /* Store callee-preserved regs x19..x28.  */
2951    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2952        int ofs = (r - TCG_REG_X19 + 2) * 8;
2953        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2954    }
2955
2956    /* Make stack space for TCG locals.  */
2957    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2958                 FRAME_SIZE - PUSH_SIZE);
2959
2960    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2961    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2962                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2963
2964#if !defined(CONFIG_SOFTMMU)
2965    if (USE_GUEST_BASE) {
2966        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2967        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2968    }
2969#endif
2970
2971    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2972    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2973
2974    /*
2975     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2976     * and fall through to the rest of the epilogue.
2977     */
2978    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2979    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2980
2981    /* TB epilogue */
2982    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
2983
2984    /* Remove TCG locals stack space.  */
2985    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2986                 FRAME_SIZE - PUSH_SIZE);
2987
2988    /* Restore registers x19..x28.  */
2989    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2990        int ofs = (r - TCG_REG_X19 + 2) * 8;
2991        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2992    }
2993
2994    /* Pop (FP, LR), restore SP to previous frame.  */
2995    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2996                 TCG_REG_SP, PUSH_SIZE, 0, 1);
2997    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2998}
2999
3000static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3001{
3002    int i;
3003    for (i = 0; i < count; ++i) {
3004        p[i] = NOP;
3005    }
3006}
3007
3008typedef struct {
3009    DebugFrameHeader h;
3010    uint8_t fde_def_cfa[4];
3011    uint8_t fde_reg_ofs[24];
3012} DebugFrame;
3013
3014#define ELF_HOST_MACHINE EM_AARCH64
3015
3016static const DebugFrame debug_frame = {
3017    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3018    .h.cie.id = -1,
3019    .h.cie.version = 1,
3020    .h.cie.code_align = 1,
3021    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3022    .h.cie.return_column = TCG_REG_LR,
3023
3024    /* Total FDE size does not include the "len" member.  */
3025    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3026
3027    .fde_def_cfa = {
3028        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3029        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3030        (FRAME_SIZE >> 7)
3031    },
3032    .fde_reg_ofs = {
3033        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3034        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3035        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3036        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3037        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3038        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3039        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3040        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3041        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3042        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3043        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3044        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3045    }
3046};
3047
3048void tcg_register_jit(const void *buf, size_t buf_size)
3049{
3050    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3051}
3052