xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision cba42d61)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-pool.c.inc"
14#include "qemu/bitops.h"
15
16/* We're going to re-use TCGType in setting of the SF bit, which controls
17   the size of the operation performed.  If we know the values match, it
18   makes things much cleaner.  */
19QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20
21#ifdef CONFIG_DEBUG_TCG
22static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
24    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
25    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
26    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
27
28    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
29    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
30    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
31    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
32};
33#endif /* CONFIG_DEBUG_TCG */
34
35static const int tcg_target_reg_alloc_order[] = {
36    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
37    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
38    TCG_REG_X28, /* we will reserve this for guest_base if configured */
39
40    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
41    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
42    TCG_REG_X16, TCG_REG_X17,
43
44    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47    /* X18 reserved by system */
48    /* X19 reserved for AREG0 */
49    /* X29 reserved as fp */
50    /* X30 reserved as temporary */
51
52    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
53    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
54    /* V8 - V15 are call-saved, and skipped.  */
55    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
56    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
57    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
58    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
59};
60
61static const int tcg_target_call_iarg_regs[8] = {
62    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
63    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
64};
65static const int tcg_target_call_oarg_regs[1] = {
66    TCG_REG_X0
67};
68
69#define TCG_REG_TMP TCG_REG_X30
70#define TCG_VEC_TMP TCG_REG_V31
71
72#ifndef CONFIG_SOFTMMU
73/* Note that XZR cannot be encoded in the address base register slot,
74   as that actaully encodes SP.  So if we need to zero-extend the guest
75   address, via the address index register slot, we need to load even
76   a zero guest base into a register.  */
77#define USE_GUEST_BASE     (guest_base != 0 || TARGET_LONG_BITS == 32)
78#define TCG_REG_GUEST_BASE TCG_REG_X28
79#endif
80
81static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
82{
83    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
84    ptrdiff_t offset = target - src_rx;
85
86    if (offset == sextract64(offset, 0, 26)) {
87        /* read instruction, mask away previous PC_REL26 parameter contents,
88           set the proper offset, then write back the instruction. */
89        *src_rw = deposit32(*src_rw, 0, 26, offset);
90        return true;
91    }
92    return false;
93}
94
95static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
96{
97    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
98    ptrdiff_t offset = target - src_rx;
99
100    if (offset == sextract64(offset, 0, 19)) {
101        *src_rw = deposit32(*src_rw, 5, 19, offset);
102        return true;
103    }
104    return false;
105}
106
107static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
108                        intptr_t value, intptr_t addend)
109{
110    tcg_debug_assert(addend == 0);
111    switch (type) {
112    case R_AARCH64_JUMP26:
113    case R_AARCH64_CALL26:
114        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
115    case R_AARCH64_CONDBR19:
116        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
117    default:
118        g_assert_not_reached();
119    }
120}
121
122#define TCG_CT_CONST_AIMM 0x100
123#define TCG_CT_CONST_LIMM 0x200
124#define TCG_CT_CONST_ZERO 0x400
125#define TCG_CT_CONST_MONE 0x800
126#define TCG_CT_CONST_ORRI 0x1000
127#define TCG_CT_CONST_ANDI 0x2000
128
129#define ALL_GENERAL_REGS  0xffffffffu
130#define ALL_VECTOR_REGS   0xffffffff00000000ull
131
132#ifdef CONFIG_SOFTMMU
133#define ALL_QLDST_REGS \
134    (ALL_GENERAL_REGS & ~((1 << TCG_REG_X0) | (1 << TCG_REG_X1) | \
135                          (1 << TCG_REG_X2) | (1 << TCG_REG_X3)))
136#else
137#define ALL_QLDST_REGS   ALL_GENERAL_REGS
138#endif
139
140/* Match a constant valid for addition (12-bit, optionally shifted).  */
141static inline bool is_aimm(uint64_t val)
142{
143    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
144}
145
146/* Match a constant valid for logical operations.  */
147static inline bool is_limm(uint64_t val)
148{
149    /* Taking a simplified view of the logical immediates for now, ignoring
150       the replication that can happen across the field.  Match bit patterns
151       of the forms
152           0....01....1
153           0..01..10..0
154       and their inverses.  */
155
156    /* Make things easier below, by testing the form with msb clear. */
157    if ((int64_t)val < 0) {
158        val = ~val;
159    }
160    if (val == 0) {
161        return false;
162    }
163    val += val & -val;
164    return (val & (val - 1)) == 0;
165}
166
167/* Return true if v16 is a valid 16-bit shifted immediate.  */
168static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
169{
170    if (v16 == (v16 & 0xff)) {
171        *cmode = 0x8;
172        *imm8 = v16 & 0xff;
173        return true;
174    } else if (v16 == (v16 & 0xff00)) {
175        *cmode = 0xa;
176        *imm8 = v16 >> 8;
177        return true;
178    }
179    return false;
180}
181
182/* Return true if v32 is a valid 32-bit shifted immediate.  */
183static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
184{
185    if (v32 == (v32 & 0xff)) {
186        *cmode = 0x0;
187        *imm8 = v32 & 0xff;
188        return true;
189    } else if (v32 == (v32 & 0xff00)) {
190        *cmode = 0x2;
191        *imm8 = (v32 >> 8) & 0xff;
192        return true;
193    } else if (v32 == (v32 & 0xff0000)) {
194        *cmode = 0x4;
195        *imm8 = (v32 >> 16) & 0xff;
196        return true;
197    } else if (v32 == (v32 & 0xff000000)) {
198        *cmode = 0x6;
199        *imm8 = v32 >> 24;
200        return true;
201    }
202    return false;
203}
204
205/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
206static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
207{
208    if ((v32 & 0xffff00ff) == 0xff) {
209        *cmode = 0xc;
210        *imm8 = (v32 >> 8) & 0xff;
211        return true;
212    } else if ((v32 & 0xff00ffff) == 0xffff) {
213        *cmode = 0xd;
214        *imm8 = (v32 >> 16) & 0xff;
215        return true;
216    }
217    return false;
218}
219
220/* Return true if v32 is a valid float32 immediate.  */
221static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
222{
223    if (extract32(v32, 0, 19) == 0
224        && (extract32(v32, 25, 6) == 0x20
225            || extract32(v32, 25, 6) == 0x1f)) {
226        *cmode = 0xf;
227        *imm8 = (extract32(v32, 31, 1) << 7)
228              | (extract32(v32, 25, 1) << 6)
229              | extract32(v32, 19, 6);
230        return true;
231    }
232    return false;
233}
234
235/* Return true if v64 is a valid float64 immediate.  */
236static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
237{
238    if (extract64(v64, 0, 48) == 0
239        && (extract64(v64, 54, 9) == 0x100
240            || extract64(v64, 54, 9) == 0x0ff)) {
241        *cmode = 0xf;
242        *imm8 = (extract64(v64, 63, 1) << 7)
243              | (extract64(v64, 54, 1) << 6)
244              | extract64(v64, 48, 6);
245        return true;
246    }
247    return false;
248}
249
250/*
251 * Return non-zero if v32 can be formed by MOVI+ORR.
252 * Place the parameters for MOVI in (cmode, imm8).
253 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
254 */
255static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
256{
257    int i;
258
259    for (i = 6; i > 0; i -= 2) {
260        /* Mask out one byte we can add with ORR.  */
261        uint32_t tmp = v32 & ~(0xffu << (i * 4));
262        if (is_shimm32(tmp, cmode, imm8) ||
263            is_soimm32(tmp, cmode, imm8)) {
264            break;
265        }
266    }
267    return i;
268}
269
270/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
271static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
272{
273    if (v32 == deposit32(v32, 16, 16, v32)) {
274        return is_shimm16(v32, cmode, imm8);
275    } else {
276        return is_shimm32(v32, cmode, imm8);
277    }
278}
279
280static int tcg_target_const_match(tcg_target_long val, TCGType type,
281                                  const TCGArgConstraint *arg_ct)
282{
283    int ct = arg_ct->ct;
284
285    if (ct & TCG_CT_CONST) {
286        return 1;
287    }
288    if (type == TCG_TYPE_I32) {
289        val = (int32_t)val;
290    }
291    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
292        return 1;
293    }
294    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
295        return 1;
296    }
297    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
298        return 1;
299    }
300    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
301        return 1;
302    }
303
304    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
305    case 0:
306        break;
307    case TCG_CT_CONST_ANDI:
308        val = ~val;
309        /* fallthru */
310    case TCG_CT_CONST_ORRI:
311        if (val == deposit64(val, 32, 32, val)) {
312            int cmode, imm8;
313            return is_shimm1632(val, &cmode, &imm8);
314        }
315        break;
316    default:
317        /* Both bits should not be set for the same insn.  */
318        g_assert_not_reached();
319    }
320
321    return 0;
322}
323
324enum aarch64_cond_code {
325    COND_EQ = 0x0,
326    COND_NE = 0x1,
327    COND_CS = 0x2,     /* Unsigned greater or equal */
328    COND_HS = COND_CS, /* ALIAS greater or equal */
329    COND_CC = 0x3,     /* Unsigned less than */
330    COND_LO = COND_CC, /* ALIAS Lower */
331    COND_MI = 0x4,     /* Negative */
332    COND_PL = 0x5,     /* Zero or greater */
333    COND_VS = 0x6,     /* Overflow */
334    COND_VC = 0x7,     /* No overflow */
335    COND_HI = 0x8,     /* Unsigned greater than */
336    COND_LS = 0x9,     /* Unsigned less or equal */
337    COND_GE = 0xa,
338    COND_LT = 0xb,
339    COND_GT = 0xc,
340    COND_LE = 0xd,
341    COND_AL = 0xe,
342    COND_NV = 0xf, /* behaves like COND_AL here */
343};
344
345static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
346    [TCG_COND_EQ] = COND_EQ,
347    [TCG_COND_NE] = COND_NE,
348    [TCG_COND_LT] = COND_LT,
349    [TCG_COND_GE] = COND_GE,
350    [TCG_COND_LE] = COND_LE,
351    [TCG_COND_GT] = COND_GT,
352    /* unsigned */
353    [TCG_COND_LTU] = COND_LO,
354    [TCG_COND_GTU] = COND_HI,
355    [TCG_COND_GEU] = COND_HS,
356    [TCG_COND_LEU] = COND_LS,
357};
358
359typedef enum {
360    LDST_ST = 0,    /* store */
361    LDST_LD = 1,    /* load */
362    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
363    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
364} AArch64LdstType;
365
366/* We encode the format of the insn into the beginning of the name, so that
367   we can have the preprocessor help "typecheck" the insn vs the output
368   function.  Arm didn't provide us with nice names for the formats, so we
369   use the section number of the architecture reference manual in which the
370   instruction group is described.  */
371typedef enum {
372    /* Compare and branch (immediate).  */
373    I3201_CBZ       = 0x34000000,
374    I3201_CBNZ      = 0x35000000,
375
376    /* Conditional branch (immediate).  */
377    I3202_B_C       = 0x54000000,
378
379    /* Unconditional branch (immediate).  */
380    I3206_B         = 0x14000000,
381    I3206_BL        = 0x94000000,
382
383    /* Unconditional branch (register).  */
384    I3207_BR        = 0xd61f0000,
385    I3207_BLR       = 0xd63f0000,
386    I3207_RET       = 0xd65f0000,
387
388    /* AdvSIMD load/store single structure.  */
389    I3303_LD1R      = 0x0d40c000,
390
391    /* Load literal for loading the address at pc-relative offset */
392    I3305_LDR       = 0x58000000,
393    I3305_LDR_v64   = 0x5c000000,
394    I3305_LDR_v128  = 0x9c000000,
395
396    /* Load/store register.  Described here as 3.3.12, but the helper
397       that emits them can transform to 3.3.10 or 3.3.13.  */
398    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
399    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
400    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
401    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
402
403    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
404    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
405    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
406    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
407
408    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
409    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
410
411    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
412    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
413    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
414
415    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
416    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
417
418    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
419    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
420
421    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
422    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
423
424    I3312_TO_I3310  = 0x00200800,
425    I3312_TO_I3313  = 0x01000000,
426
427    /* Load/store register pair instructions.  */
428    I3314_LDP       = 0x28400000,
429    I3314_STP       = 0x28000000,
430
431    /* Add/subtract immediate instructions.  */
432    I3401_ADDI      = 0x11000000,
433    I3401_ADDSI     = 0x31000000,
434    I3401_SUBI      = 0x51000000,
435    I3401_SUBSI     = 0x71000000,
436
437    /* Bitfield instructions.  */
438    I3402_BFM       = 0x33000000,
439    I3402_SBFM      = 0x13000000,
440    I3402_UBFM      = 0x53000000,
441
442    /* Extract instruction.  */
443    I3403_EXTR      = 0x13800000,
444
445    /* Logical immediate instructions.  */
446    I3404_ANDI      = 0x12000000,
447    I3404_ORRI      = 0x32000000,
448    I3404_EORI      = 0x52000000,
449
450    /* Move wide immediate instructions.  */
451    I3405_MOVN      = 0x12800000,
452    I3405_MOVZ      = 0x52800000,
453    I3405_MOVK      = 0x72800000,
454
455    /* PC relative addressing instructions.  */
456    I3406_ADR       = 0x10000000,
457    I3406_ADRP      = 0x90000000,
458
459    /* Add/subtract shifted register instructions (without a shift).  */
460    I3502_ADD       = 0x0b000000,
461    I3502_ADDS      = 0x2b000000,
462    I3502_SUB       = 0x4b000000,
463    I3502_SUBS      = 0x6b000000,
464
465    /* Add/subtract shifted register instructions (with a shift).  */
466    I3502S_ADD_LSL  = I3502_ADD,
467
468    /* Add/subtract with carry instructions.  */
469    I3503_ADC       = 0x1a000000,
470    I3503_SBC       = 0x5a000000,
471
472    /* Conditional select instructions.  */
473    I3506_CSEL      = 0x1a800000,
474    I3506_CSINC     = 0x1a800400,
475    I3506_CSINV     = 0x5a800000,
476    I3506_CSNEG     = 0x5a800400,
477
478    /* Data-processing (1 source) instructions.  */
479    I3507_CLZ       = 0x5ac01000,
480    I3507_RBIT      = 0x5ac00000,
481    I3507_REV16     = 0x5ac00400,
482    I3507_REV32     = 0x5ac00800,
483    I3507_REV64     = 0x5ac00c00,
484
485    /* Data-processing (2 source) instructions.  */
486    I3508_LSLV      = 0x1ac02000,
487    I3508_LSRV      = 0x1ac02400,
488    I3508_ASRV      = 0x1ac02800,
489    I3508_RORV      = 0x1ac02c00,
490    I3508_SMULH     = 0x9b407c00,
491    I3508_UMULH     = 0x9bc07c00,
492    I3508_UDIV      = 0x1ac00800,
493    I3508_SDIV      = 0x1ac00c00,
494
495    /* Data-processing (3 source) instructions.  */
496    I3509_MADD      = 0x1b000000,
497    I3509_MSUB      = 0x1b008000,
498
499    /* Logical shifted register instructions (without a shift).  */
500    I3510_AND       = 0x0a000000,
501    I3510_BIC       = 0x0a200000,
502    I3510_ORR       = 0x2a000000,
503    I3510_ORN       = 0x2a200000,
504    I3510_EOR       = 0x4a000000,
505    I3510_EON       = 0x4a200000,
506    I3510_ANDS      = 0x6a000000,
507
508    /* Logical shifted register instructions (with a shift).  */
509    I3502S_AND_LSR  = I3510_AND | (1 << 22),
510
511    /* AdvSIMD copy */
512    I3605_DUP      = 0x0e000400,
513    I3605_INS      = 0x4e001c00,
514    I3605_UMOV     = 0x0e003c00,
515
516    /* AdvSIMD modified immediate */
517    I3606_MOVI      = 0x0f000400,
518    I3606_MVNI      = 0x2f000400,
519    I3606_BIC       = 0x2f001400,
520    I3606_ORR       = 0x0f001400,
521
522    /* AdvSIMD scalar shift by immediate */
523    I3609_SSHR      = 0x5f000400,
524    I3609_SSRA      = 0x5f001400,
525    I3609_SHL       = 0x5f005400,
526    I3609_USHR      = 0x7f000400,
527    I3609_USRA      = 0x7f001400,
528    I3609_SLI       = 0x7f005400,
529
530    /* AdvSIMD scalar three same */
531    I3611_SQADD     = 0x5e200c00,
532    I3611_SQSUB     = 0x5e202c00,
533    I3611_CMGT      = 0x5e203400,
534    I3611_CMGE      = 0x5e203c00,
535    I3611_SSHL      = 0x5e204400,
536    I3611_ADD       = 0x5e208400,
537    I3611_CMTST     = 0x5e208c00,
538    I3611_UQADD     = 0x7e200c00,
539    I3611_UQSUB     = 0x7e202c00,
540    I3611_CMHI      = 0x7e203400,
541    I3611_CMHS      = 0x7e203c00,
542    I3611_USHL      = 0x7e204400,
543    I3611_SUB       = 0x7e208400,
544    I3611_CMEQ      = 0x7e208c00,
545
546    /* AdvSIMD scalar two-reg misc */
547    I3612_CMGT0     = 0x5e208800,
548    I3612_CMEQ0     = 0x5e209800,
549    I3612_CMLT0     = 0x5e20a800,
550    I3612_ABS       = 0x5e20b800,
551    I3612_CMGE0     = 0x7e208800,
552    I3612_CMLE0     = 0x7e209800,
553    I3612_NEG       = 0x7e20b800,
554
555    /* AdvSIMD shift by immediate */
556    I3614_SSHR      = 0x0f000400,
557    I3614_SSRA      = 0x0f001400,
558    I3614_SHL       = 0x0f005400,
559    I3614_SLI       = 0x2f005400,
560    I3614_USHR      = 0x2f000400,
561    I3614_USRA      = 0x2f001400,
562
563    /* AdvSIMD three same.  */
564    I3616_ADD       = 0x0e208400,
565    I3616_AND       = 0x0e201c00,
566    I3616_BIC       = 0x0e601c00,
567    I3616_BIF       = 0x2ee01c00,
568    I3616_BIT       = 0x2ea01c00,
569    I3616_BSL       = 0x2e601c00,
570    I3616_EOR       = 0x2e201c00,
571    I3616_MUL       = 0x0e209c00,
572    I3616_ORR       = 0x0ea01c00,
573    I3616_ORN       = 0x0ee01c00,
574    I3616_SUB       = 0x2e208400,
575    I3616_CMGT      = 0x0e203400,
576    I3616_CMGE      = 0x0e203c00,
577    I3616_CMTST     = 0x0e208c00,
578    I3616_CMHI      = 0x2e203400,
579    I3616_CMHS      = 0x2e203c00,
580    I3616_CMEQ      = 0x2e208c00,
581    I3616_SMAX      = 0x0e206400,
582    I3616_SMIN      = 0x0e206c00,
583    I3616_SSHL      = 0x0e204400,
584    I3616_SQADD     = 0x0e200c00,
585    I3616_SQSUB     = 0x0e202c00,
586    I3616_UMAX      = 0x2e206400,
587    I3616_UMIN      = 0x2e206c00,
588    I3616_UQADD     = 0x2e200c00,
589    I3616_UQSUB     = 0x2e202c00,
590    I3616_USHL      = 0x2e204400,
591
592    /* AdvSIMD two-reg misc.  */
593    I3617_CMGT0     = 0x0e208800,
594    I3617_CMEQ0     = 0x0e209800,
595    I3617_CMLT0     = 0x0e20a800,
596    I3617_CMGE0     = 0x2e208800,
597    I3617_CMLE0     = 0x2e209800,
598    I3617_NOT       = 0x2e205800,
599    I3617_ABS       = 0x0e20b800,
600    I3617_NEG       = 0x2e20b800,
601
602    /* System instructions.  */
603    NOP             = 0xd503201f,
604    DMB_ISH         = 0xd50338bf,
605    DMB_LD          = 0x00000100,
606    DMB_ST          = 0x00000200,
607} AArch64Insn;
608
609static inline uint32_t tcg_in32(TCGContext *s)
610{
611    uint32_t v = *(uint32_t *)s->code_ptr;
612    return v;
613}
614
615/* Emit an opcode with "type-checking" of the format.  */
616#define tcg_out_insn(S, FMT, OP, ...) \
617    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
618
619static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
620                              TCGReg rt, TCGReg rn, unsigned size)
621{
622    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
623}
624
625static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
626                              int imm19, TCGReg rt)
627{
628    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
629}
630
631static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
632                              TCGReg rt, int imm19)
633{
634    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
635}
636
637static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
638                              TCGCond c, int imm19)
639{
640    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
641}
642
643static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
644{
645    tcg_out32(s, insn | (imm26 & 0x03ffffff));
646}
647
648static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
649{
650    tcg_out32(s, insn | rn << 5);
651}
652
653static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
654                              TCGReg r1, TCGReg r2, TCGReg rn,
655                              tcg_target_long ofs, bool pre, bool w)
656{
657    insn |= 1u << 31; /* ext */
658    insn |= pre << 24;
659    insn |= w << 23;
660
661    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
662    insn |= (ofs & (0x7f << 3)) << (15 - 3);
663
664    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
665}
666
667static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
668                              TCGReg rd, TCGReg rn, uint64_t aimm)
669{
670    if (aimm > 0xfff) {
671        tcg_debug_assert((aimm & 0xfff) == 0);
672        aimm >>= 12;
673        tcg_debug_assert(aimm <= 0xfff);
674        aimm |= 1 << 12;  /* apply LSL 12 */
675    }
676    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
677}
678
679/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
680   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
681   that feed the DecodeBitMasks pseudo function.  */
682static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
683                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
684{
685    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
686              | rn << 5 | rd);
687}
688
689#define tcg_out_insn_3404  tcg_out_insn_3402
690
691static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
692                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
693{
694    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
695              | rn << 5 | rd);
696}
697
698/* This function is used for the Move (wide immediate) instruction group.
699   Note that SHIFT is a full shift count, not the 2 bit HW field. */
700static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
701                              TCGReg rd, uint16_t half, unsigned shift)
702{
703    tcg_debug_assert((shift & ~0x30) == 0);
704    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
705}
706
707static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
708                              TCGReg rd, int64_t disp)
709{
710    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
711}
712
713/* This function is for both 3.5.2 (Add/Subtract shifted register), for
714   the rare occasion when we actually want to supply a shift amount.  */
715static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
716                                      TCGType ext, TCGReg rd, TCGReg rn,
717                                      TCGReg rm, int imm6)
718{
719    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
720}
721
722/* This function is for 3.5.2 (Add/subtract shifted register),
723   and 3.5.10 (Logical shifted register), for the vast majorty of cases
724   when we don't want to apply a shift.  Thus it can also be used for
725   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
726static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
727                              TCGReg rd, TCGReg rn, TCGReg rm)
728{
729    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
730}
731
732#define tcg_out_insn_3503  tcg_out_insn_3502
733#define tcg_out_insn_3508  tcg_out_insn_3502
734#define tcg_out_insn_3510  tcg_out_insn_3502
735
736static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
737                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
738{
739    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
740              | tcg_cond_to_aarch64[c] << 12);
741}
742
743static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
744                              TCGReg rd, TCGReg rn)
745{
746    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
747}
748
749static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
750                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
751{
752    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
753}
754
755static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
756                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
757{
758    /* Note that bit 11 set means general register input.  Therefore
759       we can handle both register sets with one function.  */
760    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
761              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
762}
763
764static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
765                              TCGReg rd, bool op, int cmode, uint8_t imm8)
766{
767    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
768              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
769}
770
771static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
772                              TCGReg rd, TCGReg rn, unsigned immhb)
773{
774    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
775}
776
777static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
778                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
779{
780    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
781              | (rn & 0x1f) << 5 | (rd & 0x1f));
782}
783
784static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
785                              unsigned size, TCGReg rd, TCGReg rn)
786{
787    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
788}
789
790static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
791                              TCGReg rd, TCGReg rn, unsigned immhb)
792{
793    tcg_out32(s, insn | q << 30 | immhb << 16
794              | (rn & 0x1f) << 5 | (rd & 0x1f));
795}
796
797static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
798                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
799{
800    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
801              | (rn & 0x1f) << 5 | (rd & 0x1f));
802}
803
804static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
805                              unsigned size, TCGReg rd, TCGReg rn)
806{
807    tcg_out32(s, insn | q << 30 | (size << 22)
808              | (rn & 0x1f) << 5 | (rd & 0x1f));
809}
810
811static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
812                              TCGReg rd, TCGReg base, TCGType ext,
813                              TCGReg regoff)
814{
815    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
816    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
817              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
818}
819
820static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
821                              TCGReg rd, TCGReg rn, intptr_t offset)
822{
823    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
824}
825
826static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
827                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
828{
829    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
830    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
831              | rn << 5 | (rd & 0x1f));
832}
833
834/* Register to register move using ORR (shifted register with no shift). */
835static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
836{
837    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
838}
839
840/* Register to register move using ADDI (move to/from SP).  */
841static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
842{
843    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
844}
845
846/* This function is used for the Logical (immediate) instruction group.
847   The value of LIMM must satisfy IS_LIMM.  See the comment above about
848   only supporting simplified logical immediates.  */
849static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
850                             TCGReg rd, TCGReg rn, uint64_t limm)
851{
852    unsigned h, l, r, c;
853
854    tcg_debug_assert(is_limm(limm));
855
856    h = clz64(limm);
857    l = ctz64(limm);
858    if (l == 0) {
859        r = 0;                  /* form 0....01....1 */
860        c = ctz64(~limm) - 1;
861        if (h == 0) {
862            r = clz64(~limm);   /* form 1..10..01..1 */
863            c += r;
864        }
865    } else {
866        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
867        c = r - h - 1;
868    }
869    if (ext == TCG_TYPE_I32) {
870        r &= 31;
871        c &= 31;
872    }
873
874    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
875}
876
877static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
878                             TCGReg rd, int64_t v64)
879{
880    bool q = type == TCG_TYPE_V128;
881    int cmode, imm8, i;
882
883    /* Test all bytes equal first.  */
884    if (vece == MO_8) {
885        imm8 = (uint8_t)v64;
886        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
887        return;
888    }
889
890    /*
891     * Test all bytes 0x00 or 0xff second.  This can match cases that
892     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
893     */
894    for (i = imm8 = 0; i < 8; i++) {
895        uint8_t byte = v64 >> (i * 8);
896        if (byte == 0xff) {
897            imm8 |= 1 << i;
898        } else if (byte != 0) {
899            goto fail_bytes;
900        }
901    }
902    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
903    return;
904 fail_bytes:
905
906    /*
907     * Tests for various replications.  For each element width, if we
908     * cannot find an expansion there's no point checking a larger
909     * width because we already know by replication it cannot match.
910     */
911    if (vece == MO_16) {
912        uint16_t v16 = v64;
913
914        if (is_shimm16(v16, &cmode, &imm8)) {
915            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
916            return;
917        }
918        if (is_shimm16(~v16, &cmode, &imm8)) {
919            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
920            return;
921        }
922
923        /*
924         * Otherwise, all remaining constants can be loaded in two insns:
925         * rd = v16 & 0xff, rd |= v16 & 0xff00.
926         */
927        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
928        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
929        return;
930    } else if (vece == MO_32) {
931        uint32_t v32 = v64;
932        uint32_t n32 = ~v32;
933
934        if (is_shimm32(v32, &cmode, &imm8) ||
935            is_soimm32(v32, &cmode, &imm8) ||
936            is_fimm32(v32, &cmode, &imm8)) {
937            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
938            return;
939        }
940        if (is_shimm32(n32, &cmode, &imm8) ||
941            is_soimm32(n32, &cmode, &imm8)) {
942            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
943            return;
944        }
945
946        /*
947         * Restrict the set of constants to those we can load with
948         * two instructions.  Others we load from the pool.
949         */
950        i = is_shimm32_pair(v32, &cmode, &imm8);
951        if (i) {
952            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
953            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
954            return;
955        }
956        i = is_shimm32_pair(n32, &cmode, &imm8);
957        if (i) {
958            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
959            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
960            return;
961        }
962    } else if (is_fimm64(v64, &cmode, &imm8)) {
963        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
964        return;
965    }
966
967    /*
968     * As a last resort, load from the constant pool.  Sadly there
969     * is no LD1R (literal), so store the full 16-byte vector.
970     */
971    if (type == TCG_TYPE_V128) {
972        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
973        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
974    } else {
975        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
976        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
977    }
978}
979
980static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
981                            TCGReg rd, TCGReg rs)
982{
983    int is_q = type - TCG_TYPE_V64;
984    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
985    return true;
986}
987
988static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
989                             TCGReg r, TCGReg base, intptr_t offset)
990{
991    TCGReg temp = TCG_REG_TMP;
992
993    if (offset < -0xffffff || offset > 0xffffff) {
994        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
995        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
996        base = temp;
997    } else {
998        AArch64Insn add_insn = I3401_ADDI;
999
1000        if (offset < 0) {
1001            add_insn = I3401_SUBI;
1002            offset = -offset;
1003        }
1004        if (offset & 0xfff000) {
1005            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1006            base = temp;
1007        }
1008        if (offset & 0xfff) {
1009            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1010            base = temp;
1011        }
1012    }
1013    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1014    return true;
1015}
1016
1017static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1018                         tcg_target_long value)
1019{
1020    tcg_target_long svalue = value;
1021    tcg_target_long ivalue = ~value;
1022    tcg_target_long t0, t1, t2;
1023    int s0, s1;
1024    AArch64Insn opc;
1025
1026    switch (type) {
1027    case TCG_TYPE_I32:
1028    case TCG_TYPE_I64:
1029        tcg_debug_assert(rd < 32);
1030        break;
1031    default:
1032        g_assert_not_reached();
1033    }
1034
1035    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1036       values within [2**31, 2**32-1], we can create smaller sequences by
1037       interpreting this as a negative 32-bit number, while ensuring that
1038       the high 32 bits are cleared by setting SF=0.  */
1039    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1040        svalue = (int32_t)value;
1041        value = (uint32_t)value;
1042        ivalue = (uint32_t)ivalue;
1043        type = TCG_TYPE_I32;
1044    }
1045
1046    /* Speed things up by handling the common case of small positive
1047       and negative values specially.  */
1048    if ((value & ~0xffffull) == 0) {
1049        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1050        return;
1051    } else if ((ivalue & ~0xffffull) == 0) {
1052        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1053        return;
1054    }
1055
1056    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1057       use the sign-extended value.  That lets us match rotated values such
1058       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1059    if (is_limm(svalue)) {
1060        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1061        return;
1062    }
1063
1064    /* Look for host pointer values within 4G of the PC.  This happens
1065       often when loading pointers to QEMU's own data structures.  */
1066    if (type == TCG_TYPE_I64) {
1067        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1068        tcg_target_long disp = value - src_rx;
1069        if (disp == sextract64(disp, 0, 21)) {
1070            tcg_out_insn(s, 3406, ADR, rd, disp);
1071            return;
1072        }
1073        disp = (value >> 12) - (src_rx >> 12);
1074        if (disp == sextract64(disp, 0, 21)) {
1075            tcg_out_insn(s, 3406, ADRP, rd, disp);
1076            if (value & 0xfff) {
1077                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1078            }
1079            return;
1080        }
1081    }
1082
1083    /* Would it take fewer insns to begin with MOVN?  */
1084    if (ctpop64(value) >= 32) {
1085        t0 = ivalue;
1086        opc = I3405_MOVN;
1087    } else {
1088        t0 = value;
1089        opc = I3405_MOVZ;
1090    }
1091    s0 = ctz64(t0) & (63 & -16);
1092    t1 = t0 & ~(0xffffUL << s0);
1093    s1 = ctz64(t1) & (63 & -16);
1094    t2 = t1 & ~(0xffffUL << s1);
1095    if (t2 == 0) {
1096        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1097        if (t1 != 0) {
1098            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1099        }
1100        return;
1101    }
1102
1103    /* For more than 2 insns, dump it into the constant pool.  */
1104    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1105    tcg_out_insn(s, 3305, LDR, 0, rd);
1106}
1107
1108/* Define something more legible for general use.  */
1109#define tcg_out_ldst_r  tcg_out_insn_3310
1110
1111static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1112                         TCGReg rn, intptr_t offset, int lgsize)
1113{
1114    /* If the offset is naturally aligned and in range, then we can
1115       use the scaled uimm12 encoding */
1116    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1117        uintptr_t scaled_uimm = offset >> lgsize;
1118        if (scaled_uimm <= 0xfff) {
1119            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1120            return;
1121        }
1122    }
1123
1124    /* Small signed offsets can use the unscaled encoding.  */
1125    if (offset >= -256 && offset < 256) {
1126        tcg_out_insn_3312(s, insn, rd, rn, offset);
1127        return;
1128    }
1129
1130    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1131    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
1132    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
1133}
1134
1135static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1136{
1137    if (ret == arg) {
1138        return true;
1139    }
1140    switch (type) {
1141    case TCG_TYPE_I32:
1142    case TCG_TYPE_I64:
1143        if (ret < 32 && arg < 32) {
1144            tcg_out_movr(s, type, ret, arg);
1145            break;
1146        } else if (ret < 32) {
1147            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1148            break;
1149        } else if (arg < 32) {
1150            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1151            break;
1152        }
1153        /* FALLTHRU */
1154
1155    case TCG_TYPE_V64:
1156        tcg_debug_assert(ret >= 32 && arg >= 32);
1157        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1158        break;
1159    case TCG_TYPE_V128:
1160        tcg_debug_assert(ret >= 32 && arg >= 32);
1161        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1162        break;
1163
1164    default:
1165        g_assert_not_reached();
1166    }
1167    return true;
1168}
1169
1170static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1171                       TCGReg base, intptr_t ofs)
1172{
1173    AArch64Insn insn;
1174    int lgsz;
1175
1176    switch (type) {
1177    case TCG_TYPE_I32:
1178        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1179        lgsz = 2;
1180        break;
1181    case TCG_TYPE_I64:
1182        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1183        lgsz = 3;
1184        break;
1185    case TCG_TYPE_V64:
1186        insn = I3312_LDRVD;
1187        lgsz = 3;
1188        break;
1189    case TCG_TYPE_V128:
1190        insn = I3312_LDRVQ;
1191        lgsz = 4;
1192        break;
1193    default:
1194        g_assert_not_reached();
1195    }
1196    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1197}
1198
1199static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1200                       TCGReg base, intptr_t ofs)
1201{
1202    AArch64Insn insn;
1203    int lgsz;
1204
1205    switch (type) {
1206    case TCG_TYPE_I32:
1207        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1208        lgsz = 2;
1209        break;
1210    case TCG_TYPE_I64:
1211        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1212        lgsz = 3;
1213        break;
1214    case TCG_TYPE_V64:
1215        insn = I3312_STRVD;
1216        lgsz = 3;
1217        break;
1218    case TCG_TYPE_V128:
1219        insn = I3312_STRVQ;
1220        lgsz = 4;
1221        break;
1222    default:
1223        g_assert_not_reached();
1224    }
1225    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1226}
1227
1228static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1229                               TCGReg base, intptr_t ofs)
1230{
1231    if (type <= TCG_TYPE_I64 && val == 0) {
1232        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1233        return true;
1234    }
1235    return false;
1236}
1237
1238static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1239                               TCGReg rn, unsigned int a, unsigned int b)
1240{
1241    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1242}
1243
1244static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1245                                TCGReg rn, unsigned int a, unsigned int b)
1246{
1247    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1248}
1249
1250static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1251                                TCGReg rn, unsigned int a, unsigned int b)
1252{
1253    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1254}
1255
1256static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1257                                TCGReg rn, TCGReg rm, unsigned int a)
1258{
1259    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1260}
1261
1262static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1263                               TCGReg rd, TCGReg rn, unsigned int m)
1264{
1265    int bits = ext ? 64 : 32;
1266    int max = bits - 1;
1267    tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
1268}
1269
1270static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1271                               TCGReg rd, TCGReg rn, unsigned int m)
1272{
1273    int max = ext ? 63 : 31;
1274    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1275}
1276
1277static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1278                               TCGReg rd, TCGReg rn, unsigned int m)
1279{
1280    int max = ext ? 63 : 31;
1281    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1282}
1283
1284static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1285                                TCGReg rd, TCGReg rn, unsigned int m)
1286{
1287    int max = ext ? 63 : 31;
1288    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1289}
1290
1291static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1292                                TCGReg rd, TCGReg rn, unsigned int m)
1293{
1294    int bits = ext ? 64 : 32;
1295    int max = bits - 1;
1296    tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
1297}
1298
1299static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1300                               TCGReg rn, unsigned lsb, unsigned width)
1301{
1302    unsigned size = ext ? 64 : 32;
1303    unsigned a = (size - lsb) & (size - 1);
1304    unsigned b = width - 1;
1305    tcg_out_bfm(s, ext, rd, rn, a, b);
1306}
1307
1308static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1309                        tcg_target_long b, bool const_b)
1310{
1311    if (const_b) {
1312        /* Using CMP or CMN aliases.  */
1313        if (b >= 0) {
1314            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1315        } else {
1316            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1317        }
1318    } else {
1319        /* Using CMP alias SUBS wzr, Wn, Wm */
1320        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1321    }
1322}
1323
1324static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1325{
1326    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1327    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1328    tcg_out_insn(s, 3206, B, offset);
1329}
1330
1331static void tcg_out_goto_long(TCGContext *s, const tcg_insn_unit *target)
1332{
1333    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1334    if (offset == sextract64(offset, 0, 26)) {
1335        tcg_out_insn(s, 3206, B, offset);
1336    } else {
1337        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1338        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1339    }
1340}
1341
1342static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
1343{
1344    tcg_out_insn(s, 3207, BLR, reg);
1345}
1346
1347static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target)
1348{
1349    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1350    if (offset == sextract64(offset, 0, 26)) {
1351        tcg_out_insn(s, 3206, BL, offset);
1352    } else {
1353        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
1354        tcg_out_callr(s, TCG_REG_TMP);
1355    }
1356}
1357
1358void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
1359                              uintptr_t jmp_rw, uintptr_t addr)
1360{
1361    tcg_insn_unit i1, i2;
1362    TCGType rt = TCG_TYPE_I64;
1363    TCGReg  rd = TCG_REG_TMP;
1364    uint64_t pair;
1365
1366    ptrdiff_t offset = addr - jmp_rx;
1367
1368    if (offset == sextract64(offset, 0, 26)) {
1369        i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
1370        i2 = NOP;
1371    } else {
1372        offset = (addr >> 12) - (jmp_rx >> 12);
1373
1374        /* patch ADRP */
1375        i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
1376        /* patch ADDI */
1377        i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
1378    }
1379    pair = (uint64_t)i2 << 32 | i1;
1380    qatomic_set((uint64_t *)jmp_rw, pair);
1381    flush_idcache_range(jmp_rx, jmp_rw, 8);
1382}
1383
1384static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1385{
1386    if (!l->has_value) {
1387        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1388        tcg_out_insn(s, 3206, B, 0);
1389    } else {
1390        tcg_out_goto(s, l->u.value_ptr);
1391    }
1392}
1393
1394static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1395                           TCGArg b, bool b_const, TCGLabel *l)
1396{
1397    intptr_t offset;
1398    bool need_cmp;
1399
1400    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1401        need_cmp = false;
1402    } else {
1403        need_cmp = true;
1404        tcg_out_cmp(s, ext, a, b, b_const);
1405    }
1406
1407    if (!l->has_value) {
1408        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1409        offset = tcg_in32(s) >> 5;
1410    } else {
1411        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1412        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1413    }
1414
1415    if (need_cmp) {
1416        tcg_out_insn(s, 3202, B_C, c, offset);
1417    } else if (c == TCG_COND_EQ) {
1418        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1419    } else {
1420        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1421    }
1422}
1423
1424static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
1425{
1426    tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
1427}
1428
1429static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
1430{
1431    tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
1432}
1433
1434static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
1435{
1436    tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
1437}
1438
1439static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1440                               TCGReg rd, TCGReg rn)
1441{
1442    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1443    int bits = (8 << s_bits) - 1;
1444    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1445}
1446
1447static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1448                               TCGReg rd, TCGReg rn)
1449{
1450    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1451    int bits = (8 << s_bits) - 1;
1452    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1453}
1454
1455static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1456                            TCGReg rn, int64_t aimm)
1457{
1458    if (aimm >= 0) {
1459        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1460    } else {
1461        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1462    }
1463}
1464
1465static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1466                            TCGReg rh, TCGReg al, TCGReg ah,
1467                            tcg_target_long bl, tcg_target_long bh,
1468                            bool const_bl, bool const_bh, bool sub)
1469{
1470    TCGReg orig_rl = rl;
1471    AArch64Insn insn;
1472
1473    if (rl == ah || (!const_bh && rl == bh)) {
1474        rl = TCG_REG_TMP;
1475    }
1476
1477    if (const_bl) {
1478        if (bl < 0) {
1479            bl = -bl;
1480            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1481        } else {
1482            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1483        }
1484
1485        if (unlikely(al == TCG_REG_XZR)) {
1486            /* ??? We want to allow al to be zero for the benefit of
1487               negation via subtraction.  However, that leaves open the
1488               possibility of adding 0+const in the low part, and the
1489               immediate add instructions encode XSP not XZR.  Don't try
1490               anything more elaborate here than loading another zero.  */
1491            al = TCG_REG_TMP;
1492            tcg_out_movi(s, ext, al, 0);
1493        }
1494        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1495    } else {
1496        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1497    }
1498
1499    insn = I3503_ADC;
1500    if (const_bh) {
1501        /* Note that the only two constants we support are 0 and -1, and
1502           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1503        if ((bh != 0) ^ sub) {
1504            insn = I3503_SBC;
1505        }
1506        bh = TCG_REG_XZR;
1507    } else if (sub) {
1508        insn = I3503_SBC;
1509    }
1510    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1511
1512    tcg_out_mov(s, ext, orig_rl, rl);
1513}
1514
1515static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1516{
1517    static const uint32_t sync[] = {
1518        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1519        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1520        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1521        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1522        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1523    };
1524    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1525}
1526
1527static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1528                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1529{
1530    TCGReg a1 = a0;
1531    if (is_ctz) {
1532        a1 = TCG_REG_TMP;
1533        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1534    }
1535    if (const_b && b == (ext ? 64 : 32)) {
1536        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1537    } else {
1538        AArch64Insn sel = I3506_CSEL;
1539
1540        tcg_out_cmp(s, ext, a0, 0, 1);
1541        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1542
1543        if (const_b) {
1544            if (b == -1) {
1545                b = TCG_REG_XZR;
1546                sel = I3506_CSINV;
1547            } else if (b == 0) {
1548                b = TCG_REG_XZR;
1549            } else {
1550                tcg_out_movi(s, ext, d, b);
1551                b = d;
1552            }
1553        }
1554        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1555    }
1556}
1557
1558#ifdef CONFIG_SOFTMMU
1559#include "../tcg-ldst.c.inc"
1560
1561/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1562 *                                     TCGMemOpIdx oi, uintptr_t ra)
1563 */
1564static void * const qemu_ld_helpers[16] = {
1565    [MO_UB]   = helper_ret_ldub_mmu,
1566    [MO_LEUW] = helper_le_lduw_mmu,
1567    [MO_LEUL] = helper_le_ldul_mmu,
1568    [MO_LEQ]  = helper_le_ldq_mmu,
1569    [MO_BEUW] = helper_be_lduw_mmu,
1570    [MO_BEUL] = helper_be_ldul_mmu,
1571    [MO_BEQ]  = helper_be_ldq_mmu,
1572};
1573
1574/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1575 *                                     uintxx_t val, TCGMemOpIdx oi,
1576 *                                     uintptr_t ra)
1577 */
1578static void * const qemu_st_helpers[16] = {
1579    [MO_UB]   = helper_ret_stb_mmu,
1580    [MO_LEUW] = helper_le_stw_mmu,
1581    [MO_LEUL] = helper_le_stl_mmu,
1582    [MO_LEQ]  = helper_le_stq_mmu,
1583    [MO_BEUW] = helper_be_stw_mmu,
1584    [MO_BEUL] = helper_be_stl_mmu,
1585    [MO_BEQ]  = helper_be_stq_mmu,
1586};
1587
1588static inline void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
1589{
1590    ptrdiff_t offset = tcg_pcrel_diff(s, target);
1591    tcg_debug_assert(offset == sextract64(offset, 0, 21));
1592    tcg_out_insn(s, 3406, ADR, rd, offset);
1593}
1594
1595static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1596{
1597    TCGMemOpIdx oi = lb->oi;
1598    MemOp opc = get_memop(oi);
1599    MemOp size = opc & MO_SIZE;
1600
1601    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1602        return false;
1603    }
1604
1605    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1606    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1607    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1608    tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1609    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1610    if (opc & MO_SIGN) {
1611        tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1612    } else {
1613        tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1614    }
1615
1616    tcg_out_goto(s, lb->raddr);
1617    return true;
1618}
1619
1620static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1621{
1622    TCGMemOpIdx oi = lb->oi;
1623    MemOp opc = get_memop(oi);
1624    MemOp size = opc & MO_SIZE;
1625
1626    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1627        return false;
1628    }
1629
1630    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1631    tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1632    tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1633    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1634    tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1635    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1636    tcg_out_goto(s, lb->raddr);
1637    return true;
1638}
1639
1640static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1641                                TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1642                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1643{
1644    TCGLabelQemuLdst *label = new_ldst_label(s);
1645
1646    label->is_ld = is_ld;
1647    label->oi = oi;
1648    label->type = ext;
1649    label->datalo_reg = data_reg;
1650    label->addrlo_reg = addr_reg;
1651    label->raddr = tcg_splitwx_to_rx(raddr);
1652    label->label_ptr[0] = label_ptr;
1653}
1654
1655/* We expect to use a 7-bit scaled negative offset from ENV.  */
1656QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1657QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
1658
1659/* These offsets are built into the LDP below.  */
1660QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1661QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1662
1663/* Load and compare a TLB entry, emitting the conditional jump to the
1664   slow path for the failure case, which will be patched later when finalizing
1665   the slow path. Generated code returns the host addend in X1,
1666   clobbers X0,X2,X3,TMP. */
1667static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1668                             tcg_insn_unit **label_ptr, int mem_index,
1669                             bool is_read)
1670{
1671    unsigned a_bits = get_alignment_bits(opc);
1672    unsigned s_bits = opc & MO_SIZE;
1673    unsigned a_mask = (1u << a_bits) - 1;
1674    unsigned s_mask = (1u << s_bits) - 1;
1675    TCGReg x3;
1676    TCGType mask_type;
1677    uint64_t compare_mask;
1678
1679    mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
1680                 ? TCG_TYPE_I64 : TCG_TYPE_I32);
1681
1682    /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}.  */
1683    tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
1684                 TLB_MASK_TABLE_OFS(mem_index), 1, 0);
1685
1686    /* Extract the TLB index from the address into X0.  */
1687    tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1688                 TCG_REG_X0, TCG_REG_X0, addr_reg,
1689                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1690
1691    /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1.  */
1692    tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
1693
1694    /* Load the tlb comparator into X0, and the fast path addend into X1.  */
1695    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
1696               ? offsetof(CPUTLBEntry, addr_read)
1697               : offsetof(CPUTLBEntry, addr_write));
1698    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
1699               offsetof(CPUTLBEntry, addend));
1700
1701    /* For aligned accesses, we check the first byte and include the alignment
1702       bits within the address.  For unaligned access, we check that we don't
1703       cross pages using the address of the last byte of the access.  */
1704    if (a_bits >= s_bits) {
1705        x3 = addr_reg;
1706    } else {
1707        tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1708                     TCG_REG_X3, addr_reg, s_mask - a_mask);
1709        x3 = TCG_REG_X3;
1710    }
1711    compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1712
1713    /* Store the page mask part of the address into X3.  */
1714    tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1715                     TCG_REG_X3, x3, compare_mask);
1716
1717    /* Perform the address comparison. */
1718    tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
1719
1720    /* If not equal, we jump to the slow path. */
1721    *label_ptr = s->code_ptr;
1722    tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1723}
1724
1725#endif /* CONFIG_SOFTMMU */
1726
1727static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1728                                   TCGReg data_r, TCGReg addr_r,
1729                                   TCGType otype, TCGReg off_r)
1730{
1731    const MemOp bswap = memop & MO_BSWAP;
1732
1733    switch (memop & MO_SSIZE) {
1734    case MO_UB:
1735        tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1736        break;
1737    case MO_SB:
1738        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1739                       data_r, addr_r, otype, off_r);
1740        break;
1741    case MO_UW:
1742        tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1743        if (bswap) {
1744            tcg_out_rev16(s, data_r, data_r);
1745        }
1746        break;
1747    case MO_SW:
1748        if (bswap) {
1749            tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1750            tcg_out_rev16(s, data_r, data_r);
1751            tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1752        } else {
1753            tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1754                           data_r, addr_r, otype, off_r);
1755        }
1756        break;
1757    case MO_UL:
1758        tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1759        if (bswap) {
1760            tcg_out_rev32(s, data_r, data_r);
1761        }
1762        break;
1763    case MO_SL:
1764        if (bswap) {
1765            tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1766            tcg_out_rev32(s, data_r, data_r);
1767            tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1768        } else {
1769            tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1770        }
1771        break;
1772    case MO_Q:
1773        tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1774        if (bswap) {
1775            tcg_out_rev64(s, data_r, data_r);
1776        }
1777        break;
1778    default:
1779        tcg_abort();
1780    }
1781}
1782
1783static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1784                                   TCGReg data_r, TCGReg addr_r,
1785                                   TCGType otype, TCGReg off_r)
1786{
1787    const MemOp bswap = memop & MO_BSWAP;
1788
1789    switch (memop & MO_SIZE) {
1790    case MO_8:
1791        tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1792        break;
1793    case MO_16:
1794        if (bswap && data_r != TCG_REG_XZR) {
1795            tcg_out_rev16(s, TCG_REG_TMP, data_r);
1796            data_r = TCG_REG_TMP;
1797        }
1798        tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1799        break;
1800    case MO_32:
1801        if (bswap && data_r != TCG_REG_XZR) {
1802            tcg_out_rev32(s, TCG_REG_TMP, data_r);
1803            data_r = TCG_REG_TMP;
1804        }
1805        tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1806        break;
1807    case MO_64:
1808        if (bswap && data_r != TCG_REG_XZR) {
1809            tcg_out_rev64(s, TCG_REG_TMP, data_r);
1810            data_r = TCG_REG_TMP;
1811        }
1812        tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1813        break;
1814    default:
1815        tcg_abort();
1816    }
1817}
1818
1819static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1820                            TCGMemOpIdx oi, TCGType ext)
1821{
1822    MemOp memop = get_memop(oi);
1823    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1824#ifdef CONFIG_SOFTMMU
1825    unsigned mem_index = get_mmuidx(oi);
1826    tcg_insn_unit *label_ptr;
1827
1828    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1829    tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1830                           TCG_REG_X1, otype, addr_reg);
1831    add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1832                        s->code_ptr, label_ptr);
1833#else /* !CONFIG_SOFTMMU */
1834    if (USE_GUEST_BASE) {
1835        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1836                               TCG_REG_GUEST_BASE, otype, addr_reg);
1837    } else {
1838        tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1839                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1840    }
1841#endif /* CONFIG_SOFTMMU */
1842}
1843
1844static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1845                            TCGMemOpIdx oi)
1846{
1847    MemOp memop = get_memop(oi);
1848    const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1849#ifdef CONFIG_SOFTMMU
1850    unsigned mem_index = get_mmuidx(oi);
1851    tcg_insn_unit *label_ptr;
1852
1853    tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1854    tcg_out_qemu_st_direct(s, memop, data_reg,
1855                           TCG_REG_X1, otype, addr_reg);
1856    add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1857                        data_reg, addr_reg, s->code_ptr, label_ptr);
1858#else /* !CONFIG_SOFTMMU */
1859    if (USE_GUEST_BASE) {
1860        tcg_out_qemu_st_direct(s, memop, data_reg,
1861                               TCG_REG_GUEST_BASE, otype, addr_reg);
1862    } else {
1863        tcg_out_qemu_st_direct(s, memop, data_reg,
1864                               addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1865    }
1866#endif /* CONFIG_SOFTMMU */
1867}
1868
1869static const tcg_insn_unit *tb_ret_addr;
1870
1871static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1872                       const TCGArg args[TCG_MAX_OP_ARGS],
1873                       const int const_args[TCG_MAX_OP_ARGS])
1874{
1875    /* 99% of the time, we can signal the use of extension registers
1876       by looking to see if the opcode handles 64-bit data.  */
1877    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1878
1879    /* Hoist the loads of the most common arguments.  */
1880    TCGArg a0 = args[0];
1881    TCGArg a1 = args[1];
1882    TCGArg a2 = args[2];
1883    int c2 = const_args[2];
1884
1885    /* Some operands are defined with "rZ" constraint, a register or
1886       the zero register.  These need not actually test args[I] == 0.  */
1887#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1888
1889    switch (opc) {
1890    case INDEX_op_exit_tb:
1891        /* Reuse the zeroing that exists for goto_ptr.  */
1892        if (a0 == 0) {
1893            tcg_out_goto_long(s, tcg_code_gen_epilogue);
1894        } else {
1895            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1896            tcg_out_goto_long(s, tb_ret_addr);
1897        }
1898        break;
1899
1900    case INDEX_op_goto_tb:
1901        if (s->tb_jmp_insn_offset != NULL) {
1902            /* TCG_TARGET_HAS_direct_jump */
1903            /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1904               write can be used to patch the target address. */
1905            if ((uintptr_t)s->code_ptr & 7) {
1906                tcg_out32(s, NOP);
1907            }
1908            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1909            /* actual branch destination will be patched by
1910               tb_target_set_jmp_target later. */
1911            tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1912            tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1913        } else {
1914            /* !TCG_TARGET_HAS_direct_jump */
1915            tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1916            intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1917            tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1918        }
1919        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1920        set_jmp_reset_offset(s, a0);
1921        break;
1922
1923    case INDEX_op_goto_ptr:
1924        tcg_out_insn(s, 3207, BR, a0);
1925        break;
1926
1927    case INDEX_op_br:
1928        tcg_out_goto_label(s, arg_label(a0));
1929        break;
1930
1931    case INDEX_op_ld8u_i32:
1932    case INDEX_op_ld8u_i64:
1933        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
1934        break;
1935    case INDEX_op_ld8s_i32:
1936        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
1937        break;
1938    case INDEX_op_ld8s_i64:
1939        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
1940        break;
1941    case INDEX_op_ld16u_i32:
1942    case INDEX_op_ld16u_i64:
1943        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
1944        break;
1945    case INDEX_op_ld16s_i32:
1946        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
1947        break;
1948    case INDEX_op_ld16s_i64:
1949        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
1950        break;
1951    case INDEX_op_ld_i32:
1952    case INDEX_op_ld32u_i64:
1953        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
1954        break;
1955    case INDEX_op_ld32s_i64:
1956        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
1957        break;
1958    case INDEX_op_ld_i64:
1959        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
1960        break;
1961
1962    case INDEX_op_st8_i32:
1963    case INDEX_op_st8_i64:
1964        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
1965        break;
1966    case INDEX_op_st16_i32:
1967    case INDEX_op_st16_i64:
1968        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
1969        break;
1970    case INDEX_op_st_i32:
1971    case INDEX_op_st32_i64:
1972        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
1973        break;
1974    case INDEX_op_st_i64:
1975        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
1976        break;
1977
1978    case INDEX_op_add_i32:
1979        a2 = (int32_t)a2;
1980        /* FALLTHRU */
1981    case INDEX_op_add_i64:
1982        if (c2) {
1983            tcg_out_addsubi(s, ext, a0, a1, a2);
1984        } else {
1985            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1986        }
1987        break;
1988
1989    case INDEX_op_sub_i32:
1990        a2 = (int32_t)a2;
1991        /* FALLTHRU */
1992    case INDEX_op_sub_i64:
1993        if (c2) {
1994            tcg_out_addsubi(s, ext, a0, a1, -a2);
1995        } else {
1996            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1997        }
1998        break;
1999
2000    case INDEX_op_neg_i64:
2001    case INDEX_op_neg_i32:
2002        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2003        break;
2004
2005    case INDEX_op_and_i32:
2006        a2 = (int32_t)a2;
2007        /* FALLTHRU */
2008    case INDEX_op_and_i64:
2009        if (c2) {
2010            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2011        } else {
2012            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2013        }
2014        break;
2015
2016    case INDEX_op_andc_i32:
2017        a2 = (int32_t)a2;
2018        /* FALLTHRU */
2019    case INDEX_op_andc_i64:
2020        if (c2) {
2021            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2022        } else {
2023            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2024        }
2025        break;
2026
2027    case INDEX_op_or_i32:
2028        a2 = (int32_t)a2;
2029        /* FALLTHRU */
2030    case INDEX_op_or_i64:
2031        if (c2) {
2032            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2033        } else {
2034            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2035        }
2036        break;
2037
2038    case INDEX_op_orc_i32:
2039        a2 = (int32_t)a2;
2040        /* FALLTHRU */
2041    case INDEX_op_orc_i64:
2042        if (c2) {
2043            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2044        } else {
2045            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2046        }
2047        break;
2048
2049    case INDEX_op_xor_i32:
2050        a2 = (int32_t)a2;
2051        /* FALLTHRU */
2052    case INDEX_op_xor_i64:
2053        if (c2) {
2054            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2055        } else {
2056            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2057        }
2058        break;
2059
2060    case INDEX_op_eqv_i32:
2061        a2 = (int32_t)a2;
2062        /* FALLTHRU */
2063    case INDEX_op_eqv_i64:
2064        if (c2) {
2065            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2066        } else {
2067            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2068        }
2069        break;
2070
2071    case INDEX_op_not_i64:
2072    case INDEX_op_not_i32:
2073        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2074        break;
2075
2076    case INDEX_op_mul_i64:
2077    case INDEX_op_mul_i32:
2078        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2079        break;
2080
2081    case INDEX_op_div_i64:
2082    case INDEX_op_div_i32:
2083        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2084        break;
2085    case INDEX_op_divu_i64:
2086    case INDEX_op_divu_i32:
2087        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2088        break;
2089
2090    case INDEX_op_rem_i64:
2091    case INDEX_op_rem_i32:
2092        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
2093        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2094        break;
2095    case INDEX_op_remu_i64:
2096    case INDEX_op_remu_i32:
2097        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
2098        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
2099        break;
2100
2101    case INDEX_op_shl_i64:
2102    case INDEX_op_shl_i32:
2103        if (c2) {
2104            tcg_out_shl(s, ext, a0, a1, a2);
2105        } else {
2106            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2107        }
2108        break;
2109
2110    case INDEX_op_shr_i64:
2111    case INDEX_op_shr_i32:
2112        if (c2) {
2113            tcg_out_shr(s, ext, a0, a1, a2);
2114        } else {
2115            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2116        }
2117        break;
2118
2119    case INDEX_op_sar_i64:
2120    case INDEX_op_sar_i32:
2121        if (c2) {
2122            tcg_out_sar(s, ext, a0, a1, a2);
2123        } else {
2124            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2125        }
2126        break;
2127
2128    case INDEX_op_rotr_i64:
2129    case INDEX_op_rotr_i32:
2130        if (c2) {
2131            tcg_out_rotr(s, ext, a0, a1, a2);
2132        } else {
2133            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2134        }
2135        break;
2136
2137    case INDEX_op_rotl_i64:
2138    case INDEX_op_rotl_i32:
2139        if (c2) {
2140            tcg_out_rotl(s, ext, a0, a1, a2);
2141        } else {
2142            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
2143            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
2144        }
2145        break;
2146
2147    case INDEX_op_clz_i64:
2148    case INDEX_op_clz_i32:
2149        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2150        break;
2151    case INDEX_op_ctz_i64:
2152    case INDEX_op_ctz_i32:
2153        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2154        break;
2155
2156    case INDEX_op_brcond_i32:
2157        a1 = (int32_t)a1;
2158        /* FALLTHRU */
2159    case INDEX_op_brcond_i64:
2160        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2161        break;
2162
2163    case INDEX_op_setcond_i32:
2164        a2 = (int32_t)a2;
2165        /* FALLTHRU */
2166    case INDEX_op_setcond_i64:
2167        tcg_out_cmp(s, ext, a1, a2, c2);
2168        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2169        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2170                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2171        break;
2172
2173    case INDEX_op_movcond_i32:
2174        a2 = (int32_t)a2;
2175        /* FALLTHRU */
2176    case INDEX_op_movcond_i64:
2177        tcg_out_cmp(s, ext, a1, a2, c2);
2178        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2179        break;
2180
2181    case INDEX_op_qemu_ld_i32:
2182    case INDEX_op_qemu_ld_i64:
2183        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2184        break;
2185    case INDEX_op_qemu_st_i32:
2186    case INDEX_op_qemu_st_i64:
2187        tcg_out_qemu_st(s, REG0(0), a1, a2);
2188        break;
2189
2190    case INDEX_op_bswap64_i64:
2191        tcg_out_rev64(s, a0, a1);
2192        break;
2193    case INDEX_op_bswap32_i64:
2194    case INDEX_op_bswap32_i32:
2195        tcg_out_rev32(s, a0, a1);
2196        break;
2197    case INDEX_op_bswap16_i64:
2198    case INDEX_op_bswap16_i32:
2199        tcg_out_rev16(s, a0, a1);
2200        break;
2201
2202    case INDEX_op_ext8s_i64:
2203    case INDEX_op_ext8s_i32:
2204        tcg_out_sxt(s, ext, MO_8, a0, a1);
2205        break;
2206    case INDEX_op_ext16s_i64:
2207    case INDEX_op_ext16s_i32:
2208        tcg_out_sxt(s, ext, MO_16, a0, a1);
2209        break;
2210    case INDEX_op_ext_i32_i64:
2211    case INDEX_op_ext32s_i64:
2212        tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
2213        break;
2214    case INDEX_op_ext8u_i64:
2215    case INDEX_op_ext8u_i32:
2216        tcg_out_uxt(s, MO_8, a0, a1);
2217        break;
2218    case INDEX_op_ext16u_i64:
2219    case INDEX_op_ext16u_i32:
2220        tcg_out_uxt(s, MO_16, a0, a1);
2221        break;
2222    case INDEX_op_extu_i32_i64:
2223    case INDEX_op_ext32u_i64:
2224        tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
2225        break;
2226
2227    case INDEX_op_deposit_i64:
2228    case INDEX_op_deposit_i32:
2229        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2230        break;
2231
2232    case INDEX_op_extract_i64:
2233    case INDEX_op_extract_i32:
2234        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2235        break;
2236
2237    case INDEX_op_sextract_i64:
2238    case INDEX_op_sextract_i32:
2239        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2240        break;
2241
2242    case INDEX_op_extract2_i64:
2243    case INDEX_op_extract2_i32:
2244        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2245        break;
2246
2247    case INDEX_op_add2_i32:
2248        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2249                        (int32_t)args[4], args[5], const_args[4],
2250                        const_args[5], false);
2251        break;
2252    case INDEX_op_add2_i64:
2253        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2254                        args[5], const_args[4], const_args[5], false);
2255        break;
2256    case INDEX_op_sub2_i32:
2257        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2258                        (int32_t)args[4], args[5], const_args[4],
2259                        const_args[5], true);
2260        break;
2261    case INDEX_op_sub2_i64:
2262        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2263                        args[5], const_args[4], const_args[5], true);
2264        break;
2265
2266    case INDEX_op_muluh_i64:
2267        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2268        break;
2269    case INDEX_op_mulsh_i64:
2270        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2271        break;
2272
2273    case INDEX_op_mb:
2274        tcg_out_mb(s, a0);
2275        break;
2276
2277    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2278    case INDEX_op_mov_i64:
2279    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2280    default:
2281        g_assert_not_reached();
2282    }
2283
2284#undef REG0
2285}
2286
2287static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2288                           unsigned vecl, unsigned vece,
2289                           const TCGArg *args, const int *const_args)
2290{
2291    static const AArch64Insn cmp_vec_insn[16] = {
2292        [TCG_COND_EQ] = I3616_CMEQ,
2293        [TCG_COND_GT] = I3616_CMGT,
2294        [TCG_COND_GE] = I3616_CMGE,
2295        [TCG_COND_GTU] = I3616_CMHI,
2296        [TCG_COND_GEU] = I3616_CMHS,
2297    };
2298    static const AArch64Insn cmp_scalar_insn[16] = {
2299        [TCG_COND_EQ] = I3611_CMEQ,
2300        [TCG_COND_GT] = I3611_CMGT,
2301        [TCG_COND_GE] = I3611_CMGE,
2302        [TCG_COND_GTU] = I3611_CMHI,
2303        [TCG_COND_GEU] = I3611_CMHS,
2304    };
2305    static const AArch64Insn cmp0_vec_insn[16] = {
2306        [TCG_COND_EQ] = I3617_CMEQ0,
2307        [TCG_COND_GT] = I3617_CMGT0,
2308        [TCG_COND_GE] = I3617_CMGE0,
2309        [TCG_COND_LT] = I3617_CMLT0,
2310        [TCG_COND_LE] = I3617_CMLE0,
2311    };
2312    static const AArch64Insn cmp0_scalar_insn[16] = {
2313        [TCG_COND_EQ] = I3612_CMEQ0,
2314        [TCG_COND_GT] = I3612_CMGT0,
2315        [TCG_COND_GE] = I3612_CMGE0,
2316        [TCG_COND_LT] = I3612_CMLT0,
2317        [TCG_COND_LE] = I3612_CMLE0,
2318    };
2319
2320    TCGType type = vecl + TCG_TYPE_V64;
2321    unsigned is_q = vecl;
2322    bool is_scalar = !is_q && vece == MO_64;
2323    TCGArg a0, a1, a2, a3;
2324    int cmode, imm8;
2325
2326    a0 = args[0];
2327    a1 = args[1];
2328    a2 = args[2];
2329
2330    switch (opc) {
2331    case INDEX_op_ld_vec:
2332        tcg_out_ld(s, type, a0, a1, a2);
2333        break;
2334    case INDEX_op_st_vec:
2335        tcg_out_st(s, type, a0, a1, a2);
2336        break;
2337    case INDEX_op_dupm_vec:
2338        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2339        break;
2340    case INDEX_op_add_vec:
2341        if (is_scalar) {
2342            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2343        } else {
2344            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2345        }
2346        break;
2347    case INDEX_op_sub_vec:
2348        if (is_scalar) {
2349            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2350        } else {
2351            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2352        }
2353        break;
2354    case INDEX_op_mul_vec:
2355        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2356        break;
2357    case INDEX_op_neg_vec:
2358        if (is_scalar) {
2359            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2360        } else {
2361            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2362        }
2363        break;
2364    case INDEX_op_abs_vec:
2365        if (is_scalar) {
2366            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2367        } else {
2368            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2369        }
2370        break;
2371    case INDEX_op_and_vec:
2372        if (const_args[2]) {
2373            is_shimm1632(~a2, &cmode, &imm8);
2374            if (a0 == a1) {
2375                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2376                return;
2377            }
2378            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2379            a2 = a0;
2380        }
2381        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2382        break;
2383    case INDEX_op_or_vec:
2384        if (const_args[2]) {
2385            is_shimm1632(a2, &cmode, &imm8);
2386            if (a0 == a1) {
2387                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2388                return;
2389            }
2390            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2391            a2 = a0;
2392        }
2393        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2394        break;
2395    case INDEX_op_andc_vec:
2396        if (const_args[2]) {
2397            is_shimm1632(a2, &cmode, &imm8);
2398            if (a0 == a1) {
2399                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2400                return;
2401            }
2402            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2403            a2 = a0;
2404        }
2405        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2406        break;
2407    case INDEX_op_orc_vec:
2408        if (const_args[2]) {
2409            is_shimm1632(~a2, &cmode, &imm8);
2410            if (a0 == a1) {
2411                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2412                return;
2413            }
2414            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2415            a2 = a0;
2416        }
2417        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2418        break;
2419    case INDEX_op_xor_vec:
2420        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2421        break;
2422    case INDEX_op_ssadd_vec:
2423        if (is_scalar) {
2424            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2425        } else {
2426            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2427        }
2428        break;
2429    case INDEX_op_sssub_vec:
2430        if (is_scalar) {
2431            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2432        } else {
2433            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2434        }
2435        break;
2436    case INDEX_op_usadd_vec:
2437        if (is_scalar) {
2438            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2439        } else {
2440            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2441        }
2442        break;
2443    case INDEX_op_ussub_vec:
2444        if (is_scalar) {
2445            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2446        } else {
2447            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2448        }
2449        break;
2450    case INDEX_op_smax_vec:
2451        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2452        break;
2453    case INDEX_op_smin_vec:
2454        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2455        break;
2456    case INDEX_op_umax_vec:
2457        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2458        break;
2459    case INDEX_op_umin_vec:
2460        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2461        break;
2462    case INDEX_op_not_vec:
2463        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2464        break;
2465    case INDEX_op_shli_vec:
2466        if (is_scalar) {
2467            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2468        } else {
2469            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2470        }
2471        break;
2472    case INDEX_op_shri_vec:
2473        if (is_scalar) {
2474            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2475        } else {
2476            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2477        }
2478        break;
2479    case INDEX_op_sari_vec:
2480        if (is_scalar) {
2481            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2482        } else {
2483            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2484        }
2485        break;
2486    case INDEX_op_aa64_sli_vec:
2487        if (is_scalar) {
2488            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2489        } else {
2490            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2491        }
2492        break;
2493    case INDEX_op_shlv_vec:
2494        if (is_scalar) {
2495            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2496        } else {
2497            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2498        }
2499        break;
2500    case INDEX_op_aa64_sshl_vec:
2501        if (is_scalar) {
2502            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2503        } else {
2504            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2505        }
2506        break;
2507    case INDEX_op_cmp_vec:
2508        {
2509            TCGCond cond = args[3];
2510            AArch64Insn insn;
2511
2512            if (cond == TCG_COND_NE) {
2513                if (const_args[2]) {
2514                    if (is_scalar) {
2515                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2516                    } else {
2517                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2518                    }
2519                } else {
2520                    if (is_scalar) {
2521                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2522                    } else {
2523                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2524                    }
2525                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2526                }
2527            } else {
2528                if (const_args[2]) {
2529                    if (is_scalar) {
2530                        insn = cmp0_scalar_insn[cond];
2531                        if (insn) {
2532                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2533                            break;
2534                        }
2535                    } else {
2536                        insn = cmp0_vec_insn[cond];
2537                        if (insn) {
2538                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2539                            break;
2540                        }
2541                    }
2542                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
2543                    a2 = TCG_VEC_TMP;
2544                }
2545                if (is_scalar) {
2546                    insn = cmp_scalar_insn[cond];
2547                    if (insn == 0) {
2548                        TCGArg t;
2549                        t = a1, a1 = a2, a2 = t;
2550                        cond = tcg_swap_cond(cond);
2551                        insn = cmp_scalar_insn[cond];
2552                        tcg_debug_assert(insn != 0);
2553                    }
2554                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2555                } else {
2556                    insn = cmp_vec_insn[cond];
2557                    if (insn == 0) {
2558                        TCGArg t;
2559                        t = a1, a1 = a2, a2 = t;
2560                        cond = tcg_swap_cond(cond);
2561                        insn = cmp_vec_insn[cond];
2562                        tcg_debug_assert(insn != 0);
2563                    }
2564                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2565                }
2566            }
2567        }
2568        break;
2569
2570    case INDEX_op_bitsel_vec:
2571        a3 = args[3];
2572        if (a0 == a3) {
2573            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2574        } else if (a0 == a2) {
2575            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2576        } else {
2577            if (a0 != a1) {
2578                tcg_out_mov(s, type, a0, a1);
2579            }
2580            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2581        }
2582        break;
2583
2584    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2585    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2586    default:
2587        g_assert_not_reached();
2588    }
2589}
2590
2591int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2592{
2593    switch (opc) {
2594    case INDEX_op_add_vec:
2595    case INDEX_op_sub_vec:
2596    case INDEX_op_and_vec:
2597    case INDEX_op_or_vec:
2598    case INDEX_op_xor_vec:
2599    case INDEX_op_andc_vec:
2600    case INDEX_op_orc_vec:
2601    case INDEX_op_neg_vec:
2602    case INDEX_op_abs_vec:
2603    case INDEX_op_not_vec:
2604    case INDEX_op_cmp_vec:
2605    case INDEX_op_shli_vec:
2606    case INDEX_op_shri_vec:
2607    case INDEX_op_sari_vec:
2608    case INDEX_op_ssadd_vec:
2609    case INDEX_op_sssub_vec:
2610    case INDEX_op_usadd_vec:
2611    case INDEX_op_ussub_vec:
2612    case INDEX_op_shlv_vec:
2613    case INDEX_op_bitsel_vec:
2614        return 1;
2615    case INDEX_op_rotli_vec:
2616    case INDEX_op_shrv_vec:
2617    case INDEX_op_sarv_vec:
2618    case INDEX_op_rotlv_vec:
2619    case INDEX_op_rotrv_vec:
2620        return -1;
2621    case INDEX_op_mul_vec:
2622    case INDEX_op_smax_vec:
2623    case INDEX_op_smin_vec:
2624    case INDEX_op_umax_vec:
2625    case INDEX_op_umin_vec:
2626        return vece < MO_64;
2627
2628    default:
2629        return 0;
2630    }
2631}
2632
2633void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2634                       TCGArg a0, ...)
2635{
2636    va_list va;
2637    TCGv_vec v0, v1, v2, t1, t2, c1;
2638    TCGArg a2;
2639
2640    va_start(va, a0);
2641    v0 = temp_tcgv_vec(arg_temp(a0));
2642    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2643    a2 = va_arg(va, TCGArg);
2644    va_end(va);
2645
2646    switch (opc) {
2647    case INDEX_op_rotli_vec:
2648        t1 = tcg_temp_new_vec(type);
2649        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2650        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2651                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2652        tcg_temp_free_vec(t1);
2653        break;
2654
2655    case INDEX_op_shrv_vec:
2656    case INDEX_op_sarv_vec:
2657        /* Right shifts are negative left shifts for AArch64.  */
2658        v2 = temp_tcgv_vec(arg_temp(a2));
2659        t1 = tcg_temp_new_vec(type);
2660        tcg_gen_neg_vec(vece, t1, v2);
2661        opc = (opc == INDEX_op_shrv_vec
2662               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2663        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2664                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2665        tcg_temp_free_vec(t1);
2666        break;
2667
2668    case INDEX_op_rotlv_vec:
2669        v2 = temp_tcgv_vec(arg_temp(a2));
2670        t1 = tcg_temp_new_vec(type);
2671        c1 = tcg_constant_vec(type, vece, 8 << vece);
2672        tcg_gen_sub_vec(vece, t1, v2, c1);
2673        /* Right shifts are negative left shifts for AArch64.  */
2674        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2675                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2676        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2677                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2678        tcg_gen_or_vec(vece, v0, v0, t1);
2679        tcg_temp_free_vec(t1);
2680        break;
2681
2682    case INDEX_op_rotrv_vec:
2683        v2 = temp_tcgv_vec(arg_temp(a2));
2684        t1 = tcg_temp_new_vec(type);
2685        t2 = tcg_temp_new_vec(type);
2686        c1 = tcg_constant_vec(type, vece, 8 << vece);
2687        tcg_gen_neg_vec(vece, t1, v2);
2688        tcg_gen_sub_vec(vece, t2, c1, v2);
2689        /* Right shifts are negative left shifts for AArch64.  */
2690        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2691                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2692        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2693                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2694        tcg_gen_or_vec(vece, v0, t1, t2);
2695        tcg_temp_free_vec(t1);
2696        tcg_temp_free_vec(t2);
2697        break;
2698
2699    default:
2700        g_assert_not_reached();
2701    }
2702}
2703
2704static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2705{
2706    switch (op) {
2707    case INDEX_op_goto_ptr:
2708        return C_O0_I1(r);
2709
2710    case INDEX_op_ld8u_i32:
2711    case INDEX_op_ld8s_i32:
2712    case INDEX_op_ld16u_i32:
2713    case INDEX_op_ld16s_i32:
2714    case INDEX_op_ld_i32:
2715    case INDEX_op_ld8u_i64:
2716    case INDEX_op_ld8s_i64:
2717    case INDEX_op_ld16u_i64:
2718    case INDEX_op_ld16s_i64:
2719    case INDEX_op_ld32u_i64:
2720    case INDEX_op_ld32s_i64:
2721    case INDEX_op_ld_i64:
2722    case INDEX_op_neg_i32:
2723    case INDEX_op_neg_i64:
2724    case INDEX_op_not_i32:
2725    case INDEX_op_not_i64:
2726    case INDEX_op_bswap16_i32:
2727    case INDEX_op_bswap32_i32:
2728    case INDEX_op_bswap16_i64:
2729    case INDEX_op_bswap32_i64:
2730    case INDEX_op_bswap64_i64:
2731    case INDEX_op_ext8s_i32:
2732    case INDEX_op_ext16s_i32:
2733    case INDEX_op_ext8u_i32:
2734    case INDEX_op_ext16u_i32:
2735    case INDEX_op_ext8s_i64:
2736    case INDEX_op_ext16s_i64:
2737    case INDEX_op_ext32s_i64:
2738    case INDEX_op_ext8u_i64:
2739    case INDEX_op_ext16u_i64:
2740    case INDEX_op_ext32u_i64:
2741    case INDEX_op_ext_i32_i64:
2742    case INDEX_op_extu_i32_i64:
2743    case INDEX_op_extract_i32:
2744    case INDEX_op_extract_i64:
2745    case INDEX_op_sextract_i32:
2746    case INDEX_op_sextract_i64:
2747        return C_O1_I1(r, r);
2748
2749    case INDEX_op_st8_i32:
2750    case INDEX_op_st16_i32:
2751    case INDEX_op_st_i32:
2752    case INDEX_op_st8_i64:
2753    case INDEX_op_st16_i64:
2754    case INDEX_op_st32_i64:
2755    case INDEX_op_st_i64:
2756        return C_O0_I2(rZ, r);
2757
2758    case INDEX_op_add_i32:
2759    case INDEX_op_add_i64:
2760    case INDEX_op_sub_i32:
2761    case INDEX_op_sub_i64:
2762    case INDEX_op_setcond_i32:
2763    case INDEX_op_setcond_i64:
2764        return C_O1_I2(r, r, rA);
2765
2766    case INDEX_op_mul_i32:
2767    case INDEX_op_mul_i64:
2768    case INDEX_op_div_i32:
2769    case INDEX_op_div_i64:
2770    case INDEX_op_divu_i32:
2771    case INDEX_op_divu_i64:
2772    case INDEX_op_rem_i32:
2773    case INDEX_op_rem_i64:
2774    case INDEX_op_remu_i32:
2775    case INDEX_op_remu_i64:
2776    case INDEX_op_muluh_i64:
2777    case INDEX_op_mulsh_i64:
2778        return C_O1_I2(r, r, r);
2779
2780    case INDEX_op_and_i32:
2781    case INDEX_op_and_i64:
2782    case INDEX_op_or_i32:
2783    case INDEX_op_or_i64:
2784    case INDEX_op_xor_i32:
2785    case INDEX_op_xor_i64:
2786    case INDEX_op_andc_i32:
2787    case INDEX_op_andc_i64:
2788    case INDEX_op_orc_i32:
2789    case INDEX_op_orc_i64:
2790    case INDEX_op_eqv_i32:
2791    case INDEX_op_eqv_i64:
2792        return C_O1_I2(r, r, rL);
2793
2794    case INDEX_op_shl_i32:
2795    case INDEX_op_shr_i32:
2796    case INDEX_op_sar_i32:
2797    case INDEX_op_rotl_i32:
2798    case INDEX_op_rotr_i32:
2799    case INDEX_op_shl_i64:
2800    case INDEX_op_shr_i64:
2801    case INDEX_op_sar_i64:
2802    case INDEX_op_rotl_i64:
2803    case INDEX_op_rotr_i64:
2804        return C_O1_I2(r, r, ri);
2805
2806    case INDEX_op_clz_i32:
2807    case INDEX_op_ctz_i32:
2808    case INDEX_op_clz_i64:
2809    case INDEX_op_ctz_i64:
2810        return C_O1_I2(r, r, rAL);
2811
2812    case INDEX_op_brcond_i32:
2813    case INDEX_op_brcond_i64:
2814        return C_O0_I2(r, rA);
2815
2816    case INDEX_op_movcond_i32:
2817    case INDEX_op_movcond_i64:
2818        return C_O1_I4(r, r, rA, rZ, rZ);
2819
2820    case INDEX_op_qemu_ld_i32:
2821    case INDEX_op_qemu_ld_i64:
2822        return C_O1_I1(r, l);
2823    case INDEX_op_qemu_st_i32:
2824    case INDEX_op_qemu_st_i64:
2825        return C_O0_I2(lZ, l);
2826
2827    case INDEX_op_deposit_i32:
2828    case INDEX_op_deposit_i64:
2829        return C_O1_I2(r, 0, rZ);
2830
2831    case INDEX_op_extract2_i32:
2832    case INDEX_op_extract2_i64:
2833        return C_O1_I2(r, rZ, rZ);
2834
2835    case INDEX_op_add2_i32:
2836    case INDEX_op_add2_i64:
2837    case INDEX_op_sub2_i32:
2838    case INDEX_op_sub2_i64:
2839        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2840
2841    case INDEX_op_add_vec:
2842    case INDEX_op_sub_vec:
2843    case INDEX_op_mul_vec:
2844    case INDEX_op_xor_vec:
2845    case INDEX_op_ssadd_vec:
2846    case INDEX_op_sssub_vec:
2847    case INDEX_op_usadd_vec:
2848    case INDEX_op_ussub_vec:
2849    case INDEX_op_smax_vec:
2850    case INDEX_op_smin_vec:
2851    case INDEX_op_umax_vec:
2852    case INDEX_op_umin_vec:
2853    case INDEX_op_shlv_vec:
2854    case INDEX_op_shrv_vec:
2855    case INDEX_op_sarv_vec:
2856    case INDEX_op_aa64_sshl_vec:
2857        return C_O1_I2(w, w, w);
2858    case INDEX_op_not_vec:
2859    case INDEX_op_neg_vec:
2860    case INDEX_op_abs_vec:
2861    case INDEX_op_shli_vec:
2862    case INDEX_op_shri_vec:
2863    case INDEX_op_sari_vec:
2864        return C_O1_I1(w, w);
2865    case INDEX_op_ld_vec:
2866    case INDEX_op_dupm_vec:
2867        return C_O1_I1(w, r);
2868    case INDEX_op_st_vec:
2869        return C_O0_I2(w, r);
2870    case INDEX_op_dup_vec:
2871        return C_O1_I1(w, wr);
2872    case INDEX_op_or_vec:
2873    case INDEX_op_andc_vec:
2874        return C_O1_I2(w, w, wO);
2875    case INDEX_op_and_vec:
2876    case INDEX_op_orc_vec:
2877        return C_O1_I2(w, w, wN);
2878    case INDEX_op_cmp_vec:
2879        return C_O1_I2(w, w, wZ);
2880    case INDEX_op_bitsel_vec:
2881        return C_O1_I3(w, w, w, w);
2882    case INDEX_op_aa64_sli_vec:
2883        return C_O1_I2(w, 0, w);
2884
2885    default:
2886        g_assert_not_reached();
2887    }
2888}
2889
2890static void tcg_target_init(TCGContext *s)
2891{
2892    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
2893    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
2894    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
2895    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
2896
2897    tcg_target_call_clobber_regs = -1ull;
2898    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
2899    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
2900    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
2901    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
2902    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
2903    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
2904    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
2905    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
2906    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
2907    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
2908    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
2909    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
2910    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
2911    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
2912    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
2913    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
2914    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
2915    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
2916    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
2917
2918    s->reserved_regs = 0;
2919    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2920    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
2921    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2922    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
2923    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
2924}
2925
2926/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
2927#define PUSH_SIZE  ((30 - 19 + 1) * 8)
2928
2929#define FRAME_SIZE \
2930    ((PUSH_SIZE \
2931      + TCG_STATIC_CALL_ARGS_SIZE \
2932      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2933      + TCG_TARGET_STACK_ALIGN - 1) \
2934     & ~(TCG_TARGET_STACK_ALIGN - 1))
2935
2936/* We're expecting a 2 byte uleb128 encoded value.  */
2937QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2938
2939/* We're expecting to use a single ADDI insn.  */
2940QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
2941
2942static void tcg_target_qemu_prologue(TCGContext *s)
2943{
2944    TCGReg r;
2945
2946    /* Push (FP, LR) and allocate space for all saved registers.  */
2947    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
2948                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
2949
2950    /* Set up frame pointer for canonical unwinding.  */
2951    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
2952
2953    /* Store callee-preserved regs x19..x28.  */
2954    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2955        int ofs = (r - TCG_REG_X19 + 2) * 8;
2956        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2957    }
2958
2959    /* Make stack space for TCG locals.  */
2960    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2961                 FRAME_SIZE - PUSH_SIZE);
2962
2963    /* Inform TCG about how to find TCG locals with register, offset, size.  */
2964    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
2965                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2966
2967#if !defined(CONFIG_SOFTMMU)
2968    if (USE_GUEST_BASE) {
2969        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2970        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2971    }
2972#endif
2973
2974    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2975    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2976
2977    /*
2978     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2979     * and fall through to the rest of the epilogue.
2980     */
2981    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2982    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2983
2984    /* TB epilogue */
2985    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
2986
2987    /* Remove TCG locals stack space.  */
2988    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2989                 FRAME_SIZE - PUSH_SIZE);
2990
2991    /* Restore registers x19..x28.  */
2992    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2993        int ofs = (r - TCG_REG_X19 + 2) * 8;
2994        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2995    }
2996
2997    /* Pop (FP, LR), restore SP to previous frame.  */
2998    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2999                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3000    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3001}
3002
3003static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3004{
3005    int i;
3006    for (i = 0; i < count; ++i) {
3007        p[i] = NOP;
3008    }
3009}
3010
3011typedef struct {
3012    DebugFrameHeader h;
3013    uint8_t fde_def_cfa[4];
3014    uint8_t fde_reg_ofs[24];
3015} DebugFrame;
3016
3017#define ELF_HOST_MACHINE EM_AARCH64
3018
3019static const DebugFrame debug_frame = {
3020    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3021    .h.cie.id = -1,
3022    .h.cie.version = 1,
3023    .h.cie.code_align = 1,
3024    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3025    .h.cie.return_column = TCG_REG_LR,
3026
3027    /* Total FDE size does not include the "len" member.  */
3028    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3029
3030    .fde_def_cfa = {
3031        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3032        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3033        (FRAME_SIZE >> 7)
3034    },
3035    .fde_reg_ofs = {
3036        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3037        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3038        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3039        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3040        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3041        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3042        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3043        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3044        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3045        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3046        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3047        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3048    }
3049};
3050
3051void tcg_register_jit(const void *buf, size_t buf_size)
3052{
3053    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3054}
3055