xref: /openbmc/qemu/tcg/aarch64/tcg-target.c.inc (revision 4d7dd4ed)
1/*
2 * Initial TCG Implementation for aarch64
3 *
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
9 *
10 * See the COPYING file in the top-level directory for details.
11 */
12
13#include "../tcg-ldst.c.inc"
14#include "../tcg-pool.c.inc"
15#include "qemu/bitops.h"
16
17/* We're going to re-use TCGType in setting of the SF bit, which controls
18   the size of the operation performed.  If we know the values match, it
19   makes things much cleaner.  */
20QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
21
22#ifdef CONFIG_DEBUG_TCG
23static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24    "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
25    "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
26    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
27    "x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
28
29    "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
30    "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
31    "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
32    "v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
33};
34#endif /* CONFIG_DEBUG_TCG */
35
36static const int tcg_target_reg_alloc_order[] = {
37    TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
38    TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
39    TCG_REG_X28, /* we will reserve this for guest_base if configured */
40
41    TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
42    TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
43
44    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
45    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
46
47    /* X16 reserved as temporary */
48    /* X17 reserved as temporary */
49    /* X18 reserved by system */
50    /* X19 reserved for AREG0 */
51    /* X29 reserved as fp */
52    /* X30 reserved as temporary */
53
54    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
55    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
56    /* V8 - V15 are call-saved, and skipped.  */
57    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
58    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
59    TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
60    TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
61};
62
63static const int tcg_target_call_iarg_regs[8] = {
64    TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
65    TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
66};
67
68static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
69{
70    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
71    tcg_debug_assert(slot >= 0 && slot <= 1);
72    return TCG_REG_X0 + slot;
73}
74
75#define TCG_REG_TMP0 TCG_REG_X16
76#define TCG_REG_TMP1 TCG_REG_X17
77#define TCG_REG_TMP2 TCG_REG_X30
78#define TCG_VEC_TMP0 TCG_REG_V31
79
80#define TCG_REG_GUEST_BASE TCG_REG_X28
81
82static bool reloc_pc26(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
83{
84    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
85    ptrdiff_t offset = target - src_rx;
86
87    if (offset == sextract64(offset, 0, 26)) {
88        /* read instruction, mask away previous PC_REL26 parameter contents,
89           set the proper offset, then write back the instruction. */
90        *src_rw = deposit32(*src_rw, 0, 26, offset);
91        return true;
92    }
93    return false;
94}
95
96static bool reloc_pc19(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
97{
98    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
99    ptrdiff_t offset = target - src_rx;
100
101    if (offset == sextract64(offset, 0, 19)) {
102        *src_rw = deposit32(*src_rw, 5, 19, offset);
103        return true;
104    }
105    return false;
106}
107
108static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
109                        intptr_t value, intptr_t addend)
110{
111    tcg_debug_assert(addend == 0);
112    switch (type) {
113    case R_AARCH64_JUMP26:
114    case R_AARCH64_CALL26:
115        return reloc_pc26(code_ptr, (const tcg_insn_unit *)value);
116    case R_AARCH64_CONDBR19:
117        return reloc_pc19(code_ptr, (const tcg_insn_unit *)value);
118    default:
119        g_assert_not_reached();
120    }
121}
122
123#define TCG_CT_CONST_AIMM 0x100
124#define TCG_CT_CONST_LIMM 0x200
125#define TCG_CT_CONST_ZERO 0x400
126#define TCG_CT_CONST_MONE 0x800
127#define TCG_CT_CONST_ORRI 0x1000
128#define TCG_CT_CONST_ANDI 0x2000
129
130#define ALL_GENERAL_REGS  0xffffffffu
131#define ALL_VECTOR_REGS   0xffffffff00000000ull
132
133/* Match a constant valid for addition (12-bit, optionally shifted).  */
134static inline bool is_aimm(uint64_t val)
135{
136    return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
137}
138
139/* Match a constant valid for logical operations.  */
140static inline bool is_limm(uint64_t val)
141{
142    /* Taking a simplified view of the logical immediates for now, ignoring
143       the replication that can happen across the field.  Match bit patterns
144       of the forms
145           0....01....1
146           0..01..10..0
147       and their inverses.  */
148
149    /* Make things easier below, by testing the form with msb clear. */
150    if ((int64_t)val < 0) {
151        val = ~val;
152    }
153    if (val == 0) {
154        return false;
155    }
156    val += val & -val;
157    return (val & (val - 1)) == 0;
158}
159
160/* Return true if v16 is a valid 16-bit shifted immediate.  */
161static bool is_shimm16(uint16_t v16, int *cmode, int *imm8)
162{
163    if (v16 == (v16 & 0xff)) {
164        *cmode = 0x8;
165        *imm8 = v16 & 0xff;
166        return true;
167    } else if (v16 == (v16 & 0xff00)) {
168        *cmode = 0xa;
169        *imm8 = v16 >> 8;
170        return true;
171    }
172    return false;
173}
174
175/* Return true if v32 is a valid 32-bit shifted immediate.  */
176static bool is_shimm32(uint32_t v32, int *cmode, int *imm8)
177{
178    if (v32 == (v32 & 0xff)) {
179        *cmode = 0x0;
180        *imm8 = v32 & 0xff;
181        return true;
182    } else if (v32 == (v32 & 0xff00)) {
183        *cmode = 0x2;
184        *imm8 = (v32 >> 8) & 0xff;
185        return true;
186    } else if (v32 == (v32 & 0xff0000)) {
187        *cmode = 0x4;
188        *imm8 = (v32 >> 16) & 0xff;
189        return true;
190    } else if (v32 == (v32 & 0xff000000)) {
191        *cmode = 0x6;
192        *imm8 = v32 >> 24;
193        return true;
194    }
195    return false;
196}
197
198/* Return true if v32 is a valid 32-bit shifting ones immediate.  */
199static bool is_soimm32(uint32_t v32, int *cmode, int *imm8)
200{
201    if ((v32 & 0xffff00ff) == 0xff) {
202        *cmode = 0xc;
203        *imm8 = (v32 >> 8) & 0xff;
204        return true;
205    } else if ((v32 & 0xff00ffff) == 0xffff) {
206        *cmode = 0xd;
207        *imm8 = (v32 >> 16) & 0xff;
208        return true;
209    }
210    return false;
211}
212
213/* Return true if v32 is a valid float32 immediate.  */
214static bool is_fimm32(uint32_t v32, int *cmode, int *imm8)
215{
216    if (extract32(v32, 0, 19) == 0
217        && (extract32(v32, 25, 6) == 0x20
218            || extract32(v32, 25, 6) == 0x1f)) {
219        *cmode = 0xf;
220        *imm8 = (extract32(v32, 31, 1) << 7)
221              | (extract32(v32, 25, 1) << 6)
222              | extract32(v32, 19, 6);
223        return true;
224    }
225    return false;
226}
227
228/* Return true if v64 is a valid float64 immediate.  */
229static bool is_fimm64(uint64_t v64, int *cmode, int *imm8)
230{
231    if (extract64(v64, 0, 48) == 0
232        && (extract64(v64, 54, 9) == 0x100
233            || extract64(v64, 54, 9) == 0x0ff)) {
234        *cmode = 0xf;
235        *imm8 = (extract64(v64, 63, 1) << 7)
236              | (extract64(v64, 54, 1) << 6)
237              | extract64(v64, 48, 6);
238        return true;
239    }
240    return false;
241}
242
243/*
244 * Return non-zero if v32 can be formed by MOVI+ORR.
245 * Place the parameters for MOVI in (cmode, imm8).
246 * Return the cmode for ORR; the imm8 can be had via extraction from v32.
247 */
248static int is_shimm32_pair(uint32_t v32, int *cmode, int *imm8)
249{
250    int i;
251
252    for (i = 6; i > 0; i -= 2) {
253        /* Mask out one byte we can add with ORR.  */
254        uint32_t tmp = v32 & ~(0xffu << (i * 4));
255        if (is_shimm32(tmp, cmode, imm8) ||
256            is_soimm32(tmp, cmode, imm8)) {
257            break;
258        }
259    }
260    return i;
261}
262
263/* Return true if V is a valid 16-bit or 32-bit shifted immediate.  */
264static bool is_shimm1632(uint32_t v32, int *cmode, int *imm8)
265{
266    if (v32 == deposit32(v32, 16, 16, v32)) {
267        return is_shimm16(v32, cmode, imm8);
268    } else {
269        return is_shimm32(v32, cmode, imm8);
270    }
271}
272
273static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece)
274{
275    if (ct & TCG_CT_CONST) {
276        return 1;
277    }
278    if (type == TCG_TYPE_I32) {
279        val = (int32_t)val;
280    }
281    if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
282        return 1;
283    }
284    if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
285        return 1;
286    }
287    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
288        return 1;
289    }
290    if ((ct & TCG_CT_CONST_MONE) && val == -1) {
291        return 1;
292    }
293
294    switch (ct & (TCG_CT_CONST_ORRI | TCG_CT_CONST_ANDI)) {
295    case 0:
296        break;
297    case TCG_CT_CONST_ANDI:
298        val = ~val;
299        /* fallthru */
300    case TCG_CT_CONST_ORRI:
301        if (val == deposit64(val, 32, 32, val)) {
302            int cmode, imm8;
303            return is_shimm1632(val, &cmode, &imm8);
304        }
305        break;
306    default:
307        /* Both bits should not be set for the same insn.  */
308        g_assert_not_reached();
309    }
310
311    return 0;
312}
313
314enum aarch64_cond_code {
315    COND_EQ = 0x0,
316    COND_NE = 0x1,
317    COND_CS = 0x2,     /* Unsigned greater or equal */
318    COND_HS = COND_CS, /* ALIAS greater or equal */
319    COND_CC = 0x3,     /* Unsigned less than */
320    COND_LO = COND_CC, /* ALIAS Lower */
321    COND_MI = 0x4,     /* Negative */
322    COND_PL = 0x5,     /* Zero or greater */
323    COND_VS = 0x6,     /* Overflow */
324    COND_VC = 0x7,     /* No overflow */
325    COND_HI = 0x8,     /* Unsigned greater than */
326    COND_LS = 0x9,     /* Unsigned less or equal */
327    COND_GE = 0xa,
328    COND_LT = 0xb,
329    COND_GT = 0xc,
330    COND_LE = 0xd,
331    COND_AL = 0xe,
332    COND_NV = 0xf, /* behaves like COND_AL here */
333};
334
335static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
336    [TCG_COND_EQ] = COND_EQ,
337    [TCG_COND_NE] = COND_NE,
338    [TCG_COND_LT] = COND_LT,
339    [TCG_COND_GE] = COND_GE,
340    [TCG_COND_LE] = COND_LE,
341    [TCG_COND_GT] = COND_GT,
342    /* unsigned */
343    [TCG_COND_LTU] = COND_LO,
344    [TCG_COND_GTU] = COND_HI,
345    [TCG_COND_GEU] = COND_HS,
346    [TCG_COND_LEU] = COND_LS,
347};
348
349typedef enum {
350    LDST_ST = 0,    /* store */
351    LDST_LD = 1,    /* load */
352    LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
353    LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
354} AArch64LdstType;
355
356/* We encode the format of the insn into the beginning of the name, so that
357   we can have the preprocessor help "typecheck" the insn vs the output
358   function.  Arm didn't provide us with nice names for the formats, so we
359   use the section number of the architecture reference manual in which the
360   instruction group is described.  */
361typedef enum {
362    /* Compare and branch (immediate).  */
363    I3201_CBZ       = 0x34000000,
364    I3201_CBNZ      = 0x35000000,
365
366    /* Conditional branch (immediate).  */
367    I3202_B_C       = 0x54000000,
368
369    /* Unconditional branch (immediate).  */
370    I3206_B         = 0x14000000,
371    I3206_BL        = 0x94000000,
372
373    /* Unconditional branch (register).  */
374    I3207_BR        = 0xd61f0000,
375    I3207_BLR       = 0xd63f0000,
376    I3207_RET       = 0xd65f0000,
377
378    /* AdvSIMD load/store single structure.  */
379    I3303_LD1R      = 0x0d40c000,
380
381    /* Load literal for loading the address at pc-relative offset */
382    I3305_LDR       = 0x58000000,
383    I3305_LDR_v64   = 0x5c000000,
384    I3305_LDR_v128  = 0x9c000000,
385
386    /* Load/store exclusive. */
387    I3306_LDXP      = 0xc8600000,
388    I3306_STXP      = 0xc8200000,
389
390    /* Load/store register.  Described here as 3.3.12, but the helper
391       that emits them can transform to 3.3.10 or 3.3.13.  */
392    I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
393    I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
394    I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
395    I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
396
397    I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
398    I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
399    I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
400    I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
401
402    I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
403    I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
404
405    I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
406    I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
407    I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
408
409    I3312_LDRVS     = 0x3c000000 | LDST_LD << 22 | MO_32 << 30,
410    I3312_STRVS     = 0x3c000000 | LDST_ST << 22 | MO_32 << 30,
411
412    I3312_LDRVD     = 0x3c000000 | LDST_LD << 22 | MO_64 << 30,
413    I3312_STRVD     = 0x3c000000 | LDST_ST << 22 | MO_64 << 30,
414
415    I3312_LDRVQ     = 0x3c000000 | 3 << 22 | 0 << 30,
416    I3312_STRVQ     = 0x3c000000 | 2 << 22 | 0 << 30,
417
418    I3312_TO_I3310  = 0x00200800,
419    I3312_TO_I3313  = 0x01000000,
420
421    /* Load/store register pair instructions.  */
422    I3314_LDP       = 0x28400000,
423    I3314_STP       = 0x28000000,
424
425    /* Add/subtract immediate instructions.  */
426    I3401_ADDI      = 0x11000000,
427    I3401_ADDSI     = 0x31000000,
428    I3401_SUBI      = 0x51000000,
429    I3401_SUBSI     = 0x71000000,
430
431    /* Bitfield instructions.  */
432    I3402_BFM       = 0x33000000,
433    I3402_SBFM      = 0x13000000,
434    I3402_UBFM      = 0x53000000,
435
436    /* Extract instruction.  */
437    I3403_EXTR      = 0x13800000,
438
439    /* Logical immediate instructions.  */
440    I3404_ANDI      = 0x12000000,
441    I3404_ORRI      = 0x32000000,
442    I3404_EORI      = 0x52000000,
443    I3404_ANDSI     = 0x72000000,
444
445    /* Move wide immediate instructions.  */
446    I3405_MOVN      = 0x12800000,
447    I3405_MOVZ      = 0x52800000,
448    I3405_MOVK      = 0x72800000,
449
450    /* PC relative addressing instructions.  */
451    I3406_ADR       = 0x10000000,
452    I3406_ADRP      = 0x90000000,
453
454    /* Add/subtract extended register instructions. */
455    I3501_ADD       = 0x0b200000,
456
457    /* Add/subtract shifted register instructions (without a shift).  */
458    I3502_ADD       = 0x0b000000,
459    I3502_ADDS      = 0x2b000000,
460    I3502_SUB       = 0x4b000000,
461    I3502_SUBS      = 0x6b000000,
462
463    /* Add/subtract shifted register instructions (with a shift).  */
464    I3502S_ADD_LSL  = I3502_ADD,
465
466    /* Add/subtract with carry instructions.  */
467    I3503_ADC       = 0x1a000000,
468    I3503_SBC       = 0x5a000000,
469
470    /* Conditional select instructions.  */
471    I3506_CSEL      = 0x1a800000,
472    I3506_CSINC     = 0x1a800400,
473    I3506_CSINV     = 0x5a800000,
474    I3506_CSNEG     = 0x5a800400,
475
476    /* Data-processing (1 source) instructions.  */
477    I3507_CLZ       = 0x5ac01000,
478    I3507_RBIT      = 0x5ac00000,
479    I3507_REV       = 0x5ac00000, /* + size << 10 */
480
481    /* Data-processing (2 source) instructions.  */
482    I3508_LSLV      = 0x1ac02000,
483    I3508_LSRV      = 0x1ac02400,
484    I3508_ASRV      = 0x1ac02800,
485    I3508_RORV      = 0x1ac02c00,
486    I3508_SMULH     = 0x9b407c00,
487    I3508_UMULH     = 0x9bc07c00,
488    I3508_UDIV      = 0x1ac00800,
489    I3508_SDIV      = 0x1ac00c00,
490
491    /* Data-processing (3 source) instructions.  */
492    I3509_MADD      = 0x1b000000,
493    I3509_MSUB      = 0x1b008000,
494
495    /* Logical shifted register instructions (without a shift).  */
496    I3510_AND       = 0x0a000000,
497    I3510_BIC       = 0x0a200000,
498    I3510_ORR       = 0x2a000000,
499    I3510_ORN       = 0x2a200000,
500    I3510_EOR       = 0x4a000000,
501    I3510_EON       = 0x4a200000,
502    I3510_ANDS      = 0x6a000000,
503
504    /* Logical shifted register instructions (with a shift).  */
505    I3502S_AND_LSR  = I3510_AND | (1 << 22),
506
507    /* AdvSIMD copy */
508    I3605_DUP      = 0x0e000400,
509    I3605_INS      = 0x4e001c00,
510    I3605_UMOV     = 0x0e003c00,
511
512    /* AdvSIMD modified immediate */
513    I3606_MOVI      = 0x0f000400,
514    I3606_MVNI      = 0x2f000400,
515    I3606_BIC       = 0x2f001400,
516    I3606_ORR       = 0x0f001400,
517
518    /* AdvSIMD scalar shift by immediate */
519    I3609_SSHR      = 0x5f000400,
520    I3609_SSRA      = 0x5f001400,
521    I3609_SHL       = 0x5f005400,
522    I3609_USHR      = 0x7f000400,
523    I3609_USRA      = 0x7f001400,
524    I3609_SLI       = 0x7f005400,
525
526    /* AdvSIMD scalar three same */
527    I3611_SQADD     = 0x5e200c00,
528    I3611_SQSUB     = 0x5e202c00,
529    I3611_CMGT      = 0x5e203400,
530    I3611_CMGE      = 0x5e203c00,
531    I3611_SSHL      = 0x5e204400,
532    I3611_ADD       = 0x5e208400,
533    I3611_CMTST     = 0x5e208c00,
534    I3611_UQADD     = 0x7e200c00,
535    I3611_UQSUB     = 0x7e202c00,
536    I3611_CMHI      = 0x7e203400,
537    I3611_CMHS      = 0x7e203c00,
538    I3611_USHL      = 0x7e204400,
539    I3611_SUB       = 0x7e208400,
540    I3611_CMEQ      = 0x7e208c00,
541
542    /* AdvSIMD scalar two-reg misc */
543    I3612_CMGT0     = 0x5e208800,
544    I3612_CMEQ0     = 0x5e209800,
545    I3612_CMLT0     = 0x5e20a800,
546    I3612_ABS       = 0x5e20b800,
547    I3612_CMGE0     = 0x7e208800,
548    I3612_CMLE0     = 0x7e209800,
549    I3612_NEG       = 0x7e20b800,
550
551    /* AdvSIMD shift by immediate */
552    I3614_SSHR      = 0x0f000400,
553    I3614_SSRA      = 0x0f001400,
554    I3614_SHL       = 0x0f005400,
555    I3614_SLI       = 0x2f005400,
556    I3614_USHR      = 0x2f000400,
557    I3614_USRA      = 0x2f001400,
558
559    /* AdvSIMD three same.  */
560    I3616_ADD       = 0x0e208400,
561    I3616_AND       = 0x0e201c00,
562    I3616_BIC       = 0x0e601c00,
563    I3616_BIF       = 0x2ee01c00,
564    I3616_BIT       = 0x2ea01c00,
565    I3616_BSL       = 0x2e601c00,
566    I3616_EOR       = 0x2e201c00,
567    I3616_MUL       = 0x0e209c00,
568    I3616_ORR       = 0x0ea01c00,
569    I3616_ORN       = 0x0ee01c00,
570    I3616_SUB       = 0x2e208400,
571    I3616_CMGT      = 0x0e203400,
572    I3616_CMGE      = 0x0e203c00,
573    I3616_CMTST     = 0x0e208c00,
574    I3616_CMHI      = 0x2e203400,
575    I3616_CMHS      = 0x2e203c00,
576    I3616_CMEQ      = 0x2e208c00,
577    I3616_SMAX      = 0x0e206400,
578    I3616_SMIN      = 0x0e206c00,
579    I3616_SSHL      = 0x0e204400,
580    I3616_SQADD     = 0x0e200c00,
581    I3616_SQSUB     = 0x0e202c00,
582    I3616_UMAX      = 0x2e206400,
583    I3616_UMIN      = 0x2e206c00,
584    I3616_UQADD     = 0x2e200c00,
585    I3616_UQSUB     = 0x2e202c00,
586    I3616_USHL      = 0x2e204400,
587
588    /* AdvSIMD two-reg misc.  */
589    I3617_CMGT0     = 0x0e208800,
590    I3617_CMEQ0     = 0x0e209800,
591    I3617_CMLT0     = 0x0e20a800,
592    I3617_CMGE0     = 0x2e208800,
593    I3617_CMLE0     = 0x2e209800,
594    I3617_NOT       = 0x2e205800,
595    I3617_ABS       = 0x0e20b800,
596    I3617_NEG       = 0x2e20b800,
597
598    /* System instructions.  */
599    NOP             = 0xd503201f,
600    DMB_ISH         = 0xd50338bf,
601    DMB_LD          = 0x00000100,
602    DMB_ST          = 0x00000200,
603
604    BTI_C           = 0xd503245f,
605    BTI_J           = 0xd503249f,
606    BTI_JC          = 0xd50324df,
607} AArch64Insn;
608
609static inline uint32_t tcg_in32(TCGContext *s)
610{
611    uint32_t v = *(uint32_t *)s->code_ptr;
612    return v;
613}
614
615/* Emit an opcode with "type-checking" of the format.  */
616#define tcg_out_insn(S, FMT, OP, ...) \
617    glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
618
619static void tcg_out_insn_3303(TCGContext *s, AArch64Insn insn, bool q,
620                              TCGReg rt, TCGReg rn, unsigned size)
621{
622    tcg_out32(s, insn | (rt & 0x1f) | (rn << 5) | (size << 10) | (q << 30));
623}
624
625static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
626                              int imm19, TCGReg rt)
627{
628    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
629}
630
631static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
632                              TCGReg rt, TCGReg rt2, TCGReg rn)
633{
634    tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
635}
636
637static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
638                              TCGReg rt, int imm19)
639{
640    tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
641}
642
643static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
644                              TCGCond c, int imm19)
645{
646    tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
647}
648
649static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
650{
651    tcg_out32(s, insn | (imm26 & 0x03ffffff));
652}
653
654static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
655{
656    tcg_out32(s, insn | rn << 5);
657}
658
659static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
660                              TCGReg r1, TCGReg r2, TCGReg rn,
661                              tcg_target_long ofs, bool pre, bool w)
662{
663    insn |= 1u << 31; /* ext */
664    insn |= pre << 24;
665    insn |= w << 23;
666
667    tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
668    insn |= (ofs & (0x7f << 3)) << (15 - 3);
669
670    tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
671}
672
673static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
674                              TCGReg rd, TCGReg rn, uint64_t aimm)
675{
676    if (aimm > 0xfff) {
677        tcg_debug_assert((aimm & 0xfff) == 0);
678        aimm >>= 12;
679        tcg_debug_assert(aimm <= 0xfff);
680        aimm |= 1 << 12;  /* apply LSL 12 */
681    }
682    tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
683}
684
685/* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
686   (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
687   that feed the DecodeBitMasks pseudo function.  */
688static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
689                              TCGReg rd, TCGReg rn, int n, int immr, int imms)
690{
691    tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
692              | rn << 5 | rd);
693}
694
695#define tcg_out_insn_3404  tcg_out_insn_3402
696
697static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
698                              TCGReg rd, TCGReg rn, TCGReg rm, int imms)
699{
700    tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
701              | rn << 5 | rd);
702}
703
704/* This function is used for the Move (wide immediate) instruction group.
705   Note that SHIFT is a full shift count, not the 2 bit HW field. */
706static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
707                              TCGReg rd, uint16_t half, unsigned shift)
708{
709    tcg_debug_assert((shift & ~0x30) == 0);
710    tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
711}
712
713static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
714                              TCGReg rd, int64_t disp)
715{
716    tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
717}
718
719static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
720                                     TCGType sf, TCGReg rd, TCGReg rn,
721                                     TCGReg rm, int opt, int imm3)
722{
723    tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
724              imm3 << 10 | rn << 5 | rd);
725}
726
727/* This function is for both 3.5.2 (Add/Subtract shifted register), for
728   the rare occasion when we actually want to supply a shift amount.  */
729static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
730                                      TCGType ext, TCGReg rd, TCGReg rn,
731                                      TCGReg rm, int imm6)
732{
733    tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
734}
735
736/* This function is for 3.5.2 (Add/subtract shifted register),
737   and 3.5.10 (Logical shifted register), for the vast majorty of cases
738   when we don't want to apply a shift.  Thus it can also be used for
739   3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
740static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
741                              TCGReg rd, TCGReg rn, TCGReg rm)
742{
743    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
744}
745
746#define tcg_out_insn_3503  tcg_out_insn_3502
747#define tcg_out_insn_3508  tcg_out_insn_3502
748#define tcg_out_insn_3510  tcg_out_insn_3502
749
750static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
751                              TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
752{
753    tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
754              | tcg_cond_to_aarch64[c] << 12);
755}
756
757static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
758                              TCGReg rd, TCGReg rn)
759{
760    tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
761}
762
763static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
764                              TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
765{
766    tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
767}
768
769static void tcg_out_insn_3605(TCGContext *s, AArch64Insn insn, bool q,
770                              TCGReg rd, TCGReg rn, int dst_idx, int src_idx)
771{
772    /* Note that bit 11 set means general register input.  Therefore
773       we can handle both register sets with one function.  */
774    tcg_out32(s, insn | q << 30 | (dst_idx << 16) | (src_idx << 11)
775              | (rd & 0x1f) | (~rn & 0x20) << 6 | (rn & 0x1f) << 5);
776}
777
778static void tcg_out_insn_3606(TCGContext *s, AArch64Insn insn, bool q,
779                              TCGReg rd, bool op, int cmode, uint8_t imm8)
780{
781    tcg_out32(s, insn | q << 30 | op << 29 | cmode << 12 | (rd & 0x1f)
782              | (imm8 & 0xe0) << (16 - 5) | (imm8 & 0x1f) << 5);
783}
784
785static void tcg_out_insn_3609(TCGContext *s, AArch64Insn insn,
786                              TCGReg rd, TCGReg rn, unsigned immhb)
787{
788    tcg_out32(s, insn | immhb << 16 | (rn & 0x1f) << 5 | (rd & 0x1f));
789}
790
791static void tcg_out_insn_3611(TCGContext *s, AArch64Insn insn,
792                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
793{
794    tcg_out32(s, insn | (size << 22) | (rm & 0x1f) << 16
795              | (rn & 0x1f) << 5 | (rd & 0x1f));
796}
797
798static void tcg_out_insn_3612(TCGContext *s, AArch64Insn insn,
799                              unsigned size, TCGReg rd, TCGReg rn)
800{
801    tcg_out32(s, insn | (size << 22) | (rn & 0x1f) << 5 | (rd & 0x1f));
802}
803
804static void tcg_out_insn_3614(TCGContext *s, AArch64Insn insn, bool q,
805                              TCGReg rd, TCGReg rn, unsigned immhb)
806{
807    tcg_out32(s, insn | q << 30 | immhb << 16
808              | (rn & 0x1f) << 5 | (rd & 0x1f));
809}
810
811static void tcg_out_insn_3616(TCGContext *s, AArch64Insn insn, bool q,
812                              unsigned size, TCGReg rd, TCGReg rn, TCGReg rm)
813{
814    tcg_out32(s, insn | q << 30 | (size << 22) | (rm & 0x1f) << 16
815              | (rn & 0x1f) << 5 | (rd & 0x1f));
816}
817
818static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
819                              unsigned size, TCGReg rd, TCGReg rn)
820{
821    tcg_out32(s, insn | q << 30 | (size << 22)
822              | (rn & 0x1f) << 5 | (rd & 0x1f));
823}
824
825static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
826                              TCGReg rd, TCGReg base, TCGType ext,
827                              TCGReg regoff)
828{
829    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
830    tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
831              0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
832}
833
834static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
835                              TCGReg rd, TCGReg rn, intptr_t offset)
836{
837    tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | (rd & 0x1f));
838}
839
840static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
841                              TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
842{
843    /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
844    tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10
845              | rn << 5 | (rd & 0x1f));
846}
847
848static void tcg_out_bti(TCGContext *s, AArch64Insn insn)
849{
850    /*
851     * While BTI insns are nops on hosts without FEAT_BTI,
852     * there is no point in emitting them in that case either.
853     */
854    if (cpuinfo & CPUINFO_BTI) {
855        tcg_out32(s, insn);
856    }
857}
858
859/* Register to register move using ORR (shifted register with no shift). */
860static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
861{
862    tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
863}
864
865/* Register to register move using ADDI (move to/from SP).  */
866static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
867{
868    tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
869}
870
871/* This function is used for the Logical (immediate) instruction group.
872   The value of LIMM must satisfy IS_LIMM.  See the comment above about
873   only supporting simplified logical immediates.  */
874static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
875                             TCGReg rd, TCGReg rn, uint64_t limm)
876{
877    unsigned h, l, r, c;
878
879    tcg_debug_assert(is_limm(limm));
880
881    h = clz64(limm);
882    l = ctz64(limm);
883    if (l == 0) {
884        r = 0;                  /* form 0....01....1 */
885        c = ctz64(~limm) - 1;
886        if (h == 0) {
887            r = clz64(~limm);   /* form 1..10..01..1 */
888            c += r;
889        }
890    } else {
891        r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
892        c = r - h - 1;
893    }
894    if (ext == TCG_TYPE_I32) {
895        r &= 31;
896        c &= 31;
897    }
898
899    tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
900}
901
902static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
903                             TCGReg rd, int64_t v64)
904{
905    bool q = type == TCG_TYPE_V128;
906    int cmode, imm8, i;
907
908    /* Test all bytes equal first.  */
909    if (vece == MO_8) {
910        imm8 = (uint8_t)v64;
911        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
912        return;
913    }
914
915    /*
916     * Test all bytes 0x00 or 0xff second.  This can match cases that
917     * might otherwise take 2 or 3 insns for MO_16 or MO_32 below.
918     */
919    for (i = imm8 = 0; i < 8; i++) {
920        uint8_t byte = v64 >> (i * 8);
921        if (byte == 0xff) {
922            imm8 |= 1 << i;
923        } else if (byte != 0) {
924            goto fail_bytes;
925        }
926    }
927    tcg_out_insn(s, 3606, MOVI, q, rd, 1, 0xe, imm8);
928    return;
929 fail_bytes:
930
931    /*
932     * Tests for various replications.  For each element width, if we
933     * cannot find an expansion there's no point checking a larger
934     * width because we already know by replication it cannot match.
935     */
936    if (vece == MO_16) {
937        uint16_t v16 = v64;
938
939        if (is_shimm16(v16, &cmode, &imm8)) {
940            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
941            return;
942        }
943        if (is_shimm16(~v16, &cmode, &imm8)) {
944            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
945            return;
946        }
947
948        /*
949         * Otherwise, all remaining constants can be loaded in two insns:
950         * rd = v16 & 0xff, rd |= v16 & 0xff00.
951         */
952        tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
953        tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
954        return;
955    } else if (vece == MO_32) {
956        uint32_t v32 = v64;
957        uint32_t n32 = ~v32;
958
959        if (is_shimm32(v32, &cmode, &imm8) ||
960            is_soimm32(v32, &cmode, &imm8) ||
961            is_fimm32(v32, &cmode, &imm8)) {
962            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
963            return;
964        }
965        if (is_shimm32(n32, &cmode, &imm8) ||
966            is_soimm32(n32, &cmode, &imm8)) {
967            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
968            return;
969        }
970
971        /*
972         * Restrict the set of constants to those we can load with
973         * two instructions.  Others we load from the pool.
974         */
975        i = is_shimm32_pair(v32, &cmode, &imm8);
976        if (i) {
977            tcg_out_insn(s, 3606, MOVI, q, rd, 0, cmode, imm8);
978            tcg_out_insn(s, 3606, ORR, q, rd, 0, i, extract32(v32, i * 4, 8));
979            return;
980        }
981        i = is_shimm32_pair(n32, &cmode, &imm8);
982        if (i) {
983            tcg_out_insn(s, 3606, MVNI, q, rd, 0, cmode, imm8);
984            tcg_out_insn(s, 3606, BIC, q, rd, 0, i, extract32(n32, i * 4, 8));
985            return;
986        }
987    } else if (is_fimm64(v64, &cmode, &imm8)) {
988        tcg_out_insn(s, 3606, MOVI, q, rd, 1, cmode, imm8);
989        return;
990    }
991
992    /*
993     * As a last resort, load from the constant pool.  Sadly there
994     * is no LD1R (literal), so store the full 16-byte vector.
995     */
996    if (type == TCG_TYPE_V128) {
997        new_pool_l2(s, R_AARCH64_CONDBR19, s->code_ptr, 0, v64, v64);
998        tcg_out_insn(s, 3305, LDR_v128, 0, rd);
999    } else {
1000        new_pool_label(s, v64, R_AARCH64_CONDBR19, s->code_ptr, 0);
1001        tcg_out_insn(s, 3305, LDR_v64, 0, rd);
1002    }
1003}
1004
1005static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
1006                            TCGReg rd, TCGReg rs)
1007{
1008    int is_q = type - TCG_TYPE_V64;
1009    tcg_out_insn(s, 3605, DUP, is_q, rd, rs, 1 << vece, 0);
1010    return true;
1011}
1012
1013static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1014                             TCGReg r, TCGReg base, intptr_t offset)
1015{
1016    TCGReg temp = TCG_REG_TMP0;
1017
1018    if (offset < -0xffffff || offset > 0xffffff) {
1019        tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
1020        tcg_out_insn(s, 3502, ADD, 1, temp, temp, base);
1021        base = temp;
1022    } else {
1023        AArch64Insn add_insn = I3401_ADDI;
1024
1025        if (offset < 0) {
1026            add_insn = I3401_SUBI;
1027            offset = -offset;
1028        }
1029        if (offset & 0xfff000) {
1030            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff000);
1031            base = temp;
1032        }
1033        if (offset & 0xfff) {
1034            tcg_out_insn_3401(s, add_insn, 1, temp, base, offset & 0xfff);
1035            base = temp;
1036        }
1037    }
1038    tcg_out_insn(s, 3303, LD1R, type == TCG_TYPE_V128, r, base, vece);
1039    return true;
1040}
1041
1042static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
1043                         tcg_target_long value)
1044{
1045    tcg_target_long svalue = value;
1046    tcg_target_long ivalue = ~value;
1047    tcg_target_long t0, t1, t2;
1048    int s0, s1;
1049    AArch64Insn opc;
1050
1051    switch (type) {
1052    case TCG_TYPE_I32:
1053    case TCG_TYPE_I64:
1054        tcg_debug_assert(rd < 32);
1055        break;
1056    default:
1057        g_assert_not_reached();
1058    }
1059
1060    /* For 32-bit values, discard potential garbage in value.  For 64-bit
1061       values within [2**31, 2**32-1], we can create smaller sequences by
1062       interpreting this as a negative 32-bit number, while ensuring that
1063       the high 32 bits are cleared by setting SF=0.  */
1064    if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
1065        svalue = (int32_t)value;
1066        value = (uint32_t)value;
1067        ivalue = (uint32_t)ivalue;
1068        type = TCG_TYPE_I32;
1069    }
1070
1071    /* Speed things up by handling the common case of small positive
1072       and negative values specially.  */
1073    if ((value & ~0xffffull) == 0) {
1074        tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
1075        return;
1076    } else if ((ivalue & ~0xffffull) == 0) {
1077        tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
1078        return;
1079    }
1080
1081    /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
1082       use the sign-extended value.  That lets us match rotated values such
1083       as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
1084    if (is_limm(svalue)) {
1085        tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
1086        return;
1087    }
1088
1089    /* Look for host pointer values within 4G of the PC.  This happens
1090       often when loading pointers to QEMU's own data structures.  */
1091    if (type == TCG_TYPE_I64) {
1092        intptr_t src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
1093        tcg_target_long disp = value - src_rx;
1094        if (disp == sextract64(disp, 0, 21)) {
1095            tcg_out_insn(s, 3406, ADR, rd, disp);
1096            return;
1097        }
1098        disp = (value >> 12) - (src_rx >> 12);
1099        if (disp == sextract64(disp, 0, 21)) {
1100            tcg_out_insn(s, 3406, ADRP, rd, disp);
1101            if (value & 0xfff) {
1102                tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
1103            }
1104            return;
1105        }
1106    }
1107
1108    /* Would it take fewer insns to begin with MOVN?  */
1109    if (ctpop64(value) >= 32) {
1110        t0 = ivalue;
1111        opc = I3405_MOVN;
1112    } else {
1113        t0 = value;
1114        opc = I3405_MOVZ;
1115    }
1116    s0 = ctz64(t0) & (63 & -16);
1117    t1 = t0 & ~(0xffffull << s0);
1118    s1 = ctz64(t1) & (63 & -16);
1119    t2 = t1 & ~(0xffffull << s1);
1120    if (t2 == 0) {
1121        tcg_out_insn_3405(s, opc, type, rd, t0 >> s0, s0);
1122        if (t1 != 0) {
1123            tcg_out_insn(s, 3405, MOVK, type, rd, value >> s1, s1);
1124        }
1125        return;
1126    }
1127
1128    /* For more than 2 insns, dump it into the constant pool.  */
1129    new_pool_label(s, value, R_AARCH64_CONDBR19, s->code_ptr, 0);
1130    tcg_out_insn(s, 3305, LDR, 0, rd);
1131}
1132
1133static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1134{
1135    return false;
1136}
1137
1138static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1139                             tcg_target_long imm)
1140{
1141    /* This function is only used for passing structs by reference. */
1142    g_assert_not_reached();
1143}
1144
1145/* Define something more legible for general use.  */
1146#define tcg_out_ldst_r  tcg_out_insn_3310
1147
1148static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
1149                         TCGReg rn, intptr_t offset, int lgsize)
1150{
1151    /* If the offset is naturally aligned and in range, then we can
1152       use the scaled uimm12 encoding */
1153    if (offset >= 0 && !(offset & ((1 << lgsize) - 1))) {
1154        uintptr_t scaled_uimm = offset >> lgsize;
1155        if (scaled_uimm <= 0xfff) {
1156            tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
1157            return;
1158        }
1159    }
1160
1161    /* Small signed offsets can use the unscaled encoding.  */
1162    if (offset >= -256 && offset < 256) {
1163        tcg_out_insn_3312(s, insn, rd, rn, offset);
1164        return;
1165    }
1166
1167    /* Worst-case scenario, move offset to temp register, use reg offset.  */
1168    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
1169    tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
1170}
1171
1172static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
1173{
1174    if (ret == arg) {
1175        return true;
1176    }
1177    switch (type) {
1178    case TCG_TYPE_I32:
1179    case TCG_TYPE_I64:
1180        if (ret < 32 && arg < 32) {
1181            tcg_out_movr(s, type, ret, arg);
1182            break;
1183        } else if (ret < 32) {
1184            tcg_out_insn(s, 3605, UMOV, type, ret, arg, 0, 0);
1185            break;
1186        } else if (arg < 32) {
1187            tcg_out_insn(s, 3605, INS, 0, ret, arg, 4 << type, 0);
1188            break;
1189        }
1190        /* FALLTHRU */
1191
1192    case TCG_TYPE_V64:
1193        tcg_debug_assert(ret >= 32 && arg >= 32);
1194        tcg_out_insn(s, 3616, ORR, 0, 0, ret, arg, arg);
1195        break;
1196    case TCG_TYPE_V128:
1197        tcg_debug_assert(ret >= 32 && arg >= 32);
1198        tcg_out_insn(s, 3616, ORR, 1, 0, ret, arg, arg);
1199        break;
1200
1201    default:
1202        g_assert_not_reached();
1203    }
1204    return true;
1205}
1206
1207static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
1208                       TCGReg base, intptr_t ofs)
1209{
1210    AArch64Insn insn;
1211    int lgsz;
1212
1213    switch (type) {
1214    case TCG_TYPE_I32:
1215        insn = (ret < 32 ? I3312_LDRW : I3312_LDRVS);
1216        lgsz = 2;
1217        break;
1218    case TCG_TYPE_I64:
1219        insn = (ret < 32 ? I3312_LDRX : I3312_LDRVD);
1220        lgsz = 3;
1221        break;
1222    case TCG_TYPE_V64:
1223        insn = I3312_LDRVD;
1224        lgsz = 3;
1225        break;
1226    case TCG_TYPE_V128:
1227        insn = I3312_LDRVQ;
1228        lgsz = 4;
1229        break;
1230    default:
1231        g_assert_not_reached();
1232    }
1233    tcg_out_ldst(s, insn, ret, base, ofs, lgsz);
1234}
1235
1236static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
1237                       TCGReg base, intptr_t ofs)
1238{
1239    AArch64Insn insn;
1240    int lgsz;
1241
1242    switch (type) {
1243    case TCG_TYPE_I32:
1244        insn = (src < 32 ? I3312_STRW : I3312_STRVS);
1245        lgsz = 2;
1246        break;
1247    case TCG_TYPE_I64:
1248        insn = (src < 32 ? I3312_STRX : I3312_STRVD);
1249        lgsz = 3;
1250        break;
1251    case TCG_TYPE_V64:
1252        insn = I3312_STRVD;
1253        lgsz = 3;
1254        break;
1255    case TCG_TYPE_V128:
1256        insn = I3312_STRVQ;
1257        lgsz = 4;
1258        break;
1259    default:
1260        g_assert_not_reached();
1261    }
1262    tcg_out_ldst(s, insn, src, base, ofs, lgsz);
1263}
1264
1265static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1266                               TCGReg base, intptr_t ofs)
1267{
1268    if (type <= TCG_TYPE_I64 && val == 0) {
1269        tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
1270        return true;
1271    }
1272    return false;
1273}
1274
1275static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
1276                               TCGReg rn, unsigned int a, unsigned int b)
1277{
1278    tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
1279}
1280
1281static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
1282                                TCGReg rn, unsigned int a, unsigned int b)
1283{
1284    tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
1285}
1286
1287static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
1288                                TCGReg rn, unsigned int a, unsigned int b)
1289{
1290    tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
1291}
1292
1293static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
1294                                TCGReg rn, TCGReg rm, unsigned int a)
1295{
1296    tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
1297}
1298
1299static inline void tcg_out_shl(TCGContext *s, TCGType ext,
1300                               TCGReg rd, TCGReg rn, unsigned int m)
1301{
1302    int bits = ext ? 64 : 32;
1303    int max = bits - 1;
1304    tcg_out_ubfm(s, ext, rd, rn, (bits - m) & max, (max - m) & max);
1305}
1306
1307static inline void tcg_out_shr(TCGContext *s, TCGType ext,
1308                               TCGReg rd, TCGReg rn, unsigned int m)
1309{
1310    int max = ext ? 63 : 31;
1311    tcg_out_ubfm(s, ext, rd, rn, m & max, max);
1312}
1313
1314static inline void tcg_out_sar(TCGContext *s, TCGType ext,
1315                               TCGReg rd, TCGReg rn, unsigned int m)
1316{
1317    int max = ext ? 63 : 31;
1318    tcg_out_sbfm(s, ext, rd, rn, m & max, max);
1319}
1320
1321static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
1322                                TCGReg rd, TCGReg rn, unsigned int m)
1323{
1324    int max = ext ? 63 : 31;
1325    tcg_out_extr(s, ext, rd, rn, rn, m & max);
1326}
1327
1328static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
1329                                TCGReg rd, TCGReg rn, unsigned int m)
1330{
1331    int max = ext ? 63 : 31;
1332    tcg_out_extr(s, ext, rd, rn, rn, -m & max);
1333}
1334
1335static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
1336                               TCGReg rn, unsigned lsb, unsigned width)
1337{
1338    unsigned size = ext ? 64 : 32;
1339    unsigned a = (size - lsb) & (size - 1);
1340    unsigned b = width - 1;
1341    tcg_out_bfm(s, ext, rd, rn, a, b);
1342}
1343
1344static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
1345                        tcg_target_long b, bool const_b)
1346{
1347    if (const_b) {
1348        /* Using CMP or CMN aliases.  */
1349        if (b >= 0) {
1350            tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
1351        } else {
1352            tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
1353        }
1354    } else {
1355        /* Using CMP alias SUBS wzr, Wn, Wm */
1356        tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
1357    }
1358}
1359
1360static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1361{
1362    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1363    tcg_debug_assert(offset == sextract64(offset, 0, 26));
1364    tcg_out_insn(s, 3206, B, offset);
1365}
1366
1367static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
1368{
1369    ptrdiff_t offset = tcg_pcrel_diff(s, target) >> 2;
1370    if (offset == sextract64(offset, 0, 26)) {
1371        tcg_out_insn(s, 3206, BL, offset);
1372    } else {
1373        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1374        tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
1375    }
1376}
1377
1378static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
1379                         const TCGHelperInfo *info)
1380{
1381    tcg_out_call_int(s, target);
1382}
1383
1384static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
1385{
1386    if (!l->has_value) {
1387        tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
1388        tcg_out_insn(s, 3206, B, 0);
1389    } else {
1390        tcg_out_goto(s, l->u.value_ptr);
1391    }
1392}
1393
1394static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
1395                           TCGArg b, bool b_const, TCGLabel *l)
1396{
1397    intptr_t offset;
1398    bool need_cmp;
1399
1400    if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
1401        need_cmp = false;
1402    } else {
1403        need_cmp = true;
1404        tcg_out_cmp(s, ext, a, b, b_const);
1405    }
1406
1407    if (!l->has_value) {
1408        tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
1409        offset = tcg_in32(s) >> 5;
1410    } else {
1411        offset = tcg_pcrel_diff(s, l->u.value_ptr) >> 2;
1412        tcg_debug_assert(offset == sextract64(offset, 0, 19));
1413    }
1414
1415    if (need_cmp) {
1416        tcg_out_insn(s, 3202, B_C, c, offset);
1417    } else if (c == TCG_COND_EQ) {
1418        tcg_out_insn(s, 3201, CBZ, ext, a, offset);
1419    } else {
1420        tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
1421    }
1422}
1423
1424static inline void tcg_out_rev(TCGContext *s, int ext, MemOp s_bits,
1425                               TCGReg rd, TCGReg rn)
1426{
1427    /* REV, REV16, REV32 */
1428    tcg_out_insn_3507(s, I3507_REV | (s_bits << 10), ext, rd, rn);
1429}
1430
1431static inline void tcg_out_sxt(TCGContext *s, TCGType ext, MemOp s_bits,
1432                               TCGReg rd, TCGReg rn)
1433{
1434    /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
1435    int bits = (8 << s_bits) - 1;
1436    tcg_out_sbfm(s, ext, rd, rn, 0, bits);
1437}
1438
1439static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1440{
1441    tcg_out_sxt(s, type, MO_8, rd, rn);
1442}
1443
1444static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
1445{
1446    tcg_out_sxt(s, type, MO_16, rd, rn);
1447}
1448
1449static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
1450{
1451    tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
1452}
1453
1454static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1455{
1456    tcg_out_ext32s(s, rd, rn);
1457}
1458
1459static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
1460                               TCGReg rd, TCGReg rn)
1461{
1462    /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
1463    int bits = (8 << s_bits) - 1;
1464    tcg_out_ubfm(s, 0, rd, rn, 0, bits);
1465}
1466
1467static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
1468{
1469    tcg_out_uxt(s, MO_8, rd, rn);
1470}
1471
1472static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
1473{
1474    tcg_out_uxt(s, MO_16, rd, rn);
1475}
1476
1477static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
1478{
1479    tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
1480}
1481
1482static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
1483{
1484    tcg_out_ext32u(s, rd, rn);
1485}
1486
1487static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
1488{
1489    tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
1490}
1491
1492static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
1493                            TCGReg rn, int64_t aimm)
1494{
1495    if (aimm >= 0) {
1496        tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
1497    } else {
1498        tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
1499    }
1500}
1501
1502static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
1503                            TCGReg rh, TCGReg al, TCGReg ah,
1504                            tcg_target_long bl, tcg_target_long bh,
1505                            bool const_bl, bool const_bh, bool sub)
1506{
1507    TCGReg orig_rl = rl;
1508    AArch64Insn insn;
1509
1510    if (rl == ah || (!const_bh && rl == bh)) {
1511        rl = TCG_REG_TMP0;
1512    }
1513
1514    if (const_bl) {
1515        if (bl < 0) {
1516            bl = -bl;
1517            insn = sub ? I3401_ADDSI : I3401_SUBSI;
1518        } else {
1519            insn = sub ? I3401_SUBSI : I3401_ADDSI;
1520        }
1521
1522        if (unlikely(al == TCG_REG_XZR)) {
1523            /* ??? We want to allow al to be zero for the benefit of
1524               negation via subtraction.  However, that leaves open the
1525               possibility of adding 0+const in the low part, and the
1526               immediate add instructions encode XSP not XZR.  Don't try
1527               anything more elaborate here than loading another zero.  */
1528            al = TCG_REG_TMP0;
1529            tcg_out_movi(s, ext, al, 0);
1530        }
1531        tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1532    } else {
1533        tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1534    }
1535
1536    insn = I3503_ADC;
1537    if (const_bh) {
1538        /* Note that the only two constants we support are 0 and -1, and
1539           that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
1540        if ((bh != 0) ^ sub) {
1541            insn = I3503_SBC;
1542        }
1543        bh = TCG_REG_XZR;
1544    } else if (sub) {
1545        insn = I3503_SBC;
1546    }
1547    tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1548
1549    tcg_out_mov(s, ext, orig_rl, rl);
1550}
1551
1552static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1553{
1554    static const uint32_t sync[] = {
1555        [0 ... TCG_MO_ALL]            = DMB_ISH | DMB_LD | DMB_ST,
1556        [TCG_MO_ST_ST]                = DMB_ISH | DMB_ST,
1557        [TCG_MO_LD_LD]                = DMB_ISH | DMB_LD,
1558        [TCG_MO_LD_ST]                = DMB_ISH | DMB_LD,
1559        [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1560    };
1561    tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1562}
1563
1564static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1565                         TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1566{
1567    TCGReg a1 = a0;
1568    if (is_ctz) {
1569        a1 = TCG_REG_TMP0;
1570        tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1571    }
1572    if (const_b && b == (ext ? 64 : 32)) {
1573        tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1574    } else {
1575        AArch64Insn sel = I3506_CSEL;
1576
1577        tcg_out_cmp(s, ext, a0, 0, 1);
1578        tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1);
1579
1580        if (const_b) {
1581            if (b == -1) {
1582                b = TCG_REG_XZR;
1583                sel = I3506_CSINV;
1584            } else if (b == 0) {
1585                b = TCG_REG_XZR;
1586            } else {
1587                tcg_out_movi(s, ext, d, b);
1588                b = d;
1589            }
1590        }
1591        tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE);
1592    }
1593}
1594
1595typedef struct {
1596    TCGReg base;
1597    TCGReg index;
1598    TCGType index_ext;
1599    TCGAtomAlign aa;
1600} HostAddress;
1601
1602bool tcg_target_has_memory_bswap(MemOp memop)
1603{
1604    return false;
1605}
1606
1607static const TCGLdstHelperParam ldst_helper_param = {
1608    .ntmp = 1, .tmp = { TCG_REG_TMP0 }
1609};
1610
1611static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1612{
1613    MemOp opc = get_memop(lb->oi);
1614
1615    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1616        return false;
1617    }
1618
1619    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1620    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1621    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1622    tcg_out_goto(s, lb->raddr);
1623    return true;
1624}
1625
1626static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1627{
1628    MemOp opc = get_memop(lb->oi);
1629
1630    if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1631        return false;
1632    }
1633
1634    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1635    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1636    tcg_out_goto(s, lb->raddr);
1637    return true;
1638}
1639
1640/* We expect to use a 7-bit scaled negative offset from ENV.  */
1641#define MIN_TLB_MASK_TABLE_OFS  -512
1642
1643/*
1644 * For system-mode, perform the TLB load and compare.
1645 * For user-mode, perform any required alignment tests.
1646 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1647 * is required and fill in @h with the host address for the fast path.
1648 */
1649static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1650                                           TCGReg addr_reg, MemOpIdx oi,
1651                                           bool is_ld)
1652{
1653    TCGType addr_type = s->addr_type;
1654    TCGLabelQemuLdst *ldst = NULL;
1655    MemOp opc = get_memop(oi);
1656    MemOp s_bits = opc & MO_SIZE;
1657    unsigned a_mask;
1658
1659    h->aa = atom_and_align_for_opc(s, opc,
1660                                   have_lse2 ? MO_ATOM_WITHIN16
1661                                             : MO_ATOM_IFALIGN,
1662                                   s_bits == MO_128);
1663    a_mask = (1 << h->aa.align) - 1;
1664
1665    if (tcg_use_softmmu) {
1666        unsigned s_mask = (1u << s_bits) - 1;
1667        unsigned mem_index = get_mmuidx(oi);
1668        TCGReg addr_adj;
1669        TCGType mask_type;
1670        uint64_t compare_mask;
1671
1672        ldst = new_ldst_label(s);
1673        ldst->is_ld = is_ld;
1674        ldst->oi = oi;
1675        ldst->addrlo_reg = addr_reg;
1676
1677        mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
1678                     ? TCG_TYPE_I64 : TCG_TYPE_I32);
1679
1680        /* Load cpu->neg.tlb.f[mmu_idx].{mask,table} into {tmp0,tmp1}. */
1681        QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
1682        QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
1683        tcg_out_insn(s, 3314, LDP, TCG_REG_TMP0, TCG_REG_TMP1, TCG_AREG0,
1684                     tlb_mask_table_ofs(s, mem_index), 1, 0);
1685
1686        /* Extract the TLB index from the address into X0.  */
1687        tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
1688                     TCG_REG_TMP0, TCG_REG_TMP0, addr_reg,
1689                     s->page_bits - CPU_TLB_ENTRY_BITS);
1690
1691        /* Add the tlb_table pointer, forming the CPUTLBEntry address. */
1692        tcg_out_insn(s, 3502, ADD, 1, TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP0);
1693
1694        /* Load the tlb comparator into TMP0, and the fast path addend. */
1695        QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
1696        tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP1,
1697                   is_ld ? offsetof(CPUTLBEntry, addr_read)
1698                         : offsetof(CPUTLBEntry, addr_write));
1699        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
1700                   offsetof(CPUTLBEntry, addend));
1701
1702        /*
1703         * For aligned accesses, we check the first byte and include
1704         * the alignment bits within the address.  For unaligned access,
1705         * we check that we don't cross pages using the address of the
1706         * last byte of the access.
1707         */
1708        if (a_mask >= s_mask) {
1709            addr_adj = addr_reg;
1710        } else {
1711            addr_adj = TCG_REG_TMP2;
1712            tcg_out_insn(s, 3401, ADDI, addr_type,
1713                         addr_adj, addr_reg, s_mask - a_mask);
1714        }
1715        compare_mask = (uint64_t)s->page_mask | a_mask;
1716
1717        /* Store the page mask part of the address into TMP2.  */
1718        tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_TMP2,
1719                         addr_adj, compare_mask);
1720
1721        /* Perform the address comparison. */
1722        tcg_out_cmp(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2, 0);
1723
1724        /* If not equal, we jump to the slow path. */
1725        ldst->label_ptr[0] = s->code_ptr;
1726        tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1727
1728        h->base = TCG_REG_TMP1;
1729        h->index = addr_reg;
1730        h->index_ext = addr_type;
1731    } else {
1732        if (a_mask) {
1733            ldst = new_ldst_label(s);
1734
1735            ldst->is_ld = is_ld;
1736            ldst->oi = oi;
1737            ldst->addrlo_reg = addr_reg;
1738
1739            /* tst addr, #mask */
1740            tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
1741
1742            /* b.ne slow_path */
1743            ldst->label_ptr[0] = s->code_ptr;
1744            tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1745        }
1746
1747        if (guest_base || addr_type == TCG_TYPE_I32) {
1748            h->base = TCG_REG_GUEST_BASE;
1749            h->index = addr_reg;
1750            h->index_ext = addr_type;
1751        } else {
1752            h->base = addr_reg;
1753            h->index = TCG_REG_XZR;
1754            h->index_ext = TCG_TYPE_I64;
1755        }
1756    }
1757
1758    return ldst;
1759}
1760
1761static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
1762                                   TCGReg data_r, HostAddress h)
1763{
1764    switch (memop & MO_SSIZE) {
1765    case MO_UB:
1766        tcg_out_ldst_r(s, I3312_LDRB, data_r, h.base, h.index_ext, h.index);
1767        break;
1768    case MO_SB:
1769        tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1770                       data_r, h.base, h.index_ext, h.index);
1771        break;
1772    case MO_UW:
1773        tcg_out_ldst_r(s, I3312_LDRH, data_r, h.base, h.index_ext, h.index);
1774        break;
1775    case MO_SW:
1776        tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1777                       data_r, h.base, h.index_ext, h.index);
1778        break;
1779    case MO_UL:
1780        tcg_out_ldst_r(s, I3312_LDRW, data_r, h.base, h.index_ext, h.index);
1781        break;
1782    case MO_SL:
1783        tcg_out_ldst_r(s, I3312_LDRSWX, data_r, h.base, h.index_ext, h.index);
1784        break;
1785    case MO_UQ:
1786        tcg_out_ldst_r(s, I3312_LDRX, data_r, h.base, h.index_ext, h.index);
1787        break;
1788    default:
1789        g_assert_not_reached();
1790    }
1791}
1792
1793static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
1794                                   TCGReg data_r, HostAddress h)
1795{
1796    switch (memop & MO_SIZE) {
1797    case MO_8:
1798        tcg_out_ldst_r(s, I3312_STRB, data_r, h.base, h.index_ext, h.index);
1799        break;
1800    case MO_16:
1801        tcg_out_ldst_r(s, I3312_STRH, data_r, h.base, h.index_ext, h.index);
1802        break;
1803    case MO_32:
1804        tcg_out_ldst_r(s, I3312_STRW, data_r, h.base, h.index_ext, h.index);
1805        break;
1806    case MO_64:
1807        tcg_out_ldst_r(s, I3312_STRX, data_r, h.base, h.index_ext, h.index);
1808        break;
1809    default:
1810        g_assert_not_reached();
1811    }
1812}
1813
1814static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1815                            MemOpIdx oi, TCGType data_type)
1816{
1817    TCGLabelQemuLdst *ldst;
1818    HostAddress h;
1819
1820    ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1821    tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
1822
1823    if (ldst) {
1824        ldst->type = data_type;
1825        ldst->datalo_reg = data_reg;
1826        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1827    }
1828}
1829
1830static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1831                            MemOpIdx oi, TCGType data_type)
1832{
1833    TCGLabelQemuLdst *ldst;
1834    HostAddress h;
1835
1836    ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1837    tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1838
1839    if (ldst) {
1840        ldst->type = data_type;
1841        ldst->datalo_reg = data_reg;
1842        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1843    }
1844}
1845
1846static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
1847                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
1848{
1849    TCGLabelQemuLdst *ldst;
1850    HostAddress h;
1851    TCGReg base;
1852    bool use_pair;
1853
1854    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
1855
1856    /* Compose the final address, as LDP/STP have no indexing. */
1857    if (h.index == TCG_REG_XZR) {
1858        base = h.base;
1859    } else {
1860        base = TCG_REG_TMP2;
1861        if (h.index_ext == TCG_TYPE_I32) {
1862            /* add base, base, index, uxtw */
1863            tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, base,
1864                         h.base, h.index, MO_32, 0);
1865        } else {
1866            /* add base, base, index */
1867            tcg_out_insn(s, 3502, ADD, 1, base, h.base, h.index);
1868        }
1869    }
1870
1871    use_pair = h.aa.atom < MO_128 || have_lse2;
1872
1873    if (!use_pair) {
1874        tcg_insn_unit *branch = NULL;
1875        TCGReg ll, lh, sl, sh;
1876
1877        /*
1878         * If we have already checked for 16-byte alignment, that's all
1879         * we need. Otherwise we have determined that misaligned atomicity
1880         * may be handled with two 8-byte loads.
1881         */
1882        if (h.aa.align < MO_128) {
1883            /*
1884             * TODO: align should be MO_64, so we only need test bit 3,
1885             * which means we could use TBNZ instead of ANDS+B_C.
1886             */
1887            tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, 15);
1888            branch = s->code_ptr;
1889            tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
1890            use_pair = true;
1891        }
1892
1893        if (is_ld) {
1894            /*
1895             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1896             *    ldxp lo, hi, [base]
1897             *    stxp t0, lo, hi, [base]
1898             *    cbnz t0, .-8
1899             * Require no overlap between data{lo,hi} and base.
1900             */
1901            if (datalo == base || datahi == base) {
1902                tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_TMP2, base);
1903                base = TCG_REG_TMP2;
1904            }
1905            ll = sl = datalo;
1906            lh = sh = datahi;
1907        } else {
1908            /*
1909             * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
1910             * 1: ldxp t0, t1, [base]
1911             *    stxp t0, lo, hi, [base]
1912             *    cbnz t0, 1b
1913             */
1914            tcg_debug_assert(base != TCG_REG_TMP0 && base != TCG_REG_TMP1);
1915            ll = TCG_REG_TMP0;
1916            lh = TCG_REG_TMP1;
1917            sl = datalo;
1918            sh = datahi;
1919        }
1920
1921        tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, ll, lh, base);
1922        tcg_out_insn(s, 3306, STXP, TCG_REG_TMP0, sl, sh, base);
1923        tcg_out_insn(s, 3201, CBNZ, 0, TCG_REG_TMP0, -2);
1924
1925        if (use_pair) {
1926            /* "b .+8", branching across the one insn of use_pair. */
1927            tcg_out_insn(s, 3206, B, 2);
1928            reloc_pc19(branch, tcg_splitwx_to_rx(s->code_ptr));
1929        }
1930    }
1931
1932    if (use_pair) {
1933        if (is_ld) {
1934            tcg_out_insn(s, 3314, LDP, datalo, datahi, base, 0, 1, 0);
1935        } else {
1936            tcg_out_insn(s, 3314, STP, datalo, datahi, base, 0, 1, 0);
1937        }
1938    }
1939
1940    if (ldst) {
1941        ldst->type = TCG_TYPE_I128;
1942        ldst->datalo_reg = datalo;
1943        ldst->datahi_reg = datahi;
1944        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1945    }
1946}
1947
1948static const tcg_insn_unit *tb_ret_addr;
1949
1950static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1951{
1952    const tcg_insn_unit *target;
1953    ptrdiff_t offset;
1954
1955    /* Reuse the zeroing that exists for goto_ptr.  */
1956    if (a0 == 0) {
1957        target = tcg_code_gen_epilogue;
1958    } else {
1959        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1960        target = tb_ret_addr;
1961    }
1962
1963    offset = tcg_pcrel_diff(s, target) >> 2;
1964    if (offset == sextract64(offset, 0, 26)) {
1965        tcg_out_insn(s, 3206, B, offset);
1966    } else {
1967        /*
1968         * Only x16/x17 generate BTI type Jump (2),
1969         * other registers generate BTI type Jump|Call (3).
1970         */
1971        QEMU_BUILD_BUG_ON(TCG_REG_TMP0 != TCG_REG_X16);
1972        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
1973        tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
1974    }
1975}
1976
1977static void tcg_out_goto_tb(TCGContext *s, int which)
1978{
1979    /*
1980     * Direct branch, or indirect address load, will be patched
1981     * by tb_target_set_jmp_target.  Assert indirect load offset
1982     * in range early, regardless of direct branch distance.
1983     */
1984    intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which));
1985    tcg_debug_assert(i_off == sextract64(i_off, 0, 21));
1986
1987    set_jmp_insn_offset(s, which);
1988    tcg_out32(s, I3206_B);
1989    tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
1990    set_jmp_reset_offset(s, which);
1991    tcg_out_bti(s, BTI_J);
1992}
1993
1994void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1995                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1996{
1997    uintptr_t d_addr = tb->jmp_target_addr[n];
1998    ptrdiff_t d_offset = d_addr - jmp_rx;
1999    tcg_insn_unit insn;
2000
2001    /* Either directly branch, or indirect branch load. */
2002    if (d_offset == sextract64(d_offset, 0, 28)) {
2003        insn = deposit32(I3206_B, 0, 26, d_offset >> 2);
2004    } else {
2005        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
2006        ptrdiff_t i_offset = i_addr - jmp_rx;
2007
2008        /* Note that we asserted this in range in tcg_out_goto_tb. */
2009        insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
2010    }
2011    qatomic_set((uint32_t *)jmp_rw, insn);
2012    flush_idcache_range(jmp_rx, jmp_rw, 4);
2013}
2014
2015static void tcg_out_op(TCGContext *s, TCGOpcode opc,
2016                       const TCGArg args[TCG_MAX_OP_ARGS],
2017                       const int const_args[TCG_MAX_OP_ARGS])
2018{
2019    /* 99% of the time, we can signal the use of extension registers
2020       by looking to see if the opcode handles 64-bit data.  */
2021    TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
2022
2023    /* Hoist the loads of the most common arguments.  */
2024    TCGArg a0 = args[0];
2025    TCGArg a1 = args[1];
2026    TCGArg a2 = args[2];
2027    int c2 = const_args[2];
2028
2029    /* Some operands are defined with "rZ" constraint, a register or
2030       the zero register.  These need not actually test args[I] == 0.  */
2031#define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
2032
2033    switch (opc) {
2034    case INDEX_op_goto_ptr:
2035        tcg_out_insn(s, 3207, BR, a0);
2036        break;
2037
2038    case INDEX_op_br:
2039        tcg_out_goto_label(s, arg_label(a0));
2040        break;
2041
2042    case INDEX_op_ld8u_i32:
2043    case INDEX_op_ld8u_i64:
2044        tcg_out_ldst(s, I3312_LDRB, a0, a1, a2, 0);
2045        break;
2046    case INDEX_op_ld8s_i32:
2047        tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2, 0);
2048        break;
2049    case INDEX_op_ld8s_i64:
2050        tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2, 0);
2051        break;
2052    case INDEX_op_ld16u_i32:
2053    case INDEX_op_ld16u_i64:
2054        tcg_out_ldst(s, I3312_LDRH, a0, a1, a2, 1);
2055        break;
2056    case INDEX_op_ld16s_i32:
2057        tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2, 1);
2058        break;
2059    case INDEX_op_ld16s_i64:
2060        tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2, 1);
2061        break;
2062    case INDEX_op_ld_i32:
2063    case INDEX_op_ld32u_i64:
2064        tcg_out_ldst(s, I3312_LDRW, a0, a1, a2, 2);
2065        break;
2066    case INDEX_op_ld32s_i64:
2067        tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2, 2);
2068        break;
2069    case INDEX_op_ld_i64:
2070        tcg_out_ldst(s, I3312_LDRX, a0, a1, a2, 3);
2071        break;
2072
2073    case INDEX_op_st8_i32:
2074    case INDEX_op_st8_i64:
2075        tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2, 0);
2076        break;
2077    case INDEX_op_st16_i32:
2078    case INDEX_op_st16_i64:
2079        tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2, 1);
2080        break;
2081    case INDEX_op_st_i32:
2082    case INDEX_op_st32_i64:
2083        tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2, 2);
2084        break;
2085    case INDEX_op_st_i64:
2086        tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2, 3);
2087        break;
2088
2089    case INDEX_op_add_i32:
2090        a2 = (int32_t)a2;
2091        /* FALLTHRU */
2092    case INDEX_op_add_i64:
2093        if (c2) {
2094            tcg_out_addsubi(s, ext, a0, a1, a2);
2095        } else {
2096            tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
2097        }
2098        break;
2099
2100    case INDEX_op_sub_i32:
2101        a2 = (int32_t)a2;
2102        /* FALLTHRU */
2103    case INDEX_op_sub_i64:
2104        if (c2) {
2105            tcg_out_addsubi(s, ext, a0, a1, -a2);
2106        } else {
2107            tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
2108        }
2109        break;
2110
2111    case INDEX_op_neg_i64:
2112    case INDEX_op_neg_i32:
2113        tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
2114        break;
2115
2116    case INDEX_op_and_i32:
2117        a2 = (int32_t)a2;
2118        /* FALLTHRU */
2119    case INDEX_op_and_i64:
2120        if (c2) {
2121            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
2122        } else {
2123            tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
2124        }
2125        break;
2126
2127    case INDEX_op_andc_i32:
2128        a2 = (int32_t)a2;
2129        /* FALLTHRU */
2130    case INDEX_op_andc_i64:
2131        if (c2) {
2132            tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
2133        } else {
2134            tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
2135        }
2136        break;
2137
2138    case INDEX_op_or_i32:
2139        a2 = (int32_t)a2;
2140        /* FALLTHRU */
2141    case INDEX_op_or_i64:
2142        if (c2) {
2143            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
2144        } else {
2145            tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
2146        }
2147        break;
2148
2149    case INDEX_op_orc_i32:
2150        a2 = (int32_t)a2;
2151        /* FALLTHRU */
2152    case INDEX_op_orc_i64:
2153        if (c2) {
2154            tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
2155        } else {
2156            tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
2157        }
2158        break;
2159
2160    case INDEX_op_xor_i32:
2161        a2 = (int32_t)a2;
2162        /* FALLTHRU */
2163    case INDEX_op_xor_i64:
2164        if (c2) {
2165            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
2166        } else {
2167            tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
2168        }
2169        break;
2170
2171    case INDEX_op_eqv_i32:
2172        a2 = (int32_t)a2;
2173        /* FALLTHRU */
2174    case INDEX_op_eqv_i64:
2175        if (c2) {
2176            tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
2177        } else {
2178            tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
2179        }
2180        break;
2181
2182    case INDEX_op_not_i64:
2183    case INDEX_op_not_i32:
2184        tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
2185        break;
2186
2187    case INDEX_op_mul_i64:
2188    case INDEX_op_mul_i32:
2189        tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
2190        break;
2191
2192    case INDEX_op_div_i64:
2193    case INDEX_op_div_i32:
2194        tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
2195        break;
2196    case INDEX_op_divu_i64:
2197    case INDEX_op_divu_i32:
2198        tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
2199        break;
2200
2201    case INDEX_op_rem_i64:
2202    case INDEX_op_rem_i32:
2203        tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2);
2204        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2205        break;
2206    case INDEX_op_remu_i64:
2207    case INDEX_op_remu_i32:
2208        tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2);
2209        tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
2210        break;
2211
2212    case INDEX_op_shl_i64:
2213    case INDEX_op_shl_i32:
2214        if (c2) {
2215            tcg_out_shl(s, ext, a0, a1, a2);
2216        } else {
2217            tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
2218        }
2219        break;
2220
2221    case INDEX_op_shr_i64:
2222    case INDEX_op_shr_i32:
2223        if (c2) {
2224            tcg_out_shr(s, ext, a0, a1, a2);
2225        } else {
2226            tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
2227        }
2228        break;
2229
2230    case INDEX_op_sar_i64:
2231    case INDEX_op_sar_i32:
2232        if (c2) {
2233            tcg_out_sar(s, ext, a0, a1, a2);
2234        } else {
2235            tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
2236        }
2237        break;
2238
2239    case INDEX_op_rotr_i64:
2240    case INDEX_op_rotr_i32:
2241        if (c2) {
2242            tcg_out_rotr(s, ext, a0, a1, a2);
2243        } else {
2244            tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
2245        }
2246        break;
2247
2248    case INDEX_op_rotl_i64:
2249    case INDEX_op_rotl_i32:
2250        if (c2) {
2251            tcg_out_rotl(s, ext, a0, a1, a2);
2252        } else {
2253            tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2);
2254            tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0);
2255        }
2256        break;
2257
2258    case INDEX_op_clz_i64:
2259    case INDEX_op_clz_i32:
2260        tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
2261        break;
2262    case INDEX_op_ctz_i64:
2263    case INDEX_op_ctz_i32:
2264        tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
2265        break;
2266
2267    case INDEX_op_brcond_i32:
2268        a1 = (int32_t)a1;
2269        /* FALLTHRU */
2270    case INDEX_op_brcond_i64:
2271        tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
2272        break;
2273
2274    case INDEX_op_setcond_i32:
2275        a2 = (int32_t)a2;
2276        /* FALLTHRU */
2277    case INDEX_op_setcond_i64:
2278        tcg_out_cmp(s, ext, a1, a2, c2);
2279        /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
2280        tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
2281                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2282        break;
2283
2284    case INDEX_op_negsetcond_i32:
2285        a2 = (int32_t)a2;
2286        /* FALLTHRU */
2287    case INDEX_op_negsetcond_i64:
2288        tcg_out_cmp(s, ext, a1, a2, c2);
2289        /* Use CSETM alias of CSINV Wd, WZR, WZR, invert(cond).  */
2290        tcg_out_insn(s, 3506, CSINV, ext, a0, TCG_REG_XZR,
2291                     TCG_REG_XZR, tcg_invert_cond(args[3]));
2292        break;
2293
2294    case INDEX_op_movcond_i32:
2295        a2 = (int32_t)a2;
2296        /* FALLTHRU */
2297    case INDEX_op_movcond_i64:
2298        tcg_out_cmp(s, ext, a1, a2, c2);
2299        tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
2300        break;
2301
2302    case INDEX_op_qemu_ld_a32_i32:
2303    case INDEX_op_qemu_ld_a64_i32:
2304    case INDEX_op_qemu_ld_a32_i64:
2305    case INDEX_op_qemu_ld_a64_i64:
2306        tcg_out_qemu_ld(s, a0, a1, a2, ext);
2307        break;
2308    case INDEX_op_qemu_st_a32_i32:
2309    case INDEX_op_qemu_st_a64_i32:
2310    case INDEX_op_qemu_st_a32_i64:
2311    case INDEX_op_qemu_st_a64_i64:
2312        tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
2313        break;
2314    case INDEX_op_qemu_ld_a32_i128:
2315    case INDEX_op_qemu_ld_a64_i128:
2316        tcg_out_qemu_ldst_i128(s, a0, a1, a2, args[3], true);
2317        break;
2318    case INDEX_op_qemu_st_a32_i128:
2319    case INDEX_op_qemu_st_a64_i128:
2320        tcg_out_qemu_ldst_i128(s, REG0(0), REG0(1), a2, args[3], false);
2321        break;
2322
2323    case INDEX_op_bswap64_i64:
2324        tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
2325        break;
2326    case INDEX_op_bswap32_i64:
2327        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2328        if (a2 & TCG_BSWAP_OS) {
2329            tcg_out_ext32s(s, a0, a0);
2330        }
2331        break;
2332    case INDEX_op_bswap32_i32:
2333        tcg_out_rev(s, TCG_TYPE_I32, MO_32, a0, a1);
2334        break;
2335    case INDEX_op_bswap16_i64:
2336    case INDEX_op_bswap16_i32:
2337        tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
2338        if (a2 & TCG_BSWAP_OS) {
2339            /* Output must be sign-extended. */
2340            tcg_out_ext16s(s, ext, a0, a0);
2341        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2342            /* Output must be zero-extended, but input isn't. */
2343            tcg_out_ext16u(s, a0, a0);
2344        }
2345        break;
2346
2347    case INDEX_op_deposit_i64:
2348    case INDEX_op_deposit_i32:
2349        tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
2350        break;
2351
2352    case INDEX_op_extract_i64:
2353    case INDEX_op_extract_i32:
2354        tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2355        break;
2356
2357    case INDEX_op_sextract_i64:
2358    case INDEX_op_sextract_i32:
2359        tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
2360        break;
2361
2362    case INDEX_op_extract2_i64:
2363    case INDEX_op_extract2_i32:
2364        tcg_out_extr(s, ext, a0, REG0(2), REG0(1), args[3]);
2365        break;
2366
2367    case INDEX_op_add2_i32:
2368        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2369                        (int32_t)args[4], args[5], const_args[4],
2370                        const_args[5], false);
2371        break;
2372    case INDEX_op_add2_i64:
2373        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2374                        args[5], const_args[4], const_args[5], false);
2375        break;
2376    case INDEX_op_sub2_i32:
2377        tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
2378                        (int32_t)args[4], args[5], const_args[4],
2379                        const_args[5], true);
2380        break;
2381    case INDEX_op_sub2_i64:
2382        tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
2383                        args[5], const_args[4], const_args[5], true);
2384        break;
2385
2386    case INDEX_op_muluh_i64:
2387        tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
2388        break;
2389    case INDEX_op_mulsh_i64:
2390        tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
2391        break;
2392
2393    case INDEX_op_mb:
2394        tcg_out_mb(s, a0);
2395        break;
2396
2397    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2398    case INDEX_op_mov_i64:
2399    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2400    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2401    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2402    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2403    case INDEX_op_ext8s_i64:
2404    case INDEX_op_ext8u_i32:
2405    case INDEX_op_ext8u_i64:
2406    case INDEX_op_ext16s_i64:
2407    case INDEX_op_ext16s_i32:
2408    case INDEX_op_ext16u_i64:
2409    case INDEX_op_ext16u_i32:
2410    case INDEX_op_ext32s_i64:
2411    case INDEX_op_ext32u_i64:
2412    case INDEX_op_ext_i32_i64:
2413    case INDEX_op_extu_i32_i64:
2414    case INDEX_op_extrl_i64_i32:
2415    default:
2416        g_assert_not_reached();
2417    }
2418
2419#undef REG0
2420}
2421
2422static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2423                           unsigned vecl, unsigned vece,
2424                           const TCGArg args[TCG_MAX_OP_ARGS],
2425                           const int const_args[TCG_MAX_OP_ARGS])
2426{
2427    static const AArch64Insn cmp_vec_insn[16] = {
2428        [TCG_COND_EQ] = I3616_CMEQ,
2429        [TCG_COND_GT] = I3616_CMGT,
2430        [TCG_COND_GE] = I3616_CMGE,
2431        [TCG_COND_GTU] = I3616_CMHI,
2432        [TCG_COND_GEU] = I3616_CMHS,
2433    };
2434    static const AArch64Insn cmp_scalar_insn[16] = {
2435        [TCG_COND_EQ] = I3611_CMEQ,
2436        [TCG_COND_GT] = I3611_CMGT,
2437        [TCG_COND_GE] = I3611_CMGE,
2438        [TCG_COND_GTU] = I3611_CMHI,
2439        [TCG_COND_GEU] = I3611_CMHS,
2440    };
2441    static const AArch64Insn cmp0_vec_insn[16] = {
2442        [TCG_COND_EQ] = I3617_CMEQ0,
2443        [TCG_COND_GT] = I3617_CMGT0,
2444        [TCG_COND_GE] = I3617_CMGE0,
2445        [TCG_COND_LT] = I3617_CMLT0,
2446        [TCG_COND_LE] = I3617_CMLE0,
2447    };
2448    static const AArch64Insn cmp0_scalar_insn[16] = {
2449        [TCG_COND_EQ] = I3612_CMEQ0,
2450        [TCG_COND_GT] = I3612_CMGT0,
2451        [TCG_COND_GE] = I3612_CMGE0,
2452        [TCG_COND_LT] = I3612_CMLT0,
2453        [TCG_COND_LE] = I3612_CMLE0,
2454    };
2455
2456    TCGType type = vecl + TCG_TYPE_V64;
2457    unsigned is_q = vecl;
2458    bool is_scalar = !is_q && vece == MO_64;
2459    TCGArg a0, a1, a2, a3;
2460    int cmode, imm8;
2461
2462    a0 = args[0];
2463    a1 = args[1];
2464    a2 = args[2];
2465
2466    switch (opc) {
2467    case INDEX_op_ld_vec:
2468        tcg_out_ld(s, type, a0, a1, a2);
2469        break;
2470    case INDEX_op_st_vec:
2471        tcg_out_st(s, type, a0, a1, a2);
2472        break;
2473    case INDEX_op_dupm_vec:
2474        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2475        break;
2476    case INDEX_op_add_vec:
2477        if (is_scalar) {
2478            tcg_out_insn(s, 3611, ADD, vece, a0, a1, a2);
2479        } else {
2480            tcg_out_insn(s, 3616, ADD, is_q, vece, a0, a1, a2);
2481        }
2482        break;
2483    case INDEX_op_sub_vec:
2484        if (is_scalar) {
2485            tcg_out_insn(s, 3611, SUB, vece, a0, a1, a2);
2486        } else {
2487            tcg_out_insn(s, 3616, SUB, is_q, vece, a0, a1, a2);
2488        }
2489        break;
2490    case INDEX_op_mul_vec:
2491        tcg_out_insn(s, 3616, MUL, is_q, vece, a0, a1, a2);
2492        break;
2493    case INDEX_op_neg_vec:
2494        if (is_scalar) {
2495            tcg_out_insn(s, 3612, NEG, vece, a0, a1);
2496        } else {
2497            tcg_out_insn(s, 3617, NEG, is_q, vece, a0, a1);
2498        }
2499        break;
2500    case INDEX_op_abs_vec:
2501        if (is_scalar) {
2502            tcg_out_insn(s, 3612, ABS, vece, a0, a1);
2503        } else {
2504            tcg_out_insn(s, 3617, ABS, is_q, vece, a0, a1);
2505        }
2506        break;
2507    case INDEX_op_and_vec:
2508        if (const_args[2]) {
2509            is_shimm1632(~a2, &cmode, &imm8);
2510            if (a0 == a1) {
2511                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2512                return;
2513            }
2514            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2515            a2 = a0;
2516        }
2517        tcg_out_insn(s, 3616, AND, is_q, 0, a0, a1, a2);
2518        break;
2519    case INDEX_op_or_vec:
2520        if (const_args[2]) {
2521            is_shimm1632(a2, &cmode, &imm8);
2522            if (a0 == a1) {
2523                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2524                return;
2525            }
2526            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2527            a2 = a0;
2528        }
2529        tcg_out_insn(s, 3616, ORR, is_q, 0, a0, a1, a2);
2530        break;
2531    case INDEX_op_andc_vec:
2532        if (const_args[2]) {
2533            is_shimm1632(a2, &cmode, &imm8);
2534            if (a0 == a1) {
2535                tcg_out_insn(s, 3606, BIC, is_q, a0, 0, cmode, imm8);
2536                return;
2537            }
2538            tcg_out_insn(s, 3606, MOVI, is_q, a0, 0, cmode, imm8);
2539            a2 = a0;
2540        }
2541        tcg_out_insn(s, 3616, BIC, is_q, 0, a0, a1, a2);
2542        break;
2543    case INDEX_op_orc_vec:
2544        if (const_args[2]) {
2545            is_shimm1632(~a2, &cmode, &imm8);
2546            if (a0 == a1) {
2547                tcg_out_insn(s, 3606, ORR, is_q, a0, 0, cmode, imm8);
2548                return;
2549            }
2550            tcg_out_insn(s, 3606, MVNI, is_q, a0, 0, cmode, imm8);
2551            a2 = a0;
2552        }
2553        tcg_out_insn(s, 3616, ORN, is_q, 0, a0, a1, a2);
2554        break;
2555    case INDEX_op_xor_vec:
2556        tcg_out_insn(s, 3616, EOR, is_q, 0, a0, a1, a2);
2557        break;
2558    case INDEX_op_ssadd_vec:
2559        if (is_scalar) {
2560            tcg_out_insn(s, 3611, SQADD, vece, a0, a1, a2);
2561        } else {
2562            tcg_out_insn(s, 3616, SQADD, is_q, vece, a0, a1, a2);
2563        }
2564        break;
2565    case INDEX_op_sssub_vec:
2566        if (is_scalar) {
2567            tcg_out_insn(s, 3611, SQSUB, vece, a0, a1, a2);
2568        } else {
2569            tcg_out_insn(s, 3616, SQSUB, is_q, vece, a0, a1, a2);
2570        }
2571        break;
2572    case INDEX_op_usadd_vec:
2573        if (is_scalar) {
2574            tcg_out_insn(s, 3611, UQADD, vece, a0, a1, a2);
2575        } else {
2576            tcg_out_insn(s, 3616, UQADD, is_q, vece, a0, a1, a2);
2577        }
2578        break;
2579    case INDEX_op_ussub_vec:
2580        if (is_scalar) {
2581            tcg_out_insn(s, 3611, UQSUB, vece, a0, a1, a2);
2582        } else {
2583            tcg_out_insn(s, 3616, UQSUB, is_q, vece, a0, a1, a2);
2584        }
2585        break;
2586    case INDEX_op_smax_vec:
2587        tcg_out_insn(s, 3616, SMAX, is_q, vece, a0, a1, a2);
2588        break;
2589    case INDEX_op_smin_vec:
2590        tcg_out_insn(s, 3616, SMIN, is_q, vece, a0, a1, a2);
2591        break;
2592    case INDEX_op_umax_vec:
2593        tcg_out_insn(s, 3616, UMAX, is_q, vece, a0, a1, a2);
2594        break;
2595    case INDEX_op_umin_vec:
2596        tcg_out_insn(s, 3616, UMIN, is_q, vece, a0, a1, a2);
2597        break;
2598    case INDEX_op_not_vec:
2599        tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
2600        break;
2601    case INDEX_op_shli_vec:
2602        if (is_scalar) {
2603            tcg_out_insn(s, 3609, SHL, a0, a1, a2 + (8 << vece));
2604        } else {
2605            tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
2606        }
2607        break;
2608    case INDEX_op_shri_vec:
2609        if (is_scalar) {
2610            tcg_out_insn(s, 3609, USHR, a0, a1, (16 << vece) - a2);
2611        } else {
2612            tcg_out_insn(s, 3614, USHR, is_q, a0, a1, (16 << vece) - a2);
2613        }
2614        break;
2615    case INDEX_op_sari_vec:
2616        if (is_scalar) {
2617            tcg_out_insn(s, 3609, SSHR, a0, a1, (16 << vece) - a2);
2618        } else {
2619            tcg_out_insn(s, 3614, SSHR, is_q, a0, a1, (16 << vece) - a2);
2620        }
2621        break;
2622    case INDEX_op_aa64_sli_vec:
2623        if (is_scalar) {
2624            tcg_out_insn(s, 3609, SLI, a0, a2, args[3] + (8 << vece));
2625        } else {
2626            tcg_out_insn(s, 3614, SLI, is_q, a0, a2, args[3] + (8 << vece));
2627        }
2628        break;
2629    case INDEX_op_shlv_vec:
2630        if (is_scalar) {
2631            tcg_out_insn(s, 3611, USHL, vece, a0, a1, a2);
2632        } else {
2633            tcg_out_insn(s, 3616, USHL, is_q, vece, a0, a1, a2);
2634        }
2635        break;
2636    case INDEX_op_aa64_sshl_vec:
2637        if (is_scalar) {
2638            tcg_out_insn(s, 3611, SSHL, vece, a0, a1, a2);
2639        } else {
2640            tcg_out_insn(s, 3616, SSHL, is_q, vece, a0, a1, a2);
2641        }
2642        break;
2643    case INDEX_op_cmp_vec:
2644        {
2645            TCGCond cond = args[3];
2646            AArch64Insn insn;
2647
2648            if (cond == TCG_COND_NE) {
2649                if (const_args[2]) {
2650                    if (is_scalar) {
2651                        tcg_out_insn(s, 3611, CMTST, vece, a0, a1, a1);
2652                    } else {
2653                        tcg_out_insn(s, 3616, CMTST, is_q, vece, a0, a1, a1);
2654                    }
2655                } else {
2656                    if (is_scalar) {
2657                        tcg_out_insn(s, 3611, CMEQ, vece, a0, a1, a2);
2658                    } else {
2659                        tcg_out_insn(s, 3616, CMEQ, is_q, vece, a0, a1, a2);
2660                    }
2661                    tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a0);
2662                }
2663            } else {
2664                if (const_args[2]) {
2665                    if (is_scalar) {
2666                        insn = cmp0_scalar_insn[cond];
2667                        if (insn) {
2668                            tcg_out_insn_3612(s, insn, vece, a0, a1);
2669                            break;
2670                        }
2671                    } else {
2672                        insn = cmp0_vec_insn[cond];
2673                        if (insn) {
2674                            tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
2675                            break;
2676                        }
2677                    }
2678                    tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
2679                    a2 = TCG_VEC_TMP0;
2680                }
2681                if (is_scalar) {
2682                    insn = cmp_scalar_insn[cond];
2683                    if (insn == 0) {
2684                        TCGArg t;
2685                        t = a1, a1 = a2, a2 = t;
2686                        cond = tcg_swap_cond(cond);
2687                        insn = cmp_scalar_insn[cond];
2688                        tcg_debug_assert(insn != 0);
2689                    }
2690                    tcg_out_insn_3611(s, insn, vece, a0, a1, a2);
2691                } else {
2692                    insn = cmp_vec_insn[cond];
2693                    if (insn == 0) {
2694                        TCGArg t;
2695                        t = a1, a1 = a2, a2 = t;
2696                        cond = tcg_swap_cond(cond);
2697                        insn = cmp_vec_insn[cond];
2698                        tcg_debug_assert(insn != 0);
2699                    }
2700                    tcg_out_insn_3616(s, insn, is_q, vece, a0, a1, a2);
2701                }
2702            }
2703        }
2704        break;
2705
2706    case INDEX_op_bitsel_vec:
2707        a3 = args[3];
2708        if (a0 == a3) {
2709            tcg_out_insn(s, 3616, BIT, is_q, 0, a0, a2, a1);
2710        } else if (a0 == a2) {
2711            tcg_out_insn(s, 3616, BIF, is_q, 0, a0, a3, a1);
2712        } else {
2713            if (a0 != a1) {
2714                tcg_out_mov(s, type, a0, a1);
2715            }
2716            tcg_out_insn(s, 3616, BSL, is_q, 0, a0, a2, a3);
2717        }
2718        break;
2719
2720    case INDEX_op_mov_vec:  /* Always emitted via tcg_out_mov.  */
2721    case INDEX_op_dup_vec:  /* Always emitted via tcg_out_dup_vec.  */
2722    default:
2723        g_assert_not_reached();
2724    }
2725}
2726
2727int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2728{
2729    switch (opc) {
2730    case INDEX_op_add_vec:
2731    case INDEX_op_sub_vec:
2732    case INDEX_op_and_vec:
2733    case INDEX_op_or_vec:
2734    case INDEX_op_xor_vec:
2735    case INDEX_op_andc_vec:
2736    case INDEX_op_orc_vec:
2737    case INDEX_op_neg_vec:
2738    case INDEX_op_abs_vec:
2739    case INDEX_op_not_vec:
2740    case INDEX_op_cmp_vec:
2741    case INDEX_op_shli_vec:
2742    case INDEX_op_shri_vec:
2743    case INDEX_op_sari_vec:
2744    case INDEX_op_ssadd_vec:
2745    case INDEX_op_sssub_vec:
2746    case INDEX_op_usadd_vec:
2747    case INDEX_op_ussub_vec:
2748    case INDEX_op_shlv_vec:
2749    case INDEX_op_bitsel_vec:
2750        return 1;
2751    case INDEX_op_rotli_vec:
2752    case INDEX_op_shrv_vec:
2753    case INDEX_op_sarv_vec:
2754    case INDEX_op_rotlv_vec:
2755    case INDEX_op_rotrv_vec:
2756        return -1;
2757    case INDEX_op_mul_vec:
2758    case INDEX_op_smax_vec:
2759    case INDEX_op_smin_vec:
2760    case INDEX_op_umax_vec:
2761    case INDEX_op_umin_vec:
2762        return vece < MO_64;
2763
2764    default:
2765        return 0;
2766    }
2767}
2768
2769void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2770                       TCGArg a0, ...)
2771{
2772    va_list va;
2773    TCGv_vec v0, v1, v2, t1, t2, c1;
2774    TCGArg a2;
2775
2776    va_start(va, a0);
2777    v0 = temp_tcgv_vec(arg_temp(a0));
2778    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2779    a2 = va_arg(va, TCGArg);
2780    va_end(va);
2781
2782    switch (opc) {
2783    case INDEX_op_rotli_vec:
2784        t1 = tcg_temp_new_vec(type);
2785        tcg_gen_shri_vec(vece, t1, v1, -a2 & ((8 << vece) - 1));
2786        vec_gen_4(INDEX_op_aa64_sli_vec, type, vece,
2787                  tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(v1), a2);
2788        tcg_temp_free_vec(t1);
2789        break;
2790
2791    case INDEX_op_shrv_vec:
2792    case INDEX_op_sarv_vec:
2793        /* Right shifts are negative left shifts for AArch64.  */
2794        v2 = temp_tcgv_vec(arg_temp(a2));
2795        t1 = tcg_temp_new_vec(type);
2796        tcg_gen_neg_vec(vece, t1, v2);
2797        opc = (opc == INDEX_op_shrv_vec
2798               ? INDEX_op_shlv_vec : INDEX_op_aa64_sshl_vec);
2799        vec_gen_3(opc, type, vece, tcgv_vec_arg(v0),
2800                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2801        tcg_temp_free_vec(t1);
2802        break;
2803
2804    case INDEX_op_rotlv_vec:
2805        v2 = temp_tcgv_vec(arg_temp(a2));
2806        t1 = tcg_temp_new_vec(type);
2807        c1 = tcg_constant_vec(type, vece, 8 << vece);
2808        tcg_gen_sub_vec(vece, t1, v2, c1);
2809        /* Right shifts are negative left shifts for AArch64.  */
2810        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2811                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2812        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(v0),
2813                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
2814        tcg_gen_or_vec(vece, v0, v0, t1);
2815        tcg_temp_free_vec(t1);
2816        break;
2817
2818    case INDEX_op_rotrv_vec:
2819        v2 = temp_tcgv_vec(arg_temp(a2));
2820        t1 = tcg_temp_new_vec(type);
2821        t2 = tcg_temp_new_vec(type);
2822        c1 = tcg_constant_vec(type, vece, 8 << vece);
2823        tcg_gen_neg_vec(vece, t1, v2);
2824        tcg_gen_sub_vec(vece, t2, c1, v2);
2825        /* Right shifts are negative left shifts for AArch64.  */
2826        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t1),
2827                  tcgv_vec_arg(v1), tcgv_vec_arg(t1));
2828        vec_gen_3(INDEX_op_shlv_vec, type, vece, tcgv_vec_arg(t2),
2829                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
2830        tcg_gen_or_vec(vece, v0, t1, t2);
2831        tcg_temp_free_vec(t1);
2832        tcg_temp_free_vec(t2);
2833        break;
2834
2835    default:
2836        g_assert_not_reached();
2837    }
2838}
2839
2840static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2841{
2842    switch (op) {
2843    case INDEX_op_goto_ptr:
2844        return C_O0_I1(r);
2845
2846    case INDEX_op_ld8u_i32:
2847    case INDEX_op_ld8s_i32:
2848    case INDEX_op_ld16u_i32:
2849    case INDEX_op_ld16s_i32:
2850    case INDEX_op_ld_i32:
2851    case INDEX_op_ld8u_i64:
2852    case INDEX_op_ld8s_i64:
2853    case INDEX_op_ld16u_i64:
2854    case INDEX_op_ld16s_i64:
2855    case INDEX_op_ld32u_i64:
2856    case INDEX_op_ld32s_i64:
2857    case INDEX_op_ld_i64:
2858    case INDEX_op_neg_i32:
2859    case INDEX_op_neg_i64:
2860    case INDEX_op_not_i32:
2861    case INDEX_op_not_i64:
2862    case INDEX_op_bswap16_i32:
2863    case INDEX_op_bswap32_i32:
2864    case INDEX_op_bswap16_i64:
2865    case INDEX_op_bswap32_i64:
2866    case INDEX_op_bswap64_i64:
2867    case INDEX_op_ext8s_i32:
2868    case INDEX_op_ext16s_i32:
2869    case INDEX_op_ext8u_i32:
2870    case INDEX_op_ext16u_i32:
2871    case INDEX_op_ext8s_i64:
2872    case INDEX_op_ext16s_i64:
2873    case INDEX_op_ext32s_i64:
2874    case INDEX_op_ext8u_i64:
2875    case INDEX_op_ext16u_i64:
2876    case INDEX_op_ext32u_i64:
2877    case INDEX_op_ext_i32_i64:
2878    case INDEX_op_extu_i32_i64:
2879    case INDEX_op_extract_i32:
2880    case INDEX_op_extract_i64:
2881    case INDEX_op_sextract_i32:
2882    case INDEX_op_sextract_i64:
2883        return C_O1_I1(r, r);
2884
2885    case INDEX_op_st8_i32:
2886    case INDEX_op_st16_i32:
2887    case INDEX_op_st_i32:
2888    case INDEX_op_st8_i64:
2889    case INDEX_op_st16_i64:
2890    case INDEX_op_st32_i64:
2891    case INDEX_op_st_i64:
2892        return C_O0_I2(rZ, r);
2893
2894    case INDEX_op_add_i32:
2895    case INDEX_op_add_i64:
2896    case INDEX_op_sub_i32:
2897    case INDEX_op_sub_i64:
2898    case INDEX_op_setcond_i32:
2899    case INDEX_op_setcond_i64:
2900    case INDEX_op_negsetcond_i32:
2901    case INDEX_op_negsetcond_i64:
2902        return C_O1_I2(r, r, rA);
2903
2904    case INDEX_op_mul_i32:
2905    case INDEX_op_mul_i64:
2906    case INDEX_op_div_i32:
2907    case INDEX_op_div_i64:
2908    case INDEX_op_divu_i32:
2909    case INDEX_op_divu_i64:
2910    case INDEX_op_rem_i32:
2911    case INDEX_op_rem_i64:
2912    case INDEX_op_remu_i32:
2913    case INDEX_op_remu_i64:
2914    case INDEX_op_muluh_i64:
2915    case INDEX_op_mulsh_i64:
2916        return C_O1_I2(r, r, r);
2917
2918    case INDEX_op_and_i32:
2919    case INDEX_op_and_i64:
2920    case INDEX_op_or_i32:
2921    case INDEX_op_or_i64:
2922    case INDEX_op_xor_i32:
2923    case INDEX_op_xor_i64:
2924    case INDEX_op_andc_i32:
2925    case INDEX_op_andc_i64:
2926    case INDEX_op_orc_i32:
2927    case INDEX_op_orc_i64:
2928    case INDEX_op_eqv_i32:
2929    case INDEX_op_eqv_i64:
2930        return C_O1_I2(r, r, rL);
2931
2932    case INDEX_op_shl_i32:
2933    case INDEX_op_shr_i32:
2934    case INDEX_op_sar_i32:
2935    case INDEX_op_rotl_i32:
2936    case INDEX_op_rotr_i32:
2937    case INDEX_op_shl_i64:
2938    case INDEX_op_shr_i64:
2939    case INDEX_op_sar_i64:
2940    case INDEX_op_rotl_i64:
2941    case INDEX_op_rotr_i64:
2942        return C_O1_I2(r, r, ri);
2943
2944    case INDEX_op_clz_i32:
2945    case INDEX_op_ctz_i32:
2946    case INDEX_op_clz_i64:
2947    case INDEX_op_ctz_i64:
2948        return C_O1_I2(r, r, rAL);
2949
2950    case INDEX_op_brcond_i32:
2951    case INDEX_op_brcond_i64:
2952        return C_O0_I2(r, rA);
2953
2954    case INDEX_op_movcond_i32:
2955    case INDEX_op_movcond_i64:
2956        return C_O1_I4(r, r, rA, rZ, rZ);
2957
2958    case INDEX_op_qemu_ld_a32_i32:
2959    case INDEX_op_qemu_ld_a64_i32:
2960    case INDEX_op_qemu_ld_a32_i64:
2961    case INDEX_op_qemu_ld_a64_i64:
2962        return C_O1_I1(r, r);
2963    case INDEX_op_qemu_ld_a32_i128:
2964    case INDEX_op_qemu_ld_a64_i128:
2965        return C_O2_I1(r, r, r);
2966    case INDEX_op_qemu_st_a32_i32:
2967    case INDEX_op_qemu_st_a64_i32:
2968    case INDEX_op_qemu_st_a32_i64:
2969    case INDEX_op_qemu_st_a64_i64:
2970        return C_O0_I2(rZ, r);
2971    case INDEX_op_qemu_st_a32_i128:
2972    case INDEX_op_qemu_st_a64_i128:
2973        return C_O0_I3(rZ, rZ, r);
2974
2975    case INDEX_op_deposit_i32:
2976    case INDEX_op_deposit_i64:
2977        return C_O1_I2(r, 0, rZ);
2978
2979    case INDEX_op_extract2_i32:
2980    case INDEX_op_extract2_i64:
2981        return C_O1_I2(r, rZ, rZ);
2982
2983    case INDEX_op_add2_i32:
2984    case INDEX_op_add2_i64:
2985    case INDEX_op_sub2_i32:
2986    case INDEX_op_sub2_i64:
2987        return C_O2_I4(r, r, rZ, rZ, rA, rMZ);
2988
2989    case INDEX_op_add_vec:
2990    case INDEX_op_sub_vec:
2991    case INDEX_op_mul_vec:
2992    case INDEX_op_xor_vec:
2993    case INDEX_op_ssadd_vec:
2994    case INDEX_op_sssub_vec:
2995    case INDEX_op_usadd_vec:
2996    case INDEX_op_ussub_vec:
2997    case INDEX_op_smax_vec:
2998    case INDEX_op_smin_vec:
2999    case INDEX_op_umax_vec:
3000    case INDEX_op_umin_vec:
3001    case INDEX_op_shlv_vec:
3002    case INDEX_op_shrv_vec:
3003    case INDEX_op_sarv_vec:
3004    case INDEX_op_aa64_sshl_vec:
3005        return C_O1_I2(w, w, w);
3006    case INDEX_op_not_vec:
3007    case INDEX_op_neg_vec:
3008    case INDEX_op_abs_vec:
3009    case INDEX_op_shli_vec:
3010    case INDEX_op_shri_vec:
3011    case INDEX_op_sari_vec:
3012        return C_O1_I1(w, w);
3013    case INDEX_op_ld_vec:
3014    case INDEX_op_dupm_vec:
3015        return C_O1_I1(w, r);
3016    case INDEX_op_st_vec:
3017        return C_O0_I2(w, r);
3018    case INDEX_op_dup_vec:
3019        return C_O1_I1(w, wr);
3020    case INDEX_op_or_vec:
3021    case INDEX_op_andc_vec:
3022        return C_O1_I2(w, w, wO);
3023    case INDEX_op_and_vec:
3024    case INDEX_op_orc_vec:
3025        return C_O1_I2(w, w, wN);
3026    case INDEX_op_cmp_vec:
3027        return C_O1_I2(w, w, wZ);
3028    case INDEX_op_bitsel_vec:
3029        return C_O1_I3(w, w, w, w);
3030    case INDEX_op_aa64_sli_vec:
3031        return C_O1_I2(w, 0, w);
3032
3033    default:
3034        g_assert_not_reached();
3035    }
3036}
3037
3038static void tcg_target_init(TCGContext *s)
3039{
3040    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
3041    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
3042    tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3043    tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3044
3045    tcg_target_call_clobber_regs = -1ull;
3046    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X19);
3047    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X20);
3048    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X21);
3049    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X22);
3050    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X23);
3051    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X24);
3052    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X25);
3053    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X26);
3054    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X27);
3055    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X28);
3056    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_X29);
3057    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V8);
3058    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V9);
3059    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V10);
3060    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V11);
3061    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V12);
3062    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V13);
3063    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V14);
3064    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V15);
3065
3066    s->reserved_regs = 0;
3067    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
3068    tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
3069    tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
3070    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
3071    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
3072    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
3073    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
3074}
3075
3076/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
3077#define PUSH_SIZE  ((30 - 19 + 1) * 8)
3078
3079#define FRAME_SIZE \
3080    ((PUSH_SIZE \
3081      + TCG_STATIC_CALL_ARGS_SIZE \
3082      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
3083      + TCG_TARGET_STACK_ALIGN - 1) \
3084     & ~(TCG_TARGET_STACK_ALIGN - 1))
3085
3086/* We're expecting a 2 byte uleb128 encoded value.  */
3087QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3088
3089/* We're expecting to use a single ADDI insn.  */
3090QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
3091
3092static void tcg_target_qemu_prologue(TCGContext *s)
3093{
3094    TCGReg r;
3095
3096    tcg_out_bti(s, BTI_C);
3097
3098    /* Push (FP, LR) and allocate space for all saved registers.  */
3099    tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
3100                 TCG_REG_SP, -PUSH_SIZE, 1, 1);
3101
3102    /* Set up frame pointer for canonical unwinding.  */
3103    tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
3104
3105    /* Store callee-preserved regs x19..x28.  */
3106    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3107        int ofs = (r - TCG_REG_X19 + 2) * 8;
3108        tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3109    }
3110
3111    /* Make stack space for TCG locals.  */
3112    tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3113                 FRAME_SIZE - PUSH_SIZE);
3114
3115    /* Inform TCG about how to find TCG locals with register, offset, size.  */
3116    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
3117                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3118
3119    if (!tcg_use_softmmu) {
3120        /*
3121         * Note that XZR cannot be encoded in the address base register slot,
3122         * as that actually encodes SP.  Depending on the guest, we may need
3123         * to zero-extend the guest address via the address index register slot,
3124         * therefore we need to load even a zero guest base into a register.
3125         */
3126        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
3127        tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
3128    }
3129
3130    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3131    tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
3132
3133    /*
3134     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3135     * and fall through to the rest of the epilogue.
3136     */
3137    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3138    tcg_out_bti(s, BTI_J);
3139    tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
3140
3141    /* TB epilogue */
3142    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3143    tcg_out_bti(s, BTI_J);
3144
3145    /* Remove TCG locals stack space.  */
3146    tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
3147                 FRAME_SIZE - PUSH_SIZE);
3148
3149    /* Restore registers x19..x28.  */
3150    for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
3151        int ofs = (r - TCG_REG_X19 + 2) * 8;
3152        tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
3153    }
3154
3155    /* Pop (FP, LR), restore SP to previous frame.  */
3156    tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
3157                 TCG_REG_SP, PUSH_SIZE, 0, 1);
3158    tcg_out_insn(s, 3207, RET, TCG_REG_LR);
3159}
3160
3161static void tcg_out_tb_start(TCGContext *s)
3162{
3163    tcg_out_bti(s, BTI_J);
3164}
3165
3166static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3167{
3168    int i;
3169    for (i = 0; i < count; ++i) {
3170        p[i] = NOP;
3171    }
3172}
3173
3174typedef struct {
3175    DebugFrameHeader h;
3176    uint8_t fde_def_cfa[4];
3177    uint8_t fde_reg_ofs[24];
3178} DebugFrame;
3179
3180#define ELF_HOST_MACHINE EM_AARCH64
3181
3182static const DebugFrame debug_frame = {
3183    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3184    .h.cie.id = -1,
3185    .h.cie.version = 1,
3186    .h.cie.code_align = 1,
3187    .h.cie.data_align = 0x78,             /* sleb128 -8 */
3188    .h.cie.return_column = TCG_REG_LR,
3189
3190    /* Total FDE size does not include the "len" member.  */
3191    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3192
3193    .fde_def_cfa = {
3194        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3195        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3196        (FRAME_SIZE >> 7)
3197    },
3198    .fde_reg_ofs = {
3199        0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
3200        0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
3201        0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
3202        0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
3203        0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
3204        0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
3205        0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
3206        0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
3207        0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
3208        0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
3209        0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
3210        0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
3211    }
3212};
3213
3214void tcg_register_jit(const void *buf, size_t buf_size)
3215{
3216    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3217}
3218