xref: /openbmc/qemu/tcg/riscv/tcg-target.c.inc (revision 12d1a768bdfea6e27a3a829228840d72507613a1)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2018 SiFive, Inc
5 * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org>
6 * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net>
7 * Copyright (c) 2008 Fabrice Bellard
8 *
9 * Based on i386/tcg-target.c and mips/tcg-target.c
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a copy
12 * of this software and associated documentation files (the "Software"), to deal
13 * in the Software without restriction, including without limitation the rights
14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 * copies of the Software, and to permit persons to whom the Software is
16 * furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice shall be included in
19 * all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27 * THE SOFTWARE.
28 */
29
30/* Used for function call generation. */
31#define TCG_REG_CALL_STACK              TCG_REG_SP
32#define TCG_TARGET_STACK_ALIGN          16
33#define TCG_TARGET_CALL_STACK_OFFSET    0
34#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
35#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
36#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_NORMAL
37#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
38
39#ifdef CONFIG_DEBUG_TCG
40static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
41    "zero", "ra",  "sp",  "gp",  "tp",  "t0",  "t1",  "t2",
42    "s0",   "s1",  "a0",  "a1",  "a2",  "a3",  "a4",  "a5",
43    "a6",   "a7",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
44    "s8",   "s9",  "s10", "s11", "t3",  "t4",  "t5",  "t6",
45    "v0",   "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
46    "v8",   "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
47    "v16",  "v17", "v18", "v19", "v20", "v21", "v22", "v23",
48    "v24",  "v25", "v26", "v27", "v28", "v29", "v30", "v31",
49};
50#endif
51
52static const int tcg_target_reg_alloc_order[] = {
53    /* Call saved registers */
54    /* TCG_REG_S0 reserved for TCG_AREG0 */
55    TCG_REG_S1,
56    TCG_REG_S2,
57    TCG_REG_S3,
58    TCG_REG_S4,
59    TCG_REG_S5,
60    TCG_REG_S6,
61    TCG_REG_S7,
62    TCG_REG_S8,
63    TCG_REG_S9,
64    TCG_REG_S10,
65    TCG_REG_S11,
66
67    /* Call clobbered registers */
68    TCG_REG_T0,
69    TCG_REG_T1,
70    TCG_REG_T2,
71    TCG_REG_T3,
72    TCG_REG_T4,
73    TCG_REG_T5,
74    TCG_REG_T6,
75
76    /* Argument registers */
77    TCG_REG_A0,
78    TCG_REG_A1,
79    TCG_REG_A2,
80    TCG_REG_A3,
81    TCG_REG_A4,
82    TCG_REG_A5,
83    TCG_REG_A6,
84    TCG_REG_A7,
85
86    /* Vector registers and TCG_REG_V0 reserved for mask. */
87    TCG_REG_V1,  TCG_REG_V2,  TCG_REG_V3,  TCG_REG_V4,
88    TCG_REG_V5,  TCG_REG_V6,  TCG_REG_V7,  TCG_REG_V8,
89    TCG_REG_V9,  TCG_REG_V10, TCG_REG_V11, TCG_REG_V12,
90    TCG_REG_V13, TCG_REG_V14, TCG_REG_V15, TCG_REG_V16,
91    TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, TCG_REG_V20,
92    TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, TCG_REG_V24,
93    TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, TCG_REG_V28,
94    TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
95};
96
97static const int tcg_target_call_iarg_regs[] = {
98    TCG_REG_A0,
99    TCG_REG_A1,
100    TCG_REG_A2,
101    TCG_REG_A3,
102    TCG_REG_A4,
103    TCG_REG_A5,
104    TCG_REG_A6,
105    TCG_REG_A7,
106};
107
108static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
109{
110    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
111    tcg_debug_assert(slot >= 0 && slot <= 1);
112    return TCG_REG_A0 + slot;
113}
114
115#define TCG_CT_CONST_S12     0x100
116#define TCG_CT_CONST_N12     0x200
117#define TCG_CT_CONST_M12     0x400
118#define TCG_CT_CONST_J12     0x800
119#define TCG_CT_CONST_S5     0x1000
120#define TCG_CT_CONST_CMP_VI 0x2000
121
122#define ALL_GENERAL_REGS   MAKE_64BIT_MASK(0, 32)
123#define ALL_VECTOR_REGS    MAKE_64BIT_MASK(32, 32)
124#define ALL_DVECTOR_REG_GROUPS 0x5555555500000000
125#define ALL_QVECTOR_REG_GROUPS 0x1111111100000000
126
127#define sextreg  sextract64
128
129/*
130 * RISC-V Base ISA opcodes (IM)
131 */
132
133#define V_OPIVV (0x0 << 12)
134#define V_OPFVV (0x1 << 12)
135#define V_OPMVV (0x2 << 12)
136#define V_OPIVI (0x3 << 12)
137#define V_OPIVX (0x4 << 12)
138#define V_OPFVF (0x5 << 12)
139#define V_OPMVX (0x6 << 12)
140#define V_OPCFG (0x7 << 12)
141
142/* NF <= 7 && NF >= 0 */
143#define V_NF(x) (x << 29)
144#define V_UNIT_STRIDE (0x0 << 20)
145#define V_UNIT_STRIDE_WHOLE_REG (0x8 << 20)
146
147typedef enum {
148    VLMUL_M1 = 0, /* LMUL=1 */
149    VLMUL_M2,     /* LMUL=2 */
150    VLMUL_M4,     /* LMUL=4 */
151    VLMUL_M8,     /* LMUL=8 */
152    VLMUL_RESERVED,
153    VLMUL_MF8,    /* LMUL=1/8 */
154    VLMUL_MF4,    /* LMUL=1/4 */
155    VLMUL_MF2,    /* LMUL=1/2 */
156} RISCVVlmul;
157
158typedef enum {
159    OPC_ADD = 0x33,
160    OPC_ADDI = 0x13,
161    OPC_AND = 0x7033,
162    OPC_ANDI = 0x7013,
163    OPC_AUIPC = 0x17,
164    OPC_BEQ = 0x63,
165    OPC_BEXTI = 0x48005013,
166    OPC_BGE = 0x5063,
167    OPC_BGEU = 0x7063,
168    OPC_BLT = 0x4063,
169    OPC_BLTU = 0x6063,
170    OPC_BNE = 0x1063,
171    OPC_DIV = 0x2004033,
172    OPC_DIVU = 0x2005033,
173    OPC_JAL = 0x6f,
174    OPC_JALR = 0x67,
175    OPC_LB = 0x3,
176    OPC_LBU = 0x4003,
177    OPC_LD = 0x3003,
178    OPC_LH = 0x1003,
179    OPC_LHU = 0x5003,
180    OPC_LUI = 0x37,
181    OPC_LW = 0x2003,
182    OPC_LWU = 0x6003,
183    OPC_MUL = 0x2000033,
184    OPC_MULH = 0x2001033,
185    OPC_MULHSU = 0x2002033,
186    OPC_MULHU = 0x2003033,
187    OPC_OR = 0x6033,
188    OPC_ORI = 0x6013,
189    OPC_REM = 0x2006033,
190    OPC_REMU = 0x2007033,
191    OPC_SB = 0x23,
192    OPC_SD = 0x3023,
193    OPC_SH = 0x1023,
194    OPC_SLL = 0x1033,
195    OPC_SLLI = 0x1013,
196    OPC_SLT = 0x2033,
197    OPC_SLTI = 0x2013,
198    OPC_SLTIU = 0x3013,
199    OPC_SLTU = 0x3033,
200    OPC_SRA = 0x40005033,
201    OPC_SRAI = 0x40005013,
202    OPC_SRL = 0x5033,
203    OPC_SRLI = 0x5013,
204    OPC_SUB = 0x40000033,
205    OPC_SW = 0x2023,
206    OPC_XOR = 0x4033,
207    OPC_XORI = 0x4013,
208
209    OPC_ADDIW = 0x1b,
210    OPC_ADDW = 0x3b,
211    OPC_DIVUW = 0x200503b,
212    OPC_DIVW = 0x200403b,
213    OPC_MULW = 0x200003b,
214    OPC_REMUW = 0x200703b,
215    OPC_REMW = 0x200603b,
216    OPC_SLLIW = 0x101b,
217    OPC_SLLW = 0x103b,
218    OPC_SRAIW = 0x4000501b,
219    OPC_SRAW = 0x4000503b,
220    OPC_SRLIW = 0x501b,
221    OPC_SRLW = 0x503b,
222    OPC_SUBW = 0x4000003b,
223
224    OPC_FENCE = 0x0000000f,
225    OPC_NOP   = OPC_ADDI,   /* nop = addi r0,r0,0 */
226
227    /* Zba: Bit manipulation extension, address generation */
228    OPC_ADD_UW = 0x0800003b,
229
230    /* Zbb: Bit manipulation extension, basic bit manipulation */
231    OPC_ANDN   = 0x40007033,
232    OPC_CLZ    = 0x60001013,
233    OPC_CLZW   = 0x6000101b,
234    OPC_CPOP   = 0x60201013,
235    OPC_CPOPW  = 0x6020101b,
236    OPC_CTZ    = 0x60101013,
237    OPC_CTZW   = 0x6010101b,
238    OPC_ORN    = 0x40006033,
239    OPC_REV8   = 0x6b805013,
240    OPC_ROL    = 0x60001033,
241    OPC_ROLW   = 0x6000103b,
242    OPC_ROR    = 0x60005033,
243    OPC_RORW   = 0x6000503b,
244    OPC_RORI   = 0x60005013,
245    OPC_RORIW  = 0x6000501b,
246    OPC_SEXT_B = 0x60401013,
247    OPC_SEXT_H = 0x60501013,
248    OPC_XNOR   = 0x40004033,
249    OPC_ZEXT_H = 0x0800403b,
250
251    /* Zicond: integer conditional operations */
252    OPC_CZERO_EQZ = 0x0e005033,
253    OPC_CZERO_NEZ = 0x0e007033,
254
255    /* V: Vector extension 1.0 */
256    OPC_VSETVLI  = 0x57 | V_OPCFG,
257    OPC_VSETIVLI = 0xc0000057 | V_OPCFG,
258    OPC_VSETVL   = 0x80000057 | V_OPCFG,
259
260    OPC_VLE8_V  = 0x7 | V_UNIT_STRIDE,
261    OPC_VLE16_V = 0x5007 | V_UNIT_STRIDE,
262    OPC_VLE32_V = 0x6007 | V_UNIT_STRIDE,
263    OPC_VLE64_V = 0x7007 | V_UNIT_STRIDE,
264    OPC_VSE8_V  = 0x27 | V_UNIT_STRIDE,
265    OPC_VSE16_V = 0x5027 | V_UNIT_STRIDE,
266    OPC_VSE32_V = 0x6027 | V_UNIT_STRIDE,
267    OPC_VSE64_V = 0x7027 | V_UNIT_STRIDE,
268
269    OPC_VL1RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(0),
270    OPC_VL2RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(1),
271    OPC_VL4RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(3),
272    OPC_VL8RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(7),
273
274    OPC_VS1R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(0),
275    OPC_VS2R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(1),
276    OPC_VS4R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(3),
277    OPC_VS8R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(7),
278
279    OPC_VMERGE_VIM = 0x5c000057 | V_OPIVI,
280    OPC_VMERGE_VVM = 0x5c000057 | V_OPIVV,
281
282    OPC_VADD_VV = 0x57 | V_OPIVV,
283    OPC_VADD_VI = 0x57 | V_OPIVI,
284    OPC_VSUB_VV = 0x8000057 | V_OPIVV,
285    OPC_VRSUB_VI = 0xc000057 | V_OPIVI,
286    OPC_VAND_VV = 0x24000057 | V_OPIVV,
287    OPC_VAND_VI = 0x24000057 | V_OPIVI,
288    OPC_VOR_VV = 0x28000057 | V_OPIVV,
289    OPC_VOR_VI = 0x28000057 | V_OPIVI,
290    OPC_VXOR_VV = 0x2c000057 | V_OPIVV,
291    OPC_VXOR_VI = 0x2c000057 | V_OPIVI,
292
293    OPC_VMUL_VV = 0x94000057 | V_OPMVV,
294    OPC_VSADD_VV = 0x84000057 | V_OPIVV,
295    OPC_VSADD_VI = 0x84000057 | V_OPIVI,
296    OPC_VSSUB_VV = 0x8c000057 | V_OPIVV,
297    OPC_VSSUB_VI = 0x8c000057 | V_OPIVI,
298    OPC_VSADDU_VV = 0x80000057 | V_OPIVV,
299    OPC_VSADDU_VI = 0x80000057 | V_OPIVI,
300    OPC_VSSUBU_VV = 0x88000057 | V_OPIVV,
301    OPC_VSSUBU_VI = 0x88000057 | V_OPIVI,
302
303    OPC_VMAX_VV = 0x1c000057 | V_OPIVV,
304    OPC_VMAX_VI = 0x1c000057 | V_OPIVI,
305    OPC_VMAXU_VV = 0x18000057 | V_OPIVV,
306    OPC_VMAXU_VI = 0x18000057 | V_OPIVI,
307    OPC_VMIN_VV = 0x14000057 | V_OPIVV,
308    OPC_VMIN_VI = 0x14000057 | V_OPIVI,
309    OPC_VMINU_VV = 0x10000057 | V_OPIVV,
310    OPC_VMINU_VI = 0x10000057 | V_OPIVI,
311
312    OPC_VMSEQ_VV = 0x60000057 | V_OPIVV,
313    OPC_VMSEQ_VI = 0x60000057 | V_OPIVI,
314    OPC_VMSEQ_VX = 0x60000057 | V_OPIVX,
315    OPC_VMSNE_VV = 0x64000057 | V_OPIVV,
316    OPC_VMSNE_VI = 0x64000057 | V_OPIVI,
317    OPC_VMSNE_VX = 0x64000057 | V_OPIVX,
318
319    OPC_VMSLTU_VV = 0x68000057 | V_OPIVV,
320    OPC_VMSLTU_VX = 0x68000057 | V_OPIVX,
321    OPC_VMSLT_VV = 0x6c000057 | V_OPIVV,
322    OPC_VMSLT_VX = 0x6c000057 | V_OPIVX,
323    OPC_VMSLEU_VV = 0x70000057 | V_OPIVV,
324    OPC_VMSLEU_VX = 0x70000057 | V_OPIVX,
325    OPC_VMSLE_VV = 0x74000057 | V_OPIVV,
326    OPC_VMSLE_VX = 0x74000057 | V_OPIVX,
327
328    OPC_VMSLEU_VI = 0x70000057 | V_OPIVI,
329    OPC_VMSLE_VI = 0x74000057 | V_OPIVI,
330    OPC_VMSGTU_VI = 0x78000057 | V_OPIVI,
331    OPC_VMSGTU_VX = 0x78000057 | V_OPIVX,
332    OPC_VMSGT_VI = 0x7c000057 | V_OPIVI,
333    OPC_VMSGT_VX = 0x7c000057 | V_OPIVX,
334
335    OPC_VSLL_VV = 0x94000057 | V_OPIVV,
336    OPC_VSLL_VI = 0x94000057 | V_OPIVI,
337    OPC_VSLL_VX = 0x94000057 | V_OPIVX,
338    OPC_VSRL_VV = 0xa0000057 | V_OPIVV,
339    OPC_VSRL_VI = 0xa0000057 | V_OPIVI,
340    OPC_VSRL_VX = 0xa0000057 | V_OPIVX,
341    OPC_VSRA_VV = 0xa4000057 | V_OPIVV,
342    OPC_VSRA_VI = 0xa4000057 | V_OPIVI,
343    OPC_VSRA_VX = 0xa4000057 | V_OPIVX,
344
345    OPC_VMV_V_V = 0x5e000057 | V_OPIVV,
346    OPC_VMV_V_I = 0x5e000057 | V_OPIVI,
347    OPC_VMV_V_X = 0x5e000057 | V_OPIVX,
348
349    OPC_VMVNR_V = 0x9e000057 | V_OPIVI,
350} RISCVInsn;
351
352static const struct {
353    RISCVInsn op;
354    bool swap;
355} tcg_cmpcond_to_rvv_vv[] = {
356    [TCG_COND_EQ] =  { OPC_VMSEQ_VV,  false },
357    [TCG_COND_NE] =  { OPC_VMSNE_VV,  false },
358    [TCG_COND_LT] =  { OPC_VMSLT_VV,  false },
359    [TCG_COND_GE] =  { OPC_VMSLE_VV,  true  },
360    [TCG_COND_GT] =  { OPC_VMSLT_VV,  true  },
361    [TCG_COND_LE] =  { OPC_VMSLE_VV,  false },
362    [TCG_COND_LTU] = { OPC_VMSLTU_VV, false },
363    [TCG_COND_GEU] = { OPC_VMSLEU_VV, true  },
364    [TCG_COND_GTU] = { OPC_VMSLTU_VV, true  },
365    [TCG_COND_LEU] = { OPC_VMSLEU_VV, false }
366};
367
368static const struct {
369    RISCVInsn op;
370    int min;
371    int max;
372    bool adjust;
373}  tcg_cmpcond_to_rvv_vi[] = {
374    [TCG_COND_EQ]  = { OPC_VMSEQ_VI,  -16, 15, false },
375    [TCG_COND_NE]  = { OPC_VMSNE_VI,  -16, 15, false },
376    [TCG_COND_GT]  = { OPC_VMSGT_VI,  -16, 15, false },
377    [TCG_COND_LE]  = { OPC_VMSLE_VI,  -16, 15, false },
378    [TCG_COND_LT]  = { OPC_VMSLE_VI,  -15, 16, true  },
379    [TCG_COND_GE]  = { OPC_VMSGT_VI,  -15, 16, true  },
380    [TCG_COND_LEU] = { OPC_VMSLEU_VI,   0, 15, false },
381    [TCG_COND_GTU] = { OPC_VMSGTU_VI,   0, 15, false },
382    [TCG_COND_LTU] = { OPC_VMSLEU_VI,   1, 16, true  },
383    [TCG_COND_GEU] = { OPC_VMSGTU_VI,   1, 16, true  },
384};
385
386/* test if a constant matches the constraint */
387static bool tcg_target_const_match(int64_t val, int ct,
388                                   TCGType type, TCGCond cond, int vece)
389{
390    if (ct & TCG_CT_CONST) {
391        return 1;
392    }
393    if (type >= TCG_TYPE_V64) {
394        /* Val is replicated by VECE; extract the highest element. */
395        val >>= (-8 << vece) & 63;
396    }
397    /*
398     * Sign extended from 12 bits: [-0x800, 0x7ff].
399     * Used for most arithmetic, as this is the isa field.
400     */
401    if ((ct & TCG_CT_CONST_S12) && val >= -0x800 && val <= 0x7ff) {
402        return 1;
403    }
404    /*
405     * Sign extended from 12 bits, negated: [-0x7ff, 0x800].
406     * Used for subtraction, where a constant must be handled by ADDI.
407     */
408    if ((ct & TCG_CT_CONST_N12) && val >= -0x7ff && val <= 0x800) {
409        return 1;
410    }
411    /*
412     * Sign extended from 12 bits, +/- matching: [-0x7ff, 0x7ff].
413     * Used by addsub2 and movcond, which may need the negative value,
414     * and requires the modified constant to be representable.
415     */
416    if ((ct & TCG_CT_CONST_M12) && val >= -0x7ff && val <= 0x7ff) {
417        return 1;
418    }
419    /*
420     * Inverse of sign extended from 12 bits: ~[-0x800, 0x7ff].
421     * Used to map ANDN back to ANDI, etc.
422     */
423    if ((ct & TCG_CT_CONST_J12) && ~val >= -0x800 && ~val <= 0x7ff) {
424        return 1;
425    }
426    /*
427     * Sign extended from 5 bits: [-0x10, 0x0f].
428     * Used for vector-immediate.
429     */
430    if ((ct & TCG_CT_CONST_S5) && val >= -0x10 && val <= 0x0f) {
431        return 1;
432    }
433    /*
434     * Used for vector compare OPIVI instructions.
435     */
436    if ((ct & TCG_CT_CONST_CMP_VI) &&
437        val >= tcg_cmpcond_to_rvv_vi[cond].min &&
438        val <= tcg_cmpcond_to_rvv_vi[cond].max) {
439        return true;
440     }
441    return 0;
442}
443
444/*
445 * RISC-V immediate and instruction encoders (excludes 16-bit RVC)
446 */
447
448/* Type-R */
449
450static int32_t encode_r(RISCVInsn opc, TCGReg rd, TCGReg rs1, TCGReg rs2)
451{
452    return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20;
453}
454
455/* Type-I */
456
457static int32_t encode_imm12(uint32_t imm)
458{
459    return (imm & 0xfff) << 20;
460}
461
462static int32_t encode_i(RISCVInsn opc, TCGReg rd, TCGReg rs1, uint32_t imm)
463{
464    return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | encode_imm12(imm);
465}
466
467/* Type-S */
468
469static int32_t encode_simm12(uint32_t imm)
470{
471    int32_t ret = 0;
472
473    ret |= (imm & 0xFE0) << 20;
474    ret |= (imm & 0x1F) << 7;
475
476    return ret;
477}
478
479static int32_t encode_s(RISCVInsn opc, TCGReg rs1, TCGReg rs2, uint32_t imm)
480{
481    return opc | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20 | encode_simm12(imm);
482}
483
484/* Type-SB */
485
486static int32_t encode_sbimm12(uint32_t imm)
487{
488    int32_t ret = 0;
489
490    ret |= (imm & 0x1000) << 19;
491    ret |= (imm & 0x7e0) << 20;
492    ret |= (imm & 0x1e) << 7;
493    ret |= (imm & 0x800) >> 4;
494
495    return ret;
496}
497
498static int32_t encode_sb(RISCVInsn opc, TCGReg rs1, TCGReg rs2, uint32_t imm)
499{
500    return opc | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20 | encode_sbimm12(imm);
501}
502
503/* Type-U */
504
505static int32_t encode_uimm20(uint32_t imm)
506{
507    return imm & 0xfffff000;
508}
509
510static int32_t encode_u(RISCVInsn opc, TCGReg rd, uint32_t imm)
511{
512    return opc | (rd & 0x1f) << 7 | encode_uimm20(imm);
513}
514
515/* Type-UJ */
516
517static int32_t encode_ujimm20(uint32_t imm)
518{
519    int32_t ret = 0;
520
521    ret |= (imm & 0x0007fe) << (21 - 1);
522    ret |= (imm & 0x000800) << (20 - 11);
523    ret |= (imm & 0x0ff000) << (12 - 12);
524    ret |= (imm & 0x100000) << (31 - 20);
525
526    return ret;
527}
528
529static int32_t encode_uj(RISCVInsn opc, TCGReg rd, uint32_t imm)
530{
531    return opc | (rd & 0x1f) << 7 | encode_ujimm20(imm);
532}
533
534
535/* Type-OPIVI */
536
537static int32_t encode_vi(RISCVInsn opc, TCGReg rd, int32_t imm,
538                         TCGReg vs2, bool vm)
539{
540    return opc | (rd & 0x1f) << 7 | (imm & 0x1f) << 15 |
541           (vs2 & 0x1f) << 20 | (vm << 25);
542}
543
544/* Type-OPIVV/OPMVV/OPIVX/OPMVX, Vector load and store */
545
546static int32_t encode_v(RISCVInsn opc, TCGReg d, TCGReg s1,
547                        TCGReg s2, bool vm)
548{
549    return opc | (d & 0x1f) << 7 | (s1 & 0x1f) << 15 |
550           (s2 & 0x1f) << 20 | (vm << 25);
551}
552
553/* Vector vtype */
554
555static uint32_t encode_vtype(bool vta, bool vma,
556                            MemOp vsew, RISCVVlmul vlmul)
557{
558    return vma << 7 | vta << 6 | vsew << 3 | vlmul;
559}
560
561static int32_t encode_vset(RISCVInsn opc, TCGReg rd,
562                           TCGArg rs1, uint32_t vtype)
563{
564    return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | (vtype & 0x7ff) << 20;
565}
566
567static int32_t encode_vseti(RISCVInsn opc, TCGReg rd,
568                            uint32_t uimm, uint32_t vtype)
569{
570    return opc | (rd & 0x1f) << 7 | (uimm & 0x1f) << 15 | (vtype & 0x3ff) << 20;
571}
572
573/*
574 * RISC-V instruction emitters
575 */
576
577static void tcg_out_opc_reg(TCGContext *s, RISCVInsn opc,
578                            TCGReg rd, TCGReg rs1, TCGReg rs2)
579{
580    tcg_out32(s, encode_r(opc, rd, rs1, rs2));
581}
582
583static void tcg_out_opc_imm(TCGContext *s, RISCVInsn opc,
584                            TCGReg rd, TCGReg rs1, TCGArg imm)
585{
586    tcg_out32(s, encode_i(opc, rd, rs1, imm));
587}
588
589static void tcg_out_opc_store(TCGContext *s, RISCVInsn opc,
590                              TCGReg rs1, TCGReg rs2, uint32_t imm)
591{
592    tcg_out32(s, encode_s(opc, rs1, rs2, imm));
593}
594
595static void tcg_out_opc_branch(TCGContext *s, RISCVInsn opc,
596                               TCGReg rs1, TCGReg rs2, uint32_t imm)
597{
598    tcg_out32(s, encode_sb(opc, rs1, rs2, imm));
599}
600
601static void tcg_out_opc_upper(TCGContext *s, RISCVInsn opc,
602                              TCGReg rd, uint32_t imm)
603{
604    tcg_out32(s, encode_u(opc, rd, imm));
605}
606
607static void tcg_out_opc_jump(TCGContext *s, RISCVInsn opc,
608                             TCGReg rd, uint32_t imm)
609{
610    tcg_out32(s, encode_uj(opc, rd, imm));
611}
612
613static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
614{
615    int i;
616    for (i = 0; i < count; ++i) {
617        p[i] = OPC_NOP;
618    }
619}
620
621/*
622 * Relocations
623 */
624
625static bool reloc_sbimm12(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
626{
627    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
628    intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
629
630    tcg_debug_assert((offset & 1) == 0);
631    if (offset == sextreg(offset, 0, 12)) {
632        *src_rw |= encode_sbimm12(offset);
633        return true;
634    }
635
636    return false;
637}
638
639static bool reloc_jimm20(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
640{
641    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
642    intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
643
644    tcg_debug_assert((offset & 1) == 0);
645    if (offset == sextreg(offset, 0, 20)) {
646        *src_rw |= encode_ujimm20(offset);
647        return true;
648    }
649
650    return false;
651}
652
653static bool reloc_call(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
654{
655    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
656    intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
657    int32_t lo = sextreg(offset, 0, 12);
658    int32_t hi = offset - lo;
659
660    if (offset == hi + lo) {
661        src_rw[0] |= encode_uimm20(hi);
662        src_rw[1] |= encode_imm12(lo);
663        return true;
664    }
665
666    return false;
667}
668
669static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
670                        intptr_t value, intptr_t addend)
671{
672    tcg_debug_assert(addend == 0);
673    switch (type) {
674    case R_RISCV_BRANCH:
675        return reloc_sbimm12(code_ptr, (tcg_insn_unit *)value);
676    case R_RISCV_JAL:
677        return reloc_jimm20(code_ptr, (tcg_insn_unit *)value);
678    case R_RISCV_CALL:
679        return reloc_call(code_ptr, (tcg_insn_unit *)value);
680    default:
681        g_assert_not_reached();
682    }
683}
684
685/*
686 * RISC-V vector instruction emitters
687 */
688
689/*
690 * Vector registers uses the same 5 lower bits as GPR registers,
691 * and vm=0 (vm = false) means vector masking ENABLED.
692 * With RVV 1.0, vs2 is the first operand, while rs1/imm is the
693 * second operand.
694 */
695static void tcg_out_opc_vv(TCGContext *s, RISCVInsn opc,
696                           TCGReg vd, TCGReg vs2, TCGReg vs1)
697{
698    tcg_out32(s, encode_v(opc, vd, vs1, vs2, true));
699}
700
701static void tcg_out_opc_vx(TCGContext *s, RISCVInsn opc,
702                           TCGReg vd, TCGReg vs2, TCGReg rs1)
703{
704    tcg_out32(s, encode_v(opc, vd, rs1, vs2, true));
705}
706
707static void tcg_out_opc_vi(TCGContext *s, RISCVInsn opc,
708                           TCGReg vd, TCGReg vs2, int32_t imm)
709{
710    tcg_out32(s, encode_vi(opc, vd, imm, vs2, true));
711}
712
713static void tcg_out_opc_vv_vi(TCGContext *s, RISCVInsn o_vv, RISCVInsn o_vi,
714                              TCGReg vd, TCGReg vs2, TCGArg vi1, int c_vi1)
715{
716    if (c_vi1) {
717        tcg_out_opc_vi(s, o_vi, vd, vs2, vi1);
718    } else {
719        tcg_out_opc_vv(s, o_vv, vd, vs2, vi1);
720    }
721}
722
723static void tcg_out_opc_vim_mask(TCGContext *s, RISCVInsn opc, TCGReg vd,
724                                 TCGReg vs2, int32_t imm)
725{
726    tcg_out32(s, encode_vi(opc, vd, imm, vs2, false));
727}
728
729static void tcg_out_opc_vvm_mask(TCGContext *s, RISCVInsn opc, TCGReg vd,
730                                 TCGReg vs2, TCGReg vs1)
731{
732    tcg_out32(s, encode_v(opc, vd, vs1, vs2, false));
733}
734
735typedef struct VsetCache {
736    uint32_t movi_insn;
737    uint32_t vset_insn;
738} VsetCache;
739
740static VsetCache riscv_vset_cache[3][4];
741
742static void set_vtype(TCGContext *s, TCGType type, MemOp vsew)
743{
744    const VsetCache *p = &riscv_vset_cache[type - TCG_TYPE_V64][vsew];
745
746    s->riscv_cur_type = type;
747    s->riscv_cur_vsew = vsew;
748
749    if (p->movi_insn) {
750        tcg_out32(s, p->movi_insn);
751    }
752    tcg_out32(s, p->vset_insn);
753}
754
755static MemOp set_vtype_len(TCGContext *s, TCGType type)
756{
757    if (type != s->riscv_cur_type) {
758        set_vtype(s, type, MO_64);
759    }
760    return s->riscv_cur_vsew;
761}
762
763static void set_vtype_len_sew(TCGContext *s, TCGType type, MemOp vsew)
764{
765    if (type != s->riscv_cur_type || vsew != s->riscv_cur_vsew) {
766        set_vtype(s, type, vsew);
767    }
768}
769
770/*
771 * TCG intrinsics
772 */
773
774static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
775{
776    if (ret == arg) {
777        return true;
778    }
779    switch (type) {
780    case TCG_TYPE_I32:
781    case TCG_TYPE_I64:
782        tcg_out_opc_imm(s, OPC_ADDI, ret, arg, 0);
783        break;
784    case TCG_TYPE_V64:
785    case TCG_TYPE_V128:
786    case TCG_TYPE_V256:
787        {
788            int lmul = type - riscv_lg2_vlenb;
789            int nf = 1 << MAX(lmul, 0);
790            tcg_out_opc_vi(s, OPC_VMVNR_V, ret, arg, nf - 1);
791        }
792        break;
793    default:
794        g_assert_not_reached();
795    }
796    return true;
797}
798
799static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
800                         tcg_target_long val)
801{
802    tcg_target_long lo, hi, tmp;
803    int shift, ret;
804
805    if (type == TCG_TYPE_I32) {
806        val = (int32_t)val;
807    }
808
809    lo = sextreg(val, 0, 12);
810    if (val == lo) {
811        tcg_out_opc_imm(s, OPC_ADDI, rd, TCG_REG_ZERO, lo);
812        return;
813    }
814
815    hi = val - lo;
816    if (val == (int32_t)val) {
817        tcg_out_opc_upper(s, OPC_LUI, rd, hi);
818        if (lo != 0) {
819            tcg_out_opc_imm(s, OPC_ADDIW, rd, rd, lo);
820        }
821        return;
822    }
823
824    tmp = tcg_pcrel_diff(s, (void *)val);
825    if (tmp == (int32_t)tmp) {
826        tcg_out_opc_upper(s, OPC_AUIPC, rd, 0);
827        tcg_out_opc_imm(s, OPC_ADDI, rd, rd, 0);
828        ret = reloc_call(s->code_ptr - 2, (const tcg_insn_unit *)val);
829        tcg_debug_assert(ret == true);
830        return;
831    }
832
833    /* Look for a single 20-bit section.  */
834    shift = ctz64(val);
835    tmp = val >> shift;
836    if (tmp == sextreg(tmp, 0, 20)) {
837        tcg_out_opc_upper(s, OPC_LUI, rd, tmp << 12);
838        if (shift > 12) {
839            tcg_out_opc_imm(s, OPC_SLLI, rd, rd, shift - 12);
840        } else {
841            tcg_out_opc_imm(s, OPC_SRAI, rd, rd, 12 - shift);
842        }
843        return;
844    }
845
846    /* Look for a few high zero bits, with lots of bits set in the middle.  */
847    shift = clz64(val);
848    tmp = val << shift;
849    if (tmp == sextreg(tmp, 12, 20) << 12) {
850        tcg_out_opc_upper(s, OPC_LUI, rd, tmp);
851        tcg_out_opc_imm(s, OPC_SRLI, rd, rd, shift);
852        return;
853    } else if (tmp == sextreg(tmp, 0, 12)) {
854        tcg_out_opc_imm(s, OPC_ADDI, rd, TCG_REG_ZERO, tmp);
855        tcg_out_opc_imm(s, OPC_SRLI, rd, rd, shift);
856        return;
857    }
858
859    /* Drop into the constant pool.  */
860    new_pool_label(s, val, R_RISCV_CALL, s->code_ptr, 0);
861    tcg_out_opc_upper(s, OPC_AUIPC, rd, 0);
862    tcg_out_opc_imm(s, OPC_LD, rd, rd, 0);
863}
864
865static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
866{
867    return false;
868}
869
870static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
871                             tcg_target_long imm)
872{
873    /* This function is only used for passing structs by reference. */
874    g_assert_not_reached();
875}
876
877static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg)
878{
879    tcg_out_opc_imm(s, OPC_ANDI, ret, arg, 0xff);
880}
881
882static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg)
883{
884    if (cpuinfo & CPUINFO_ZBB) {
885        tcg_out_opc_reg(s, OPC_ZEXT_H, ret, arg, TCG_REG_ZERO);
886    } else {
887        tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16);
888        tcg_out_opc_imm(s, OPC_SRLIW, ret, ret, 16);
889    }
890}
891
892static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg)
893{
894    if (cpuinfo & CPUINFO_ZBA) {
895        tcg_out_opc_reg(s, OPC_ADD_UW, ret, arg, TCG_REG_ZERO);
896    } else {
897        tcg_out_opc_imm(s, OPC_SLLI, ret, arg, 32);
898        tcg_out_opc_imm(s, OPC_SRLI, ret, ret, 32);
899    }
900}
901
902static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
903{
904    if (cpuinfo & CPUINFO_ZBB) {
905        tcg_out_opc_imm(s, OPC_SEXT_B, ret, arg, 0);
906    } else {
907        tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 24);
908        tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 24);
909    }
910}
911
912static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
913{
914    if (cpuinfo & CPUINFO_ZBB) {
915        tcg_out_opc_imm(s, OPC_SEXT_H, ret, arg, 0);
916    } else {
917        tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16);
918        tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 16);
919    }
920}
921
922static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg)
923{
924    tcg_out_opc_imm(s, OPC_ADDIW, ret, arg, 0);
925}
926
927static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
928{
929    if (ret != arg) {
930        tcg_out_ext32s(s, ret, arg);
931    }
932}
933
934static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
935{
936    tcg_out_ext32u(s, ret, arg);
937}
938
939static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg)
940{
941    tcg_out_ext32s(s, ret, arg);
942}
943
944static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
945                         TCGReg addr, intptr_t offset)
946{
947    intptr_t imm12 = sextreg(offset, 0, 12);
948
949    if (offset != imm12) {
950        intptr_t diff = tcg_pcrel_diff(s, (void *)offset);
951
952        if (addr == TCG_REG_ZERO && diff == (int32_t)diff) {
953            imm12 = sextreg(diff, 0, 12);
954            tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP2, diff - imm12);
955        } else {
956            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12);
957            if (addr != TCG_REG_ZERO) {
958                tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, addr);
959            }
960        }
961        addr = TCG_REG_TMP2;
962    }
963
964    switch (opc) {
965    case OPC_SB:
966    case OPC_SH:
967    case OPC_SW:
968    case OPC_SD:
969        tcg_out_opc_store(s, opc, addr, data, imm12);
970        break;
971    case OPC_LB:
972    case OPC_LBU:
973    case OPC_LH:
974    case OPC_LHU:
975    case OPC_LW:
976    case OPC_LWU:
977    case OPC_LD:
978        tcg_out_opc_imm(s, opc, data, addr, imm12);
979        break;
980    default:
981        g_assert_not_reached();
982    }
983}
984
985static void tcg_out_vec_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
986                             TCGReg addr, intptr_t offset)
987{
988    tcg_debug_assert(data >= TCG_REG_V0);
989    tcg_debug_assert(addr < TCG_REG_V0);
990
991    if (offset) {
992        tcg_debug_assert(addr != TCG_REG_ZERO);
993        if (offset == sextreg(offset, 0, 12)) {
994            tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, addr, offset);
995        } else {
996            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
997            tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0, addr);
998        }
999        addr = TCG_REG_TMP0;
1000    }
1001    tcg_out32(s, encode_v(opc, data, addr, 0, true));
1002}
1003
1004static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
1005                       TCGReg arg1, intptr_t arg2)
1006{
1007    RISCVInsn insn;
1008
1009    switch (type) {
1010    case TCG_TYPE_I32:
1011        tcg_out_ldst(s, OPC_LW, arg, arg1, arg2);
1012        break;
1013    case TCG_TYPE_I64:
1014        tcg_out_ldst(s, OPC_LD, arg, arg1, arg2);
1015        break;
1016    case TCG_TYPE_V64:
1017    case TCG_TYPE_V128:
1018    case TCG_TYPE_V256:
1019        if (type >= riscv_lg2_vlenb) {
1020            static const RISCVInsn whole_reg_ld[] = {
1021                OPC_VL1RE64_V, OPC_VL2RE64_V, OPC_VL4RE64_V, OPC_VL8RE64_V
1022            };
1023            unsigned idx = type - riscv_lg2_vlenb;
1024
1025            tcg_debug_assert(idx < ARRAY_SIZE(whole_reg_ld));
1026            insn = whole_reg_ld[idx];
1027        } else {
1028            static const RISCVInsn unit_stride_ld[] = {
1029                OPC_VLE8_V, OPC_VLE16_V, OPC_VLE32_V, OPC_VLE64_V
1030            };
1031            MemOp prev_vsew = set_vtype_len(s, type);
1032
1033            tcg_debug_assert(prev_vsew < ARRAY_SIZE(unit_stride_ld));
1034            insn = unit_stride_ld[prev_vsew];
1035        }
1036        tcg_out_vec_ldst(s, insn, arg, arg1, arg2);
1037        break;
1038    default:
1039        g_assert_not_reached();
1040    }
1041}
1042
1043static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1044                       TCGReg arg1, intptr_t arg2)
1045{
1046    RISCVInsn insn;
1047
1048    switch (type) {
1049    case TCG_TYPE_I32:
1050        tcg_out_ldst(s, OPC_SW, arg, arg1, arg2);
1051        break;
1052    case TCG_TYPE_I64:
1053        tcg_out_ldst(s, OPC_SD, arg, arg1, arg2);
1054        break;
1055    case TCG_TYPE_V64:
1056    case TCG_TYPE_V128:
1057    case TCG_TYPE_V256:
1058        if (type >= riscv_lg2_vlenb) {
1059            static const RISCVInsn whole_reg_st[] = {
1060                OPC_VS1R_V, OPC_VS2R_V, OPC_VS4R_V, OPC_VS8R_V
1061            };
1062            unsigned idx = type - riscv_lg2_vlenb;
1063
1064            tcg_debug_assert(idx < ARRAY_SIZE(whole_reg_st));
1065            insn = whole_reg_st[idx];
1066        } else {
1067            static const RISCVInsn unit_stride_st[] = {
1068                OPC_VSE8_V, OPC_VSE16_V, OPC_VSE32_V, OPC_VSE64_V
1069            };
1070            MemOp prev_vsew = set_vtype_len(s, type);
1071
1072            tcg_debug_assert(prev_vsew < ARRAY_SIZE(unit_stride_st));
1073            insn = unit_stride_st[prev_vsew];
1074        }
1075        tcg_out_vec_ldst(s, insn, arg, arg1, arg2);
1076        break;
1077    default:
1078        g_assert_not_reached();
1079    }
1080}
1081
1082static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1083                        TCGReg base, intptr_t ofs)
1084{
1085    if (val == 0) {
1086        tcg_out_st(s, type, TCG_REG_ZERO, base, ofs);
1087        return true;
1088    }
1089    return false;
1090}
1091
1092static void tcg_out_addsub2(TCGContext *s,
1093                            TCGReg rl, TCGReg rh,
1094                            TCGReg al, TCGReg ah,
1095                            TCGArg bl, TCGArg bh,
1096                            bool cbl, bool cbh, bool is_sub, bool is32bit)
1097{
1098    const RISCVInsn opc_add = is32bit ? OPC_ADDW : OPC_ADD;
1099    const RISCVInsn opc_addi = is32bit ? OPC_ADDIW : OPC_ADDI;
1100    const RISCVInsn opc_sub = is32bit ? OPC_SUBW : OPC_SUB;
1101    TCGReg th = TCG_REG_TMP1;
1102
1103    /* If we have a negative constant such that negating it would
1104       make the high part zero, we can (usually) eliminate one insn.  */
1105    if (cbl && cbh && bh == -1 && bl != 0) {
1106        bl = -bl;
1107        bh = 0;
1108        is_sub = !is_sub;
1109    }
1110
1111    /* By operating on the high part first, we get to use the final
1112       carry operation to move back from the temporary.  */
1113    if (!cbh) {
1114        tcg_out_opc_reg(s, (is_sub ? opc_sub : opc_add), th, ah, bh);
1115    } else if (bh != 0 || ah == rl) {
1116        tcg_out_opc_imm(s, opc_addi, th, ah, (is_sub ? -bh : bh));
1117    } else {
1118        th = ah;
1119    }
1120
1121    /* Note that tcg optimization should eliminate the bl == 0 case.  */
1122    if (is_sub) {
1123        if (cbl) {
1124            tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, al, bl);
1125            tcg_out_opc_imm(s, opc_addi, rl, al, -bl);
1126        } else {
1127            tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0, al, bl);
1128            tcg_out_opc_reg(s, opc_sub, rl, al, bl);
1129        }
1130        tcg_out_opc_reg(s, opc_sub, rh, th, TCG_REG_TMP0);
1131    } else {
1132        if (cbl) {
1133            tcg_out_opc_imm(s, opc_addi, rl, al, bl);
1134            tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, rl, bl);
1135        } else if (al == bl) {
1136            /*
1137             * If the input regs overlap, this is a simple doubling
1138             * and carry-out is the input msb.  This special case is
1139             * required when the output reg overlaps the input,
1140             * but we might as well use it always.
1141             */
1142            tcg_out_opc_imm(s, OPC_SLTI, TCG_REG_TMP0, al, 0);
1143            tcg_out_opc_reg(s, opc_add, rl, al, al);
1144        } else {
1145            tcg_out_opc_reg(s, opc_add, rl, al, bl);
1146            tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0,
1147                            rl, (rl == bl ? al : bl));
1148        }
1149        tcg_out_opc_reg(s, opc_add, rh, th, TCG_REG_TMP0);
1150    }
1151}
1152
1153static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
1154                                   TCGReg dst, TCGReg src)
1155{
1156    set_vtype_len_sew(s, type, vece);
1157    tcg_out_opc_vx(s, OPC_VMV_V_X, dst, 0, src);
1158    return true;
1159}
1160
1161static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1162                                    TCGReg dst, TCGReg base, intptr_t offset)
1163{
1164    tcg_out_ld(s, TCG_TYPE_REG, TCG_REG_TMP0, base, offset);
1165    return tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0);
1166}
1167
1168static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1169                                    TCGReg dst, int64_t arg)
1170{
1171    /* Arg is replicated by VECE; extract the highest element. */
1172    arg >>= (-8 << vece) & 63;
1173
1174    if (arg >= -16 && arg < 16) {
1175        if (arg == 0 || arg == -1) {
1176            set_vtype_len(s, type);
1177        } else {
1178            set_vtype_len_sew(s, type, vece);
1179        }
1180        tcg_out_opc_vi(s, OPC_VMV_V_I, dst, 0, arg);
1181        return;
1182    }
1183    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, arg);
1184    tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0);
1185}
1186
1187static const struct {
1188    RISCVInsn op;
1189    bool swap;
1190} tcg_brcond_to_riscv[] = {
1191    [TCG_COND_EQ] =  { OPC_BEQ,  false },
1192    [TCG_COND_NE] =  { OPC_BNE,  false },
1193    [TCG_COND_LT] =  { OPC_BLT,  false },
1194    [TCG_COND_GE] =  { OPC_BGE,  false },
1195    [TCG_COND_LE] =  { OPC_BGE,  true  },
1196    [TCG_COND_GT] =  { OPC_BLT,  true  },
1197    [TCG_COND_LTU] = { OPC_BLTU, false },
1198    [TCG_COND_GEU] = { OPC_BGEU, false },
1199    [TCG_COND_LEU] = { OPC_BGEU, true  },
1200    [TCG_COND_GTU] = { OPC_BLTU, true  }
1201};
1202
1203static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
1204                           TCGReg arg2, TCGLabel *l)
1205{
1206    RISCVInsn op = tcg_brcond_to_riscv[cond].op;
1207
1208    tcg_debug_assert(op != 0);
1209
1210    if (tcg_brcond_to_riscv[cond].swap) {
1211        TCGReg t = arg1;
1212        arg1 = arg2;
1213        arg2 = t;
1214    }
1215
1216    tcg_out_reloc(s, s->code_ptr, R_RISCV_BRANCH, l, 0);
1217    tcg_out_opc_branch(s, op, arg1, arg2, 0);
1218}
1219
1220#define SETCOND_INV    TCG_TARGET_NB_REGS
1221#define SETCOND_NEZ    (SETCOND_INV << 1)
1222#define SETCOND_FLAGS  (SETCOND_INV | SETCOND_NEZ)
1223
1224static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret,
1225                               TCGReg arg1, tcg_target_long arg2, bool c2)
1226{
1227    int flags = 0;
1228
1229    switch (cond) {
1230    case TCG_COND_EQ:    /* -> NE  */
1231    case TCG_COND_GE:    /* -> LT  */
1232    case TCG_COND_GEU:   /* -> LTU */
1233    case TCG_COND_GT:    /* -> LE  */
1234    case TCG_COND_GTU:   /* -> LEU */
1235        cond = tcg_invert_cond(cond);
1236        flags ^= SETCOND_INV;
1237        break;
1238    default:
1239        break;
1240    }
1241
1242    switch (cond) {
1243    case TCG_COND_LE:
1244    case TCG_COND_LEU:
1245        /*
1246         * If we have a constant input, the most efficient way to implement
1247         * LE is by adding 1 and using LT.  Watch out for wrap around for LEU.
1248         * We don't need to care for this for LE because the constant input
1249         * is constrained to signed 12-bit, and 0x800 is representable in the
1250         * temporary register.
1251         */
1252        if (c2) {
1253            if (cond == TCG_COND_LEU) {
1254                /* unsigned <= -1 is true */
1255                if (arg2 == -1) {
1256                    tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV));
1257                    return ret;
1258                }
1259                cond = TCG_COND_LTU;
1260            } else {
1261                cond = TCG_COND_LT;
1262            }
1263            tcg_debug_assert(arg2 <= 0x7ff);
1264            if (++arg2 == 0x800) {
1265                tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2);
1266                arg2 = TCG_REG_TMP0;
1267                c2 = false;
1268            }
1269        } else {
1270            TCGReg tmp = arg2;
1271            arg2 = arg1;
1272            arg1 = tmp;
1273            cond = tcg_swap_cond(cond);    /* LE -> GE */
1274            cond = tcg_invert_cond(cond);  /* GE -> LT */
1275            flags ^= SETCOND_INV;
1276        }
1277        break;
1278    default:
1279        break;
1280    }
1281
1282    switch (cond) {
1283    case TCG_COND_NE:
1284        flags |= SETCOND_NEZ;
1285        if (!c2) {
1286            tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2);
1287        } else if (arg2 == 0) {
1288            ret = arg1;
1289        } else {
1290            tcg_out_opc_imm(s, OPC_XORI, ret, arg1, arg2);
1291        }
1292        break;
1293
1294    case TCG_COND_LT:
1295        if (c2) {
1296            tcg_out_opc_imm(s, OPC_SLTI, ret, arg1, arg2);
1297        } else {
1298            tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
1299        }
1300        break;
1301
1302    case TCG_COND_LTU:
1303        if (c2) {
1304            tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, arg2);
1305        } else {
1306            tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
1307        }
1308        break;
1309
1310    default:
1311        g_assert_not_reached();
1312    }
1313
1314    return ret | flags;
1315}
1316
1317static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
1318                            TCGReg arg1, tcg_target_long arg2, bool c2)
1319{
1320    int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2);
1321
1322    if (tmpflags != ret) {
1323        TCGReg tmp = tmpflags & ~SETCOND_FLAGS;
1324
1325        switch (tmpflags & SETCOND_FLAGS) {
1326        case SETCOND_INV:
1327            /* Intermediate result is boolean: simply invert. */
1328            tcg_out_opc_imm(s, OPC_XORI, ret, tmp, 1);
1329            break;
1330        case SETCOND_NEZ:
1331            /* Intermediate result is zero/non-zero: test != 0. */
1332            tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp);
1333            break;
1334        case SETCOND_NEZ | SETCOND_INV:
1335            /* Intermediate result is zero/non-zero: test == 0. */
1336            tcg_out_opc_imm(s, OPC_SLTIU, ret, tmp, 1);
1337            break;
1338        default:
1339            g_assert_not_reached();
1340        }
1341    }
1342}
1343
1344static void tcg_out_negsetcond(TCGContext *s, TCGCond cond, TCGReg ret,
1345                               TCGReg arg1, tcg_target_long arg2, bool c2)
1346{
1347    int tmpflags;
1348    TCGReg tmp;
1349
1350    /* For LT/GE comparison against 0, replicate the sign bit. */
1351    if (c2 && arg2 == 0) {
1352        switch (cond) {
1353        case TCG_COND_GE:
1354            tcg_out_opc_imm(s, OPC_XORI, ret, arg1, -1);
1355            arg1 = ret;
1356            /* fall through */
1357        case TCG_COND_LT:
1358            tcg_out_opc_imm(s, OPC_SRAI, ret, arg1, TCG_TARGET_REG_BITS - 1);
1359            return;
1360        default:
1361            break;
1362        }
1363    }
1364
1365    tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2);
1366    tmp = tmpflags & ~SETCOND_FLAGS;
1367
1368    /* If intermediate result is zero/non-zero: test != 0. */
1369    if (tmpflags & SETCOND_NEZ) {
1370        tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp);
1371        tmp = ret;
1372    }
1373
1374    /* Produce the 0/-1 result. */
1375    if (tmpflags & SETCOND_INV) {
1376        tcg_out_opc_imm(s, OPC_ADDI, ret, tmp, -1);
1377    } else {
1378        tcg_out_opc_reg(s, OPC_SUB, ret, TCG_REG_ZERO, tmp);
1379    }
1380}
1381
1382static void tcg_out_movcond_zicond(TCGContext *s, TCGReg ret, TCGReg test_ne,
1383                                   int val1, bool c_val1,
1384                                   int val2, bool c_val2)
1385{
1386    if (val1 == 0) {
1387        if (c_val2) {
1388            tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, val2);
1389            val2 = TCG_REG_TMP1;
1390        }
1391        tcg_out_opc_reg(s, OPC_CZERO_NEZ, ret, val2, test_ne);
1392        return;
1393    }
1394
1395    if (val2 == 0) {
1396        if (c_val1) {
1397            tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, val1);
1398            val1 = TCG_REG_TMP1;
1399        }
1400        tcg_out_opc_reg(s, OPC_CZERO_EQZ, ret, val1, test_ne);
1401        return;
1402    }
1403
1404    if (c_val2) {
1405        if (c_val1) {
1406            tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, val1 - val2);
1407        } else {
1408            tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP1, val1, -val2);
1409        }
1410        tcg_out_opc_reg(s, OPC_CZERO_EQZ, ret, TCG_REG_TMP1, test_ne);
1411        tcg_out_opc_imm(s, OPC_ADDI, ret, ret, val2);
1412        return;
1413    }
1414
1415    if (c_val1) {
1416        tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP1, val2, -val1);
1417        tcg_out_opc_reg(s, OPC_CZERO_NEZ, ret, TCG_REG_TMP1, test_ne);
1418        tcg_out_opc_imm(s, OPC_ADDI, ret, ret, val1);
1419        return;
1420    }
1421
1422    tcg_out_opc_reg(s, OPC_CZERO_NEZ, TCG_REG_TMP1, val2, test_ne);
1423    tcg_out_opc_reg(s, OPC_CZERO_EQZ, TCG_REG_TMP0, val1, test_ne);
1424    tcg_out_opc_reg(s, OPC_OR, ret, TCG_REG_TMP0, TCG_REG_TMP1);
1425}
1426
1427static void tcg_out_movcond_br1(TCGContext *s, TCGCond cond, TCGReg ret,
1428                                TCGReg cmp1, TCGReg cmp2,
1429                                int val, bool c_val)
1430{
1431    RISCVInsn op;
1432    int disp = 8;
1433
1434    tcg_debug_assert((unsigned)cond < ARRAY_SIZE(tcg_brcond_to_riscv));
1435    op = tcg_brcond_to_riscv[cond].op;
1436    tcg_debug_assert(op != 0);
1437
1438    if (tcg_brcond_to_riscv[cond].swap) {
1439        tcg_out_opc_branch(s, op, cmp2, cmp1, disp);
1440    } else {
1441        tcg_out_opc_branch(s, op, cmp1, cmp2, disp);
1442    }
1443    if (c_val) {
1444        tcg_out_opc_imm(s, OPC_ADDI, ret, TCG_REG_ZERO, val);
1445    } else {
1446        tcg_out_opc_imm(s, OPC_ADDI, ret, val, 0);
1447    }
1448}
1449
1450static void tcg_out_movcond_br2(TCGContext *s, TCGCond cond, TCGReg ret,
1451                                TCGReg cmp1, TCGReg cmp2,
1452                                int val1, bool c_val1,
1453                                int val2, bool c_val2)
1454{
1455    TCGReg tmp;
1456
1457    /* TCG optimizer reorders to prefer ret matching val2. */
1458    if (!c_val2 && ret == val2) {
1459        cond = tcg_invert_cond(cond);
1460        tcg_out_movcond_br1(s, cond, ret, cmp1, cmp2, val1, c_val1);
1461        return;
1462    }
1463
1464    if (!c_val1 && ret == val1) {
1465        tcg_out_movcond_br1(s, cond, ret, cmp1, cmp2, val2, c_val2);
1466        return;
1467    }
1468
1469    tmp = (ret == cmp1 || ret == cmp2 ? TCG_REG_TMP1 : ret);
1470    if (c_val1) {
1471        tcg_out_movi(s, TCG_TYPE_REG, tmp, val1);
1472    } else {
1473        tcg_out_mov(s, TCG_TYPE_REG, tmp, val1);
1474    }
1475    tcg_out_movcond_br1(s, cond, tmp, cmp1, cmp2, val2, c_val2);
1476    tcg_out_mov(s, TCG_TYPE_REG, ret, tmp);
1477}
1478
1479static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
1480                            TCGReg cmp1, int cmp2, bool c_cmp2,
1481                            TCGReg val1, bool c_val1,
1482                            TCGReg val2, bool c_val2)
1483{
1484    int tmpflags;
1485    TCGReg t;
1486
1487    if (!(cpuinfo & CPUINFO_ZICOND) && (!c_cmp2 || cmp2 == 0)) {
1488        tcg_out_movcond_br2(s, cond, ret, cmp1, cmp2,
1489                            val1, c_val1, val2, c_val2);
1490        return;
1491    }
1492
1493    tmpflags = tcg_out_setcond_int(s, cond, TCG_REG_TMP0, cmp1, cmp2, c_cmp2);
1494    t = tmpflags & ~SETCOND_FLAGS;
1495
1496    if (cpuinfo & CPUINFO_ZICOND) {
1497        if (tmpflags & SETCOND_INV) {
1498            tcg_out_movcond_zicond(s, ret, t, val2, c_val2, val1, c_val1);
1499        } else {
1500            tcg_out_movcond_zicond(s, ret, t, val1, c_val1, val2, c_val2);
1501        }
1502    } else {
1503        cond = tmpflags & SETCOND_INV ? TCG_COND_EQ : TCG_COND_NE;
1504        tcg_out_movcond_br2(s, cond, ret, t, TCG_REG_ZERO,
1505                            val1, c_val1, val2, c_val2);
1506    }
1507}
1508
1509static void tcg_out_cltz(TCGContext *s, TCGType type, RISCVInsn insn,
1510                         TCGReg ret, TCGReg src1, int src2, bool c_src2)
1511{
1512    tcg_out_opc_imm(s, insn, ret, src1, 0);
1513
1514    if (!c_src2 || src2 != (type == TCG_TYPE_I32 ? 32 : 64)) {
1515        /*
1516         * The requested zero result does not match the insn, so adjust.
1517         * Note that constraints put 'ret' in a new register, so the
1518         * computation above did not clobber either 'src1' or 'src2'.
1519         */
1520        tcg_out_movcond(s, TCG_COND_EQ, ret, src1, 0, true,
1521                        src2, c_src2, ret, false);
1522    }
1523}
1524
1525static void tcg_out_cmpsel(TCGContext *s, TCGType type, unsigned vece,
1526                           TCGCond cond, TCGReg ret,
1527                           TCGReg cmp1, TCGReg cmp2, bool c_cmp2,
1528                           TCGReg val1, bool c_val1,
1529                           TCGReg val2, bool c_val2)
1530{
1531    set_vtype_len_sew(s, type, vece);
1532
1533    /* Use only vmerge_vim if possible, by inverting the test. */
1534    if (c_val2 && !c_val1) {
1535        TCGArg temp = val1;
1536        cond = tcg_invert_cond(cond);
1537        val1 = val2;
1538        val2 = temp;
1539        c_val1 = true;
1540        c_val2 = false;
1541    }
1542
1543    /* Perform the comparison into V0 mask. */
1544    if (c_cmp2) {
1545        tcg_out_opc_vi(s, tcg_cmpcond_to_rvv_vi[cond].op, TCG_REG_V0, cmp1,
1546                       cmp2 - tcg_cmpcond_to_rvv_vi[cond].adjust);
1547    } else if (tcg_cmpcond_to_rvv_vv[cond].swap) {
1548        tcg_out_opc_vv(s, tcg_cmpcond_to_rvv_vv[cond].op,
1549                       TCG_REG_V0, cmp2, cmp1);
1550    } else {
1551        tcg_out_opc_vv(s, tcg_cmpcond_to_rvv_vv[cond].op,
1552                       TCG_REG_V0, cmp1, cmp2);
1553    }
1554    if (c_val1) {
1555        if (c_val2) {
1556            tcg_out_opc_vi(s, OPC_VMV_V_I, ret, 0, val2);
1557            val2 = ret;
1558        }
1559        /* vd[i] == v0.mask[i] ? imm : vs2[i] */
1560        tcg_out_opc_vim_mask(s, OPC_VMERGE_VIM, ret, val2, val1);
1561    } else {
1562        /* vd[i] == v0.mask[i] ? vs1[i] : vs2[i] */
1563        tcg_out_opc_vvm_mask(s, OPC_VMERGE_VVM, ret, val2, val1);
1564    }
1565}
1566
1567static void tcg_out_vshifti(TCGContext *s, RISCVInsn opc_vi, RISCVInsn opc_vx,
1568                             TCGReg dst, TCGReg src, unsigned imm)
1569{
1570    if (imm < 32) {
1571        tcg_out_opc_vi(s, opc_vi, dst, src, imm);
1572    } else {
1573        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP0, imm);
1574        tcg_out_opc_vx(s, opc_vx, dst, src, TCG_REG_TMP0);
1575    }
1576}
1577
1578static void init_setting_vtype(TCGContext *s)
1579{
1580    s->riscv_cur_type = TCG_TYPE_COUNT;
1581}
1582
1583static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
1584{
1585    TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA;
1586    ptrdiff_t offset = tcg_pcrel_diff(s, arg);
1587    int ret;
1588
1589    init_setting_vtype(s);
1590
1591    tcg_debug_assert((offset & 1) == 0);
1592    if (offset == sextreg(offset, 0, 20)) {
1593        /* short jump: -2097150 to 2097152 */
1594        tcg_out_opc_jump(s, OPC_JAL, link, offset);
1595    } else if (offset == (int32_t)offset) {
1596        /* long jump: -2147483646 to 2147483648 */
1597        tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP0, 0);
1598        tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, 0);
1599        ret = reloc_call(s->code_ptr - 2, arg);
1600        tcg_debug_assert(ret == true);
1601    } else {
1602        /* far jump: 64-bit */
1603        tcg_target_long imm = sextreg((tcg_target_long)arg, 0, 12);
1604        tcg_target_long base = (tcg_target_long)arg - imm;
1605        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, base);
1606        tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, imm);
1607    }
1608}
1609
1610static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg,
1611                         const TCGHelperInfo *info)
1612{
1613    tcg_out_call_int(s, arg, false);
1614}
1615
1616static void tcg_out_mb(TCGContext *s, TCGArg a0)
1617{
1618    tcg_insn_unit insn = OPC_FENCE;
1619
1620    if (a0 & TCG_MO_LD_LD) {
1621        insn |= 0x02200000;
1622    }
1623    if (a0 & TCG_MO_ST_LD) {
1624        insn |= 0x01200000;
1625    }
1626    if (a0 & TCG_MO_LD_ST) {
1627        insn |= 0x02100000;
1628    }
1629    if (a0 & TCG_MO_ST_ST) {
1630        insn |= 0x01100000;
1631    }
1632    tcg_out32(s, insn);
1633}
1634
1635/*
1636 * Load/store and TLB
1637 */
1638
1639static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1640{
1641    tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
1642    bool ok = reloc_jimm20(s->code_ptr - 1, target);
1643    tcg_debug_assert(ok);
1644}
1645
1646bool tcg_target_has_memory_bswap(MemOp memop)
1647{
1648    return false;
1649}
1650
1651/* We have three temps, we might as well expose them. */
1652static const TCGLdstHelperParam ldst_helper_param = {
1653    .ntmp = 3, .tmp = { TCG_REG_TMP0, TCG_REG_TMP1, TCG_REG_TMP2 }
1654};
1655
1656static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1657{
1658    MemOp opc = get_memop(l->oi);
1659
1660    /* resolve label address */
1661    if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1662        return false;
1663    }
1664
1665    /* call load helper */
1666    tcg_out_ld_helper_args(s, l, &ldst_helper_param);
1667    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false);
1668    tcg_out_ld_helper_ret(s, l, true, &ldst_helper_param);
1669
1670    tcg_out_goto(s, l->raddr);
1671    return true;
1672}
1673
1674static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1675{
1676    MemOp opc = get_memop(l->oi);
1677
1678    /* resolve label address */
1679    if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1680        return false;
1681    }
1682
1683    /* call store helper */
1684    tcg_out_st_helper_args(s, l, &ldst_helper_param);
1685    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
1686
1687    tcg_out_goto(s, l->raddr);
1688    return true;
1689}
1690
1691/* We expect to use a 12-bit negative offset from ENV.  */
1692#define MIN_TLB_MASK_TABLE_OFS  -(1 << 11)
1693
1694/*
1695 * For system-mode, perform the TLB load and compare.
1696 * For user-mode, perform any required alignment tests.
1697 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1698 * is required and fill in @h with the host address for the fast path.
1699 */
1700static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
1701                                           TCGReg addr_reg, MemOpIdx oi,
1702                                           bool is_ld)
1703{
1704    TCGType addr_type = s->addr_type;
1705    TCGLabelQemuLdst *ldst = NULL;
1706    MemOp opc = get_memop(oi);
1707    TCGAtomAlign aa;
1708    unsigned a_mask;
1709
1710    aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
1711    a_mask = (1u << aa.align) - 1;
1712
1713    if (tcg_use_softmmu) {
1714        unsigned s_bits = opc & MO_SIZE;
1715        unsigned s_mask = (1u << s_bits) - 1;
1716        int mem_index = get_mmuidx(oi);
1717        int fast_ofs = tlb_mask_table_ofs(s, mem_index);
1718        int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
1719        int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
1720        int compare_mask;
1721        TCGReg addr_adj;
1722
1723        ldst = new_ldst_label(s);
1724        ldst->is_ld = is_ld;
1725        ldst->oi = oi;
1726        ldst->addr_reg = addr_reg;
1727
1728        init_setting_vtype(s);
1729
1730        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
1731        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
1732
1733        tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
1734                        s->page_bits - CPU_TLB_ENTRY_BITS);
1735        tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
1736        tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
1737
1738        /*
1739         * For aligned accesses, we check the first byte and include the
1740         * alignment bits within the address.  For unaligned access, we
1741         * check that we don't cross pages using the address of the last
1742         * byte of the access.
1743         */
1744        addr_adj = addr_reg;
1745        if (a_mask < s_mask) {
1746            addr_adj = TCG_REG_TMP0;
1747            tcg_out_opc_imm(s, addr_type == TCG_TYPE_I32 ? OPC_ADDIW : OPC_ADDI,
1748                            addr_adj, addr_reg, s_mask - a_mask);
1749        }
1750        compare_mask = s->page_mask | a_mask;
1751        if (compare_mask == sextreg(compare_mask, 0, 12)) {
1752            tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask);
1753        } else {
1754            tcg_out_movi(s, addr_type, TCG_REG_TMP1, compare_mask);
1755            tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_adj);
1756        }
1757
1758        /* Load the tlb comparator and the addend.  */
1759        QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
1760        tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2,
1761                   is_ld ? offsetof(CPUTLBEntry, addr_read)
1762                         : offsetof(CPUTLBEntry, addr_write));
1763        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
1764                   offsetof(CPUTLBEntry, addend));
1765
1766        /* Compare masked address with the TLB entry. */
1767        ldst->label_ptr[0] = s->code_ptr;
1768        tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
1769
1770        /* TLB Hit - translate address using addend.  */
1771        if (addr_type != TCG_TYPE_I32) {
1772            tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, addr_reg, TCG_REG_TMP2);
1773        } else if (cpuinfo & CPUINFO_ZBA) {
1774            tcg_out_opc_reg(s, OPC_ADD_UW, TCG_REG_TMP0,
1775                            addr_reg, TCG_REG_TMP2);
1776        } else {
1777            tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg);
1778            tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0,
1779                            TCG_REG_TMP0, TCG_REG_TMP2);
1780        }
1781        *pbase = TCG_REG_TMP0;
1782    } else {
1783        TCGReg base;
1784
1785        if (a_mask) {
1786            ldst = new_ldst_label(s);
1787            ldst->is_ld = is_ld;
1788            ldst->oi = oi;
1789            ldst->addr_reg = addr_reg;
1790
1791            init_setting_vtype(s);
1792
1793            /* We are expecting alignment max 7, so we can always use andi. */
1794            tcg_debug_assert(a_mask == sextreg(a_mask, 0, 12));
1795            tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
1796
1797            ldst->label_ptr[0] = s->code_ptr;
1798            tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0);
1799        }
1800
1801        if (guest_base != 0) {
1802            base = TCG_REG_TMP0;
1803            if (addr_type != TCG_TYPE_I32) {
1804                tcg_out_opc_reg(s, OPC_ADD, base, addr_reg,
1805                                TCG_GUEST_BASE_REG);
1806            } else if (cpuinfo & CPUINFO_ZBA) {
1807                tcg_out_opc_reg(s, OPC_ADD_UW, base, addr_reg,
1808                                TCG_GUEST_BASE_REG);
1809            } else {
1810                tcg_out_ext32u(s, base, addr_reg);
1811                tcg_out_opc_reg(s, OPC_ADD, base, base, TCG_GUEST_BASE_REG);
1812            }
1813        } else if (addr_type != TCG_TYPE_I32) {
1814            base = addr_reg;
1815        } else {
1816            base = TCG_REG_TMP0;
1817            tcg_out_ext32u(s, base, addr_reg);
1818        }
1819        *pbase = base;
1820    }
1821
1822    return ldst;
1823}
1824
1825static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val,
1826                                   TCGReg base, MemOp opc, TCGType type)
1827{
1828    /* Byte swapping is left to middle-end expansion. */
1829    tcg_debug_assert((opc & MO_BSWAP) == 0);
1830
1831    switch (opc & (MO_SSIZE)) {
1832    case MO_UB:
1833        tcg_out_opc_imm(s, OPC_LBU, val, base, 0);
1834        break;
1835    case MO_SB:
1836        tcg_out_opc_imm(s, OPC_LB, val, base, 0);
1837        break;
1838    case MO_UW:
1839        tcg_out_opc_imm(s, OPC_LHU, val, base, 0);
1840        break;
1841    case MO_SW:
1842        tcg_out_opc_imm(s, OPC_LH, val, base, 0);
1843        break;
1844    case MO_UL:
1845        if (type == TCG_TYPE_I64) {
1846            tcg_out_opc_imm(s, OPC_LWU, val, base, 0);
1847            break;
1848        }
1849        /* FALLTHRU */
1850    case MO_SL:
1851        tcg_out_opc_imm(s, OPC_LW, val, base, 0);
1852        break;
1853    case MO_UQ:
1854        tcg_out_opc_imm(s, OPC_LD, val, base, 0);
1855        break;
1856    default:
1857        g_assert_not_reached();
1858    }
1859}
1860
1861static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1862                            MemOpIdx oi, TCGType data_type)
1863{
1864    TCGLabelQemuLdst *ldst;
1865    TCGReg base;
1866
1867    ldst = prepare_host_addr(s, &base, addr_reg, oi, true);
1868    tcg_out_qemu_ld_direct(s, data_reg, base, get_memop(oi), data_type);
1869
1870    if (ldst) {
1871        ldst->type = data_type;
1872        ldst->datalo_reg = data_reg;
1873        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1874    }
1875}
1876
1877static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val,
1878                                   TCGReg base, MemOp opc)
1879{
1880    /* Byte swapping is left to middle-end expansion. */
1881    tcg_debug_assert((opc & MO_BSWAP) == 0);
1882
1883    switch (opc & (MO_SSIZE)) {
1884    case MO_8:
1885        tcg_out_opc_store(s, OPC_SB, base, val, 0);
1886        break;
1887    case MO_16:
1888        tcg_out_opc_store(s, OPC_SH, base, val, 0);
1889        break;
1890    case MO_32:
1891        tcg_out_opc_store(s, OPC_SW, base, val, 0);
1892        break;
1893    case MO_64:
1894        tcg_out_opc_store(s, OPC_SD, base, val, 0);
1895        break;
1896    default:
1897        g_assert_not_reached();
1898    }
1899}
1900
1901static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1902                            MemOpIdx oi, TCGType data_type)
1903{
1904    TCGLabelQemuLdst *ldst;
1905    TCGReg base;
1906
1907    ldst = prepare_host_addr(s, &base, addr_reg, oi, false);
1908    tcg_out_qemu_st_direct(s, data_reg, base, get_memop(oi));
1909
1910    if (ldst) {
1911        ldst->type = data_type;
1912        ldst->datalo_reg = data_reg;
1913        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1914    }
1915}
1916
1917static const tcg_insn_unit *tb_ret_addr;
1918
1919static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1920{
1921    /* Reuse the zeroing that exists for goto_ptr.  */
1922    if (a0 == 0) {
1923        tcg_out_call_int(s, tcg_code_gen_epilogue, true);
1924    } else {
1925        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0);
1926        tcg_out_call_int(s, tb_ret_addr, true);
1927    }
1928}
1929
1930static void tcg_out_goto_tb(TCGContext *s, int which)
1931{
1932    /* Direct branch will be patched by tb_target_set_jmp_target. */
1933    set_jmp_insn_offset(s, which);
1934    tcg_out32(s, OPC_JAL);
1935
1936    /* When branch is out of range, fall through to indirect. */
1937    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
1938               get_jmp_target_addr(s, which));
1939    tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_TMP0, 0);
1940    set_jmp_reset_offset(s, which);
1941}
1942
1943void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1944                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1945{
1946    uintptr_t addr = tb->jmp_target_addr[n];
1947    ptrdiff_t offset = addr - jmp_rx;
1948    tcg_insn_unit insn;
1949
1950    /* Either directly branch, or fall through to indirect branch. */
1951    if (offset == sextreg(offset, 0, 20)) {
1952        insn = encode_uj(OPC_JAL, TCG_REG_ZERO, offset);
1953    } else {
1954        insn = OPC_NOP;
1955    }
1956    qatomic_set((uint32_t *)jmp_rw, insn);
1957    flush_idcache_range(jmp_rx, jmp_rw, 4);
1958}
1959
1960static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
1961                       const TCGArg args[TCG_MAX_OP_ARGS],
1962                       const int const_args[TCG_MAX_OP_ARGS])
1963{
1964    TCGArg a0 = args[0];
1965    TCGArg a1 = args[1];
1966    TCGArg a2 = args[2];
1967    int c2 = const_args[2];
1968
1969    switch (opc) {
1970    case INDEX_op_goto_ptr:
1971        tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, a0, 0);
1972        break;
1973
1974    case INDEX_op_br:
1975        tcg_out_reloc(s, s->code_ptr, R_RISCV_JAL, arg_label(a0), 0);
1976        tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
1977        break;
1978
1979    case INDEX_op_ld8u_i32:
1980    case INDEX_op_ld8u_i64:
1981        tcg_out_ldst(s, OPC_LBU, a0, a1, a2);
1982        break;
1983    case INDEX_op_ld8s_i32:
1984    case INDEX_op_ld8s_i64:
1985        tcg_out_ldst(s, OPC_LB, a0, a1, a2);
1986        break;
1987    case INDEX_op_ld16u_i32:
1988    case INDEX_op_ld16u_i64:
1989        tcg_out_ldst(s, OPC_LHU, a0, a1, a2);
1990        break;
1991    case INDEX_op_ld16s_i32:
1992    case INDEX_op_ld16s_i64:
1993        tcg_out_ldst(s, OPC_LH, a0, a1, a2);
1994        break;
1995    case INDEX_op_ld32u_i64:
1996        tcg_out_ldst(s, OPC_LWU, a0, a1, a2);
1997        break;
1998    case INDEX_op_ld_i32:
1999    case INDEX_op_ld32s_i64:
2000        tcg_out_ldst(s, OPC_LW, a0, a1, a2);
2001        break;
2002    case INDEX_op_ld_i64:
2003        tcg_out_ldst(s, OPC_LD, a0, a1, a2);
2004        break;
2005
2006    case INDEX_op_st8_i32:
2007    case INDEX_op_st8_i64:
2008        tcg_out_ldst(s, OPC_SB, a0, a1, a2);
2009        break;
2010    case INDEX_op_st16_i32:
2011    case INDEX_op_st16_i64:
2012        tcg_out_ldst(s, OPC_SH, a0, a1, a2);
2013        break;
2014    case INDEX_op_st_i32:
2015    case INDEX_op_st32_i64:
2016        tcg_out_ldst(s, OPC_SW, a0, a1, a2);
2017        break;
2018    case INDEX_op_st_i64:
2019        tcg_out_ldst(s, OPC_SD, a0, a1, a2);
2020        break;
2021
2022    case INDEX_op_add_i32:
2023        if (c2) {
2024            tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, a2);
2025        } else {
2026            tcg_out_opc_reg(s, OPC_ADDW, a0, a1, a2);
2027        }
2028        break;
2029    case INDEX_op_add_i64:
2030        if (c2) {
2031            tcg_out_opc_imm(s, OPC_ADDI, a0, a1, a2);
2032        } else {
2033            tcg_out_opc_reg(s, OPC_ADD, a0, a1, a2);
2034        }
2035        break;
2036
2037    case INDEX_op_sub_i32:
2038        if (c2) {
2039            tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, -a2);
2040        } else {
2041            tcg_out_opc_reg(s, OPC_SUBW, a0, a1, a2);
2042        }
2043        break;
2044    case INDEX_op_sub_i64:
2045        if (c2) {
2046            tcg_out_opc_imm(s, OPC_ADDI, a0, a1, -a2);
2047        } else {
2048            tcg_out_opc_reg(s, OPC_SUB, a0, a1, a2);
2049        }
2050        break;
2051
2052    case INDEX_op_and_i32:
2053    case INDEX_op_and_i64:
2054        if (c2) {
2055            tcg_out_opc_imm(s, OPC_ANDI, a0, a1, a2);
2056        } else {
2057            tcg_out_opc_reg(s, OPC_AND, a0, a1, a2);
2058        }
2059        break;
2060
2061    case INDEX_op_or_i32:
2062    case INDEX_op_or_i64:
2063        if (c2) {
2064            tcg_out_opc_imm(s, OPC_ORI, a0, a1, a2);
2065        } else {
2066            tcg_out_opc_reg(s, OPC_OR, a0, a1, a2);
2067        }
2068        break;
2069
2070    case INDEX_op_xor_i32:
2071    case INDEX_op_xor_i64:
2072        if (c2) {
2073            tcg_out_opc_imm(s, OPC_XORI, a0, a1, a2);
2074        } else {
2075            tcg_out_opc_reg(s, OPC_XOR, a0, a1, a2);
2076        }
2077        break;
2078
2079    case INDEX_op_andc_i32:
2080    case INDEX_op_andc_i64:
2081        if (c2) {
2082            tcg_out_opc_imm(s, OPC_ANDI, a0, a1, ~a2);
2083        } else {
2084            tcg_out_opc_reg(s, OPC_ANDN, a0, a1, a2);
2085        }
2086        break;
2087    case INDEX_op_orc_i32:
2088    case INDEX_op_orc_i64:
2089        if (c2) {
2090            tcg_out_opc_imm(s, OPC_ORI, a0, a1, ~a2);
2091        } else {
2092            tcg_out_opc_reg(s, OPC_ORN, a0, a1, a2);
2093        }
2094        break;
2095    case INDEX_op_eqv_i32:
2096    case INDEX_op_eqv_i64:
2097        if (c2) {
2098            tcg_out_opc_imm(s, OPC_XORI, a0, a1, ~a2);
2099        } else {
2100            tcg_out_opc_reg(s, OPC_XNOR, a0, a1, a2);
2101        }
2102        break;
2103
2104    case INDEX_op_not_i32:
2105    case INDEX_op_not_i64:
2106        tcg_out_opc_imm(s, OPC_XORI, a0, a1, -1);
2107        break;
2108
2109    case INDEX_op_neg_i32:
2110        tcg_out_opc_reg(s, OPC_SUBW, a0, TCG_REG_ZERO, a1);
2111        break;
2112    case INDEX_op_neg_i64:
2113        tcg_out_opc_reg(s, OPC_SUB, a0, TCG_REG_ZERO, a1);
2114        break;
2115
2116    case INDEX_op_mul_i32:
2117        tcg_out_opc_reg(s, OPC_MULW, a0, a1, a2);
2118        break;
2119    case INDEX_op_mul_i64:
2120        tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2);
2121        break;
2122
2123    case INDEX_op_div_i32:
2124        tcg_out_opc_reg(s, OPC_DIVW, a0, a1, a2);
2125        break;
2126    case INDEX_op_div_i64:
2127        tcg_out_opc_reg(s, OPC_DIV, a0, a1, a2);
2128        break;
2129
2130    case INDEX_op_divu_i32:
2131        tcg_out_opc_reg(s, OPC_DIVUW, a0, a1, a2);
2132        break;
2133    case INDEX_op_divu_i64:
2134        tcg_out_opc_reg(s, OPC_DIVU, a0, a1, a2);
2135        break;
2136
2137    case INDEX_op_rem_i32:
2138        tcg_out_opc_reg(s, OPC_REMW, a0, a1, a2);
2139        break;
2140    case INDEX_op_rem_i64:
2141        tcg_out_opc_reg(s, OPC_REM, a0, a1, a2);
2142        break;
2143
2144    case INDEX_op_remu_i32:
2145        tcg_out_opc_reg(s, OPC_REMUW, a0, a1, a2);
2146        break;
2147    case INDEX_op_remu_i64:
2148        tcg_out_opc_reg(s, OPC_REMU, a0, a1, a2);
2149        break;
2150
2151    case INDEX_op_shl_i32:
2152        if (c2) {
2153            tcg_out_opc_imm(s, OPC_SLLIW, a0, a1, a2 & 0x1f);
2154        } else {
2155            tcg_out_opc_reg(s, OPC_SLLW, a0, a1, a2);
2156        }
2157        break;
2158    case INDEX_op_shl_i64:
2159        if (c2) {
2160            tcg_out_opc_imm(s, OPC_SLLI, a0, a1, a2 & 0x3f);
2161        } else {
2162            tcg_out_opc_reg(s, OPC_SLL, a0, a1, a2);
2163        }
2164        break;
2165
2166    case INDEX_op_shr_i32:
2167        if (c2) {
2168            tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2 & 0x1f);
2169        } else {
2170            tcg_out_opc_reg(s, OPC_SRLW, a0, a1, a2);
2171        }
2172        break;
2173    case INDEX_op_shr_i64:
2174        if (c2) {
2175            tcg_out_opc_imm(s, OPC_SRLI, a0, a1, a2 & 0x3f);
2176        } else {
2177            tcg_out_opc_reg(s, OPC_SRL, a0, a1, a2);
2178        }
2179        break;
2180
2181    case INDEX_op_sar_i32:
2182        if (c2) {
2183            tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2 & 0x1f);
2184        } else {
2185            tcg_out_opc_reg(s, OPC_SRAW, a0, a1, a2);
2186        }
2187        break;
2188    case INDEX_op_sar_i64:
2189        if (c2) {
2190            tcg_out_opc_imm(s, OPC_SRAI, a0, a1, a2 & 0x3f);
2191        } else {
2192            tcg_out_opc_reg(s, OPC_SRA, a0, a1, a2);
2193        }
2194        break;
2195
2196    case INDEX_op_rotl_i32:
2197        if (c2) {
2198            tcg_out_opc_imm(s, OPC_RORIW, a0, a1, -a2 & 0x1f);
2199        } else {
2200            tcg_out_opc_reg(s, OPC_ROLW, a0, a1, a2);
2201        }
2202        break;
2203    case INDEX_op_rotl_i64:
2204        if (c2) {
2205            tcg_out_opc_imm(s, OPC_RORI, a0, a1, -a2 & 0x3f);
2206        } else {
2207            tcg_out_opc_reg(s, OPC_ROL, a0, a1, a2);
2208        }
2209        break;
2210
2211    case INDEX_op_rotr_i32:
2212        if (c2) {
2213            tcg_out_opc_imm(s, OPC_RORIW, a0, a1, a2 & 0x1f);
2214        } else {
2215            tcg_out_opc_reg(s, OPC_RORW, a0, a1, a2);
2216        }
2217        break;
2218    case INDEX_op_rotr_i64:
2219        if (c2) {
2220            tcg_out_opc_imm(s, OPC_RORI, a0, a1, a2 & 0x3f);
2221        } else {
2222            tcg_out_opc_reg(s, OPC_ROR, a0, a1, a2);
2223        }
2224        break;
2225
2226    case INDEX_op_bswap64_i64:
2227        tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0);
2228        break;
2229    case INDEX_op_bswap32_i32:
2230        a2 = 0;
2231        /* fall through */
2232    case INDEX_op_bswap32_i64:
2233        tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0);
2234        if (a2 & TCG_BSWAP_OZ) {
2235            tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 32);
2236        } else {
2237            tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 32);
2238        }
2239        break;
2240    case INDEX_op_bswap16_i64:
2241    case INDEX_op_bswap16_i32:
2242        tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0);
2243        if (a2 & TCG_BSWAP_OZ) {
2244            tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 48);
2245        } else {
2246            tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 48);
2247        }
2248        break;
2249
2250    case INDEX_op_ctpop_i32:
2251        tcg_out_opc_imm(s, OPC_CPOPW, a0, a1, 0);
2252        break;
2253    case INDEX_op_ctpop_i64:
2254        tcg_out_opc_imm(s, OPC_CPOP, a0, a1, 0);
2255        break;
2256
2257    case INDEX_op_clz_i32:
2258        tcg_out_cltz(s, TCG_TYPE_I32, OPC_CLZW, a0, a1, a2, c2);
2259        break;
2260    case INDEX_op_clz_i64:
2261        tcg_out_cltz(s, TCG_TYPE_I64, OPC_CLZ, a0, a1, a2, c2);
2262        break;
2263    case INDEX_op_ctz_i32:
2264        tcg_out_cltz(s, TCG_TYPE_I32, OPC_CTZW, a0, a1, a2, c2);
2265        break;
2266    case INDEX_op_ctz_i64:
2267        tcg_out_cltz(s, TCG_TYPE_I64, OPC_CTZ, a0, a1, a2, c2);
2268        break;
2269
2270    case INDEX_op_add2_i32:
2271        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
2272                        const_args[4], const_args[5], false, true);
2273        break;
2274    case INDEX_op_add2_i64:
2275        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
2276                        const_args[4], const_args[5], false, false);
2277        break;
2278    case INDEX_op_sub2_i32:
2279        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
2280                        const_args[4], const_args[5], true, true);
2281        break;
2282    case INDEX_op_sub2_i64:
2283        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
2284                        const_args[4], const_args[5], true, false);
2285        break;
2286
2287    case INDEX_op_brcond_i32:
2288    case INDEX_op_brcond_i64:
2289        tcg_out_brcond(s, a2, a0, a1, arg_label(args[3]));
2290        break;
2291
2292    case INDEX_op_setcond_i32:
2293    case INDEX_op_setcond_i64:
2294        tcg_out_setcond(s, args[3], a0, a1, a2, c2);
2295        break;
2296
2297    case INDEX_op_negsetcond_i32:
2298    case INDEX_op_negsetcond_i64:
2299        tcg_out_negsetcond(s, args[3], a0, a1, a2, c2);
2300        break;
2301
2302    case INDEX_op_movcond_i32:
2303    case INDEX_op_movcond_i64:
2304        tcg_out_movcond(s, args[5], a0, a1, a2, c2,
2305                        args[3], const_args[3], args[4], const_args[4]);
2306        break;
2307
2308    case INDEX_op_qemu_ld_i32:
2309        tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
2310        break;
2311    case INDEX_op_qemu_ld_i64:
2312        tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
2313        break;
2314    case INDEX_op_qemu_st_i32:
2315        tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
2316        break;
2317    case INDEX_op_qemu_st_i64:
2318        tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
2319        break;
2320
2321    case INDEX_op_extrh_i64_i32:
2322        tcg_out_opc_imm(s, OPC_SRAI, a0, a1, 32);
2323        break;
2324
2325    case INDEX_op_mulsh_i32:
2326    case INDEX_op_mulsh_i64:
2327        tcg_out_opc_reg(s, OPC_MULH, a0, a1, a2);
2328        break;
2329
2330    case INDEX_op_muluh_i32:
2331    case INDEX_op_muluh_i64:
2332        tcg_out_opc_reg(s, OPC_MULHU, a0, a1, a2);
2333        break;
2334
2335    case INDEX_op_mb:
2336        tcg_out_mb(s, a0);
2337        break;
2338
2339    case INDEX_op_extract_i64:
2340        if (a2 + args[3] == 32) {
2341            if (a2 == 0) {
2342                tcg_out_ext32u(s, a0, a1);
2343            } else {
2344                tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2);
2345            }
2346            break;
2347        }
2348        /* FALLTHRU */
2349    case INDEX_op_extract_i32:
2350        switch (args[3]) {
2351        case 1:
2352            tcg_out_opc_imm(s, OPC_BEXTI, a0, a1, a2);
2353            break;
2354        case 16:
2355            tcg_debug_assert(a2 == 0);
2356            tcg_out_ext16u(s, a0, a1);
2357            break;
2358        default:
2359            g_assert_not_reached();
2360        }
2361        break;
2362
2363    case INDEX_op_sextract_i64:
2364        if (a2 + args[3] == 32) {
2365            if (a2 == 0) {
2366                tcg_out_ext32s(s, a0, a1);
2367            } else {
2368                tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2);
2369            }
2370            break;
2371        }
2372        /* FALLTHRU */
2373    case INDEX_op_sextract_i32:
2374        if (a2 == 0 && args[3] == 8) {
2375            tcg_out_ext8s(s, TCG_TYPE_REG, a0, a1);
2376        } else if (a2 == 0 && args[3] == 16) {
2377            tcg_out_ext16s(s, TCG_TYPE_REG, a0, a1);
2378        } else {
2379            g_assert_not_reached();
2380        }
2381        break;
2382
2383    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2384    case INDEX_op_mov_i64:
2385    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2386    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2387    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2388    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2389    case INDEX_op_ext8s_i64:
2390    case INDEX_op_ext8u_i32:
2391    case INDEX_op_ext8u_i64:
2392    case INDEX_op_ext16s_i32:
2393    case INDEX_op_ext16s_i64:
2394    case INDEX_op_ext16u_i32:
2395    case INDEX_op_ext16u_i64:
2396    case INDEX_op_ext32s_i64:
2397    case INDEX_op_ext32u_i64:
2398    case INDEX_op_ext_i32_i64:
2399    case INDEX_op_extu_i32_i64:
2400    case INDEX_op_extrl_i64_i32:
2401    default:
2402        g_assert_not_reached();
2403    }
2404}
2405
2406static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2407                           unsigned vecl, unsigned vece,
2408                           const TCGArg args[TCG_MAX_OP_ARGS],
2409                           const int const_args[TCG_MAX_OP_ARGS])
2410{
2411    TCGType type = vecl + TCG_TYPE_V64;
2412    TCGArg a0, a1, a2;
2413    int c2;
2414
2415    a0 = args[0];
2416    a1 = args[1];
2417    a2 = args[2];
2418    c2 = const_args[2];
2419
2420    switch (opc) {
2421    case INDEX_op_dupm_vec:
2422        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2423        break;
2424    case INDEX_op_ld_vec:
2425        tcg_out_ld(s, type, a0, a1, a2);
2426        break;
2427    case INDEX_op_st_vec:
2428        tcg_out_st(s, type, a0, a1, a2);
2429        break;
2430    case INDEX_op_add_vec:
2431        set_vtype_len_sew(s, type, vece);
2432        tcg_out_opc_vv_vi(s, OPC_VADD_VV, OPC_VADD_VI, a0, a1, a2, c2);
2433        break;
2434    case INDEX_op_sub_vec:
2435        set_vtype_len_sew(s, type, vece);
2436        if (const_args[1]) {
2437            tcg_out_opc_vi(s, OPC_VRSUB_VI, a0, a2, a1);
2438        } else {
2439            tcg_out_opc_vv(s, OPC_VSUB_VV, a0, a1, a2);
2440        }
2441        break;
2442    case INDEX_op_and_vec:
2443        set_vtype_len(s, type);
2444        tcg_out_opc_vv_vi(s, OPC_VAND_VV, OPC_VAND_VI, a0, a1, a2, c2);
2445        break;
2446    case INDEX_op_or_vec:
2447        set_vtype_len(s, type);
2448        tcg_out_opc_vv_vi(s, OPC_VOR_VV, OPC_VOR_VI, a0, a1, a2, c2);
2449        break;
2450    case INDEX_op_xor_vec:
2451        set_vtype_len(s, type);
2452        tcg_out_opc_vv_vi(s, OPC_VXOR_VV, OPC_VXOR_VI, a0, a1, a2, c2);
2453        break;
2454    case INDEX_op_not_vec:
2455        set_vtype_len(s, type);
2456        tcg_out_opc_vi(s, OPC_VXOR_VI, a0, a1, -1);
2457        break;
2458    case INDEX_op_neg_vec:
2459        set_vtype_len_sew(s, type, vece);
2460        tcg_out_opc_vi(s, OPC_VRSUB_VI, a0, a1, 0);
2461        break;
2462    case INDEX_op_mul_vec:
2463        set_vtype_len_sew(s, type, vece);
2464        tcg_out_opc_vv(s, OPC_VMUL_VV, a0, a1, a2);
2465        break;
2466    case INDEX_op_ssadd_vec:
2467        set_vtype_len_sew(s, type, vece);
2468        tcg_out_opc_vv_vi(s, OPC_VSADD_VV, OPC_VSADD_VI, a0, a1, a2, c2);
2469        break;
2470    case INDEX_op_sssub_vec:
2471        set_vtype_len_sew(s, type, vece);
2472        tcg_out_opc_vv_vi(s, OPC_VSSUB_VV, OPC_VSSUB_VI, a0, a1, a2, c2);
2473        break;
2474    case INDEX_op_usadd_vec:
2475        set_vtype_len_sew(s, type, vece);
2476        tcg_out_opc_vv_vi(s, OPC_VSADDU_VV, OPC_VSADDU_VI, a0, a1, a2, c2);
2477        break;
2478    case INDEX_op_ussub_vec:
2479        set_vtype_len_sew(s, type, vece);
2480        tcg_out_opc_vv_vi(s, OPC_VSSUBU_VV, OPC_VSSUBU_VI, a0, a1, a2, c2);
2481        break;
2482    case INDEX_op_smax_vec:
2483        set_vtype_len_sew(s, type, vece);
2484        tcg_out_opc_vv_vi(s, OPC_VMAX_VV, OPC_VMAX_VI, a0, a1, a2, c2);
2485        break;
2486    case INDEX_op_smin_vec:
2487        set_vtype_len_sew(s, type, vece);
2488        tcg_out_opc_vv_vi(s, OPC_VMIN_VV, OPC_VMIN_VI, a0, a1, a2, c2);
2489        break;
2490    case INDEX_op_umax_vec:
2491        set_vtype_len_sew(s, type, vece);
2492        tcg_out_opc_vv_vi(s, OPC_VMAXU_VV, OPC_VMAXU_VI, a0, a1, a2, c2);
2493        break;
2494    case INDEX_op_umin_vec:
2495        set_vtype_len_sew(s, type, vece);
2496        tcg_out_opc_vv_vi(s, OPC_VMINU_VV, OPC_VMINU_VI, a0, a1, a2, c2);
2497        break;
2498    case INDEX_op_shls_vec:
2499        set_vtype_len_sew(s, type, vece);
2500        tcg_out_opc_vx(s, OPC_VSLL_VX, a0, a1, a2);
2501        break;
2502    case INDEX_op_shrs_vec:
2503        set_vtype_len_sew(s, type, vece);
2504        tcg_out_opc_vx(s, OPC_VSRL_VX, a0, a1, a2);
2505        break;
2506    case INDEX_op_sars_vec:
2507        set_vtype_len_sew(s, type, vece);
2508        tcg_out_opc_vx(s, OPC_VSRA_VX, a0, a1, a2);
2509        break;
2510    case INDEX_op_shlv_vec:
2511        set_vtype_len_sew(s, type, vece);
2512        tcg_out_opc_vv(s, OPC_VSLL_VV, a0, a1, a2);
2513        break;
2514    case INDEX_op_shrv_vec:
2515        set_vtype_len_sew(s, type, vece);
2516        tcg_out_opc_vv(s, OPC_VSRL_VV, a0, a1, a2);
2517        break;
2518    case INDEX_op_sarv_vec:
2519        set_vtype_len_sew(s, type, vece);
2520        tcg_out_opc_vv(s, OPC_VSRA_VV, a0, a1, a2);
2521        break;
2522    case INDEX_op_shli_vec:
2523        set_vtype_len_sew(s, type, vece);
2524        tcg_out_vshifti(s, OPC_VSLL_VI, OPC_VSLL_VX, a0, a1, a2);
2525        break;
2526    case INDEX_op_shri_vec:
2527        set_vtype_len_sew(s, type, vece);
2528        tcg_out_vshifti(s, OPC_VSRL_VI, OPC_VSRL_VX, a0, a1, a2);
2529        break;
2530    case INDEX_op_sari_vec:
2531        set_vtype_len_sew(s, type, vece);
2532        tcg_out_vshifti(s, OPC_VSRA_VI, OPC_VSRA_VX, a0, a1, a2);
2533        break;
2534    case INDEX_op_rotli_vec:
2535        set_vtype_len_sew(s, type, vece);
2536        tcg_out_vshifti(s, OPC_VSLL_VI, OPC_VSLL_VX, TCG_REG_V0, a1, a2);
2537        tcg_out_vshifti(s, OPC_VSRL_VI, OPC_VSRL_VX, a0, a1,
2538                        -a2 & ((8 << vece) - 1));
2539        tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
2540        break;
2541    case INDEX_op_rotls_vec:
2542        set_vtype_len_sew(s, type, vece);
2543        tcg_out_opc_vx(s, OPC_VSLL_VX, TCG_REG_V0, a1, a2);
2544        tcg_out_opc_reg(s, OPC_SUBW, TCG_REG_TMP0, TCG_REG_ZERO, a2);
2545        tcg_out_opc_vx(s, OPC_VSRL_VX, a0, a1, TCG_REG_TMP0);
2546        tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
2547        break;
2548    case INDEX_op_rotlv_vec:
2549        set_vtype_len_sew(s, type, vece);
2550        tcg_out_opc_vi(s, OPC_VRSUB_VI, TCG_REG_V0, a2, 0);
2551        tcg_out_opc_vv(s, OPC_VSRL_VV, TCG_REG_V0, a1, TCG_REG_V0);
2552        tcg_out_opc_vv(s, OPC_VSLL_VV, a0, a1, a2);
2553        tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
2554        break;
2555    case INDEX_op_rotrv_vec:
2556        set_vtype_len_sew(s, type, vece);
2557        tcg_out_opc_vi(s, OPC_VRSUB_VI, TCG_REG_V0, a2, 0);
2558        tcg_out_opc_vv(s, OPC_VSLL_VV, TCG_REG_V0, a1, TCG_REG_V0);
2559        tcg_out_opc_vv(s, OPC_VSRL_VV, a0, a1, a2);
2560        tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
2561        break;
2562    case INDEX_op_cmp_vec:
2563        tcg_out_cmpsel(s, type, vece, args[3], a0, a1, a2, c2,
2564                       -1, true, 0, true);
2565        break;
2566    case INDEX_op_cmpsel_vec:
2567        tcg_out_cmpsel(s, type, vece, args[5], a0, a1, a2, c2,
2568                       args[3], const_args[3], args[4], const_args[4]);
2569        break;
2570    case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov.  */
2571    case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec.  */
2572    default:
2573        g_assert_not_reached();
2574    }
2575}
2576
2577void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2578                       TCGArg a0, ...)
2579{
2580    g_assert_not_reached();
2581}
2582
2583int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2584{
2585    switch (opc) {
2586    case INDEX_op_add_vec:
2587    case INDEX_op_sub_vec:
2588    case INDEX_op_and_vec:
2589    case INDEX_op_or_vec:
2590    case INDEX_op_xor_vec:
2591    case INDEX_op_not_vec:
2592    case INDEX_op_neg_vec:
2593    case INDEX_op_mul_vec:
2594    case INDEX_op_ssadd_vec:
2595    case INDEX_op_sssub_vec:
2596    case INDEX_op_usadd_vec:
2597    case INDEX_op_ussub_vec:
2598    case INDEX_op_smax_vec:
2599    case INDEX_op_smin_vec:
2600    case INDEX_op_umax_vec:
2601    case INDEX_op_umin_vec:
2602    case INDEX_op_shls_vec:
2603    case INDEX_op_shrs_vec:
2604    case INDEX_op_sars_vec:
2605    case INDEX_op_shlv_vec:
2606    case INDEX_op_shrv_vec:
2607    case INDEX_op_sarv_vec:
2608    case INDEX_op_shri_vec:
2609    case INDEX_op_shli_vec:
2610    case INDEX_op_sari_vec:
2611    case INDEX_op_rotls_vec:
2612    case INDEX_op_rotlv_vec:
2613    case INDEX_op_rotrv_vec:
2614    case INDEX_op_rotli_vec:
2615    case INDEX_op_cmp_vec:
2616    case INDEX_op_cmpsel_vec:
2617        return 1;
2618    default:
2619        return 0;
2620    }
2621}
2622
2623static TCGConstraintSetIndex
2624tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
2625{
2626    switch (op) {
2627    case INDEX_op_goto_ptr:
2628        return C_O0_I1(r);
2629
2630    case INDEX_op_ld8u_i32:
2631    case INDEX_op_ld8s_i32:
2632    case INDEX_op_ld16u_i32:
2633    case INDEX_op_ld16s_i32:
2634    case INDEX_op_ld_i32:
2635    case INDEX_op_not_i32:
2636    case INDEX_op_neg_i32:
2637    case INDEX_op_ld8u_i64:
2638    case INDEX_op_ld8s_i64:
2639    case INDEX_op_ld16u_i64:
2640    case INDEX_op_ld16s_i64:
2641    case INDEX_op_ld32s_i64:
2642    case INDEX_op_ld32u_i64:
2643    case INDEX_op_ld_i64:
2644    case INDEX_op_not_i64:
2645    case INDEX_op_neg_i64:
2646    case INDEX_op_ext8u_i32:
2647    case INDEX_op_ext8u_i64:
2648    case INDEX_op_ext16u_i32:
2649    case INDEX_op_ext16u_i64:
2650    case INDEX_op_ext32u_i64:
2651    case INDEX_op_extu_i32_i64:
2652    case INDEX_op_ext8s_i32:
2653    case INDEX_op_ext8s_i64:
2654    case INDEX_op_ext16s_i32:
2655    case INDEX_op_ext16s_i64:
2656    case INDEX_op_ext32s_i64:
2657    case INDEX_op_extrl_i64_i32:
2658    case INDEX_op_extrh_i64_i32:
2659    case INDEX_op_ext_i32_i64:
2660    case INDEX_op_extract_i32:
2661    case INDEX_op_extract_i64:
2662    case INDEX_op_sextract_i32:
2663    case INDEX_op_sextract_i64:
2664    case INDEX_op_bswap16_i32:
2665    case INDEX_op_bswap32_i32:
2666    case INDEX_op_bswap16_i64:
2667    case INDEX_op_bswap32_i64:
2668    case INDEX_op_bswap64_i64:
2669    case INDEX_op_ctpop_i32:
2670    case INDEX_op_ctpop_i64:
2671        return C_O1_I1(r, r);
2672
2673    case INDEX_op_st8_i32:
2674    case INDEX_op_st16_i32:
2675    case INDEX_op_st_i32:
2676    case INDEX_op_st8_i64:
2677    case INDEX_op_st16_i64:
2678    case INDEX_op_st32_i64:
2679    case INDEX_op_st_i64:
2680        return C_O0_I2(rz, r);
2681
2682    case INDEX_op_add_i32:
2683    case INDEX_op_and_i32:
2684    case INDEX_op_or_i32:
2685    case INDEX_op_xor_i32:
2686    case INDEX_op_add_i64:
2687    case INDEX_op_and_i64:
2688    case INDEX_op_or_i64:
2689    case INDEX_op_xor_i64:
2690    case INDEX_op_setcond_i32:
2691    case INDEX_op_setcond_i64:
2692    case INDEX_op_negsetcond_i32:
2693    case INDEX_op_negsetcond_i64:
2694        return C_O1_I2(r, r, rI);
2695
2696    case INDEX_op_andc_i32:
2697    case INDEX_op_andc_i64:
2698    case INDEX_op_orc_i32:
2699    case INDEX_op_orc_i64:
2700    case INDEX_op_eqv_i32:
2701    case INDEX_op_eqv_i64:
2702        return C_O1_I2(r, r, rJ);
2703
2704    case INDEX_op_sub_i32:
2705    case INDEX_op_sub_i64:
2706        return C_O1_I2(r, rz, rN);
2707
2708    case INDEX_op_mul_i32:
2709    case INDEX_op_mulsh_i32:
2710    case INDEX_op_muluh_i32:
2711    case INDEX_op_div_i32:
2712    case INDEX_op_divu_i32:
2713    case INDEX_op_rem_i32:
2714    case INDEX_op_remu_i32:
2715    case INDEX_op_mul_i64:
2716    case INDEX_op_mulsh_i64:
2717    case INDEX_op_muluh_i64:
2718    case INDEX_op_div_i64:
2719    case INDEX_op_divu_i64:
2720    case INDEX_op_rem_i64:
2721    case INDEX_op_remu_i64:
2722        return C_O1_I2(r, rz, rz);
2723
2724    case INDEX_op_shl_i32:
2725    case INDEX_op_shr_i32:
2726    case INDEX_op_sar_i32:
2727    case INDEX_op_rotl_i32:
2728    case INDEX_op_rotr_i32:
2729    case INDEX_op_shl_i64:
2730    case INDEX_op_shr_i64:
2731    case INDEX_op_sar_i64:
2732    case INDEX_op_rotl_i64:
2733    case INDEX_op_rotr_i64:
2734        return C_O1_I2(r, r, ri);
2735
2736    case INDEX_op_clz_i32:
2737    case INDEX_op_clz_i64:
2738    case INDEX_op_ctz_i32:
2739    case INDEX_op_ctz_i64:
2740        return C_N1_I2(r, r, rM);
2741
2742    case INDEX_op_brcond_i32:
2743    case INDEX_op_brcond_i64:
2744        return C_O0_I2(rz, rz);
2745
2746    case INDEX_op_movcond_i32:
2747    case INDEX_op_movcond_i64:
2748        return C_O1_I4(r, r, rI, rM, rM);
2749
2750    case INDEX_op_add2_i32:
2751    case INDEX_op_add2_i64:
2752    case INDEX_op_sub2_i32:
2753    case INDEX_op_sub2_i64:
2754        return C_O2_I4(r, r, rz, rz, rM, rM);
2755
2756    case INDEX_op_qemu_ld_i32:
2757    case INDEX_op_qemu_ld_i64:
2758        return C_O1_I1(r, r);
2759    case INDEX_op_qemu_st_i32:
2760    case INDEX_op_qemu_st_i64:
2761        return C_O0_I2(rz, r);
2762
2763    case INDEX_op_st_vec:
2764        return C_O0_I2(v, r);
2765    case INDEX_op_dup_vec:
2766    case INDEX_op_dupm_vec:
2767    case INDEX_op_ld_vec:
2768        return C_O1_I1(v, r);
2769    case INDEX_op_neg_vec:
2770    case INDEX_op_not_vec:
2771    case INDEX_op_shli_vec:
2772    case INDEX_op_shri_vec:
2773    case INDEX_op_sari_vec:
2774    case INDEX_op_rotli_vec:
2775        return C_O1_I1(v, v);
2776    case INDEX_op_add_vec:
2777    case INDEX_op_and_vec:
2778    case INDEX_op_or_vec:
2779    case INDEX_op_xor_vec:
2780    case INDEX_op_ssadd_vec:
2781    case INDEX_op_sssub_vec:
2782    case INDEX_op_usadd_vec:
2783    case INDEX_op_ussub_vec:
2784    case INDEX_op_smax_vec:
2785    case INDEX_op_smin_vec:
2786    case INDEX_op_umax_vec:
2787    case INDEX_op_umin_vec:
2788        return C_O1_I2(v, v, vK);
2789    case INDEX_op_sub_vec:
2790        return C_O1_I2(v, vK, v);
2791    case INDEX_op_mul_vec:
2792    case INDEX_op_shlv_vec:
2793    case INDEX_op_shrv_vec:
2794    case INDEX_op_sarv_vec:
2795    case INDEX_op_rotlv_vec:
2796    case INDEX_op_rotrv_vec:
2797        return C_O1_I2(v, v, v);
2798    case INDEX_op_shls_vec:
2799    case INDEX_op_shrs_vec:
2800    case INDEX_op_sars_vec:
2801    case INDEX_op_rotls_vec:
2802        return C_O1_I2(v, v, r);
2803    case INDEX_op_cmp_vec:
2804        return C_O1_I2(v, v, vL);
2805    case INDEX_op_cmpsel_vec:
2806        return C_O1_I4(v, v, vL, vK, vK);
2807    default:
2808        return C_NotImplemented;
2809    }
2810}
2811
2812static const int tcg_target_callee_save_regs[] = {
2813    TCG_REG_S0,       /* used for the global env (TCG_AREG0) */
2814    TCG_REG_S1,
2815    TCG_REG_S2,
2816    TCG_REG_S3,
2817    TCG_REG_S4,
2818    TCG_REG_S5,
2819    TCG_REG_S6,
2820    TCG_REG_S7,
2821    TCG_REG_S8,
2822    TCG_REG_S9,
2823    TCG_REG_S10,
2824    TCG_REG_S11,
2825    TCG_REG_RA,       /* should be last for ABI compliance */
2826};
2827
2828/* Stack frame parameters.  */
2829#define REG_SIZE   (TCG_TARGET_REG_BITS / 8)
2830#define SAVE_SIZE  ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE)
2831#define TEMP_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2832#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \
2833                     + TCG_TARGET_STACK_ALIGN - 1) \
2834                    & -TCG_TARGET_STACK_ALIGN)
2835#define SAVE_OFS   (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE)
2836
2837/* We're expecting to be able to use an immediate for frame allocation.  */
2838QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7ff);
2839
2840/* Generate global QEMU prologue and epilogue code */
2841static void tcg_target_qemu_prologue(TCGContext *s)
2842{
2843    int i;
2844
2845    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE);
2846
2847    /* TB prologue */
2848    tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE);
2849    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2850        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2851                   TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
2852    }
2853
2854    if (!tcg_use_softmmu && guest_base) {
2855        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
2856        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2857    }
2858
2859    /* Call generated code */
2860    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2861    tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, tcg_target_call_iarg_regs[1], 0);
2862
2863    /* Return path for goto_ptr. Set return value to 0 */
2864    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2865    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_A0, TCG_REG_ZERO);
2866
2867    /* TB epilogue */
2868    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
2869    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2870        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2871                   TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
2872    }
2873
2874    tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE);
2875    tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_RA, 0);
2876}
2877
2878static void tcg_out_tb_start(TCGContext *s)
2879{
2880    init_setting_vtype(s);
2881}
2882
2883static bool vtype_check(unsigned vtype)
2884{
2885    unsigned long tmp;
2886
2887    /* vsetvl tmp, zero, vtype */
2888    asm(".insn r 0x57, 7, 0x40, %0, zero, %1" : "=r"(tmp) : "r"(vtype));
2889    return tmp != 0;
2890}
2891
2892static void probe_frac_lmul_1(TCGType type, MemOp vsew)
2893{
2894    VsetCache *p = &riscv_vset_cache[type - TCG_TYPE_V64][vsew];
2895    unsigned avl = tcg_type_size(type) >> vsew;
2896    int lmul = type - riscv_lg2_vlenb;
2897    unsigned vtype = encode_vtype(true, true, vsew, lmul & 7);
2898    bool lmul_eq_avl = true;
2899
2900    /* Guaranteed by Zve64x. */
2901    assert(lmul < 3);
2902
2903    /*
2904     * For LMUL < -3, the host vector size is so large that TYPE
2905     * is smaller than the minimum 1/8 fraction.
2906     *
2907     * For other fractional LMUL settings, implementations must
2908     * support SEW settings between SEW_MIN and LMUL * ELEN, inclusive.
2909     * So if ELEN = 64, LMUL = 1/2, then SEW will support e8, e16, e32,
2910     * but e64 may not be supported. In other words, the hardware only
2911     * guarantees SEW_MIN <= SEW <= LMUL * ELEN.  Check.
2912     */
2913    if (lmul < 0 && (lmul < -3 || !vtype_check(vtype))) {
2914        vtype = encode_vtype(true, true, vsew, VLMUL_M1);
2915        lmul_eq_avl = false;
2916    }
2917
2918    if (avl < 32) {
2919        p->vset_insn = encode_vseti(OPC_VSETIVLI, TCG_REG_ZERO, avl, vtype);
2920    } else if (lmul_eq_avl) {
2921        /* rd != 0 and rs1 == 0 uses vlmax */
2922        p->vset_insn = encode_vset(OPC_VSETVLI, TCG_REG_TMP0, TCG_REG_ZERO, vtype);
2923    } else {
2924        p->movi_insn = encode_i(OPC_ADDI, TCG_REG_TMP0, TCG_REG_ZERO, avl);
2925        p->vset_insn = encode_vset(OPC_VSETVLI, TCG_REG_ZERO, TCG_REG_TMP0, vtype);
2926    }
2927}
2928
2929static void probe_frac_lmul(void)
2930{
2931    /* Match riscv_lg2_vlenb to TCG_TYPE_V64. */
2932    QEMU_BUILD_BUG_ON(TCG_TYPE_V64 != 3);
2933
2934    for (TCGType t = TCG_TYPE_V64; t <= TCG_TYPE_V256; t++) {
2935        for (MemOp e = MO_8; e <= MO_64; e++) {
2936            probe_frac_lmul_1(t, e);
2937        }
2938    }
2939}
2940
2941static void tcg_target_init(TCGContext *s)
2942{
2943    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
2944    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
2945
2946    tcg_target_call_clobber_regs = -1;
2947    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0);
2948    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1);
2949    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2);
2950    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S3);
2951    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S4);
2952    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S5);
2953    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S6);
2954    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S7);
2955    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8);
2956    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9);
2957    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S10);
2958    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S11);
2959
2960    s->reserved_regs = 0;
2961    tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO);
2962    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
2963    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
2964    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
2965    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2966    tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP);
2967    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP);
2968
2969    if (cpuinfo & CPUINFO_ZVE64X) {
2970        switch (riscv_lg2_vlenb) {
2971        case TCG_TYPE_V64:
2972            tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
2973            tcg_target_available_regs[TCG_TYPE_V128] = ALL_DVECTOR_REG_GROUPS;
2974            tcg_target_available_regs[TCG_TYPE_V256] = ALL_QVECTOR_REG_GROUPS;
2975            s->reserved_regs |= (~ALL_QVECTOR_REG_GROUPS & ALL_VECTOR_REGS);
2976            break;
2977        case TCG_TYPE_V128:
2978            tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
2979            tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
2980            tcg_target_available_regs[TCG_TYPE_V256] = ALL_DVECTOR_REG_GROUPS;
2981            s->reserved_regs |= (~ALL_DVECTOR_REG_GROUPS & ALL_VECTOR_REGS);
2982            break;
2983        default:
2984            /* Guaranteed by Zve64x. */
2985            tcg_debug_assert(riscv_lg2_vlenb >= TCG_TYPE_V256);
2986            tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
2987            tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
2988            tcg_target_available_regs[TCG_TYPE_V256] = ALL_VECTOR_REGS;
2989            break;
2990        }
2991        tcg_regset_set_reg(s->reserved_regs, TCG_REG_V0);
2992        probe_frac_lmul();
2993    }
2994}
2995
2996typedef struct {
2997    DebugFrameHeader h;
2998    uint8_t fde_def_cfa[4];
2999    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2];
3000} DebugFrame;
3001
3002#define ELF_HOST_MACHINE EM_RISCV
3003
3004static const DebugFrame debug_frame = {
3005    .h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */
3006    .h.cie.id = -1,
3007    .h.cie.version = 1,
3008    .h.cie.code_align = 1,
3009    .h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */
3010    .h.cie.return_column = TCG_REG_RA,
3011
3012    /* Total FDE size does not include the "len" member.  */
3013    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3014
3015    .fde_def_cfa = {
3016        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3017        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3018        (FRAME_SIZE >> 7)
3019    },
3020    .fde_reg_ofs = {
3021        0x80 + 9,  12,                  /* DW_CFA_offset, s1,  -96 */
3022        0x80 + 18, 11,                  /* DW_CFA_offset, s2,  -88 */
3023        0x80 + 19, 10,                  /* DW_CFA_offset, s3,  -80 */
3024        0x80 + 20, 9,                   /* DW_CFA_offset, s4,  -72 */
3025        0x80 + 21, 8,                   /* DW_CFA_offset, s5,  -64 */
3026        0x80 + 22, 7,                   /* DW_CFA_offset, s6,  -56 */
3027        0x80 + 23, 6,                   /* DW_CFA_offset, s7,  -48 */
3028        0x80 + 24, 5,                   /* DW_CFA_offset, s8,  -40 */
3029        0x80 + 25, 4,                   /* DW_CFA_offset, s9,  -32 */
3030        0x80 + 26, 3,                   /* DW_CFA_offset, s10, -24 */
3031        0x80 + 27, 2,                   /* DW_CFA_offset, s11, -16 */
3032        0x80 + 1 , 1,                   /* DW_CFA_offset, ra,  -8 */
3033    }
3034};
3035
3036void tcg_register_jit(const void *buf, size_t buf_size)
3037{
3038    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3039}
3040