xref: /openbmc/qemu/tcg/riscv/tcg-target.c.inc (revision d31f1185fb029b44c439a6961a6cb087df6567d9)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2018 SiFive, Inc
5 * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org>
6 * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net>
7 * Copyright (c) 2008 Fabrice Bellard
8 *
9 * Based on i386/tcg-target.c and mips/tcg-target.c
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a copy
12 * of this software and associated documentation files (the "Software"), to deal
13 * in the Software without restriction, including without limitation the rights
14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 * copies of the Software, and to permit persons to whom the Software is
16 * furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice shall be included in
19 * all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27 * THE SOFTWARE.
28 */
29
30/* Used for function call generation. */
31#define TCG_REG_CALL_STACK              TCG_REG_SP
32#define TCG_TARGET_STACK_ALIGN          16
33#define TCG_TARGET_CALL_STACK_OFFSET    0
34#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
35#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
36#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_NORMAL
37#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
38
39#ifdef CONFIG_DEBUG_TCG
40static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
41    "zero", "ra",  "sp",  "gp",  "tp",  "t0",  "t1",  "t2",
42    "s0",   "s1",  "a0",  "a1",  "a2",  "a3",  "a4",  "a5",
43    "a6",   "a7",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
44    "s8",   "s9",  "s10", "s11", "t3",  "t4",  "t5",  "t6",
45    "v0",   "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
46    "v8",   "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
47    "v16",  "v17", "v18", "v19", "v20", "v21", "v22", "v23",
48    "v24",  "v25", "v26", "v27", "v28", "v29", "v30", "v31",
49};
50#endif
51
52static const int tcg_target_reg_alloc_order[] = {
53    /* Call saved registers */
54    /* TCG_REG_S0 reserved for TCG_AREG0 */
55    TCG_REG_S1,
56    TCG_REG_S2,
57    TCG_REG_S3,
58    TCG_REG_S4,
59    TCG_REG_S5,
60    TCG_REG_S6,
61    TCG_REG_S7,
62    TCG_REG_S8,
63    TCG_REG_S9,
64    TCG_REG_S10,
65    TCG_REG_S11,
66
67    /* Call clobbered registers */
68    TCG_REG_T0,
69    TCG_REG_T1,
70    TCG_REG_T2,
71    TCG_REG_T3,
72    TCG_REG_T4,
73    TCG_REG_T5,
74    TCG_REG_T6,
75
76    /* Argument registers */
77    TCG_REG_A0,
78    TCG_REG_A1,
79    TCG_REG_A2,
80    TCG_REG_A3,
81    TCG_REG_A4,
82    TCG_REG_A5,
83    TCG_REG_A6,
84    TCG_REG_A7,
85
86    /* Vector registers and TCG_REG_V0 reserved for mask. */
87    TCG_REG_V1,  TCG_REG_V2,  TCG_REG_V3,  TCG_REG_V4,
88    TCG_REG_V5,  TCG_REG_V6,  TCG_REG_V7,  TCG_REG_V8,
89    TCG_REG_V9,  TCG_REG_V10, TCG_REG_V11, TCG_REG_V12,
90    TCG_REG_V13, TCG_REG_V14, TCG_REG_V15, TCG_REG_V16,
91    TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, TCG_REG_V20,
92    TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, TCG_REG_V24,
93    TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, TCG_REG_V28,
94    TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
95};
96
97static const int tcg_target_call_iarg_regs[] = {
98    TCG_REG_A0,
99    TCG_REG_A1,
100    TCG_REG_A2,
101    TCG_REG_A3,
102    TCG_REG_A4,
103    TCG_REG_A5,
104    TCG_REG_A6,
105    TCG_REG_A7,
106};
107
108static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
109{
110    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
111    tcg_debug_assert(slot >= 0 && slot <= 1);
112    return TCG_REG_A0 + slot;
113}
114
115#define TCG_CT_CONST_ZERO    0x100
116#define TCG_CT_CONST_S12     0x200
117#define TCG_CT_CONST_N12     0x400
118#define TCG_CT_CONST_M12     0x800
119#define TCG_CT_CONST_J12    0x1000
120#define TCG_CT_CONST_S5     0x2000
121#define TCG_CT_CONST_CMP_VI 0x4000
122
123#define ALL_GENERAL_REGS   MAKE_64BIT_MASK(0, 32)
124#define ALL_VECTOR_REGS    MAKE_64BIT_MASK(32, 32)
125#define ALL_DVECTOR_REG_GROUPS 0x5555555500000000
126#define ALL_QVECTOR_REG_GROUPS 0x1111111100000000
127
128#define sextreg  sextract64
129
130/*
131 * RISC-V Base ISA opcodes (IM)
132 */
133
134#define V_OPIVV (0x0 << 12)
135#define V_OPFVV (0x1 << 12)
136#define V_OPMVV (0x2 << 12)
137#define V_OPIVI (0x3 << 12)
138#define V_OPIVX (0x4 << 12)
139#define V_OPFVF (0x5 << 12)
140#define V_OPMVX (0x6 << 12)
141#define V_OPCFG (0x7 << 12)
142
143/* NF <= 7 && NF >= 0 */
144#define V_NF(x) (x << 29)
145#define V_UNIT_STRIDE (0x0 << 20)
146#define V_UNIT_STRIDE_WHOLE_REG (0x8 << 20)
147
148typedef enum {
149    VLMUL_M1 = 0, /* LMUL=1 */
150    VLMUL_M2,     /* LMUL=2 */
151    VLMUL_M4,     /* LMUL=4 */
152    VLMUL_M8,     /* LMUL=8 */
153    VLMUL_RESERVED,
154    VLMUL_MF8,    /* LMUL=1/8 */
155    VLMUL_MF4,    /* LMUL=1/4 */
156    VLMUL_MF2,    /* LMUL=1/2 */
157} RISCVVlmul;
158
159typedef enum {
160    OPC_ADD = 0x33,
161    OPC_ADDI = 0x13,
162    OPC_AND = 0x7033,
163    OPC_ANDI = 0x7013,
164    OPC_AUIPC = 0x17,
165    OPC_BEQ = 0x63,
166    OPC_BEXTI = 0x48005013,
167    OPC_BGE = 0x5063,
168    OPC_BGEU = 0x7063,
169    OPC_BLT = 0x4063,
170    OPC_BLTU = 0x6063,
171    OPC_BNE = 0x1063,
172    OPC_DIV = 0x2004033,
173    OPC_DIVU = 0x2005033,
174    OPC_JAL = 0x6f,
175    OPC_JALR = 0x67,
176    OPC_LB = 0x3,
177    OPC_LBU = 0x4003,
178    OPC_LD = 0x3003,
179    OPC_LH = 0x1003,
180    OPC_LHU = 0x5003,
181    OPC_LUI = 0x37,
182    OPC_LW = 0x2003,
183    OPC_LWU = 0x6003,
184    OPC_MUL = 0x2000033,
185    OPC_MULH = 0x2001033,
186    OPC_MULHSU = 0x2002033,
187    OPC_MULHU = 0x2003033,
188    OPC_OR = 0x6033,
189    OPC_ORI = 0x6013,
190    OPC_REM = 0x2006033,
191    OPC_REMU = 0x2007033,
192    OPC_SB = 0x23,
193    OPC_SD = 0x3023,
194    OPC_SH = 0x1023,
195    OPC_SLL = 0x1033,
196    OPC_SLLI = 0x1013,
197    OPC_SLT = 0x2033,
198    OPC_SLTI = 0x2013,
199    OPC_SLTIU = 0x3013,
200    OPC_SLTU = 0x3033,
201    OPC_SRA = 0x40005033,
202    OPC_SRAI = 0x40005013,
203    OPC_SRL = 0x5033,
204    OPC_SRLI = 0x5013,
205    OPC_SUB = 0x40000033,
206    OPC_SW = 0x2023,
207    OPC_XOR = 0x4033,
208    OPC_XORI = 0x4013,
209
210    OPC_ADDIW = 0x1b,
211    OPC_ADDW = 0x3b,
212    OPC_DIVUW = 0x200503b,
213    OPC_DIVW = 0x200403b,
214    OPC_MULW = 0x200003b,
215    OPC_REMUW = 0x200703b,
216    OPC_REMW = 0x200603b,
217    OPC_SLLIW = 0x101b,
218    OPC_SLLW = 0x103b,
219    OPC_SRAIW = 0x4000501b,
220    OPC_SRAW = 0x4000503b,
221    OPC_SRLIW = 0x501b,
222    OPC_SRLW = 0x503b,
223    OPC_SUBW = 0x4000003b,
224
225    OPC_FENCE = 0x0000000f,
226    OPC_NOP   = OPC_ADDI,   /* nop = addi r0,r0,0 */
227
228    /* Zba: Bit manipulation extension, address generation */
229    OPC_ADD_UW = 0x0800003b,
230
231    /* Zbb: Bit manipulation extension, basic bit manipulation */
232    OPC_ANDN   = 0x40007033,
233    OPC_CLZ    = 0x60001013,
234    OPC_CLZW   = 0x6000101b,
235    OPC_CPOP   = 0x60201013,
236    OPC_CPOPW  = 0x6020101b,
237    OPC_CTZ    = 0x60101013,
238    OPC_CTZW   = 0x6010101b,
239    OPC_ORN    = 0x40006033,
240    OPC_REV8   = 0x6b805013,
241    OPC_ROL    = 0x60001033,
242    OPC_ROLW   = 0x6000103b,
243    OPC_ROR    = 0x60005033,
244    OPC_RORW   = 0x6000503b,
245    OPC_RORI   = 0x60005013,
246    OPC_RORIW  = 0x6000501b,
247    OPC_SEXT_B = 0x60401013,
248    OPC_SEXT_H = 0x60501013,
249    OPC_XNOR   = 0x40004033,
250    OPC_ZEXT_H = 0x0800403b,
251
252    /* Zicond: integer conditional operations */
253    OPC_CZERO_EQZ = 0x0e005033,
254    OPC_CZERO_NEZ = 0x0e007033,
255
256    /* V: Vector extension 1.0 */
257    OPC_VSETVLI  = 0x57 | V_OPCFG,
258    OPC_VSETIVLI = 0xc0000057 | V_OPCFG,
259    OPC_VSETVL   = 0x80000057 | V_OPCFG,
260
261    OPC_VLE8_V  = 0x7 | V_UNIT_STRIDE,
262    OPC_VLE16_V = 0x5007 | V_UNIT_STRIDE,
263    OPC_VLE32_V = 0x6007 | V_UNIT_STRIDE,
264    OPC_VLE64_V = 0x7007 | V_UNIT_STRIDE,
265    OPC_VSE8_V  = 0x27 | V_UNIT_STRIDE,
266    OPC_VSE16_V = 0x5027 | V_UNIT_STRIDE,
267    OPC_VSE32_V = 0x6027 | V_UNIT_STRIDE,
268    OPC_VSE64_V = 0x7027 | V_UNIT_STRIDE,
269
270    OPC_VL1RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(0),
271    OPC_VL2RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(1),
272    OPC_VL4RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(3),
273    OPC_VL8RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(7),
274
275    OPC_VS1R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(0),
276    OPC_VS2R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(1),
277    OPC_VS4R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(3),
278    OPC_VS8R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(7),
279
280    OPC_VMERGE_VIM = 0x5c000057 | V_OPIVI,
281    OPC_VMERGE_VVM = 0x5c000057 | V_OPIVV,
282
283    OPC_VADD_VV = 0x57 | V_OPIVV,
284    OPC_VADD_VI = 0x57 | V_OPIVI,
285    OPC_VSUB_VV = 0x8000057 | V_OPIVV,
286    OPC_VRSUB_VI = 0xc000057 | V_OPIVI,
287    OPC_VAND_VV = 0x24000057 | V_OPIVV,
288    OPC_VAND_VI = 0x24000057 | V_OPIVI,
289    OPC_VOR_VV = 0x28000057 | V_OPIVV,
290    OPC_VOR_VI = 0x28000057 | V_OPIVI,
291    OPC_VXOR_VV = 0x2c000057 | V_OPIVV,
292    OPC_VXOR_VI = 0x2c000057 | V_OPIVI,
293
294    OPC_VMUL_VV = 0x94000057 | V_OPMVV,
295    OPC_VSADD_VV = 0x84000057 | V_OPIVV,
296    OPC_VSADD_VI = 0x84000057 | V_OPIVI,
297    OPC_VSSUB_VV = 0x8c000057 | V_OPIVV,
298    OPC_VSSUB_VI = 0x8c000057 | V_OPIVI,
299    OPC_VSADDU_VV = 0x80000057 | V_OPIVV,
300    OPC_VSADDU_VI = 0x80000057 | V_OPIVI,
301    OPC_VSSUBU_VV = 0x88000057 | V_OPIVV,
302    OPC_VSSUBU_VI = 0x88000057 | V_OPIVI,
303
304    OPC_VMAX_VV = 0x1c000057 | V_OPIVV,
305    OPC_VMAX_VI = 0x1c000057 | V_OPIVI,
306    OPC_VMAXU_VV = 0x18000057 | V_OPIVV,
307    OPC_VMAXU_VI = 0x18000057 | V_OPIVI,
308    OPC_VMIN_VV = 0x14000057 | V_OPIVV,
309    OPC_VMIN_VI = 0x14000057 | V_OPIVI,
310    OPC_VMINU_VV = 0x10000057 | V_OPIVV,
311    OPC_VMINU_VI = 0x10000057 | V_OPIVI,
312
313    OPC_VMSEQ_VV = 0x60000057 | V_OPIVV,
314    OPC_VMSEQ_VI = 0x60000057 | V_OPIVI,
315    OPC_VMSEQ_VX = 0x60000057 | V_OPIVX,
316    OPC_VMSNE_VV = 0x64000057 | V_OPIVV,
317    OPC_VMSNE_VI = 0x64000057 | V_OPIVI,
318    OPC_VMSNE_VX = 0x64000057 | V_OPIVX,
319
320    OPC_VMSLTU_VV = 0x68000057 | V_OPIVV,
321    OPC_VMSLTU_VX = 0x68000057 | V_OPIVX,
322    OPC_VMSLT_VV = 0x6c000057 | V_OPIVV,
323    OPC_VMSLT_VX = 0x6c000057 | V_OPIVX,
324    OPC_VMSLEU_VV = 0x70000057 | V_OPIVV,
325    OPC_VMSLEU_VX = 0x70000057 | V_OPIVX,
326    OPC_VMSLE_VV = 0x74000057 | V_OPIVV,
327    OPC_VMSLE_VX = 0x74000057 | V_OPIVX,
328
329    OPC_VMSLEU_VI = 0x70000057 | V_OPIVI,
330    OPC_VMSLE_VI = 0x74000057 | V_OPIVI,
331    OPC_VMSGTU_VI = 0x78000057 | V_OPIVI,
332    OPC_VMSGTU_VX = 0x78000057 | V_OPIVX,
333    OPC_VMSGT_VI = 0x7c000057 | V_OPIVI,
334    OPC_VMSGT_VX = 0x7c000057 | V_OPIVX,
335
336    OPC_VSLL_VV = 0x94000057 | V_OPIVV,
337    OPC_VSLL_VI = 0x94000057 | V_OPIVI,
338    OPC_VSLL_VX = 0x94000057 | V_OPIVX,
339    OPC_VSRL_VV = 0xa0000057 | V_OPIVV,
340    OPC_VSRL_VI = 0xa0000057 | V_OPIVI,
341    OPC_VSRL_VX = 0xa0000057 | V_OPIVX,
342    OPC_VSRA_VV = 0xa4000057 | V_OPIVV,
343    OPC_VSRA_VI = 0xa4000057 | V_OPIVI,
344    OPC_VSRA_VX = 0xa4000057 | V_OPIVX,
345
346    OPC_VMV_V_V = 0x5e000057 | V_OPIVV,
347    OPC_VMV_V_I = 0x5e000057 | V_OPIVI,
348    OPC_VMV_V_X = 0x5e000057 | V_OPIVX,
349
350    OPC_VMVNR_V = 0x9e000057 | V_OPIVI,
351} RISCVInsn;
352
353static const struct {
354    RISCVInsn op;
355    bool swap;
356} tcg_cmpcond_to_rvv_vv[] = {
357    [TCG_COND_EQ] =  { OPC_VMSEQ_VV,  false },
358    [TCG_COND_NE] =  { OPC_VMSNE_VV,  false },
359    [TCG_COND_LT] =  { OPC_VMSLT_VV,  false },
360    [TCG_COND_GE] =  { OPC_VMSLE_VV,  true  },
361    [TCG_COND_GT] =  { OPC_VMSLT_VV,  true  },
362    [TCG_COND_LE] =  { OPC_VMSLE_VV,  false },
363    [TCG_COND_LTU] = { OPC_VMSLTU_VV, false },
364    [TCG_COND_GEU] = { OPC_VMSLEU_VV, true  },
365    [TCG_COND_GTU] = { OPC_VMSLTU_VV, true  },
366    [TCG_COND_LEU] = { OPC_VMSLEU_VV, false }
367};
368
369static const struct {
370    RISCVInsn op;
371    int min;
372    int max;
373    bool adjust;
374}  tcg_cmpcond_to_rvv_vi[] = {
375    [TCG_COND_EQ]  = { OPC_VMSEQ_VI,  -16, 15, false },
376    [TCG_COND_NE]  = { OPC_VMSNE_VI,  -16, 15, false },
377    [TCG_COND_GT]  = { OPC_VMSGT_VI,  -16, 15, false },
378    [TCG_COND_LE]  = { OPC_VMSLE_VI,  -16, 15, false },
379    [TCG_COND_LT]  = { OPC_VMSLE_VI,  -15, 16, true  },
380    [TCG_COND_GE]  = { OPC_VMSGT_VI,  -15, 16, true  },
381    [TCG_COND_LEU] = { OPC_VMSLEU_VI,   0, 15, false },
382    [TCG_COND_GTU] = { OPC_VMSGTU_VI,   0, 15, false },
383    [TCG_COND_LTU] = { OPC_VMSLEU_VI,   1, 16, true  },
384    [TCG_COND_GEU] = { OPC_VMSGTU_VI,   1, 16, true  },
385};
386
387/* test if a constant matches the constraint */
388static bool tcg_target_const_match(int64_t val, int ct,
389                                   TCGType type, TCGCond cond, int vece)
390{
391    if (ct & TCG_CT_CONST) {
392        return 1;
393    }
394    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
395        return 1;
396    }
397    if (type >= TCG_TYPE_V64) {
398        /* Val is replicated by VECE; extract the highest element. */
399        val >>= (-8 << vece) & 63;
400    }
401    /*
402     * Sign extended from 12 bits: [-0x800, 0x7ff].
403     * Used for most arithmetic, as this is the isa field.
404     */
405    if ((ct & TCG_CT_CONST_S12) && val >= -0x800 && val <= 0x7ff) {
406        return 1;
407    }
408    /*
409     * Sign extended from 12 bits, negated: [-0x7ff, 0x800].
410     * Used for subtraction, where a constant must be handled by ADDI.
411     */
412    if ((ct & TCG_CT_CONST_N12) && val >= -0x7ff && val <= 0x800) {
413        return 1;
414    }
415    /*
416     * Sign extended from 12 bits, +/- matching: [-0x7ff, 0x7ff].
417     * Used by addsub2 and movcond, which may need the negative value,
418     * and requires the modified constant to be representable.
419     */
420    if ((ct & TCG_CT_CONST_M12) && val >= -0x7ff && val <= 0x7ff) {
421        return 1;
422    }
423    /*
424     * Inverse of sign extended from 12 bits: ~[-0x800, 0x7ff].
425     * Used to map ANDN back to ANDI, etc.
426     */
427    if ((ct & TCG_CT_CONST_J12) && ~val >= -0x800 && ~val <= 0x7ff) {
428        return 1;
429    }
430    /*
431     * Sign extended from 5 bits: [-0x10, 0x0f].
432     * Used for vector-immediate.
433     */
434    if ((ct & TCG_CT_CONST_S5) && val >= -0x10 && val <= 0x0f) {
435        return 1;
436    }
437    /*
438     * Used for vector compare OPIVI instructions.
439     */
440    if ((ct & TCG_CT_CONST_CMP_VI) &&
441        val >= tcg_cmpcond_to_rvv_vi[cond].min &&
442        val <= tcg_cmpcond_to_rvv_vi[cond].max) {
443        return true;
444     }
445    return 0;
446}
447
448/*
449 * RISC-V immediate and instruction encoders (excludes 16-bit RVC)
450 */
451
452/* Type-R */
453
454static int32_t encode_r(RISCVInsn opc, TCGReg rd, TCGReg rs1, TCGReg rs2)
455{
456    return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20;
457}
458
459/* Type-I */
460
461static int32_t encode_imm12(uint32_t imm)
462{
463    return (imm & 0xfff) << 20;
464}
465
466static int32_t encode_i(RISCVInsn opc, TCGReg rd, TCGReg rs1, uint32_t imm)
467{
468    return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | encode_imm12(imm);
469}
470
471/* Type-S */
472
473static int32_t encode_simm12(uint32_t imm)
474{
475    int32_t ret = 0;
476
477    ret |= (imm & 0xFE0) << 20;
478    ret |= (imm & 0x1F) << 7;
479
480    return ret;
481}
482
483static int32_t encode_s(RISCVInsn opc, TCGReg rs1, TCGReg rs2, uint32_t imm)
484{
485    return opc | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20 | encode_simm12(imm);
486}
487
488/* Type-SB */
489
490static int32_t encode_sbimm12(uint32_t imm)
491{
492    int32_t ret = 0;
493
494    ret |= (imm & 0x1000) << 19;
495    ret |= (imm & 0x7e0) << 20;
496    ret |= (imm & 0x1e) << 7;
497    ret |= (imm & 0x800) >> 4;
498
499    return ret;
500}
501
502static int32_t encode_sb(RISCVInsn opc, TCGReg rs1, TCGReg rs2, uint32_t imm)
503{
504    return opc | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20 | encode_sbimm12(imm);
505}
506
507/* Type-U */
508
509static int32_t encode_uimm20(uint32_t imm)
510{
511    return imm & 0xfffff000;
512}
513
514static int32_t encode_u(RISCVInsn opc, TCGReg rd, uint32_t imm)
515{
516    return opc | (rd & 0x1f) << 7 | encode_uimm20(imm);
517}
518
519/* Type-UJ */
520
521static int32_t encode_ujimm20(uint32_t imm)
522{
523    int32_t ret = 0;
524
525    ret |= (imm & 0x0007fe) << (21 - 1);
526    ret |= (imm & 0x000800) << (20 - 11);
527    ret |= (imm & 0x0ff000) << (12 - 12);
528    ret |= (imm & 0x100000) << (31 - 20);
529
530    return ret;
531}
532
533static int32_t encode_uj(RISCVInsn opc, TCGReg rd, uint32_t imm)
534{
535    return opc | (rd & 0x1f) << 7 | encode_ujimm20(imm);
536}
537
538
539/* Type-OPIVI */
540
541static int32_t encode_vi(RISCVInsn opc, TCGReg rd, int32_t imm,
542                         TCGReg vs2, bool vm)
543{
544    return opc | (rd & 0x1f) << 7 | (imm & 0x1f) << 15 |
545           (vs2 & 0x1f) << 20 | (vm << 25);
546}
547
548/* Type-OPIVV/OPMVV/OPIVX/OPMVX, Vector load and store */
549
550static int32_t encode_v(RISCVInsn opc, TCGReg d, TCGReg s1,
551                        TCGReg s2, bool vm)
552{
553    return opc | (d & 0x1f) << 7 | (s1 & 0x1f) << 15 |
554           (s2 & 0x1f) << 20 | (vm << 25);
555}
556
557/* Vector vtype */
558
559static uint32_t encode_vtype(bool vta, bool vma,
560                            MemOp vsew, RISCVVlmul vlmul)
561{
562    return vma << 7 | vta << 6 | vsew << 3 | vlmul;
563}
564
565static int32_t encode_vset(RISCVInsn opc, TCGReg rd,
566                           TCGArg rs1, uint32_t vtype)
567{
568    return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | (vtype & 0x7ff) << 20;
569}
570
571static int32_t encode_vseti(RISCVInsn opc, TCGReg rd,
572                            uint32_t uimm, uint32_t vtype)
573{
574    return opc | (rd & 0x1f) << 7 | (uimm & 0x1f) << 15 | (vtype & 0x3ff) << 20;
575}
576
577/*
578 * RISC-V instruction emitters
579 */
580
581static void tcg_out_opc_reg(TCGContext *s, RISCVInsn opc,
582                            TCGReg rd, TCGReg rs1, TCGReg rs2)
583{
584    tcg_out32(s, encode_r(opc, rd, rs1, rs2));
585}
586
587static void tcg_out_opc_imm(TCGContext *s, RISCVInsn opc,
588                            TCGReg rd, TCGReg rs1, TCGArg imm)
589{
590    tcg_out32(s, encode_i(opc, rd, rs1, imm));
591}
592
593static void tcg_out_opc_store(TCGContext *s, RISCVInsn opc,
594                              TCGReg rs1, TCGReg rs2, uint32_t imm)
595{
596    tcg_out32(s, encode_s(opc, rs1, rs2, imm));
597}
598
599static void tcg_out_opc_branch(TCGContext *s, RISCVInsn opc,
600                               TCGReg rs1, TCGReg rs2, uint32_t imm)
601{
602    tcg_out32(s, encode_sb(opc, rs1, rs2, imm));
603}
604
605static void tcg_out_opc_upper(TCGContext *s, RISCVInsn opc,
606                              TCGReg rd, uint32_t imm)
607{
608    tcg_out32(s, encode_u(opc, rd, imm));
609}
610
611static void tcg_out_opc_jump(TCGContext *s, RISCVInsn opc,
612                             TCGReg rd, uint32_t imm)
613{
614    tcg_out32(s, encode_uj(opc, rd, imm));
615}
616
617static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
618{
619    int i;
620    for (i = 0; i < count; ++i) {
621        p[i] = OPC_NOP;
622    }
623}
624
625/*
626 * Relocations
627 */
628
629static bool reloc_sbimm12(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
630{
631    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
632    intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
633
634    tcg_debug_assert((offset & 1) == 0);
635    if (offset == sextreg(offset, 0, 12)) {
636        *src_rw |= encode_sbimm12(offset);
637        return true;
638    }
639
640    return false;
641}
642
643static bool reloc_jimm20(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
644{
645    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
646    intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
647
648    tcg_debug_assert((offset & 1) == 0);
649    if (offset == sextreg(offset, 0, 20)) {
650        *src_rw |= encode_ujimm20(offset);
651        return true;
652    }
653
654    return false;
655}
656
657static bool reloc_call(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
658{
659    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
660    intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
661    int32_t lo = sextreg(offset, 0, 12);
662    int32_t hi = offset - lo;
663
664    if (offset == hi + lo) {
665        src_rw[0] |= encode_uimm20(hi);
666        src_rw[1] |= encode_imm12(lo);
667        return true;
668    }
669
670    return false;
671}
672
673static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
674                        intptr_t value, intptr_t addend)
675{
676    tcg_debug_assert(addend == 0);
677    switch (type) {
678    case R_RISCV_BRANCH:
679        return reloc_sbimm12(code_ptr, (tcg_insn_unit *)value);
680    case R_RISCV_JAL:
681        return reloc_jimm20(code_ptr, (tcg_insn_unit *)value);
682    case R_RISCV_CALL:
683        return reloc_call(code_ptr, (tcg_insn_unit *)value);
684    default:
685        g_assert_not_reached();
686    }
687}
688
689/*
690 * RISC-V vector instruction emitters
691 */
692
693/*
694 * Vector registers uses the same 5 lower bits as GPR registers,
695 * and vm=0 (vm = false) means vector masking ENABLED.
696 * With RVV 1.0, vs2 is the first operand, while rs1/imm is the
697 * second operand.
698 */
699static void tcg_out_opc_vv(TCGContext *s, RISCVInsn opc,
700                           TCGReg vd, TCGReg vs2, TCGReg vs1)
701{
702    tcg_out32(s, encode_v(opc, vd, vs1, vs2, true));
703}
704
705static void tcg_out_opc_vx(TCGContext *s, RISCVInsn opc,
706                           TCGReg vd, TCGReg vs2, TCGReg rs1)
707{
708    tcg_out32(s, encode_v(opc, vd, rs1, vs2, true));
709}
710
711static void tcg_out_opc_vi(TCGContext *s, RISCVInsn opc,
712                           TCGReg vd, TCGReg vs2, int32_t imm)
713{
714    tcg_out32(s, encode_vi(opc, vd, imm, vs2, true));
715}
716
717static void tcg_out_opc_vv_vi(TCGContext *s, RISCVInsn o_vv, RISCVInsn o_vi,
718                              TCGReg vd, TCGReg vs2, TCGArg vi1, int c_vi1)
719{
720    if (c_vi1) {
721        tcg_out_opc_vi(s, o_vi, vd, vs2, vi1);
722    } else {
723        tcg_out_opc_vv(s, o_vv, vd, vs2, vi1);
724    }
725}
726
727static void tcg_out_opc_vim_mask(TCGContext *s, RISCVInsn opc, TCGReg vd,
728                                 TCGReg vs2, int32_t imm)
729{
730    tcg_out32(s, encode_vi(opc, vd, imm, vs2, false));
731}
732
733static void tcg_out_opc_vvm_mask(TCGContext *s, RISCVInsn opc, TCGReg vd,
734                                 TCGReg vs2, TCGReg vs1)
735{
736    tcg_out32(s, encode_v(opc, vd, vs1, vs2, false));
737}
738
739typedef struct VsetCache {
740    uint32_t movi_insn;
741    uint32_t vset_insn;
742} VsetCache;
743
744static VsetCache riscv_vset_cache[3][4];
745
746static void set_vtype(TCGContext *s, TCGType type, MemOp vsew)
747{
748    const VsetCache *p = &riscv_vset_cache[type - TCG_TYPE_V64][vsew];
749
750    s->riscv_cur_type = type;
751    s->riscv_cur_vsew = vsew;
752
753    if (p->movi_insn) {
754        tcg_out32(s, p->movi_insn);
755    }
756    tcg_out32(s, p->vset_insn);
757}
758
759static MemOp set_vtype_len(TCGContext *s, TCGType type)
760{
761    if (type != s->riscv_cur_type) {
762        set_vtype(s, type, MO_64);
763    }
764    return s->riscv_cur_vsew;
765}
766
767static void set_vtype_len_sew(TCGContext *s, TCGType type, MemOp vsew)
768{
769    if (type != s->riscv_cur_type || vsew != s->riscv_cur_vsew) {
770        set_vtype(s, type, vsew);
771    }
772}
773
774/*
775 * TCG intrinsics
776 */
777
778static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
779{
780    if (ret == arg) {
781        return true;
782    }
783    switch (type) {
784    case TCG_TYPE_I32:
785    case TCG_TYPE_I64:
786        tcg_out_opc_imm(s, OPC_ADDI, ret, arg, 0);
787        break;
788    case TCG_TYPE_V64:
789    case TCG_TYPE_V128:
790    case TCG_TYPE_V256:
791        {
792            int lmul = type - riscv_lg2_vlenb;
793            int nf = 1 << MAX(lmul, 0);
794            tcg_out_opc_vi(s, OPC_VMVNR_V, ret, arg, nf - 1);
795        }
796        break;
797    default:
798        g_assert_not_reached();
799    }
800    return true;
801}
802
803static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
804                         tcg_target_long val)
805{
806    tcg_target_long lo, hi, tmp;
807    int shift, ret;
808
809    if (type == TCG_TYPE_I32) {
810        val = (int32_t)val;
811    }
812
813    lo = sextreg(val, 0, 12);
814    if (val == lo) {
815        tcg_out_opc_imm(s, OPC_ADDI, rd, TCG_REG_ZERO, lo);
816        return;
817    }
818
819    hi = val - lo;
820    if (val == (int32_t)val) {
821        tcg_out_opc_upper(s, OPC_LUI, rd, hi);
822        if (lo != 0) {
823            tcg_out_opc_imm(s, OPC_ADDIW, rd, rd, lo);
824        }
825        return;
826    }
827
828    tmp = tcg_pcrel_diff(s, (void *)val);
829    if (tmp == (int32_t)tmp) {
830        tcg_out_opc_upper(s, OPC_AUIPC, rd, 0);
831        tcg_out_opc_imm(s, OPC_ADDI, rd, rd, 0);
832        ret = reloc_call(s->code_ptr - 2, (const tcg_insn_unit *)val);
833        tcg_debug_assert(ret == true);
834        return;
835    }
836
837    /* Look for a single 20-bit section.  */
838    shift = ctz64(val);
839    tmp = val >> shift;
840    if (tmp == sextreg(tmp, 0, 20)) {
841        tcg_out_opc_upper(s, OPC_LUI, rd, tmp << 12);
842        if (shift > 12) {
843            tcg_out_opc_imm(s, OPC_SLLI, rd, rd, shift - 12);
844        } else {
845            tcg_out_opc_imm(s, OPC_SRAI, rd, rd, 12 - shift);
846        }
847        return;
848    }
849
850    /* Look for a few high zero bits, with lots of bits set in the middle.  */
851    shift = clz64(val);
852    tmp = val << shift;
853    if (tmp == sextreg(tmp, 12, 20) << 12) {
854        tcg_out_opc_upper(s, OPC_LUI, rd, tmp);
855        tcg_out_opc_imm(s, OPC_SRLI, rd, rd, shift);
856        return;
857    } else if (tmp == sextreg(tmp, 0, 12)) {
858        tcg_out_opc_imm(s, OPC_ADDI, rd, TCG_REG_ZERO, tmp);
859        tcg_out_opc_imm(s, OPC_SRLI, rd, rd, shift);
860        return;
861    }
862
863    /* Drop into the constant pool.  */
864    new_pool_label(s, val, R_RISCV_CALL, s->code_ptr, 0);
865    tcg_out_opc_upper(s, OPC_AUIPC, rd, 0);
866    tcg_out_opc_imm(s, OPC_LD, rd, rd, 0);
867}
868
869static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
870{
871    return false;
872}
873
874static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
875                             tcg_target_long imm)
876{
877    /* This function is only used for passing structs by reference. */
878    g_assert_not_reached();
879}
880
881static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg)
882{
883    tcg_out_opc_imm(s, OPC_ANDI, ret, arg, 0xff);
884}
885
886static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg)
887{
888    if (cpuinfo & CPUINFO_ZBB) {
889        tcg_out_opc_reg(s, OPC_ZEXT_H, ret, arg, TCG_REG_ZERO);
890    } else {
891        tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16);
892        tcg_out_opc_imm(s, OPC_SRLIW, ret, ret, 16);
893    }
894}
895
896static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg)
897{
898    if (cpuinfo & CPUINFO_ZBA) {
899        tcg_out_opc_reg(s, OPC_ADD_UW, ret, arg, TCG_REG_ZERO);
900    } else {
901        tcg_out_opc_imm(s, OPC_SLLI, ret, arg, 32);
902        tcg_out_opc_imm(s, OPC_SRLI, ret, ret, 32);
903    }
904}
905
906static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
907{
908    if (cpuinfo & CPUINFO_ZBB) {
909        tcg_out_opc_imm(s, OPC_SEXT_B, ret, arg, 0);
910    } else {
911        tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 24);
912        tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 24);
913    }
914}
915
916static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
917{
918    if (cpuinfo & CPUINFO_ZBB) {
919        tcg_out_opc_imm(s, OPC_SEXT_H, ret, arg, 0);
920    } else {
921        tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16);
922        tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 16);
923    }
924}
925
926static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg)
927{
928    tcg_out_opc_imm(s, OPC_ADDIW, ret, arg, 0);
929}
930
931static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
932{
933    if (ret != arg) {
934        tcg_out_ext32s(s, ret, arg);
935    }
936}
937
938static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
939{
940    tcg_out_ext32u(s, ret, arg);
941}
942
943static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg)
944{
945    tcg_out_ext32s(s, ret, arg);
946}
947
948static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
949                         TCGReg addr, intptr_t offset)
950{
951    intptr_t imm12 = sextreg(offset, 0, 12);
952
953    if (offset != imm12) {
954        intptr_t diff = tcg_pcrel_diff(s, (void *)offset);
955
956        if (addr == TCG_REG_ZERO && diff == (int32_t)diff) {
957            imm12 = sextreg(diff, 0, 12);
958            tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP2, diff - imm12);
959        } else {
960            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12);
961            if (addr != TCG_REG_ZERO) {
962                tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, addr);
963            }
964        }
965        addr = TCG_REG_TMP2;
966    }
967
968    switch (opc) {
969    case OPC_SB:
970    case OPC_SH:
971    case OPC_SW:
972    case OPC_SD:
973        tcg_out_opc_store(s, opc, addr, data, imm12);
974        break;
975    case OPC_LB:
976    case OPC_LBU:
977    case OPC_LH:
978    case OPC_LHU:
979    case OPC_LW:
980    case OPC_LWU:
981    case OPC_LD:
982        tcg_out_opc_imm(s, opc, data, addr, imm12);
983        break;
984    default:
985        g_assert_not_reached();
986    }
987}
988
989static void tcg_out_vec_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
990                             TCGReg addr, intptr_t offset)
991{
992    tcg_debug_assert(data >= TCG_REG_V0);
993    tcg_debug_assert(addr < TCG_REG_V0);
994
995    if (offset) {
996        tcg_debug_assert(addr != TCG_REG_ZERO);
997        if (offset == sextreg(offset, 0, 12)) {
998            tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, addr, offset);
999        } else {
1000            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
1001            tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0, addr);
1002        }
1003        addr = TCG_REG_TMP0;
1004    }
1005    tcg_out32(s, encode_v(opc, data, addr, 0, true));
1006}
1007
1008static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
1009                       TCGReg arg1, intptr_t arg2)
1010{
1011    RISCVInsn insn;
1012
1013    switch (type) {
1014    case TCG_TYPE_I32:
1015        tcg_out_ldst(s, OPC_LW, arg, arg1, arg2);
1016        break;
1017    case TCG_TYPE_I64:
1018        tcg_out_ldst(s, OPC_LD, arg, arg1, arg2);
1019        break;
1020    case TCG_TYPE_V64:
1021    case TCG_TYPE_V128:
1022    case TCG_TYPE_V256:
1023        if (type >= riscv_lg2_vlenb) {
1024            static const RISCVInsn whole_reg_ld[] = {
1025                OPC_VL1RE64_V, OPC_VL2RE64_V, OPC_VL4RE64_V, OPC_VL8RE64_V
1026            };
1027            unsigned idx = type - riscv_lg2_vlenb;
1028
1029            tcg_debug_assert(idx < ARRAY_SIZE(whole_reg_ld));
1030            insn = whole_reg_ld[idx];
1031        } else {
1032            static const RISCVInsn unit_stride_ld[] = {
1033                OPC_VLE8_V, OPC_VLE16_V, OPC_VLE32_V, OPC_VLE64_V
1034            };
1035            MemOp prev_vsew = set_vtype_len(s, type);
1036
1037            tcg_debug_assert(prev_vsew < ARRAY_SIZE(unit_stride_ld));
1038            insn = unit_stride_ld[prev_vsew];
1039        }
1040        tcg_out_vec_ldst(s, insn, arg, arg1, arg2);
1041        break;
1042    default:
1043        g_assert_not_reached();
1044    }
1045}
1046
1047static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1048                       TCGReg arg1, intptr_t arg2)
1049{
1050    RISCVInsn insn;
1051
1052    switch (type) {
1053    case TCG_TYPE_I32:
1054        tcg_out_ldst(s, OPC_SW, arg, arg1, arg2);
1055        break;
1056    case TCG_TYPE_I64:
1057        tcg_out_ldst(s, OPC_SD, arg, arg1, arg2);
1058        break;
1059    case TCG_TYPE_V64:
1060    case TCG_TYPE_V128:
1061    case TCG_TYPE_V256:
1062        if (type >= riscv_lg2_vlenb) {
1063            static const RISCVInsn whole_reg_st[] = {
1064                OPC_VS1R_V, OPC_VS2R_V, OPC_VS4R_V, OPC_VS8R_V
1065            };
1066            unsigned idx = type - riscv_lg2_vlenb;
1067
1068            tcg_debug_assert(idx < ARRAY_SIZE(whole_reg_st));
1069            insn = whole_reg_st[idx];
1070        } else {
1071            static const RISCVInsn unit_stride_st[] = {
1072                OPC_VSE8_V, OPC_VSE16_V, OPC_VSE32_V, OPC_VSE64_V
1073            };
1074            MemOp prev_vsew = set_vtype_len(s, type);
1075
1076            tcg_debug_assert(prev_vsew < ARRAY_SIZE(unit_stride_st));
1077            insn = unit_stride_st[prev_vsew];
1078        }
1079        tcg_out_vec_ldst(s, insn, arg, arg1, arg2);
1080        break;
1081    default:
1082        g_assert_not_reached();
1083    }
1084}
1085
1086static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1087                        TCGReg base, intptr_t ofs)
1088{
1089    if (val == 0) {
1090        tcg_out_st(s, type, TCG_REG_ZERO, base, ofs);
1091        return true;
1092    }
1093    return false;
1094}
1095
1096static void tcg_out_addsub2(TCGContext *s,
1097                            TCGReg rl, TCGReg rh,
1098                            TCGReg al, TCGReg ah,
1099                            TCGArg bl, TCGArg bh,
1100                            bool cbl, bool cbh, bool is_sub, bool is32bit)
1101{
1102    const RISCVInsn opc_add = is32bit ? OPC_ADDW : OPC_ADD;
1103    const RISCVInsn opc_addi = is32bit ? OPC_ADDIW : OPC_ADDI;
1104    const RISCVInsn opc_sub = is32bit ? OPC_SUBW : OPC_SUB;
1105    TCGReg th = TCG_REG_TMP1;
1106
1107    /* If we have a negative constant such that negating it would
1108       make the high part zero, we can (usually) eliminate one insn.  */
1109    if (cbl && cbh && bh == -1 && bl != 0) {
1110        bl = -bl;
1111        bh = 0;
1112        is_sub = !is_sub;
1113    }
1114
1115    /* By operating on the high part first, we get to use the final
1116       carry operation to move back from the temporary.  */
1117    if (!cbh) {
1118        tcg_out_opc_reg(s, (is_sub ? opc_sub : opc_add), th, ah, bh);
1119    } else if (bh != 0 || ah == rl) {
1120        tcg_out_opc_imm(s, opc_addi, th, ah, (is_sub ? -bh : bh));
1121    } else {
1122        th = ah;
1123    }
1124
1125    /* Note that tcg optimization should eliminate the bl == 0 case.  */
1126    if (is_sub) {
1127        if (cbl) {
1128            tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, al, bl);
1129            tcg_out_opc_imm(s, opc_addi, rl, al, -bl);
1130        } else {
1131            tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0, al, bl);
1132            tcg_out_opc_reg(s, opc_sub, rl, al, bl);
1133        }
1134        tcg_out_opc_reg(s, opc_sub, rh, th, TCG_REG_TMP0);
1135    } else {
1136        if (cbl) {
1137            tcg_out_opc_imm(s, opc_addi, rl, al, bl);
1138            tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, rl, bl);
1139        } else if (al == bl) {
1140            /*
1141             * If the input regs overlap, this is a simple doubling
1142             * and carry-out is the input msb.  This special case is
1143             * required when the output reg overlaps the input,
1144             * but we might as well use it always.
1145             */
1146            tcg_out_opc_imm(s, OPC_SLTI, TCG_REG_TMP0, al, 0);
1147            tcg_out_opc_reg(s, opc_add, rl, al, al);
1148        } else {
1149            tcg_out_opc_reg(s, opc_add, rl, al, bl);
1150            tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0,
1151                            rl, (rl == bl ? al : bl));
1152        }
1153        tcg_out_opc_reg(s, opc_add, rh, th, TCG_REG_TMP0);
1154    }
1155}
1156
1157static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
1158                                   TCGReg dst, TCGReg src)
1159{
1160    set_vtype_len_sew(s, type, vece);
1161    tcg_out_opc_vx(s, OPC_VMV_V_X, dst, 0, src);
1162    return true;
1163}
1164
1165static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1166                                    TCGReg dst, TCGReg base, intptr_t offset)
1167{
1168    tcg_out_ld(s, TCG_TYPE_REG, TCG_REG_TMP0, base, offset);
1169    return tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0);
1170}
1171
1172static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1173                                    TCGReg dst, int64_t arg)
1174{
1175    /* Arg is replicated by VECE; extract the highest element. */
1176    arg >>= (-8 << vece) & 63;
1177
1178    if (arg >= -16 && arg < 16) {
1179        if (arg == 0 || arg == -1) {
1180            set_vtype_len(s, type);
1181        } else {
1182            set_vtype_len_sew(s, type, vece);
1183        }
1184        tcg_out_opc_vi(s, OPC_VMV_V_I, dst, 0, arg);
1185        return;
1186    }
1187    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, arg);
1188    tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0);
1189}
1190
1191static const struct {
1192    RISCVInsn op;
1193    bool swap;
1194} tcg_brcond_to_riscv[] = {
1195    [TCG_COND_EQ] =  { OPC_BEQ,  false },
1196    [TCG_COND_NE] =  { OPC_BNE,  false },
1197    [TCG_COND_LT] =  { OPC_BLT,  false },
1198    [TCG_COND_GE] =  { OPC_BGE,  false },
1199    [TCG_COND_LE] =  { OPC_BGE,  true  },
1200    [TCG_COND_GT] =  { OPC_BLT,  true  },
1201    [TCG_COND_LTU] = { OPC_BLTU, false },
1202    [TCG_COND_GEU] = { OPC_BGEU, false },
1203    [TCG_COND_LEU] = { OPC_BGEU, true  },
1204    [TCG_COND_GTU] = { OPC_BLTU, true  }
1205};
1206
1207static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
1208                           TCGReg arg2, TCGLabel *l)
1209{
1210    RISCVInsn op = tcg_brcond_to_riscv[cond].op;
1211
1212    tcg_debug_assert(op != 0);
1213
1214    if (tcg_brcond_to_riscv[cond].swap) {
1215        TCGReg t = arg1;
1216        arg1 = arg2;
1217        arg2 = t;
1218    }
1219
1220    tcg_out_reloc(s, s->code_ptr, R_RISCV_BRANCH, l, 0);
1221    tcg_out_opc_branch(s, op, arg1, arg2, 0);
1222}
1223
1224#define SETCOND_INV    TCG_TARGET_NB_REGS
1225#define SETCOND_NEZ    (SETCOND_INV << 1)
1226#define SETCOND_FLAGS  (SETCOND_INV | SETCOND_NEZ)
1227
1228static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret,
1229                               TCGReg arg1, tcg_target_long arg2, bool c2)
1230{
1231    int flags = 0;
1232
1233    switch (cond) {
1234    case TCG_COND_EQ:    /* -> NE  */
1235    case TCG_COND_GE:    /* -> LT  */
1236    case TCG_COND_GEU:   /* -> LTU */
1237    case TCG_COND_GT:    /* -> LE  */
1238    case TCG_COND_GTU:   /* -> LEU */
1239        cond = tcg_invert_cond(cond);
1240        flags ^= SETCOND_INV;
1241        break;
1242    default:
1243        break;
1244    }
1245
1246    switch (cond) {
1247    case TCG_COND_LE:
1248    case TCG_COND_LEU:
1249        /*
1250         * If we have a constant input, the most efficient way to implement
1251         * LE is by adding 1 and using LT.  Watch out for wrap around for LEU.
1252         * We don't need to care for this for LE because the constant input
1253         * is constrained to signed 12-bit, and 0x800 is representable in the
1254         * temporary register.
1255         */
1256        if (c2) {
1257            if (cond == TCG_COND_LEU) {
1258                /* unsigned <= -1 is true */
1259                if (arg2 == -1) {
1260                    tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV));
1261                    return ret;
1262                }
1263                cond = TCG_COND_LTU;
1264            } else {
1265                cond = TCG_COND_LT;
1266            }
1267            tcg_debug_assert(arg2 <= 0x7ff);
1268            if (++arg2 == 0x800) {
1269                tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2);
1270                arg2 = TCG_REG_TMP0;
1271                c2 = false;
1272            }
1273        } else {
1274            TCGReg tmp = arg2;
1275            arg2 = arg1;
1276            arg1 = tmp;
1277            cond = tcg_swap_cond(cond);    /* LE -> GE */
1278            cond = tcg_invert_cond(cond);  /* GE -> LT */
1279            flags ^= SETCOND_INV;
1280        }
1281        break;
1282    default:
1283        break;
1284    }
1285
1286    switch (cond) {
1287    case TCG_COND_NE:
1288        flags |= SETCOND_NEZ;
1289        if (!c2) {
1290            tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2);
1291        } else if (arg2 == 0) {
1292            ret = arg1;
1293        } else {
1294            tcg_out_opc_imm(s, OPC_XORI, ret, arg1, arg2);
1295        }
1296        break;
1297
1298    case TCG_COND_LT:
1299        if (c2) {
1300            tcg_out_opc_imm(s, OPC_SLTI, ret, arg1, arg2);
1301        } else {
1302            tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
1303        }
1304        break;
1305
1306    case TCG_COND_LTU:
1307        if (c2) {
1308            tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, arg2);
1309        } else {
1310            tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
1311        }
1312        break;
1313
1314    default:
1315        g_assert_not_reached();
1316    }
1317
1318    return ret | flags;
1319}
1320
1321static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
1322                            TCGReg arg1, tcg_target_long arg2, bool c2)
1323{
1324    int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2);
1325
1326    if (tmpflags != ret) {
1327        TCGReg tmp = tmpflags & ~SETCOND_FLAGS;
1328
1329        switch (tmpflags & SETCOND_FLAGS) {
1330        case SETCOND_INV:
1331            /* Intermediate result is boolean: simply invert. */
1332            tcg_out_opc_imm(s, OPC_XORI, ret, tmp, 1);
1333            break;
1334        case SETCOND_NEZ:
1335            /* Intermediate result is zero/non-zero: test != 0. */
1336            tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp);
1337            break;
1338        case SETCOND_NEZ | SETCOND_INV:
1339            /* Intermediate result is zero/non-zero: test == 0. */
1340            tcg_out_opc_imm(s, OPC_SLTIU, ret, tmp, 1);
1341            break;
1342        default:
1343            g_assert_not_reached();
1344        }
1345    }
1346}
1347
1348static void tcg_out_negsetcond(TCGContext *s, TCGCond cond, TCGReg ret,
1349                               TCGReg arg1, tcg_target_long arg2, bool c2)
1350{
1351    int tmpflags;
1352    TCGReg tmp;
1353
1354    /* For LT/GE comparison against 0, replicate the sign bit. */
1355    if (c2 && arg2 == 0) {
1356        switch (cond) {
1357        case TCG_COND_GE:
1358            tcg_out_opc_imm(s, OPC_XORI, ret, arg1, -1);
1359            arg1 = ret;
1360            /* fall through */
1361        case TCG_COND_LT:
1362            tcg_out_opc_imm(s, OPC_SRAI, ret, arg1, TCG_TARGET_REG_BITS - 1);
1363            return;
1364        default:
1365            break;
1366        }
1367    }
1368
1369    tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2);
1370    tmp = tmpflags & ~SETCOND_FLAGS;
1371
1372    /* If intermediate result is zero/non-zero: test != 0. */
1373    if (tmpflags & SETCOND_NEZ) {
1374        tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp);
1375        tmp = ret;
1376    }
1377
1378    /* Produce the 0/-1 result. */
1379    if (tmpflags & SETCOND_INV) {
1380        tcg_out_opc_imm(s, OPC_ADDI, ret, tmp, -1);
1381    } else {
1382        tcg_out_opc_reg(s, OPC_SUB, ret, TCG_REG_ZERO, tmp);
1383    }
1384}
1385
1386static void tcg_out_movcond_zicond(TCGContext *s, TCGReg ret, TCGReg test_ne,
1387                                   int val1, bool c_val1,
1388                                   int val2, bool c_val2)
1389{
1390    if (val1 == 0) {
1391        if (c_val2) {
1392            tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, val2);
1393            val2 = TCG_REG_TMP1;
1394        }
1395        tcg_out_opc_reg(s, OPC_CZERO_NEZ, ret, val2, test_ne);
1396        return;
1397    }
1398
1399    if (val2 == 0) {
1400        if (c_val1) {
1401            tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, val1);
1402            val1 = TCG_REG_TMP1;
1403        }
1404        tcg_out_opc_reg(s, OPC_CZERO_EQZ, ret, val1, test_ne);
1405        return;
1406    }
1407
1408    if (c_val2) {
1409        if (c_val1) {
1410            tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, val1 - val2);
1411        } else {
1412            tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP1, val1, -val2);
1413        }
1414        tcg_out_opc_reg(s, OPC_CZERO_EQZ, ret, TCG_REG_TMP1, test_ne);
1415        tcg_out_opc_imm(s, OPC_ADDI, ret, ret, val2);
1416        return;
1417    }
1418
1419    if (c_val1) {
1420        tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP1, val2, -val1);
1421        tcg_out_opc_reg(s, OPC_CZERO_NEZ, ret, TCG_REG_TMP1, test_ne);
1422        tcg_out_opc_imm(s, OPC_ADDI, ret, ret, val1);
1423        return;
1424    }
1425
1426    tcg_out_opc_reg(s, OPC_CZERO_NEZ, TCG_REG_TMP1, val2, test_ne);
1427    tcg_out_opc_reg(s, OPC_CZERO_EQZ, TCG_REG_TMP0, val1, test_ne);
1428    tcg_out_opc_reg(s, OPC_OR, ret, TCG_REG_TMP0, TCG_REG_TMP1);
1429}
1430
1431static void tcg_out_movcond_br1(TCGContext *s, TCGCond cond, TCGReg ret,
1432                                TCGReg cmp1, TCGReg cmp2,
1433                                int val, bool c_val)
1434{
1435    RISCVInsn op;
1436    int disp = 8;
1437
1438    tcg_debug_assert((unsigned)cond < ARRAY_SIZE(tcg_brcond_to_riscv));
1439    op = tcg_brcond_to_riscv[cond].op;
1440    tcg_debug_assert(op != 0);
1441
1442    if (tcg_brcond_to_riscv[cond].swap) {
1443        tcg_out_opc_branch(s, op, cmp2, cmp1, disp);
1444    } else {
1445        tcg_out_opc_branch(s, op, cmp1, cmp2, disp);
1446    }
1447    if (c_val) {
1448        tcg_out_opc_imm(s, OPC_ADDI, ret, TCG_REG_ZERO, val);
1449    } else {
1450        tcg_out_opc_imm(s, OPC_ADDI, ret, val, 0);
1451    }
1452}
1453
1454static void tcg_out_movcond_br2(TCGContext *s, TCGCond cond, TCGReg ret,
1455                                TCGReg cmp1, TCGReg cmp2,
1456                                int val1, bool c_val1,
1457                                int val2, bool c_val2)
1458{
1459    TCGReg tmp;
1460
1461    /* TCG optimizer reorders to prefer ret matching val2. */
1462    if (!c_val2 && ret == val2) {
1463        cond = tcg_invert_cond(cond);
1464        tcg_out_movcond_br1(s, cond, ret, cmp1, cmp2, val1, c_val1);
1465        return;
1466    }
1467
1468    if (!c_val1 && ret == val1) {
1469        tcg_out_movcond_br1(s, cond, ret, cmp1, cmp2, val2, c_val2);
1470        return;
1471    }
1472
1473    tmp = (ret == cmp1 || ret == cmp2 ? TCG_REG_TMP1 : ret);
1474    if (c_val1) {
1475        tcg_out_movi(s, TCG_TYPE_REG, tmp, val1);
1476    } else {
1477        tcg_out_mov(s, TCG_TYPE_REG, tmp, val1);
1478    }
1479    tcg_out_movcond_br1(s, cond, tmp, cmp1, cmp2, val2, c_val2);
1480    tcg_out_mov(s, TCG_TYPE_REG, ret, tmp);
1481}
1482
1483static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
1484                            TCGReg cmp1, int cmp2, bool c_cmp2,
1485                            TCGReg val1, bool c_val1,
1486                            TCGReg val2, bool c_val2)
1487{
1488    int tmpflags;
1489    TCGReg t;
1490
1491    if (!(cpuinfo & CPUINFO_ZICOND) && (!c_cmp2 || cmp2 == 0)) {
1492        tcg_out_movcond_br2(s, cond, ret, cmp1, cmp2,
1493                            val1, c_val1, val2, c_val2);
1494        return;
1495    }
1496
1497    tmpflags = tcg_out_setcond_int(s, cond, TCG_REG_TMP0, cmp1, cmp2, c_cmp2);
1498    t = tmpflags & ~SETCOND_FLAGS;
1499
1500    if (cpuinfo & CPUINFO_ZICOND) {
1501        if (tmpflags & SETCOND_INV) {
1502            tcg_out_movcond_zicond(s, ret, t, val2, c_val2, val1, c_val1);
1503        } else {
1504            tcg_out_movcond_zicond(s, ret, t, val1, c_val1, val2, c_val2);
1505        }
1506    } else {
1507        cond = tmpflags & SETCOND_INV ? TCG_COND_EQ : TCG_COND_NE;
1508        tcg_out_movcond_br2(s, cond, ret, t, TCG_REG_ZERO,
1509                            val1, c_val1, val2, c_val2);
1510    }
1511}
1512
1513static void tcg_out_cltz(TCGContext *s, TCGType type, RISCVInsn insn,
1514                         TCGReg ret, TCGReg src1, int src2, bool c_src2)
1515{
1516    tcg_out_opc_imm(s, insn, ret, src1, 0);
1517
1518    if (!c_src2 || src2 != (type == TCG_TYPE_I32 ? 32 : 64)) {
1519        /*
1520         * The requested zero result does not match the insn, so adjust.
1521         * Note that constraints put 'ret' in a new register, so the
1522         * computation above did not clobber either 'src1' or 'src2'.
1523         */
1524        tcg_out_movcond(s, TCG_COND_EQ, ret, src1, 0, true,
1525                        src2, c_src2, ret, false);
1526    }
1527}
1528
1529static void tcg_out_cmpsel(TCGContext *s, TCGType type, unsigned vece,
1530                           TCGCond cond, TCGReg ret,
1531                           TCGReg cmp1, TCGReg cmp2, bool c_cmp2,
1532                           TCGReg val1, bool c_val1,
1533                           TCGReg val2, bool c_val2)
1534{
1535    set_vtype_len_sew(s, type, vece);
1536
1537    /* Use only vmerge_vim if possible, by inverting the test. */
1538    if (c_val2 && !c_val1) {
1539        TCGArg temp = val1;
1540        cond = tcg_invert_cond(cond);
1541        val1 = val2;
1542        val2 = temp;
1543        c_val1 = true;
1544        c_val2 = false;
1545    }
1546
1547    /* Perform the comparison into V0 mask. */
1548    if (c_cmp2) {
1549        tcg_out_opc_vi(s, tcg_cmpcond_to_rvv_vi[cond].op, TCG_REG_V0, cmp1,
1550                       cmp2 - tcg_cmpcond_to_rvv_vi[cond].adjust);
1551    } else if (tcg_cmpcond_to_rvv_vv[cond].swap) {
1552        tcg_out_opc_vv(s, tcg_cmpcond_to_rvv_vv[cond].op,
1553                       TCG_REG_V0, cmp2, cmp1);
1554    } else {
1555        tcg_out_opc_vv(s, tcg_cmpcond_to_rvv_vv[cond].op,
1556                       TCG_REG_V0, cmp1, cmp2);
1557    }
1558    if (c_val1) {
1559        if (c_val2) {
1560            tcg_out_opc_vi(s, OPC_VMV_V_I, ret, 0, val2);
1561            val2 = ret;
1562        }
1563        /* vd[i] == v0.mask[i] ? imm : vs2[i] */
1564        tcg_out_opc_vim_mask(s, OPC_VMERGE_VIM, ret, val2, val1);
1565    } else {
1566        /* vd[i] == v0.mask[i] ? vs1[i] : vs2[i] */
1567        tcg_out_opc_vvm_mask(s, OPC_VMERGE_VVM, ret, val2, val1);
1568    }
1569}
1570
1571static void tcg_out_vshifti(TCGContext *s, RISCVInsn opc_vi, RISCVInsn opc_vx,
1572                             TCGReg dst, TCGReg src, unsigned imm)
1573{
1574    if (imm < 32) {
1575        tcg_out_opc_vi(s, opc_vi, dst, src, imm);
1576    } else {
1577        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP0, imm);
1578        tcg_out_opc_vx(s, opc_vx, dst, src, TCG_REG_TMP0);
1579    }
1580}
1581
1582static void init_setting_vtype(TCGContext *s)
1583{
1584    s->riscv_cur_type = TCG_TYPE_COUNT;
1585}
1586
1587static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
1588{
1589    TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA;
1590    ptrdiff_t offset = tcg_pcrel_diff(s, arg);
1591    int ret;
1592
1593    init_setting_vtype(s);
1594
1595    tcg_debug_assert((offset & 1) == 0);
1596    if (offset == sextreg(offset, 0, 20)) {
1597        /* short jump: -2097150 to 2097152 */
1598        tcg_out_opc_jump(s, OPC_JAL, link, offset);
1599    } else if (offset == (int32_t)offset) {
1600        /* long jump: -2147483646 to 2147483648 */
1601        tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP0, 0);
1602        tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, 0);
1603        ret = reloc_call(s->code_ptr - 2, arg);
1604        tcg_debug_assert(ret == true);
1605    } else {
1606        /* far jump: 64-bit */
1607        tcg_target_long imm = sextreg((tcg_target_long)arg, 0, 12);
1608        tcg_target_long base = (tcg_target_long)arg - imm;
1609        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, base);
1610        tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, imm);
1611    }
1612}
1613
1614static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg,
1615                         const TCGHelperInfo *info)
1616{
1617    tcg_out_call_int(s, arg, false);
1618}
1619
1620static void tcg_out_mb(TCGContext *s, TCGArg a0)
1621{
1622    tcg_insn_unit insn = OPC_FENCE;
1623
1624    if (a0 & TCG_MO_LD_LD) {
1625        insn |= 0x02200000;
1626    }
1627    if (a0 & TCG_MO_ST_LD) {
1628        insn |= 0x01200000;
1629    }
1630    if (a0 & TCG_MO_LD_ST) {
1631        insn |= 0x02100000;
1632    }
1633    if (a0 & TCG_MO_ST_ST) {
1634        insn |= 0x01100000;
1635    }
1636    tcg_out32(s, insn);
1637}
1638
1639/*
1640 * Load/store and TLB
1641 */
1642
1643static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1644{
1645    tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
1646    bool ok = reloc_jimm20(s->code_ptr - 1, target);
1647    tcg_debug_assert(ok);
1648}
1649
1650bool tcg_target_has_memory_bswap(MemOp memop)
1651{
1652    return false;
1653}
1654
1655/* We have three temps, we might as well expose them. */
1656static const TCGLdstHelperParam ldst_helper_param = {
1657    .ntmp = 3, .tmp = { TCG_REG_TMP0, TCG_REG_TMP1, TCG_REG_TMP2 }
1658};
1659
1660static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1661{
1662    MemOp opc = get_memop(l->oi);
1663
1664    /* resolve label address */
1665    if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1666        return false;
1667    }
1668
1669    /* call load helper */
1670    tcg_out_ld_helper_args(s, l, &ldst_helper_param);
1671    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false);
1672    tcg_out_ld_helper_ret(s, l, true, &ldst_helper_param);
1673
1674    tcg_out_goto(s, l->raddr);
1675    return true;
1676}
1677
1678static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1679{
1680    MemOp opc = get_memop(l->oi);
1681
1682    /* resolve label address */
1683    if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1684        return false;
1685    }
1686
1687    /* call store helper */
1688    tcg_out_st_helper_args(s, l, &ldst_helper_param);
1689    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
1690
1691    tcg_out_goto(s, l->raddr);
1692    return true;
1693}
1694
1695/* We expect to use a 12-bit negative offset from ENV.  */
1696#define MIN_TLB_MASK_TABLE_OFS  -(1 << 11)
1697
1698/*
1699 * For system-mode, perform the TLB load and compare.
1700 * For user-mode, perform any required alignment tests.
1701 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1702 * is required and fill in @h with the host address for the fast path.
1703 */
1704static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
1705                                           TCGReg addr_reg, MemOpIdx oi,
1706                                           bool is_ld)
1707{
1708    TCGType addr_type = s->addr_type;
1709    TCGLabelQemuLdst *ldst = NULL;
1710    MemOp opc = get_memop(oi);
1711    TCGAtomAlign aa;
1712    unsigned a_mask;
1713
1714    aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
1715    a_mask = (1u << aa.align) - 1;
1716
1717    if (tcg_use_softmmu) {
1718        unsigned s_bits = opc & MO_SIZE;
1719        unsigned s_mask = (1u << s_bits) - 1;
1720        int mem_index = get_mmuidx(oi);
1721        int fast_ofs = tlb_mask_table_ofs(s, mem_index);
1722        int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
1723        int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
1724        int compare_mask;
1725        TCGReg addr_adj;
1726
1727        ldst = new_ldst_label(s);
1728        ldst->is_ld = is_ld;
1729        ldst->oi = oi;
1730        ldst->addrlo_reg = addr_reg;
1731
1732        init_setting_vtype(s);
1733
1734        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
1735        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
1736
1737        tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
1738                        s->page_bits - CPU_TLB_ENTRY_BITS);
1739        tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
1740        tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
1741
1742        /*
1743         * For aligned accesses, we check the first byte and include the
1744         * alignment bits within the address.  For unaligned access, we
1745         * check that we don't cross pages using the address of the last
1746         * byte of the access.
1747         */
1748        addr_adj = addr_reg;
1749        if (a_mask < s_mask) {
1750            addr_adj = TCG_REG_TMP0;
1751            tcg_out_opc_imm(s, addr_type == TCG_TYPE_I32 ? OPC_ADDIW : OPC_ADDI,
1752                            addr_adj, addr_reg, s_mask - a_mask);
1753        }
1754        compare_mask = s->page_mask | a_mask;
1755        if (compare_mask == sextreg(compare_mask, 0, 12)) {
1756            tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask);
1757        } else {
1758            tcg_out_movi(s, addr_type, TCG_REG_TMP1, compare_mask);
1759            tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_adj);
1760        }
1761
1762        /* Load the tlb comparator and the addend.  */
1763        QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
1764        tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2,
1765                   is_ld ? offsetof(CPUTLBEntry, addr_read)
1766                         : offsetof(CPUTLBEntry, addr_write));
1767        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
1768                   offsetof(CPUTLBEntry, addend));
1769
1770        /* Compare masked address with the TLB entry. */
1771        ldst->label_ptr[0] = s->code_ptr;
1772        tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
1773
1774        /* TLB Hit - translate address using addend.  */
1775        if (addr_type != TCG_TYPE_I32) {
1776            tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, addr_reg, TCG_REG_TMP2);
1777        } else if (cpuinfo & CPUINFO_ZBA) {
1778            tcg_out_opc_reg(s, OPC_ADD_UW, TCG_REG_TMP0,
1779                            addr_reg, TCG_REG_TMP2);
1780        } else {
1781            tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg);
1782            tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0,
1783                            TCG_REG_TMP0, TCG_REG_TMP2);
1784        }
1785        *pbase = TCG_REG_TMP0;
1786    } else {
1787        TCGReg base;
1788
1789        if (a_mask) {
1790            ldst = new_ldst_label(s);
1791            ldst->is_ld = is_ld;
1792            ldst->oi = oi;
1793            ldst->addrlo_reg = addr_reg;
1794
1795            init_setting_vtype(s);
1796
1797            /* We are expecting alignment max 7, so we can always use andi. */
1798            tcg_debug_assert(a_mask == sextreg(a_mask, 0, 12));
1799            tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
1800
1801            ldst->label_ptr[0] = s->code_ptr;
1802            tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0);
1803        }
1804
1805        if (guest_base != 0) {
1806            base = TCG_REG_TMP0;
1807            if (addr_type != TCG_TYPE_I32) {
1808                tcg_out_opc_reg(s, OPC_ADD, base, addr_reg,
1809                                TCG_GUEST_BASE_REG);
1810            } else if (cpuinfo & CPUINFO_ZBA) {
1811                tcg_out_opc_reg(s, OPC_ADD_UW, base, addr_reg,
1812                                TCG_GUEST_BASE_REG);
1813            } else {
1814                tcg_out_ext32u(s, base, addr_reg);
1815                tcg_out_opc_reg(s, OPC_ADD, base, base, TCG_GUEST_BASE_REG);
1816            }
1817        } else if (addr_type != TCG_TYPE_I32) {
1818            base = addr_reg;
1819        } else {
1820            base = TCG_REG_TMP0;
1821            tcg_out_ext32u(s, base, addr_reg);
1822        }
1823        *pbase = base;
1824    }
1825
1826    return ldst;
1827}
1828
1829static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val,
1830                                   TCGReg base, MemOp opc, TCGType type)
1831{
1832    /* Byte swapping is left to middle-end expansion. */
1833    tcg_debug_assert((opc & MO_BSWAP) == 0);
1834
1835    switch (opc & (MO_SSIZE)) {
1836    case MO_UB:
1837        tcg_out_opc_imm(s, OPC_LBU, val, base, 0);
1838        break;
1839    case MO_SB:
1840        tcg_out_opc_imm(s, OPC_LB, val, base, 0);
1841        break;
1842    case MO_UW:
1843        tcg_out_opc_imm(s, OPC_LHU, val, base, 0);
1844        break;
1845    case MO_SW:
1846        tcg_out_opc_imm(s, OPC_LH, val, base, 0);
1847        break;
1848    case MO_UL:
1849        if (type == TCG_TYPE_I64) {
1850            tcg_out_opc_imm(s, OPC_LWU, val, base, 0);
1851            break;
1852        }
1853        /* FALLTHRU */
1854    case MO_SL:
1855        tcg_out_opc_imm(s, OPC_LW, val, base, 0);
1856        break;
1857    case MO_UQ:
1858        tcg_out_opc_imm(s, OPC_LD, val, base, 0);
1859        break;
1860    default:
1861        g_assert_not_reached();
1862    }
1863}
1864
1865static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1866                            MemOpIdx oi, TCGType data_type)
1867{
1868    TCGLabelQemuLdst *ldst;
1869    TCGReg base;
1870
1871    ldst = prepare_host_addr(s, &base, addr_reg, oi, true);
1872    tcg_out_qemu_ld_direct(s, data_reg, base, get_memop(oi), data_type);
1873
1874    if (ldst) {
1875        ldst->type = data_type;
1876        ldst->datalo_reg = data_reg;
1877        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1878    }
1879}
1880
1881static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val,
1882                                   TCGReg base, MemOp opc)
1883{
1884    /* Byte swapping is left to middle-end expansion. */
1885    tcg_debug_assert((opc & MO_BSWAP) == 0);
1886
1887    switch (opc & (MO_SSIZE)) {
1888    case MO_8:
1889        tcg_out_opc_store(s, OPC_SB, base, val, 0);
1890        break;
1891    case MO_16:
1892        tcg_out_opc_store(s, OPC_SH, base, val, 0);
1893        break;
1894    case MO_32:
1895        tcg_out_opc_store(s, OPC_SW, base, val, 0);
1896        break;
1897    case MO_64:
1898        tcg_out_opc_store(s, OPC_SD, base, val, 0);
1899        break;
1900    default:
1901        g_assert_not_reached();
1902    }
1903}
1904
1905static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1906                            MemOpIdx oi, TCGType data_type)
1907{
1908    TCGLabelQemuLdst *ldst;
1909    TCGReg base;
1910
1911    ldst = prepare_host_addr(s, &base, addr_reg, oi, false);
1912    tcg_out_qemu_st_direct(s, data_reg, base, get_memop(oi));
1913
1914    if (ldst) {
1915        ldst->type = data_type;
1916        ldst->datalo_reg = data_reg;
1917        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1918    }
1919}
1920
1921static const tcg_insn_unit *tb_ret_addr;
1922
1923static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1924{
1925    /* Reuse the zeroing that exists for goto_ptr.  */
1926    if (a0 == 0) {
1927        tcg_out_call_int(s, tcg_code_gen_epilogue, true);
1928    } else {
1929        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0);
1930        tcg_out_call_int(s, tb_ret_addr, true);
1931    }
1932}
1933
1934static void tcg_out_goto_tb(TCGContext *s, int which)
1935{
1936    /* Direct branch will be patched by tb_target_set_jmp_target. */
1937    set_jmp_insn_offset(s, which);
1938    tcg_out32(s, OPC_JAL);
1939
1940    /* When branch is out of range, fall through to indirect. */
1941    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
1942               get_jmp_target_addr(s, which));
1943    tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_TMP0, 0);
1944    set_jmp_reset_offset(s, which);
1945}
1946
1947void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1948                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1949{
1950    uintptr_t addr = tb->jmp_target_addr[n];
1951    ptrdiff_t offset = addr - jmp_rx;
1952    tcg_insn_unit insn;
1953
1954    /* Either directly branch, or fall through to indirect branch. */
1955    if (offset == sextreg(offset, 0, 20)) {
1956        insn = encode_uj(OPC_JAL, TCG_REG_ZERO, offset);
1957    } else {
1958        insn = OPC_NOP;
1959    }
1960    qatomic_set((uint32_t *)jmp_rw, insn);
1961    flush_idcache_range(jmp_rx, jmp_rw, 4);
1962}
1963
1964static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
1965                       const TCGArg args[TCG_MAX_OP_ARGS],
1966                       const int const_args[TCG_MAX_OP_ARGS])
1967{
1968    TCGArg a0 = args[0];
1969    TCGArg a1 = args[1];
1970    TCGArg a2 = args[2];
1971    int c2 = const_args[2];
1972
1973    switch (opc) {
1974    case INDEX_op_goto_ptr:
1975        tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, a0, 0);
1976        break;
1977
1978    case INDEX_op_br:
1979        tcg_out_reloc(s, s->code_ptr, R_RISCV_JAL, arg_label(a0), 0);
1980        tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
1981        break;
1982
1983    case INDEX_op_ld8u_i32:
1984    case INDEX_op_ld8u_i64:
1985        tcg_out_ldst(s, OPC_LBU, a0, a1, a2);
1986        break;
1987    case INDEX_op_ld8s_i32:
1988    case INDEX_op_ld8s_i64:
1989        tcg_out_ldst(s, OPC_LB, a0, a1, a2);
1990        break;
1991    case INDEX_op_ld16u_i32:
1992    case INDEX_op_ld16u_i64:
1993        tcg_out_ldst(s, OPC_LHU, a0, a1, a2);
1994        break;
1995    case INDEX_op_ld16s_i32:
1996    case INDEX_op_ld16s_i64:
1997        tcg_out_ldst(s, OPC_LH, a0, a1, a2);
1998        break;
1999    case INDEX_op_ld32u_i64:
2000        tcg_out_ldst(s, OPC_LWU, a0, a1, a2);
2001        break;
2002    case INDEX_op_ld_i32:
2003    case INDEX_op_ld32s_i64:
2004        tcg_out_ldst(s, OPC_LW, a0, a1, a2);
2005        break;
2006    case INDEX_op_ld_i64:
2007        tcg_out_ldst(s, OPC_LD, a0, a1, a2);
2008        break;
2009
2010    case INDEX_op_st8_i32:
2011    case INDEX_op_st8_i64:
2012        tcg_out_ldst(s, OPC_SB, a0, a1, a2);
2013        break;
2014    case INDEX_op_st16_i32:
2015    case INDEX_op_st16_i64:
2016        tcg_out_ldst(s, OPC_SH, a0, a1, a2);
2017        break;
2018    case INDEX_op_st_i32:
2019    case INDEX_op_st32_i64:
2020        tcg_out_ldst(s, OPC_SW, a0, a1, a2);
2021        break;
2022    case INDEX_op_st_i64:
2023        tcg_out_ldst(s, OPC_SD, a0, a1, a2);
2024        break;
2025
2026    case INDEX_op_add_i32:
2027        if (c2) {
2028            tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, a2);
2029        } else {
2030            tcg_out_opc_reg(s, OPC_ADDW, a0, a1, a2);
2031        }
2032        break;
2033    case INDEX_op_add_i64:
2034        if (c2) {
2035            tcg_out_opc_imm(s, OPC_ADDI, a0, a1, a2);
2036        } else {
2037            tcg_out_opc_reg(s, OPC_ADD, a0, a1, a2);
2038        }
2039        break;
2040
2041    case INDEX_op_sub_i32:
2042        if (c2) {
2043            tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, -a2);
2044        } else {
2045            tcg_out_opc_reg(s, OPC_SUBW, a0, a1, a2);
2046        }
2047        break;
2048    case INDEX_op_sub_i64:
2049        if (c2) {
2050            tcg_out_opc_imm(s, OPC_ADDI, a0, a1, -a2);
2051        } else {
2052            tcg_out_opc_reg(s, OPC_SUB, a0, a1, a2);
2053        }
2054        break;
2055
2056    case INDEX_op_and_i32:
2057    case INDEX_op_and_i64:
2058        if (c2) {
2059            tcg_out_opc_imm(s, OPC_ANDI, a0, a1, a2);
2060        } else {
2061            tcg_out_opc_reg(s, OPC_AND, a0, a1, a2);
2062        }
2063        break;
2064
2065    case INDEX_op_or_i32:
2066    case INDEX_op_or_i64:
2067        if (c2) {
2068            tcg_out_opc_imm(s, OPC_ORI, a0, a1, a2);
2069        } else {
2070            tcg_out_opc_reg(s, OPC_OR, a0, a1, a2);
2071        }
2072        break;
2073
2074    case INDEX_op_xor_i32:
2075    case INDEX_op_xor_i64:
2076        if (c2) {
2077            tcg_out_opc_imm(s, OPC_XORI, a0, a1, a2);
2078        } else {
2079            tcg_out_opc_reg(s, OPC_XOR, a0, a1, a2);
2080        }
2081        break;
2082
2083    case INDEX_op_andc_i32:
2084    case INDEX_op_andc_i64:
2085        if (c2) {
2086            tcg_out_opc_imm(s, OPC_ANDI, a0, a1, ~a2);
2087        } else {
2088            tcg_out_opc_reg(s, OPC_ANDN, a0, a1, a2);
2089        }
2090        break;
2091    case INDEX_op_orc_i32:
2092    case INDEX_op_orc_i64:
2093        if (c2) {
2094            tcg_out_opc_imm(s, OPC_ORI, a0, a1, ~a2);
2095        } else {
2096            tcg_out_opc_reg(s, OPC_ORN, a0, a1, a2);
2097        }
2098        break;
2099    case INDEX_op_eqv_i32:
2100    case INDEX_op_eqv_i64:
2101        if (c2) {
2102            tcg_out_opc_imm(s, OPC_XORI, a0, a1, ~a2);
2103        } else {
2104            tcg_out_opc_reg(s, OPC_XNOR, a0, a1, a2);
2105        }
2106        break;
2107
2108    case INDEX_op_not_i32:
2109    case INDEX_op_not_i64:
2110        tcg_out_opc_imm(s, OPC_XORI, a0, a1, -1);
2111        break;
2112
2113    case INDEX_op_neg_i32:
2114        tcg_out_opc_reg(s, OPC_SUBW, a0, TCG_REG_ZERO, a1);
2115        break;
2116    case INDEX_op_neg_i64:
2117        tcg_out_opc_reg(s, OPC_SUB, a0, TCG_REG_ZERO, a1);
2118        break;
2119
2120    case INDEX_op_mul_i32:
2121        tcg_out_opc_reg(s, OPC_MULW, a0, a1, a2);
2122        break;
2123    case INDEX_op_mul_i64:
2124        tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2);
2125        break;
2126
2127    case INDEX_op_div_i32:
2128        tcg_out_opc_reg(s, OPC_DIVW, a0, a1, a2);
2129        break;
2130    case INDEX_op_div_i64:
2131        tcg_out_opc_reg(s, OPC_DIV, a0, a1, a2);
2132        break;
2133
2134    case INDEX_op_divu_i32:
2135        tcg_out_opc_reg(s, OPC_DIVUW, a0, a1, a2);
2136        break;
2137    case INDEX_op_divu_i64:
2138        tcg_out_opc_reg(s, OPC_DIVU, a0, a1, a2);
2139        break;
2140
2141    case INDEX_op_rem_i32:
2142        tcg_out_opc_reg(s, OPC_REMW, a0, a1, a2);
2143        break;
2144    case INDEX_op_rem_i64:
2145        tcg_out_opc_reg(s, OPC_REM, a0, a1, a2);
2146        break;
2147
2148    case INDEX_op_remu_i32:
2149        tcg_out_opc_reg(s, OPC_REMUW, a0, a1, a2);
2150        break;
2151    case INDEX_op_remu_i64:
2152        tcg_out_opc_reg(s, OPC_REMU, a0, a1, a2);
2153        break;
2154
2155    case INDEX_op_shl_i32:
2156        if (c2) {
2157            tcg_out_opc_imm(s, OPC_SLLIW, a0, a1, a2 & 0x1f);
2158        } else {
2159            tcg_out_opc_reg(s, OPC_SLLW, a0, a1, a2);
2160        }
2161        break;
2162    case INDEX_op_shl_i64:
2163        if (c2) {
2164            tcg_out_opc_imm(s, OPC_SLLI, a0, a1, a2 & 0x3f);
2165        } else {
2166            tcg_out_opc_reg(s, OPC_SLL, a0, a1, a2);
2167        }
2168        break;
2169
2170    case INDEX_op_shr_i32:
2171        if (c2) {
2172            tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2 & 0x1f);
2173        } else {
2174            tcg_out_opc_reg(s, OPC_SRLW, a0, a1, a2);
2175        }
2176        break;
2177    case INDEX_op_shr_i64:
2178        if (c2) {
2179            tcg_out_opc_imm(s, OPC_SRLI, a0, a1, a2 & 0x3f);
2180        } else {
2181            tcg_out_opc_reg(s, OPC_SRL, a0, a1, a2);
2182        }
2183        break;
2184
2185    case INDEX_op_sar_i32:
2186        if (c2) {
2187            tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2 & 0x1f);
2188        } else {
2189            tcg_out_opc_reg(s, OPC_SRAW, a0, a1, a2);
2190        }
2191        break;
2192    case INDEX_op_sar_i64:
2193        if (c2) {
2194            tcg_out_opc_imm(s, OPC_SRAI, a0, a1, a2 & 0x3f);
2195        } else {
2196            tcg_out_opc_reg(s, OPC_SRA, a0, a1, a2);
2197        }
2198        break;
2199
2200    case INDEX_op_rotl_i32:
2201        if (c2) {
2202            tcg_out_opc_imm(s, OPC_RORIW, a0, a1, -a2 & 0x1f);
2203        } else {
2204            tcg_out_opc_reg(s, OPC_ROLW, a0, a1, a2);
2205        }
2206        break;
2207    case INDEX_op_rotl_i64:
2208        if (c2) {
2209            tcg_out_opc_imm(s, OPC_RORI, a0, a1, -a2 & 0x3f);
2210        } else {
2211            tcg_out_opc_reg(s, OPC_ROL, a0, a1, a2);
2212        }
2213        break;
2214
2215    case INDEX_op_rotr_i32:
2216        if (c2) {
2217            tcg_out_opc_imm(s, OPC_RORIW, a0, a1, a2 & 0x1f);
2218        } else {
2219            tcg_out_opc_reg(s, OPC_RORW, a0, a1, a2);
2220        }
2221        break;
2222    case INDEX_op_rotr_i64:
2223        if (c2) {
2224            tcg_out_opc_imm(s, OPC_RORI, a0, a1, a2 & 0x3f);
2225        } else {
2226            tcg_out_opc_reg(s, OPC_ROR, a0, a1, a2);
2227        }
2228        break;
2229
2230    case INDEX_op_bswap64_i64:
2231        tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0);
2232        break;
2233    case INDEX_op_bswap32_i32:
2234        a2 = 0;
2235        /* fall through */
2236    case INDEX_op_bswap32_i64:
2237        tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0);
2238        if (a2 & TCG_BSWAP_OZ) {
2239            tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 32);
2240        } else {
2241            tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 32);
2242        }
2243        break;
2244    case INDEX_op_bswap16_i64:
2245    case INDEX_op_bswap16_i32:
2246        tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0);
2247        if (a2 & TCG_BSWAP_OZ) {
2248            tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 48);
2249        } else {
2250            tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 48);
2251        }
2252        break;
2253
2254    case INDEX_op_ctpop_i32:
2255        tcg_out_opc_imm(s, OPC_CPOPW, a0, a1, 0);
2256        break;
2257    case INDEX_op_ctpop_i64:
2258        tcg_out_opc_imm(s, OPC_CPOP, a0, a1, 0);
2259        break;
2260
2261    case INDEX_op_clz_i32:
2262        tcg_out_cltz(s, TCG_TYPE_I32, OPC_CLZW, a0, a1, a2, c2);
2263        break;
2264    case INDEX_op_clz_i64:
2265        tcg_out_cltz(s, TCG_TYPE_I64, OPC_CLZ, a0, a1, a2, c2);
2266        break;
2267    case INDEX_op_ctz_i32:
2268        tcg_out_cltz(s, TCG_TYPE_I32, OPC_CTZW, a0, a1, a2, c2);
2269        break;
2270    case INDEX_op_ctz_i64:
2271        tcg_out_cltz(s, TCG_TYPE_I64, OPC_CTZ, a0, a1, a2, c2);
2272        break;
2273
2274    case INDEX_op_add2_i32:
2275        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
2276                        const_args[4], const_args[5], false, true);
2277        break;
2278    case INDEX_op_add2_i64:
2279        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
2280                        const_args[4], const_args[5], false, false);
2281        break;
2282    case INDEX_op_sub2_i32:
2283        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
2284                        const_args[4], const_args[5], true, true);
2285        break;
2286    case INDEX_op_sub2_i64:
2287        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
2288                        const_args[4], const_args[5], true, false);
2289        break;
2290
2291    case INDEX_op_brcond_i32:
2292    case INDEX_op_brcond_i64:
2293        tcg_out_brcond(s, a2, a0, a1, arg_label(args[3]));
2294        break;
2295
2296    case INDEX_op_setcond_i32:
2297    case INDEX_op_setcond_i64:
2298        tcg_out_setcond(s, args[3], a0, a1, a2, c2);
2299        break;
2300
2301    case INDEX_op_negsetcond_i32:
2302    case INDEX_op_negsetcond_i64:
2303        tcg_out_negsetcond(s, args[3], a0, a1, a2, c2);
2304        break;
2305
2306    case INDEX_op_movcond_i32:
2307    case INDEX_op_movcond_i64:
2308        tcg_out_movcond(s, args[5], a0, a1, a2, c2,
2309                        args[3], const_args[3], args[4], const_args[4]);
2310        break;
2311
2312    case INDEX_op_qemu_ld_a32_i32:
2313    case INDEX_op_qemu_ld_a64_i32:
2314        tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
2315        break;
2316    case INDEX_op_qemu_ld_a32_i64:
2317    case INDEX_op_qemu_ld_a64_i64:
2318        tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
2319        break;
2320    case INDEX_op_qemu_st_a32_i32:
2321    case INDEX_op_qemu_st_a64_i32:
2322        tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
2323        break;
2324    case INDEX_op_qemu_st_a32_i64:
2325    case INDEX_op_qemu_st_a64_i64:
2326        tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
2327        break;
2328
2329    case INDEX_op_extrh_i64_i32:
2330        tcg_out_opc_imm(s, OPC_SRAI, a0, a1, 32);
2331        break;
2332
2333    case INDEX_op_mulsh_i32:
2334    case INDEX_op_mulsh_i64:
2335        tcg_out_opc_reg(s, OPC_MULH, a0, a1, a2);
2336        break;
2337
2338    case INDEX_op_muluh_i32:
2339    case INDEX_op_muluh_i64:
2340        tcg_out_opc_reg(s, OPC_MULHU, a0, a1, a2);
2341        break;
2342
2343    case INDEX_op_mb:
2344        tcg_out_mb(s, a0);
2345        break;
2346
2347    case INDEX_op_extract_i64:
2348        if (a2 + args[3] == 32) {
2349            if (a2 == 0) {
2350                tcg_out_ext32u(s, a0, a1);
2351            } else {
2352                tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2);
2353            }
2354            break;
2355        }
2356        /* FALLTHRU */
2357    case INDEX_op_extract_i32:
2358        switch (args[3]) {
2359        case 1:
2360            tcg_out_opc_imm(s, OPC_BEXTI, a0, a1, a2);
2361            break;
2362        case 16:
2363            tcg_debug_assert(a2 == 0);
2364            tcg_out_ext16u(s, a0, a1);
2365            break;
2366        default:
2367            g_assert_not_reached();
2368        }
2369        break;
2370
2371    case INDEX_op_sextract_i64:
2372        if (a2 + args[3] == 32) {
2373            if (a2 == 0) {
2374                tcg_out_ext32s(s, a0, a1);
2375            } else {
2376                tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2);
2377            }
2378            break;
2379        }
2380        /* FALLTHRU */
2381    case INDEX_op_sextract_i32:
2382        if (a2 == 0 && args[3] == 8) {
2383            tcg_out_ext8s(s, TCG_TYPE_REG, a0, a1);
2384        } else if (a2 == 0 && args[3] == 16) {
2385            tcg_out_ext16s(s, TCG_TYPE_REG, a0, a1);
2386        } else {
2387            g_assert_not_reached();
2388        }
2389        break;
2390
2391    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2392    case INDEX_op_mov_i64:
2393    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2394    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2395    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2396    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2397    case INDEX_op_ext8s_i64:
2398    case INDEX_op_ext8u_i32:
2399    case INDEX_op_ext8u_i64:
2400    case INDEX_op_ext16s_i32:
2401    case INDEX_op_ext16s_i64:
2402    case INDEX_op_ext16u_i32:
2403    case INDEX_op_ext16u_i64:
2404    case INDEX_op_ext32s_i64:
2405    case INDEX_op_ext32u_i64:
2406    case INDEX_op_ext_i32_i64:
2407    case INDEX_op_extu_i32_i64:
2408    case INDEX_op_extrl_i64_i32:
2409    default:
2410        g_assert_not_reached();
2411    }
2412}
2413
2414static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2415                           unsigned vecl, unsigned vece,
2416                           const TCGArg args[TCG_MAX_OP_ARGS],
2417                           const int const_args[TCG_MAX_OP_ARGS])
2418{
2419    TCGType type = vecl + TCG_TYPE_V64;
2420    TCGArg a0, a1, a2;
2421    int c2;
2422
2423    a0 = args[0];
2424    a1 = args[1];
2425    a2 = args[2];
2426    c2 = const_args[2];
2427
2428    switch (opc) {
2429    case INDEX_op_dupm_vec:
2430        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2431        break;
2432    case INDEX_op_ld_vec:
2433        tcg_out_ld(s, type, a0, a1, a2);
2434        break;
2435    case INDEX_op_st_vec:
2436        tcg_out_st(s, type, a0, a1, a2);
2437        break;
2438    case INDEX_op_add_vec:
2439        set_vtype_len_sew(s, type, vece);
2440        tcg_out_opc_vv_vi(s, OPC_VADD_VV, OPC_VADD_VI, a0, a1, a2, c2);
2441        break;
2442    case INDEX_op_sub_vec:
2443        set_vtype_len_sew(s, type, vece);
2444        if (const_args[1]) {
2445            tcg_out_opc_vi(s, OPC_VRSUB_VI, a0, a2, a1);
2446        } else {
2447            tcg_out_opc_vv(s, OPC_VSUB_VV, a0, a1, a2);
2448        }
2449        break;
2450    case INDEX_op_and_vec:
2451        set_vtype_len(s, type);
2452        tcg_out_opc_vv_vi(s, OPC_VAND_VV, OPC_VAND_VI, a0, a1, a2, c2);
2453        break;
2454    case INDEX_op_or_vec:
2455        set_vtype_len(s, type);
2456        tcg_out_opc_vv_vi(s, OPC_VOR_VV, OPC_VOR_VI, a0, a1, a2, c2);
2457        break;
2458    case INDEX_op_xor_vec:
2459        set_vtype_len(s, type);
2460        tcg_out_opc_vv_vi(s, OPC_VXOR_VV, OPC_VXOR_VI, a0, a1, a2, c2);
2461        break;
2462    case INDEX_op_not_vec:
2463        set_vtype_len(s, type);
2464        tcg_out_opc_vi(s, OPC_VXOR_VI, a0, a1, -1);
2465        break;
2466    case INDEX_op_neg_vec:
2467        set_vtype_len_sew(s, type, vece);
2468        tcg_out_opc_vi(s, OPC_VRSUB_VI, a0, a1, 0);
2469        break;
2470    case INDEX_op_mul_vec:
2471        set_vtype_len_sew(s, type, vece);
2472        tcg_out_opc_vv(s, OPC_VMUL_VV, a0, a1, a2);
2473        break;
2474    case INDEX_op_ssadd_vec:
2475        set_vtype_len_sew(s, type, vece);
2476        tcg_out_opc_vv_vi(s, OPC_VSADD_VV, OPC_VSADD_VI, a0, a1, a2, c2);
2477        break;
2478    case INDEX_op_sssub_vec:
2479        set_vtype_len_sew(s, type, vece);
2480        tcg_out_opc_vv_vi(s, OPC_VSSUB_VV, OPC_VSSUB_VI, a0, a1, a2, c2);
2481        break;
2482    case INDEX_op_usadd_vec:
2483        set_vtype_len_sew(s, type, vece);
2484        tcg_out_opc_vv_vi(s, OPC_VSADDU_VV, OPC_VSADDU_VI, a0, a1, a2, c2);
2485        break;
2486    case INDEX_op_ussub_vec:
2487        set_vtype_len_sew(s, type, vece);
2488        tcg_out_opc_vv_vi(s, OPC_VSSUBU_VV, OPC_VSSUBU_VI, a0, a1, a2, c2);
2489        break;
2490    case INDEX_op_smax_vec:
2491        set_vtype_len_sew(s, type, vece);
2492        tcg_out_opc_vv_vi(s, OPC_VMAX_VV, OPC_VMAX_VI, a0, a1, a2, c2);
2493        break;
2494    case INDEX_op_smin_vec:
2495        set_vtype_len_sew(s, type, vece);
2496        tcg_out_opc_vv_vi(s, OPC_VMIN_VV, OPC_VMIN_VI, a0, a1, a2, c2);
2497        break;
2498    case INDEX_op_umax_vec:
2499        set_vtype_len_sew(s, type, vece);
2500        tcg_out_opc_vv_vi(s, OPC_VMAXU_VV, OPC_VMAXU_VI, a0, a1, a2, c2);
2501        break;
2502    case INDEX_op_umin_vec:
2503        set_vtype_len_sew(s, type, vece);
2504        tcg_out_opc_vv_vi(s, OPC_VMINU_VV, OPC_VMINU_VI, a0, a1, a2, c2);
2505        break;
2506    case INDEX_op_shls_vec:
2507        set_vtype_len_sew(s, type, vece);
2508        tcg_out_opc_vx(s, OPC_VSLL_VX, a0, a1, a2);
2509        break;
2510    case INDEX_op_shrs_vec:
2511        set_vtype_len_sew(s, type, vece);
2512        tcg_out_opc_vx(s, OPC_VSRL_VX, a0, a1, a2);
2513        break;
2514    case INDEX_op_sars_vec:
2515        set_vtype_len_sew(s, type, vece);
2516        tcg_out_opc_vx(s, OPC_VSRA_VX, a0, a1, a2);
2517        break;
2518    case INDEX_op_shlv_vec:
2519        set_vtype_len_sew(s, type, vece);
2520        tcg_out_opc_vv(s, OPC_VSLL_VV, a0, a1, a2);
2521        break;
2522    case INDEX_op_shrv_vec:
2523        set_vtype_len_sew(s, type, vece);
2524        tcg_out_opc_vv(s, OPC_VSRL_VV, a0, a1, a2);
2525        break;
2526    case INDEX_op_sarv_vec:
2527        set_vtype_len_sew(s, type, vece);
2528        tcg_out_opc_vv(s, OPC_VSRA_VV, a0, a1, a2);
2529        break;
2530    case INDEX_op_shli_vec:
2531        set_vtype_len_sew(s, type, vece);
2532        tcg_out_vshifti(s, OPC_VSLL_VI, OPC_VSLL_VX, a0, a1, a2);
2533        break;
2534    case INDEX_op_shri_vec:
2535        set_vtype_len_sew(s, type, vece);
2536        tcg_out_vshifti(s, OPC_VSRL_VI, OPC_VSRL_VX, a0, a1, a2);
2537        break;
2538    case INDEX_op_sari_vec:
2539        set_vtype_len_sew(s, type, vece);
2540        tcg_out_vshifti(s, OPC_VSRA_VI, OPC_VSRA_VX, a0, a1, a2);
2541        break;
2542    case INDEX_op_rotli_vec:
2543        set_vtype_len_sew(s, type, vece);
2544        tcg_out_vshifti(s, OPC_VSLL_VI, OPC_VSLL_VX, TCG_REG_V0, a1, a2);
2545        tcg_out_vshifti(s, OPC_VSRL_VI, OPC_VSRL_VX, a0, a1,
2546                        -a2 & ((8 << vece) - 1));
2547        tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
2548        break;
2549    case INDEX_op_rotls_vec:
2550        set_vtype_len_sew(s, type, vece);
2551        tcg_out_opc_vx(s, OPC_VSLL_VX, TCG_REG_V0, a1, a2);
2552        tcg_out_opc_reg(s, OPC_SUBW, TCG_REG_TMP0, TCG_REG_ZERO, a2);
2553        tcg_out_opc_vx(s, OPC_VSRL_VX, a0, a1, TCG_REG_TMP0);
2554        tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
2555        break;
2556    case INDEX_op_rotlv_vec:
2557        set_vtype_len_sew(s, type, vece);
2558        tcg_out_opc_vi(s, OPC_VRSUB_VI, TCG_REG_V0, a2, 0);
2559        tcg_out_opc_vv(s, OPC_VSRL_VV, TCG_REG_V0, a1, TCG_REG_V0);
2560        tcg_out_opc_vv(s, OPC_VSLL_VV, a0, a1, a2);
2561        tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
2562        break;
2563    case INDEX_op_rotrv_vec:
2564        set_vtype_len_sew(s, type, vece);
2565        tcg_out_opc_vi(s, OPC_VRSUB_VI, TCG_REG_V0, a2, 0);
2566        tcg_out_opc_vv(s, OPC_VSLL_VV, TCG_REG_V0, a1, TCG_REG_V0);
2567        tcg_out_opc_vv(s, OPC_VSRL_VV, a0, a1, a2);
2568        tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
2569        break;
2570    case INDEX_op_cmp_vec:
2571        tcg_out_cmpsel(s, type, vece, args[3], a0, a1, a2, c2,
2572                       -1, true, 0, true);
2573        break;
2574    case INDEX_op_cmpsel_vec:
2575        tcg_out_cmpsel(s, type, vece, args[5], a0, a1, a2, c2,
2576                       args[3], const_args[3], args[4], const_args[4]);
2577        break;
2578    case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov.  */
2579    case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec.  */
2580    default:
2581        g_assert_not_reached();
2582    }
2583}
2584
2585void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2586                       TCGArg a0, ...)
2587{
2588    g_assert_not_reached();
2589}
2590
2591int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2592{
2593    switch (opc) {
2594    case INDEX_op_add_vec:
2595    case INDEX_op_sub_vec:
2596    case INDEX_op_and_vec:
2597    case INDEX_op_or_vec:
2598    case INDEX_op_xor_vec:
2599    case INDEX_op_not_vec:
2600    case INDEX_op_neg_vec:
2601    case INDEX_op_mul_vec:
2602    case INDEX_op_ssadd_vec:
2603    case INDEX_op_sssub_vec:
2604    case INDEX_op_usadd_vec:
2605    case INDEX_op_ussub_vec:
2606    case INDEX_op_smax_vec:
2607    case INDEX_op_smin_vec:
2608    case INDEX_op_umax_vec:
2609    case INDEX_op_umin_vec:
2610    case INDEX_op_shls_vec:
2611    case INDEX_op_shrs_vec:
2612    case INDEX_op_sars_vec:
2613    case INDEX_op_shlv_vec:
2614    case INDEX_op_shrv_vec:
2615    case INDEX_op_sarv_vec:
2616    case INDEX_op_shri_vec:
2617    case INDEX_op_shli_vec:
2618    case INDEX_op_sari_vec:
2619    case INDEX_op_rotls_vec:
2620    case INDEX_op_rotlv_vec:
2621    case INDEX_op_rotrv_vec:
2622    case INDEX_op_rotli_vec:
2623    case INDEX_op_cmp_vec:
2624    case INDEX_op_cmpsel_vec:
2625        return 1;
2626    default:
2627        return 0;
2628    }
2629}
2630
2631static TCGConstraintSetIndex
2632tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
2633{
2634    switch (op) {
2635    case INDEX_op_goto_ptr:
2636        return C_O0_I1(r);
2637
2638    case INDEX_op_ld8u_i32:
2639    case INDEX_op_ld8s_i32:
2640    case INDEX_op_ld16u_i32:
2641    case INDEX_op_ld16s_i32:
2642    case INDEX_op_ld_i32:
2643    case INDEX_op_not_i32:
2644    case INDEX_op_neg_i32:
2645    case INDEX_op_ld8u_i64:
2646    case INDEX_op_ld8s_i64:
2647    case INDEX_op_ld16u_i64:
2648    case INDEX_op_ld16s_i64:
2649    case INDEX_op_ld32s_i64:
2650    case INDEX_op_ld32u_i64:
2651    case INDEX_op_ld_i64:
2652    case INDEX_op_not_i64:
2653    case INDEX_op_neg_i64:
2654    case INDEX_op_ext8u_i32:
2655    case INDEX_op_ext8u_i64:
2656    case INDEX_op_ext16u_i32:
2657    case INDEX_op_ext16u_i64:
2658    case INDEX_op_ext32u_i64:
2659    case INDEX_op_extu_i32_i64:
2660    case INDEX_op_ext8s_i32:
2661    case INDEX_op_ext8s_i64:
2662    case INDEX_op_ext16s_i32:
2663    case INDEX_op_ext16s_i64:
2664    case INDEX_op_ext32s_i64:
2665    case INDEX_op_extrl_i64_i32:
2666    case INDEX_op_extrh_i64_i32:
2667    case INDEX_op_ext_i32_i64:
2668    case INDEX_op_extract_i32:
2669    case INDEX_op_extract_i64:
2670    case INDEX_op_sextract_i32:
2671    case INDEX_op_sextract_i64:
2672    case INDEX_op_bswap16_i32:
2673    case INDEX_op_bswap32_i32:
2674    case INDEX_op_bswap16_i64:
2675    case INDEX_op_bswap32_i64:
2676    case INDEX_op_bswap64_i64:
2677    case INDEX_op_ctpop_i32:
2678    case INDEX_op_ctpop_i64:
2679        return C_O1_I1(r, r);
2680
2681    case INDEX_op_st8_i32:
2682    case INDEX_op_st16_i32:
2683    case INDEX_op_st_i32:
2684    case INDEX_op_st8_i64:
2685    case INDEX_op_st16_i64:
2686    case INDEX_op_st32_i64:
2687    case INDEX_op_st_i64:
2688        return C_O0_I2(rZ, r);
2689
2690    case INDEX_op_add_i32:
2691    case INDEX_op_and_i32:
2692    case INDEX_op_or_i32:
2693    case INDEX_op_xor_i32:
2694    case INDEX_op_add_i64:
2695    case INDEX_op_and_i64:
2696    case INDEX_op_or_i64:
2697    case INDEX_op_xor_i64:
2698    case INDEX_op_setcond_i32:
2699    case INDEX_op_setcond_i64:
2700    case INDEX_op_negsetcond_i32:
2701    case INDEX_op_negsetcond_i64:
2702        return C_O1_I2(r, r, rI);
2703
2704    case INDEX_op_andc_i32:
2705    case INDEX_op_andc_i64:
2706    case INDEX_op_orc_i32:
2707    case INDEX_op_orc_i64:
2708    case INDEX_op_eqv_i32:
2709    case INDEX_op_eqv_i64:
2710        return C_O1_I2(r, r, rJ);
2711
2712    case INDEX_op_sub_i32:
2713    case INDEX_op_sub_i64:
2714        return C_O1_I2(r, rZ, rN);
2715
2716    case INDEX_op_mul_i32:
2717    case INDEX_op_mulsh_i32:
2718    case INDEX_op_muluh_i32:
2719    case INDEX_op_div_i32:
2720    case INDEX_op_divu_i32:
2721    case INDEX_op_rem_i32:
2722    case INDEX_op_remu_i32:
2723    case INDEX_op_mul_i64:
2724    case INDEX_op_mulsh_i64:
2725    case INDEX_op_muluh_i64:
2726    case INDEX_op_div_i64:
2727    case INDEX_op_divu_i64:
2728    case INDEX_op_rem_i64:
2729    case INDEX_op_remu_i64:
2730        return C_O1_I2(r, rZ, rZ);
2731
2732    case INDEX_op_shl_i32:
2733    case INDEX_op_shr_i32:
2734    case INDEX_op_sar_i32:
2735    case INDEX_op_rotl_i32:
2736    case INDEX_op_rotr_i32:
2737    case INDEX_op_shl_i64:
2738    case INDEX_op_shr_i64:
2739    case INDEX_op_sar_i64:
2740    case INDEX_op_rotl_i64:
2741    case INDEX_op_rotr_i64:
2742        return C_O1_I2(r, r, ri);
2743
2744    case INDEX_op_clz_i32:
2745    case INDEX_op_clz_i64:
2746    case INDEX_op_ctz_i32:
2747    case INDEX_op_ctz_i64:
2748        return C_N1_I2(r, r, rM);
2749
2750    case INDEX_op_brcond_i32:
2751    case INDEX_op_brcond_i64:
2752        return C_O0_I2(rZ, rZ);
2753
2754    case INDEX_op_movcond_i32:
2755    case INDEX_op_movcond_i64:
2756        return C_O1_I4(r, r, rI, rM, rM);
2757
2758    case INDEX_op_add2_i32:
2759    case INDEX_op_add2_i64:
2760    case INDEX_op_sub2_i32:
2761    case INDEX_op_sub2_i64:
2762        return C_O2_I4(r, r, rZ, rZ, rM, rM);
2763
2764    case INDEX_op_qemu_ld_a32_i32:
2765    case INDEX_op_qemu_ld_a64_i32:
2766    case INDEX_op_qemu_ld_a32_i64:
2767    case INDEX_op_qemu_ld_a64_i64:
2768        return C_O1_I1(r, r);
2769    case INDEX_op_qemu_st_a32_i32:
2770    case INDEX_op_qemu_st_a64_i32:
2771    case INDEX_op_qemu_st_a32_i64:
2772    case INDEX_op_qemu_st_a64_i64:
2773        return C_O0_I2(rZ, r);
2774
2775    case INDEX_op_st_vec:
2776        return C_O0_I2(v, r);
2777    case INDEX_op_dup_vec:
2778    case INDEX_op_dupm_vec:
2779    case INDEX_op_ld_vec:
2780        return C_O1_I1(v, r);
2781    case INDEX_op_neg_vec:
2782    case INDEX_op_not_vec:
2783    case INDEX_op_shli_vec:
2784    case INDEX_op_shri_vec:
2785    case INDEX_op_sari_vec:
2786    case INDEX_op_rotli_vec:
2787        return C_O1_I1(v, v);
2788    case INDEX_op_add_vec:
2789    case INDEX_op_and_vec:
2790    case INDEX_op_or_vec:
2791    case INDEX_op_xor_vec:
2792    case INDEX_op_ssadd_vec:
2793    case INDEX_op_sssub_vec:
2794    case INDEX_op_usadd_vec:
2795    case INDEX_op_ussub_vec:
2796    case INDEX_op_smax_vec:
2797    case INDEX_op_smin_vec:
2798    case INDEX_op_umax_vec:
2799    case INDEX_op_umin_vec:
2800        return C_O1_I2(v, v, vK);
2801    case INDEX_op_sub_vec:
2802        return C_O1_I2(v, vK, v);
2803    case INDEX_op_mul_vec:
2804    case INDEX_op_shlv_vec:
2805    case INDEX_op_shrv_vec:
2806    case INDEX_op_sarv_vec:
2807    case INDEX_op_rotlv_vec:
2808    case INDEX_op_rotrv_vec:
2809        return C_O1_I2(v, v, v);
2810    case INDEX_op_shls_vec:
2811    case INDEX_op_shrs_vec:
2812    case INDEX_op_sars_vec:
2813    case INDEX_op_rotls_vec:
2814        return C_O1_I2(v, v, r);
2815    case INDEX_op_cmp_vec:
2816        return C_O1_I2(v, v, vL);
2817    case INDEX_op_cmpsel_vec:
2818        return C_O1_I4(v, v, vL, vK, vK);
2819    default:
2820        return C_NotImplemented;
2821    }
2822}
2823
2824static const int tcg_target_callee_save_regs[] = {
2825    TCG_REG_S0,       /* used for the global env (TCG_AREG0) */
2826    TCG_REG_S1,
2827    TCG_REG_S2,
2828    TCG_REG_S3,
2829    TCG_REG_S4,
2830    TCG_REG_S5,
2831    TCG_REG_S6,
2832    TCG_REG_S7,
2833    TCG_REG_S8,
2834    TCG_REG_S9,
2835    TCG_REG_S10,
2836    TCG_REG_S11,
2837    TCG_REG_RA,       /* should be last for ABI compliance */
2838};
2839
2840/* Stack frame parameters.  */
2841#define REG_SIZE   (TCG_TARGET_REG_BITS / 8)
2842#define SAVE_SIZE  ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE)
2843#define TEMP_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2844#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \
2845                     + TCG_TARGET_STACK_ALIGN - 1) \
2846                    & -TCG_TARGET_STACK_ALIGN)
2847#define SAVE_OFS   (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE)
2848
2849/* We're expecting to be able to use an immediate for frame allocation.  */
2850QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7ff);
2851
2852/* Generate global QEMU prologue and epilogue code */
2853static void tcg_target_qemu_prologue(TCGContext *s)
2854{
2855    int i;
2856
2857    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE);
2858
2859    /* TB prologue */
2860    tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE);
2861    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2862        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2863                   TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
2864    }
2865
2866    if (!tcg_use_softmmu && guest_base) {
2867        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
2868        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2869    }
2870
2871    /* Call generated code */
2872    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2873    tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, tcg_target_call_iarg_regs[1], 0);
2874
2875    /* Return path for goto_ptr. Set return value to 0 */
2876    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2877    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_A0, TCG_REG_ZERO);
2878
2879    /* TB epilogue */
2880    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
2881    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2882        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2883                   TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
2884    }
2885
2886    tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE);
2887    tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_RA, 0);
2888}
2889
2890static void tcg_out_tb_start(TCGContext *s)
2891{
2892    init_setting_vtype(s);
2893}
2894
2895static bool vtype_check(unsigned vtype)
2896{
2897    unsigned long tmp;
2898
2899    /* vsetvl tmp, zero, vtype */
2900    asm(".insn r 0x57, 7, 0x40, %0, zero, %1" : "=r"(tmp) : "r"(vtype));
2901    return tmp != 0;
2902}
2903
2904static void probe_frac_lmul_1(TCGType type, MemOp vsew)
2905{
2906    VsetCache *p = &riscv_vset_cache[type - TCG_TYPE_V64][vsew];
2907    unsigned avl = tcg_type_size(type) >> vsew;
2908    int lmul = type - riscv_lg2_vlenb;
2909    unsigned vtype = encode_vtype(true, true, vsew, lmul & 7);
2910    bool lmul_eq_avl = true;
2911
2912    /* Guaranteed by Zve64x. */
2913    assert(lmul < 3);
2914
2915    /*
2916     * For LMUL < -3, the host vector size is so large that TYPE
2917     * is smaller than the minimum 1/8 fraction.
2918     *
2919     * For other fractional LMUL settings, implementations must
2920     * support SEW settings between SEW_MIN and LMUL * ELEN, inclusive.
2921     * So if ELEN = 64, LMUL = 1/2, then SEW will support e8, e16, e32,
2922     * but e64 may not be supported. In other words, the hardware only
2923     * guarantees SEW_MIN <= SEW <= LMUL * ELEN.  Check.
2924     */
2925    if (lmul < 0 && (lmul < -3 || !vtype_check(vtype))) {
2926        vtype = encode_vtype(true, true, vsew, VLMUL_M1);
2927        lmul_eq_avl = false;
2928    }
2929
2930    if (avl < 32) {
2931        p->vset_insn = encode_vseti(OPC_VSETIVLI, TCG_REG_ZERO, avl, vtype);
2932    } else if (lmul_eq_avl) {
2933        /* rd != 0 and rs1 == 0 uses vlmax */
2934        p->vset_insn = encode_vset(OPC_VSETVLI, TCG_REG_TMP0, TCG_REG_ZERO, vtype);
2935    } else {
2936        p->movi_insn = encode_i(OPC_ADDI, TCG_REG_TMP0, TCG_REG_ZERO, avl);
2937        p->vset_insn = encode_vset(OPC_VSETVLI, TCG_REG_ZERO, TCG_REG_TMP0, vtype);
2938    }
2939}
2940
2941static void probe_frac_lmul(void)
2942{
2943    /* Match riscv_lg2_vlenb to TCG_TYPE_V64. */
2944    QEMU_BUILD_BUG_ON(TCG_TYPE_V64 != 3);
2945
2946    for (TCGType t = TCG_TYPE_V64; t <= TCG_TYPE_V256; t++) {
2947        for (MemOp e = MO_8; e <= MO_64; e++) {
2948            probe_frac_lmul_1(t, e);
2949        }
2950    }
2951}
2952
2953static void tcg_target_init(TCGContext *s)
2954{
2955    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
2956    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
2957
2958    tcg_target_call_clobber_regs = -1;
2959    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0);
2960    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1);
2961    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2);
2962    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S3);
2963    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S4);
2964    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S5);
2965    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S6);
2966    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S7);
2967    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8);
2968    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9);
2969    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S10);
2970    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S11);
2971
2972    s->reserved_regs = 0;
2973    tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO);
2974    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
2975    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
2976    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
2977    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2978    tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP);
2979    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP);
2980
2981    if (cpuinfo & CPUINFO_ZVE64X) {
2982        switch (riscv_lg2_vlenb) {
2983        case TCG_TYPE_V64:
2984            tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
2985            tcg_target_available_regs[TCG_TYPE_V128] = ALL_DVECTOR_REG_GROUPS;
2986            tcg_target_available_regs[TCG_TYPE_V256] = ALL_QVECTOR_REG_GROUPS;
2987            s->reserved_regs |= (~ALL_QVECTOR_REG_GROUPS & ALL_VECTOR_REGS);
2988            break;
2989        case TCG_TYPE_V128:
2990            tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
2991            tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
2992            tcg_target_available_regs[TCG_TYPE_V256] = ALL_DVECTOR_REG_GROUPS;
2993            s->reserved_regs |= (~ALL_DVECTOR_REG_GROUPS & ALL_VECTOR_REGS);
2994            break;
2995        default:
2996            /* Guaranteed by Zve64x. */
2997            tcg_debug_assert(riscv_lg2_vlenb >= TCG_TYPE_V256);
2998            tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
2999            tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
3000            tcg_target_available_regs[TCG_TYPE_V256] = ALL_VECTOR_REGS;
3001            break;
3002        }
3003        tcg_regset_set_reg(s->reserved_regs, TCG_REG_V0);
3004        probe_frac_lmul();
3005    }
3006}
3007
3008typedef struct {
3009    DebugFrameHeader h;
3010    uint8_t fde_def_cfa[4];
3011    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2];
3012} DebugFrame;
3013
3014#define ELF_HOST_MACHINE EM_RISCV
3015
3016static const DebugFrame debug_frame = {
3017    .h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */
3018    .h.cie.id = -1,
3019    .h.cie.version = 1,
3020    .h.cie.code_align = 1,
3021    .h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */
3022    .h.cie.return_column = TCG_REG_RA,
3023
3024    /* Total FDE size does not include the "len" member.  */
3025    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3026
3027    .fde_def_cfa = {
3028        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3029        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3030        (FRAME_SIZE >> 7)
3031    },
3032    .fde_reg_ofs = {
3033        0x80 + 9,  12,                  /* DW_CFA_offset, s1,  -96 */
3034        0x80 + 18, 11,                  /* DW_CFA_offset, s2,  -88 */
3035        0x80 + 19, 10,                  /* DW_CFA_offset, s3,  -80 */
3036        0x80 + 20, 9,                   /* DW_CFA_offset, s4,  -72 */
3037        0x80 + 21, 8,                   /* DW_CFA_offset, s5,  -64 */
3038        0x80 + 22, 7,                   /* DW_CFA_offset, s6,  -56 */
3039        0x80 + 23, 6,                   /* DW_CFA_offset, s7,  -48 */
3040        0x80 + 24, 5,                   /* DW_CFA_offset, s8,  -40 */
3041        0x80 + 25, 4,                   /* DW_CFA_offset, s9,  -32 */
3042        0x80 + 26, 3,                   /* DW_CFA_offset, s10, -24 */
3043        0x80 + 27, 2,                   /* DW_CFA_offset, s11, -16 */
3044        0x80 + 1 , 1,                   /* DW_CFA_offset, ra,  -8 */
3045    }
3046};
3047
3048void tcg_register_jit(const void *buf, size_t buf_size)
3049{
3050    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3051}
3052