xref: /openbmc/qemu/tcg/riscv/tcg-target.c.inc (revision 61d6a8767a5d4cd4fe5086ef98b53614ae099104)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2018 SiFive, Inc
5 * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org>
6 * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net>
7 * Copyright (c) 2008 Fabrice Bellard
8 *
9 * Based on i386/tcg-target.c and mips/tcg-target.c
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a copy
12 * of this software and associated documentation files (the "Software"), to deal
13 * in the Software without restriction, including without limitation the rights
14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 * copies of the Software, and to permit persons to whom the Software is
16 * furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice shall be included in
19 * all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27 * THE SOFTWARE.
28 */
29
30/* Used for function call generation. */
31#define TCG_REG_CALL_STACK              TCG_REG_SP
32#define TCG_TARGET_STACK_ALIGN          16
33#define TCG_TARGET_CALL_STACK_OFFSET    0
34#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
35#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
36#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_NORMAL
37#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
38
39#ifdef CONFIG_DEBUG_TCG
40static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
41    "zero", "ra",  "sp",  "gp",  "tp",  "t0",  "t1",  "t2",
42    "s0",   "s1",  "a0",  "a1",  "a2",  "a3",  "a4",  "a5",
43    "a6",   "a7",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
44    "s8",   "s9",  "s10", "s11", "t3",  "t4",  "t5",  "t6",
45    "v0",   "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
46    "v8",   "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
47    "v16",  "v17", "v18", "v19", "v20", "v21", "v22", "v23",
48    "v24",  "v25", "v26", "v27", "v28", "v29", "v30", "v31",
49};
50#endif
51
52static const int tcg_target_reg_alloc_order[] = {
53    /* Call saved registers */
54    /* TCG_REG_S0 reserved for TCG_AREG0 */
55    TCG_REG_S1,
56    TCG_REG_S2,
57    TCG_REG_S3,
58    TCG_REG_S4,
59    TCG_REG_S5,
60    TCG_REG_S6,
61    TCG_REG_S7,
62    TCG_REG_S8,
63    TCG_REG_S9,
64    TCG_REG_S10,
65    TCG_REG_S11,
66
67    /* Call clobbered registers */
68    TCG_REG_T0,
69    TCG_REG_T1,
70    TCG_REG_T2,
71    TCG_REG_T3,
72    TCG_REG_T4,
73    TCG_REG_T5,
74    TCG_REG_T6,
75
76    /* Argument registers */
77    TCG_REG_A0,
78    TCG_REG_A1,
79    TCG_REG_A2,
80    TCG_REG_A3,
81    TCG_REG_A4,
82    TCG_REG_A5,
83    TCG_REG_A6,
84    TCG_REG_A7,
85
86    /* Vector registers and TCG_REG_V0 reserved for mask. */
87    TCG_REG_V1,  TCG_REG_V2,  TCG_REG_V3,  TCG_REG_V4,
88    TCG_REG_V5,  TCG_REG_V6,  TCG_REG_V7,  TCG_REG_V8,
89    TCG_REG_V9,  TCG_REG_V10, TCG_REG_V11, TCG_REG_V12,
90    TCG_REG_V13, TCG_REG_V14, TCG_REG_V15, TCG_REG_V16,
91    TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, TCG_REG_V20,
92    TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, TCG_REG_V24,
93    TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, TCG_REG_V28,
94    TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
95};
96
97static const int tcg_target_call_iarg_regs[] = {
98    TCG_REG_A0,
99    TCG_REG_A1,
100    TCG_REG_A2,
101    TCG_REG_A3,
102    TCG_REG_A4,
103    TCG_REG_A5,
104    TCG_REG_A6,
105    TCG_REG_A7,
106};
107
108static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
109{
110    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
111    tcg_debug_assert(slot >= 0 && slot <= 1);
112    return TCG_REG_A0 + slot;
113}
114
115#define TCG_CT_CONST_S12     0x100
116#define TCG_CT_CONST_M12     0x200
117#define TCG_CT_CONST_S5      0x400
118#define TCG_CT_CONST_CMP_VI  0x800
119
120#define ALL_GENERAL_REGS   MAKE_64BIT_MASK(0, 32)
121#define ALL_VECTOR_REGS    MAKE_64BIT_MASK(32, 32)
122#define ALL_DVECTOR_REG_GROUPS 0x5555555500000000
123#define ALL_QVECTOR_REG_GROUPS 0x1111111100000000
124
125#define sextreg  sextract64
126
127/*
128 * RISC-V Base ISA opcodes (IM)
129 */
130
131#define V_OPIVV (0x0 << 12)
132#define V_OPFVV (0x1 << 12)
133#define V_OPMVV (0x2 << 12)
134#define V_OPIVI (0x3 << 12)
135#define V_OPIVX (0x4 << 12)
136#define V_OPFVF (0x5 << 12)
137#define V_OPMVX (0x6 << 12)
138#define V_OPCFG (0x7 << 12)
139
140/* NF <= 7 && NF >= 0 */
141#define V_NF(x) (x << 29)
142#define V_UNIT_STRIDE (0x0 << 20)
143#define V_UNIT_STRIDE_WHOLE_REG (0x8 << 20)
144
145typedef enum {
146    VLMUL_M1 = 0, /* LMUL=1 */
147    VLMUL_M2,     /* LMUL=2 */
148    VLMUL_M4,     /* LMUL=4 */
149    VLMUL_M8,     /* LMUL=8 */
150    VLMUL_RESERVED,
151    VLMUL_MF8,    /* LMUL=1/8 */
152    VLMUL_MF4,    /* LMUL=1/4 */
153    VLMUL_MF2,    /* LMUL=1/2 */
154} RISCVVlmul;
155
156typedef enum {
157    OPC_ADD = 0x33,
158    OPC_ADDI = 0x13,
159    OPC_AND = 0x7033,
160    OPC_ANDI = 0x7013,
161    OPC_AUIPC = 0x17,
162    OPC_BEQ = 0x63,
163    OPC_BEXTI = 0x48005013,
164    OPC_BGE = 0x5063,
165    OPC_BGEU = 0x7063,
166    OPC_BLT = 0x4063,
167    OPC_BLTU = 0x6063,
168    OPC_BNE = 0x1063,
169    OPC_DIV = 0x2004033,
170    OPC_DIVU = 0x2005033,
171    OPC_JAL = 0x6f,
172    OPC_JALR = 0x67,
173    OPC_LB = 0x3,
174    OPC_LBU = 0x4003,
175    OPC_LD = 0x3003,
176    OPC_LH = 0x1003,
177    OPC_LHU = 0x5003,
178    OPC_LUI = 0x37,
179    OPC_LW = 0x2003,
180    OPC_LWU = 0x6003,
181    OPC_MUL = 0x2000033,
182    OPC_MULH = 0x2001033,
183    OPC_MULHSU = 0x2002033,
184    OPC_MULHU = 0x2003033,
185    OPC_OR = 0x6033,
186    OPC_ORI = 0x6013,
187    OPC_REM = 0x2006033,
188    OPC_REMU = 0x2007033,
189    OPC_SB = 0x23,
190    OPC_SD = 0x3023,
191    OPC_SH = 0x1023,
192    OPC_SLL = 0x1033,
193    OPC_SLLI = 0x1013,
194    OPC_SLT = 0x2033,
195    OPC_SLTI = 0x2013,
196    OPC_SLTIU = 0x3013,
197    OPC_SLTU = 0x3033,
198    OPC_SRA = 0x40005033,
199    OPC_SRAI = 0x40005013,
200    OPC_SRL = 0x5033,
201    OPC_SRLI = 0x5013,
202    OPC_SUB = 0x40000033,
203    OPC_SW = 0x2023,
204    OPC_XOR = 0x4033,
205    OPC_XORI = 0x4013,
206
207    OPC_ADDIW = 0x1b,
208    OPC_ADDW = 0x3b,
209    OPC_DIVUW = 0x200503b,
210    OPC_DIVW = 0x200403b,
211    OPC_MULW = 0x200003b,
212    OPC_REMUW = 0x200703b,
213    OPC_REMW = 0x200603b,
214    OPC_SLLIW = 0x101b,
215    OPC_SLLW = 0x103b,
216    OPC_SRAIW = 0x4000501b,
217    OPC_SRAW = 0x4000503b,
218    OPC_SRLIW = 0x501b,
219    OPC_SRLW = 0x503b,
220    OPC_SUBW = 0x4000003b,
221
222    OPC_FENCE = 0x0000000f,
223    OPC_NOP   = OPC_ADDI,   /* nop = addi r0,r0,0 */
224
225    /* Zba: Bit manipulation extension, address generation */
226    OPC_ADD_UW = 0x0800003b,
227
228    /* Zbb: Bit manipulation extension, basic bit manipulation */
229    OPC_ANDN   = 0x40007033,
230    OPC_CLZ    = 0x60001013,
231    OPC_CLZW   = 0x6000101b,
232    OPC_CPOP   = 0x60201013,
233    OPC_CPOPW  = 0x6020101b,
234    OPC_CTZ    = 0x60101013,
235    OPC_CTZW   = 0x6010101b,
236    OPC_ORN    = 0x40006033,
237    OPC_REV8   = 0x6b805013,
238    OPC_ROL    = 0x60001033,
239    OPC_ROLW   = 0x6000103b,
240    OPC_ROR    = 0x60005033,
241    OPC_RORW   = 0x6000503b,
242    OPC_RORI   = 0x60005013,
243    OPC_RORIW  = 0x6000501b,
244    OPC_SEXT_B = 0x60401013,
245    OPC_SEXT_H = 0x60501013,
246    OPC_XNOR   = 0x40004033,
247    OPC_ZEXT_H = 0x0800403b,
248
249    /* Zicond: integer conditional operations */
250    OPC_CZERO_EQZ = 0x0e005033,
251    OPC_CZERO_NEZ = 0x0e007033,
252
253    /* V: Vector extension 1.0 */
254    OPC_VSETVLI  = 0x57 | V_OPCFG,
255    OPC_VSETIVLI = 0xc0000057 | V_OPCFG,
256    OPC_VSETVL   = 0x80000057 | V_OPCFG,
257
258    OPC_VLE8_V  = 0x7 | V_UNIT_STRIDE,
259    OPC_VLE16_V = 0x5007 | V_UNIT_STRIDE,
260    OPC_VLE32_V = 0x6007 | V_UNIT_STRIDE,
261    OPC_VLE64_V = 0x7007 | V_UNIT_STRIDE,
262    OPC_VSE8_V  = 0x27 | V_UNIT_STRIDE,
263    OPC_VSE16_V = 0x5027 | V_UNIT_STRIDE,
264    OPC_VSE32_V = 0x6027 | V_UNIT_STRIDE,
265    OPC_VSE64_V = 0x7027 | V_UNIT_STRIDE,
266
267    OPC_VL1RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(0),
268    OPC_VL2RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(1),
269    OPC_VL4RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(3),
270    OPC_VL8RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(7),
271
272    OPC_VS1R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(0),
273    OPC_VS2R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(1),
274    OPC_VS4R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(3),
275    OPC_VS8R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(7),
276
277    OPC_VMERGE_VIM = 0x5c000057 | V_OPIVI,
278    OPC_VMERGE_VVM = 0x5c000057 | V_OPIVV,
279
280    OPC_VADD_VV = 0x57 | V_OPIVV,
281    OPC_VADD_VI = 0x57 | V_OPIVI,
282    OPC_VSUB_VV = 0x8000057 | V_OPIVV,
283    OPC_VRSUB_VI = 0xc000057 | V_OPIVI,
284    OPC_VAND_VV = 0x24000057 | V_OPIVV,
285    OPC_VAND_VI = 0x24000057 | V_OPIVI,
286    OPC_VOR_VV = 0x28000057 | V_OPIVV,
287    OPC_VOR_VI = 0x28000057 | V_OPIVI,
288    OPC_VXOR_VV = 0x2c000057 | V_OPIVV,
289    OPC_VXOR_VI = 0x2c000057 | V_OPIVI,
290
291    OPC_VMUL_VV = 0x94000057 | V_OPMVV,
292    OPC_VSADD_VV = 0x84000057 | V_OPIVV,
293    OPC_VSADD_VI = 0x84000057 | V_OPIVI,
294    OPC_VSSUB_VV = 0x8c000057 | V_OPIVV,
295    OPC_VSSUB_VI = 0x8c000057 | V_OPIVI,
296    OPC_VSADDU_VV = 0x80000057 | V_OPIVV,
297    OPC_VSADDU_VI = 0x80000057 | V_OPIVI,
298    OPC_VSSUBU_VV = 0x88000057 | V_OPIVV,
299    OPC_VSSUBU_VI = 0x88000057 | V_OPIVI,
300
301    OPC_VMAX_VV = 0x1c000057 | V_OPIVV,
302    OPC_VMAX_VI = 0x1c000057 | V_OPIVI,
303    OPC_VMAXU_VV = 0x18000057 | V_OPIVV,
304    OPC_VMAXU_VI = 0x18000057 | V_OPIVI,
305    OPC_VMIN_VV = 0x14000057 | V_OPIVV,
306    OPC_VMIN_VI = 0x14000057 | V_OPIVI,
307    OPC_VMINU_VV = 0x10000057 | V_OPIVV,
308    OPC_VMINU_VI = 0x10000057 | V_OPIVI,
309
310    OPC_VMSEQ_VV = 0x60000057 | V_OPIVV,
311    OPC_VMSEQ_VI = 0x60000057 | V_OPIVI,
312    OPC_VMSEQ_VX = 0x60000057 | V_OPIVX,
313    OPC_VMSNE_VV = 0x64000057 | V_OPIVV,
314    OPC_VMSNE_VI = 0x64000057 | V_OPIVI,
315    OPC_VMSNE_VX = 0x64000057 | V_OPIVX,
316
317    OPC_VMSLTU_VV = 0x68000057 | V_OPIVV,
318    OPC_VMSLTU_VX = 0x68000057 | V_OPIVX,
319    OPC_VMSLT_VV = 0x6c000057 | V_OPIVV,
320    OPC_VMSLT_VX = 0x6c000057 | V_OPIVX,
321    OPC_VMSLEU_VV = 0x70000057 | V_OPIVV,
322    OPC_VMSLEU_VX = 0x70000057 | V_OPIVX,
323    OPC_VMSLE_VV = 0x74000057 | V_OPIVV,
324    OPC_VMSLE_VX = 0x74000057 | V_OPIVX,
325
326    OPC_VMSLEU_VI = 0x70000057 | V_OPIVI,
327    OPC_VMSLE_VI = 0x74000057 | V_OPIVI,
328    OPC_VMSGTU_VI = 0x78000057 | V_OPIVI,
329    OPC_VMSGTU_VX = 0x78000057 | V_OPIVX,
330    OPC_VMSGT_VI = 0x7c000057 | V_OPIVI,
331    OPC_VMSGT_VX = 0x7c000057 | V_OPIVX,
332
333    OPC_VSLL_VV = 0x94000057 | V_OPIVV,
334    OPC_VSLL_VI = 0x94000057 | V_OPIVI,
335    OPC_VSLL_VX = 0x94000057 | V_OPIVX,
336    OPC_VSRL_VV = 0xa0000057 | V_OPIVV,
337    OPC_VSRL_VI = 0xa0000057 | V_OPIVI,
338    OPC_VSRL_VX = 0xa0000057 | V_OPIVX,
339    OPC_VSRA_VV = 0xa4000057 | V_OPIVV,
340    OPC_VSRA_VI = 0xa4000057 | V_OPIVI,
341    OPC_VSRA_VX = 0xa4000057 | V_OPIVX,
342
343    OPC_VMV_V_V = 0x5e000057 | V_OPIVV,
344    OPC_VMV_V_I = 0x5e000057 | V_OPIVI,
345    OPC_VMV_V_X = 0x5e000057 | V_OPIVX,
346
347    OPC_VMVNR_V = 0x9e000057 | V_OPIVI,
348} RISCVInsn;
349
350static const struct {
351    RISCVInsn op;
352    bool swap;
353} tcg_cmpcond_to_rvv_vv[] = {
354    [TCG_COND_EQ] =  { OPC_VMSEQ_VV,  false },
355    [TCG_COND_NE] =  { OPC_VMSNE_VV,  false },
356    [TCG_COND_LT] =  { OPC_VMSLT_VV,  false },
357    [TCG_COND_GE] =  { OPC_VMSLE_VV,  true  },
358    [TCG_COND_GT] =  { OPC_VMSLT_VV,  true  },
359    [TCG_COND_LE] =  { OPC_VMSLE_VV,  false },
360    [TCG_COND_LTU] = { OPC_VMSLTU_VV, false },
361    [TCG_COND_GEU] = { OPC_VMSLEU_VV, true  },
362    [TCG_COND_GTU] = { OPC_VMSLTU_VV, true  },
363    [TCG_COND_LEU] = { OPC_VMSLEU_VV, false }
364};
365
366static const struct {
367    RISCVInsn op;
368    int min;
369    int max;
370    bool adjust;
371}  tcg_cmpcond_to_rvv_vi[] = {
372    [TCG_COND_EQ]  = { OPC_VMSEQ_VI,  -16, 15, false },
373    [TCG_COND_NE]  = { OPC_VMSNE_VI,  -16, 15, false },
374    [TCG_COND_GT]  = { OPC_VMSGT_VI,  -16, 15, false },
375    [TCG_COND_LE]  = { OPC_VMSLE_VI,  -16, 15, false },
376    [TCG_COND_LT]  = { OPC_VMSLE_VI,  -15, 16, true  },
377    [TCG_COND_GE]  = { OPC_VMSGT_VI,  -15, 16, true  },
378    [TCG_COND_LEU] = { OPC_VMSLEU_VI,   0, 15, false },
379    [TCG_COND_GTU] = { OPC_VMSGTU_VI,   0, 15, false },
380    [TCG_COND_LTU] = { OPC_VMSLEU_VI,   1, 16, true  },
381    [TCG_COND_GEU] = { OPC_VMSGTU_VI,   1, 16, true  },
382};
383
384/* test if a constant matches the constraint */
385static bool tcg_target_const_match(int64_t val, int ct,
386                                   TCGType type, TCGCond cond, int vece)
387{
388    if (ct & TCG_CT_CONST) {
389        return 1;
390    }
391    if (type >= TCG_TYPE_V64) {
392        /* Val is replicated by VECE; extract the highest element. */
393        val >>= (-8 << vece) & 63;
394    }
395    /*
396     * Sign extended from 12 bits: [-0x800, 0x7ff].
397     * Used for most arithmetic, as this is the isa field.
398     */
399    if ((ct & TCG_CT_CONST_S12) && val >= -0x800 && val <= 0x7ff) {
400        return 1;
401    }
402    /*
403     * Sign extended from 12 bits, +/- matching: [-0x7ff, 0x7ff].
404     * Used by addsub2 and movcond, which may need the negative value,
405     * and requires the modified constant to be representable.
406     */
407    if ((ct & TCG_CT_CONST_M12) && val >= -0x7ff && val <= 0x7ff) {
408        return 1;
409    }
410    /*
411     * Sign extended from 5 bits: [-0x10, 0x0f].
412     * Used for vector-immediate.
413     */
414    if ((ct & TCG_CT_CONST_S5) && val >= -0x10 && val <= 0x0f) {
415        return 1;
416    }
417    /*
418     * Used for vector compare OPIVI instructions.
419     */
420    if ((ct & TCG_CT_CONST_CMP_VI) &&
421        val >= tcg_cmpcond_to_rvv_vi[cond].min &&
422        val <= tcg_cmpcond_to_rvv_vi[cond].max) {
423        return true;
424     }
425    return 0;
426}
427
428/*
429 * RISC-V immediate and instruction encoders (excludes 16-bit RVC)
430 */
431
432/* Type-R */
433
434static int32_t encode_r(RISCVInsn opc, TCGReg rd, TCGReg rs1, TCGReg rs2)
435{
436    return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20;
437}
438
439/* Type-I */
440
441static int32_t encode_imm12(uint32_t imm)
442{
443    return (imm & 0xfff) << 20;
444}
445
446static int32_t encode_i(RISCVInsn opc, TCGReg rd, TCGReg rs1, uint32_t imm)
447{
448    return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | encode_imm12(imm);
449}
450
451/* Type-S */
452
453static int32_t encode_simm12(uint32_t imm)
454{
455    int32_t ret = 0;
456
457    ret |= (imm & 0xFE0) << 20;
458    ret |= (imm & 0x1F) << 7;
459
460    return ret;
461}
462
463static int32_t encode_s(RISCVInsn opc, TCGReg rs1, TCGReg rs2, uint32_t imm)
464{
465    return opc | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20 | encode_simm12(imm);
466}
467
468/* Type-SB */
469
470static int32_t encode_sbimm12(uint32_t imm)
471{
472    int32_t ret = 0;
473
474    ret |= (imm & 0x1000) << 19;
475    ret |= (imm & 0x7e0) << 20;
476    ret |= (imm & 0x1e) << 7;
477    ret |= (imm & 0x800) >> 4;
478
479    return ret;
480}
481
482static int32_t encode_sb(RISCVInsn opc, TCGReg rs1, TCGReg rs2, uint32_t imm)
483{
484    return opc | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20 | encode_sbimm12(imm);
485}
486
487/* Type-U */
488
489static int32_t encode_uimm20(uint32_t imm)
490{
491    return imm & 0xfffff000;
492}
493
494static int32_t encode_u(RISCVInsn opc, TCGReg rd, uint32_t imm)
495{
496    return opc | (rd & 0x1f) << 7 | encode_uimm20(imm);
497}
498
499/* Type-UJ */
500
501static int32_t encode_ujimm20(uint32_t imm)
502{
503    int32_t ret = 0;
504
505    ret |= (imm & 0x0007fe) << (21 - 1);
506    ret |= (imm & 0x000800) << (20 - 11);
507    ret |= (imm & 0x0ff000) << (12 - 12);
508    ret |= (imm & 0x100000) << (31 - 20);
509
510    return ret;
511}
512
513static int32_t encode_uj(RISCVInsn opc, TCGReg rd, uint32_t imm)
514{
515    return opc | (rd & 0x1f) << 7 | encode_ujimm20(imm);
516}
517
518
519/* Type-OPIVI */
520
521static int32_t encode_vi(RISCVInsn opc, TCGReg rd, int32_t imm,
522                         TCGReg vs2, bool vm)
523{
524    return opc | (rd & 0x1f) << 7 | (imm & 0x1f) << 15 |
525           (vs2 & 0x1f) << 20 | (vm << 25);
526}
527
528/* Type-OPIVV/OPMVV/OPIVX/OPMVX, Vector load and store */
529
530static int32_t encode_v(RISCVInsn opc, TCGReg d, TCGReg s1,
531                        TCGReg s2, bool vm)
532{
533    return opc | (d & 0x1f) << 7 | (s1 & 0x1f) << 15 |
534           (s2 & 0x1f) << 20 | (vm << 25);
535}
536
537/* Vector vtype */
538
539static uint32_t encode_vtype(bool vta, bool vma,
540                            MemOp vsew, RISCVVlmul vlmul)
541{
542    return vma << 7 | vta << 6 | vsew << 3 | vlmul;
543}
544
545static int32_t encode_vset(RISCVInsn opc, TCGReg rd,
546                           TCGArg rs1, uint32_t vtype)
547{
548    return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | (vtype & 0x7ff) << 20;
549}
550
551static int32_t encode_vseti(RISCVInsn opc, TCGReg rd,
552                            uint32_t uimm, uint32_t vtype)
553{
554    return opc | (rd & 0x1f) << 7 | (uimm & 0x1f) << 15 | (vtype & 0x3ff) << 20;
555}
556
557/*
558 * RISC-V instruction emitters
559 */
560
561static void tcg_out_opc_reg(TCGContext *s, RISCVInsn opc,
562                            TCGReg rd, TCGReg rs1, TCGReg rs2)
563{
564    tcg_out32(s, encode_r(opc, rd, rs1, rs2));
565}
566
567static void tcg_out_opc_imm(TCGContext *s, RISCVInsn opc,
568                            TCGReg rd, TCGReg rs1, TCGArg imm)
569{
570    tcg_out32(s, encode_i(opc, rd, rs1, imm));
571}
572
573static void tcg_out_opc_store(TCGContext *s, RISCVInsn opc,
574                              TCGReg rs1, TCGReg rs2, uint32_t imm)
575{
576    tcg_out32(s, encode_s(opc, rs1, rs2, imm));
577}
578
579static void tcg_out_opc_branch(TCGContext *s, RISCVInsn opc,
580                               TCGReg rs1, TCGReg rs2, uint32_t imm)
581{
582    tcg_out32(s, encode_sb(opc, rs1, rs2, imm));
583}
584
585static void tcg_out_opc_upper(TCGContext *s, RISCVInsn opc,
586                              TCGReg rd, uint32_t imm)
587{
588    tcg_out32(s, encode_u(opc, rd, imm));
589}
590
591static void tcg_out_opc_jump(TCGContext *s, RISCVInsn opc,
592                             TCGReg rd, uint32_t imm)
593{
594    tcg_out32(s, encode_uj(opc, rd, imm));
595}
596
597static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
598{
599    int i;
600    for (i = 0; i < count; ++i) {
601        p[i] = OPC_NOP;
602    }
603}
604
605/*
606 * Relocations
607 */
608
609static bool reloc_sbimm12(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
610{
611    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
612    intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
613
614    tcg_debug_assert((offset & 1) == 0);
615    if (offset == sextreg(offset, 0, 12)) {
616        *src_rw |= encode_sbimm12(offset);
617        return true;
618    }
619
620    return false;
621}
622
623static bool reloc_jimm20(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
624{
625    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
626    intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
627
628    tcg_debug_assert((offset & 1) == 0);
629    if (offset == sextreg(offset, 0, 20)) {
630        *src_rw |= encode_ujimm20(offset);
631        return true;
632    }
633
634    return false;
635}
636
637static bool reloc_call(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
638{
639    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
640    intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
641    int32_t lo = sextreg(offset, 0, 12);
642    int32_t hi = offset - lo;
643
644    if (offset == hi + lo) {
645        src_rw[0] |= encode_uimm20(hi);
646        src_rw[1] |= encode_imm12(lo);
647        return true;
648    }
649
650    return false;
651}
652
653static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
654                        intptr_t value, intptr_t addend)
655{
656    tcg_debug_assert(addend == 0);
657    switch (type) {
658    case R_RISCV_BRANCH:
659        return reloc_sbimm12(code_ptr, (tcg_insn_unit *)value);
660    case R_RISCV_JAL:
661        return reloc_jimm20(code_ptr, (tcg_insn_unit *)value);
662    case R_RISCV_CALL:
663        return reloc_call(code_ptr, (tcg_insn_unit *)value);
664    default:
665        g_assert_not_reached();
666    }
667}
668
669/*
670 * RISC-V vector instruction emitters
671 */
672
673/*
674 * Vector registers uses the same 5 lower bits as GPR registers,
675 * and vm=0 (vm = false) means vector masking ENABLED.
676 * With RVV 1.0, vs2 is the first operand, while rs1/imm is the
677 * second operand.
678 */
679static void tcg_out_opc_vv(TCGContext *s, RISCVInsn opc,
680                           TCGReg vd, TCGReg vs2, TCGReg vs1)
681{
682    tcg_out32(s, encode_v(opc, vd, vs1, vs2, true));
683}
684
685static void tcg_out_opc_vx(TCGContext *s, RISCVInsn opc,
686                           TCGReg vd, TCGReg vs2, TCGReg rs1)
687{
688    tcg_out32(s, encode_v(opc, vd, rs1, vs2, true));
689}
690
691static void tcg_out_opc_vi(TCGContext *s, RISCVInsn opc,
692                           TCGReg vd, TCGReg vs2, int32_t imm)
693{
694    tcg_out32(s, encode_vi(opc, vd, imm, vs2, true));
695}
696
697static void tcg_out_opc_vv_vi(TCGContext *s, RISCVInsn o_vv, RISCVInsn o_vi,
698                              TCGReg vd, TCGReg vs2, TCGArg vi1, int c_vi1)
699{
700    if (c_vi1) {
701        tcg_out_opc_vi(s, o_vi, vd, vs2, vi1);
702    } else {
703        tcg_out_opc_vv(s, o_vv, vd, vs2, vi1);
704    }
705}
706
707static void tcg_out_opc_vim_mask(TCGContext *s, RISCVInsn opc, TCGReg vd,
708                                 TCGReg vs2, int32_t imm)
709{
710    tcg_out32(s, encode_vi(opc, vd, imm, vs2, false));
711}
712
713static void tcg_out_opc_vvm_mask(TCGContext *s, RISCVInsn opc, TCGReg vd,
714                                 TCGReg vs2, TCGReg vs1)
715{
716    tcg_out32(s, encode_v(opc, vd, vs1, vs2, false));
717}
718
719typedef struct VsetCache {
720    uint32_t movi_insn;
721    uint32_t vset_insn;
722} VsetCache;
723
724static VsetCache riscv_vset_cache[3][4];
725
726static void set_vtype(TCGContext *s, TCGType type, MemOp vsew)
727{
728    const VsetCache *p = &riscv_vset_cache[type - TCG_TYPE_V64][vsew];
729
730    s->riscv_cur_type = type;
731    s->riscv_cur_vsew = vsew;
732
733    if (p->movi_insn) {
734        tcg_out32(s, p->movi_insn);
735    }
736    tcg_out32(s, p->vset_insn);
737}
738
739static MemOp set_vtype_len(TCGContext *s, TCGType type)
740{
741    if (type != s->riscv_cur_type) {
742        set_vtype(s, type, MO_64);
743    }
744    return s->riscv_cur_vsew;
745}
746
747static void set_vtype_len_sew(TCGContext *s, TCGType type, MemOp vsew)
748{
749    if (type != s->riscv_cur_type || vsew != s->riscv_cur_vsew) {
750        set_vtype(s, type, vsew);
751    }
752}
753
754/*
755 * TCG intrinsics
756 */
757
758static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
759{
760    if (ret == arg) {
761        return true;
762    }
763    switch (type) {
764    case TCG_TYPE_I32:
765    case TCG_TYPE_I64:
766        tcg_out_opc_imm(s, OPC_ADDI, ret, arg, 0);
767        break;
768    case TCG_TYPE_V64:
769    case TCG_TYPE_V128:
770    case TCG_TYPE_V256:
771        {
772            int lmul = type - riscv_lg2_vlenb;
773            int nf = 1 << MAX(lmul, 0);
774            tcg_out_opc_vi(s, OPC_VMVNR_V, ret, arg, nf - 1);
775        }
776        break;
777    default:
778        g_assert_not_reached();
779    }
780    return true;
781}
782
783static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
784                         tcg_target_long val)
785{
786    tcg_target_long lo, hi, tmp;
787    int shift, ret;
788
789    if (type == TCG_TYPE_I32) {
790        val = (int32_t)val;
791    }
792
793    lo = sextreg(val, 0, 12);
794    if (val == lo) {
795        tcg_out_opc_imm(s, OPC_ADDI, rd, TCG_REG_ZERO, lo);
796        return;
797    }
798
799    hi = val - lo;
800    if (val == (int32_t)val) {
801        tcg_out_opc_upper(s, OPC_LUI, rd, hi);
802        if (lo != 0) {
803            tcg_out_opc_imm(s, OPC_ADDIW, rd, rd, lo);
804        }
805        return;
806    }
807
808    tmp = tcg_pcrel_diff(s, (void *)val);
809    if (tmp == (int32_t)tmp) {
810        tcg_out_opc_upper(s, OPC_AUIPC, rd, 0);
811        tcg_out_opc_imm(s, OPC_ADDI, rd, rd, 0);
812        ret = reloc_call(s->code_ptr - 2, (const tcg_insn_unit *)val);
813        tcg_debug_assert(ret == true);
814        return;
815    }
816
817    /* Look for a single 20-bit section.  */
818    shift = ctz64(val);
819    tmp = val >> shift;
820    if (tmp == sextreg(tmp, 0, 20)) {
821        tcg_out_opc_upper(s, OPC_LUI, rd, tmp << 12);
822        if (shift > 12) {
823            tcg_out_opc_imm(s, OPC_SLLI, rd, rd, shift - 12);
824        } else {
825            tcg_out_opc_imm(s, OPC_SRAI, rd, rd, 12 - shift);
826        }
827        return;
828    }
829
830    /* Look for a few high zero bits, with lots of bits set in the middle.  */
831    shift = clz64(val);
832    tmp = val << shift;
833    if (tmp == sextreg(tmp, 12, 20) << 12) {
834        tcg_out_opc_upper(s, OPC_LUI, rd, tmp);
835        tcg_out_opc_imm(s, OPC_SRLI, rd, rd, shift);
836        return;
837    } else if (tmp == sextreg(tmp, 0, 12)) {
838        tcg_out_opc_imm(s, OPC_ADDI, rd, TCG_REG_ZERO, tmp);
839        tcg_out_opc_imm(s, OPC_SRLI, rd, rd, shift);
840        return;
841    }
842
843    /* Drop into the constant pool.  */
844    new_pool_label(s, val, R_RISCV_CALL, s->code_ptr, 0);
845    tcg_out_opc_upper(s, OPC_AUIPC, rd, 0);
846    tcg_out_opc_imm(s, OPC_LD, rd, rd, 0);
847}
848
849static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
850{
851    return false;
852}
853
854static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
855                             tcg_target_long imm)
856{
857    /* This function is only used for passing structs by reference. */
858    g_assert_not_reached();
859}
860
861static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg)
862{
863    tcg_out_opc_imm(s, OPC_ANDI, ret, arg, 0xff);
864}
865
866static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg)
867{
868    if (cpuinfo & CPUINFO_ZBB) {
869        tcg_out_opc_reg(s, OPC_ZEXT_H, ret, arg, TCG_REG_ZERO);
870    } else {
871        tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16);
872        tcg_out_opc_imm(s, OPC_SRLIW, ret, ret, 16);
873    }
874}
875
876static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg)
877{
878    if (cpuinfo & CPUINFO_ZBA) {
879        tcg_out_opc_reg(s, OPC_ADD_UW, ret, arg, TCG_REG_ZERO);
880    } else {
881        tcg_out_opc_imm(s, OPC_SLLI, ret, arg, 32);
882        tcg_out_opc_imm(s, OPC_SRLI, ret, ret, 32);
883    }
884}
885
886static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
887{
888    if (cpuinfo & CPUINFO_ZBB) {
889        tcg_out_opc_imm(s, OPC_SEXT_B, ret, arg, 0);
890    } else {
891        tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 24);
892        tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 24);
893    }
894}
895
896static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
897{
898    if (cpuinfo & CPUINFO_ZBB) {
899        tcg_out_opc_imm(s, OPC_SEXT_H, ret, arg, 0);
900    } else {
901        tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16);
902        tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 16);
903    }
904}
905
906static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg)
907{
908    tcg_out_opc_imm(s, OPC_ADDIW, ret, arg, 0);
909}
910
911static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
912{
913    if (ret != arg) {
914        tcg_out_ext32s(s, ret, arg);
915    }
916}
917
918static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
919{
920    tcg_out_ext32u(s, ret, arg);
921}
922
923static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg)
924{
925    tcg_out_ext32s(s, ret, arg);
926}
927
928static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
929                         TCGReg addr, intptr_t offset)
930{
931    intptr_t imm12 = sextreg(offset, 0, 12);
932
933    if (offset != imm12) {
934        intptr_t diff = tcg_pcrel_diff(s, (void *)offset);
935
936        if (addr == TCG_REG_ZERO && diff == (int32_t)diff) {
937            imm12 = sextreg(diff, 0, 12);
938            tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP2, diff - imm12);
939        } else {
940            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12);
941            if (addr != TCG_REG_ZERO) {
942                tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, addr);
943            }
944        }
945        addr = TCG_REG_TMP2;
946    }
947
948    switch (opc) {
949    case OPC_SB:
950    case OPC_SH:
951    case OPC_SW:
952    case OPC_SD:
953        tcg_out_opc_store(s, opc, addr, data, imm12);
954        break;
955    case OPC_LB:
956    case OPC_LBU:
957    case OPC_LH:
958    case OPC_LHU:
959    case OPC_LW:
960    case OPC_LWU:
961    case OPC_LD:
962        tcg_out_opc_imm(s, opc, data, addr, imm12);
963        break;
964    default:
965        g_assert_not_reached();
966    }
967}
968
969static void tcg_out_vec_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
970                             TCGReg addr, intptr_t offset)
971{
972    tcg_debug_assert(data >= TCG_REG_V0);
973    tcg_debug_assert(addr < TCG_REG_V0);
974
975    if (offset) {
976        tcg_debug_assert(addr != TCG_REG_ZERO);
977        if (offset == sextreg(offset, 0, 12)) {
978            tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, addr, offset);
979        } else {
980            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
981            tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0, addr);
982        }
983        addr = TCG_REG_TMP0;
984    }
985    tcg_out32(s, encode_v(opc, data, addr, 0, true));
986}
987
988static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
989                       TCGReg arg1, intptr_t arg2)
990{
991    RISCVInsn insn;
992
993    switch (type) {
994    case TCG_TYPE_I32:
995        tcg_out_ldst(s, OPC_LW, arg, arg1, arg2);
996        break;
997    case TCG_TYPE_I64:
998        tcg_out_ldst(s, OPC_LD, arg, arg1, arg2);
999        break;
1000    case TCG_TYPE_V64:
1001    case TCG_TYPE_V128:
1002    case TCG_TYPE_V256:
1003        if (type >= riscv_lg2_vlenb) {
1004            static const RISCVInsn whole_reg_ld[] = {
1005                OPC_VL1RE64_V, OPC_VL2RE64_V, OPC_VL4RE64_V, OPC_VL8RE64_V
1006            };
1007            unsigned idx = type - riscv_lg2_vlenb;
1008
1009            tcg_debug_assert(idx < ARRAY_SIZE(whole_reg_ld));
1010            insn = whole_reg_ld[idx];
1011        } else {
1012            static const RISCVInsn unit_stride_ld[] = {
1013                OPC_VLE8_V, OPC_VLE16_V, OPC_VLE32_V, OPC_VLE64_V
1014            };
1015            MemOp prev_vsew = set_vtype_len(s, type);
1016
1017            tcg_debug_assert(prev_vsew < ARRAY_SIZE(unit_stride_ld));
1018            insn = unit_stride_ld[prev_vsew];
1019        }
1020        tcg_out_vec_ldst(s, insn, arg, arg1, arg2);
1021        break;
1022    default:
1023        g_assert_not_reached();
1024    }
1025}
1026
1027static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1028                       TCGReg arg1, intptr_t arg2)
1029{
1030    RISCVInsn insn;
1031
1032    switch (type) {
1033    case TCG_TYPE_I32:
1034        tcg_out_ldst(s, OPC_SW, arg, arg1, arg2);
1035        break;
1036    case TCG_TYPE_I64:
1037        tcg_out_ldst(s, OPC_SD, arg, arg1, arg2);
1038        break;
1039    case TCG_TYPE_V64:
1040    case TCG_TYPE_V128:
1041    case TCG_TYPE_V256:
1042        if (type >= riscv_lg2_vlenb) {
1043            static const RISCVInsn whole_reg_st[] = {
1044                OPC_VS1R_V, OPC_VS2R_V, OPC_VS4R_V, OPC_VS8R_V
1045            };
1046            unsigned idx = type - riscv_lg2_vlenb;
1047
1048            tcg_debug_assert(idx < ARRAY_SIZE(whole_reg_st));
1049            insn = whole_reg_st[idx];
1050        } else {
1051            static const RISCVInsn unit_stride_st[] = {
1052                OPC_VSE8_V, OPC_VSE16_V, OPC_VSE32_V, OPC_VSE64_V
1053            };
1054            MemOp prev_vsew = set_vtype_len(s, type);
1055
1056            tcg_debug_assert(prev_vsew < ARRAY_SIZE(unit_stride_st));
1057            insn = unit_stride_st[prev_vsew];
1058        }
1059        tcg_out_vec_ldst(s, insn, arg, arg1, arg2);
1060        break;
1061    default:
1062        g_assert_not_reached();
1063    }
1064}
1065
1066static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1067                        TCGReg base, intptr_t ofs)
1068{
1069    if (val == 0) {
1070        tcg_out_st(s, type, TCG_REG_ZERO, base, ofs);
1071        return true;
1072    }
1073    return false;
1074}
1075
1076static void tcg_out_addsub2(TCGContext *s,
1077                            TCGReg rl, TCGReg rh,
1078                            TCGReg al, TCGReg ah,
1079                            TCGArg bl, TCGArg bh,
1080                            bool cbl, bool cbh, bool is_sub, bool is32bit)
1081{
1082    const RISCVInsn opc_add = is32bit ? OPC_ADDW : OPC_ADD;
1083    const RISCVInsn opc_addi = is32bit ? OPC_ADDIW : OPC_ADDI;
1084    const RISCVInsn opc_sub = is32bit ? OPC_SUBW : OPC_SUB;
1085    TCGReg th = TCG_REG_TMP1;
1086
1087    /* If we have a negative constant such that negating it would
1088       make the high part zero, we can (usually) eliminate one insn.  */
1089    if (cbl && cbh && bh == -1 && bl != 0) {
1090        bl = -bl;
1091        bh = 0;
1092        is_sub = !is_sub;
1093    }
1094
1095    /* By operating on the high part first, we get to use the final
1096       carry operation to move back from the temporary.  */
1097    if (!cbh) {
1098        tcg_out_opc_reg(s, (is_sub ? opc_sub : opc_add), th, ah, bh);
1099    } else if (bh != 0 || ah == rl) {
1100        tcg_out_opc_imm(s, opc_addi, th, ah, (is_sub ? -bh : bh));
1101    } else {
1102        th = ah;
1103    }
1104
1105    /* Note that tcg optimization should eliminate the bl == 0 case.  */
1106    if (is_sub) {
1107        if (cbl) {
1108            tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, al, bl);
1109            tcg_out_opc_imm(s, opc_addi, rl, al, -bl);
1110        } else {
1111            tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0, al, bl);
1112            tcg_out_opc_reg(s, opc_sub, rl, al, bl);
1113        }
1114        tcg_out_opc_reg(s, opc_sub, rh, th, TCG_REG_TMP0);
1115    } else {
1116        if (cbl) {
1117            tcg_out_opc_imm(s, opc_addi, rl, al, bl);
1118            tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, rl, bl);
1119        } else if (al == bl) {
1120            /*
1121             * If the input regs overlap, this is a simple doubling
1122             * and carry-out is the input msb.  This special case is
1123             * required when the output reg overlaps the input,
1124             * but we might as well use it always.
1125             */
1126            tcg_out_opc_imm(s, OPC_SLTI, TCG_REG_TMP0, al, 0);
1127            tcg_out_opc_reg(s, opc_add, rl, al, al);
1128        } else {
1129            tcg_out_opc_reg(s, opc_add, rl, al, bl);
1130            tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0,
1131                            rl, (rl == bl ? al : bl));
1132        }
1133        tcg_out_opc_reg(s, opc_add, rh, th, TCG_REG_TMP0);
1134    }
1135}
1136
1137static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
1138                                   TCGReg dst, TCGReg src)
1139{
1140    set_vtype_len_sew(s, type, vece);
1141    tcg_out_opc_vx(s, OPC_VMV_V_X, dst, 0, src);
1142    return true;
1143}
1144
1145static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1146                                    TCGReg dst, TCGReg base, intptr_t offset)
1147{
1148    tcg_out_ld(s, TCG_TYPE_REG, TCG_REG_TMP0, base, offset);
1149    return tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0);
1150}
1151
1152static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1153                                    TCGReg dst, int64_t arg)
1154{
1155    /* Arg is replicated by VECE; extract the highest element. */
1156    arg >>= (-8 << vece) & 63;
1157
1158    if (arg >= -16 && arg < 16) {
1159        if (arg == 0 || arg == -1) {
1160            set_vtype_len(s, type);
1161        } else {
1162            set_vtype_len_sew(s, type, vece);
1163        }
1164        tcg_out_opc_vi(s, OPC_VMV_V_I, dst, 0, arg);
1165        return;
1166    }
1167    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, arg);
1168    tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0);
1169}
1170
1171static const struct {
1172    RISCVInsn op;
1173    bool swap;
1174} tcg_brcond_to_riscv[] = {
1175    [TCG_COND_EQ] =  { OPC_BEQ,  false },
1176    [TCG_COND_NE] =  { OPC_BNE,  false },
1177    [TCG_COND_LT] =  { OPC_BLT,  false },
1178    [TCG_COND_GE] =  { OPC_BGE,  false },
1179    [TCG_COND_LE] =  { OPC_BGE,  true  },
1180    [TCG_COND_GT] =  { OPC_BLT,  true  },
1181    [TCG_COND_LTU] = { OPC_BLTU, false },
1182    [TCG_COND_GEU] = { OPC_BGEU, false },
1183    [TCG_COND_LEU] = { OPC_BGEU, true  },
1184    [TCG_COND_GTU] = { OPC_BLTU, true  }
1185};
1186
1187static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond,
1188                        TCGReg arg1, TCGReg arg2, TCGLabel *l)
1189{
1190    RISCVInsn op = tcg_brcond_to_riscv[cond].op;
1191
1192    tcg_debug_assert(op != 0);
1193
1194    if (tcg_brcond_to_riscv[cond].swap) {
1195        TCGReg t = arg1;
1196        arg1 = arg2;
1197        arg2 = t;
1198    }
1199
1200    tcg_out_reloc(s, s->code_ptr, R_RISCV_BRANCH, l, 0);
1201    tcg_out_opc_branch(s, op, arg1, arg2, 0);
1202}
1203
1204static const TCGOutOpBrcond outop_brcond = {
1205    .base.static_constraint = C_O0_I2(r, rz),
1206    .out_rr = tgen_brcond,
1207};
1208
1209#define SETCOND_INV    TCG_TARGET_NB_REGS
1210#define SETCOND_NEZ    (SETCOND_INV << 1)
1211#define SETCOND_FLAGS  (SETCOND_INV | SETCOND_NEZ)
1212
1213static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret,
1214                               TCGReg arg1, tcg_target_long arg2, bool c2)
1215{
1216    int flags = 0;
1217
1218    switch (cond) {
1219    case TCG_COND_EQ:    /* -> NE  */
1220    case TCG_COND_GE:    /* -> LT  */
1221    case TCG_COND_GEU:   /* -> LTU */
1222    case TCG_COND_GT:    /* -> LE  */
1223    case TCG_COND_GTU:   /* -> LEU */
1224        cond = tcg_invert_cond(cond);
1225        flags ^= SETCOND_INV;
1226        break;
1227    default:
1228        break;
1229    }
1230
1231    switch (cond) {
1232    case TCG_COND_LE:
1233    case TCG_COND_LEU:
1234        /*
1235         * If we have a constant input, the most efficient way to implement
1236         * LE is by adding 1 and using LT.  Watch out for wrap around for LEU.
1237         * We don't need to care for this for LE because the constant input
1238         * is constrained to signed 12-bit, and 0x800 is representable in the
1239         * temporary register.
1240         */
1241        if (c2) {
1242            if (cond == TCG_COND_LEU) {
1243                /* unsigned <= -1 is true */
1244                if (arg2 == -1) {
1245                    tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV));
1246                    return ret;
1247                }
1248                cond = TCG_COND_LTU;
1249            } else {
1250                cond = TCG_COND_LT;
1251            }
1252            tcg_debug_assert(arg2 <= 0x7ff);
1253            if (++arg2 == 0x800) {
1254                tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2);
1255                arg2 = TCG_REG_TMP0;
1256                c2 = false;
1257            }
1258        } else {
1259            TCGReg tmp = arg2;
1260            arg2 = arg1;
1261            arg1 = tmp;
1262            cond = tcg_swap_cond(cond);    /* LE -> GE */
1263            cond = tcg_invert_cond(cond);  /* GE -> LT */
1264            flags ^= SETCOND_INV;
1265        }
1266        break;
1267    default:
1268        break;
1269    }
1270
1271    switch (cond) {
1272    case TCG_COND_NE:
1273        flags |= SETCOND_NEZ;
1274        if (!c2) {
1275            tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2);
1276        } else if (arg2 == 0) {
1277            ret = arg1;
1278        } else {
1279            tcg_out_opc_imm(s, OPC_XORI, ret, arg1, arg2);
1280        }
1281        break;
1282
1283    case TCG_COND_LT:
1284        if (c2) {
1285            tcg_out_opc_imm(s, OPC_SLTI, ret, arg1, arg2);
1286        } else {
1287            tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
1288        }
1289        break;
1290
1291    case TCG_COND_LTU:
1292        if (c2) {
1293            tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, arg2);
1294        } else {
1295            tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
1296        }
1297        break;
1298
1299    default:
1300        g_assert_not_reached();
1301    }
1302
1303    return ret | flags;
1304}
1305
1306static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
1307                            TCGReg arg1, tcg_target_long arg2, bool c2)
1308{
1309    int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2);
1310
1311    if (tmpflags != ret) {
1312        TCGReg tmp = tmpflags & ~SETCOND_FLAGS;
1313
1314        switch (tmpflags & SETCOND_FLAGS) {
1315        case SETCOND_INV:
1316            /* Intermediate result is boolean: simply invert. */
1317            tcg_out_opc_imm(s, OPC_XORI, ret, tmp, 1);
1318            break;
1319        case SETCOND_NEZ:
1320            /* Intermediate result is zero/non-zero: test != 0. */
1321            tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp);
1322            break;
1323        case SETCOND_NEZ | SETCOND_INV:
1324            /* Intermediate result is zero/non-zero: test == 0. */
1325            tcg_out_opc_imm(s, OPC_SLTIU, ret, tmp, 1);
1326            break;
1327        default:
1328            g_assert_not_reached();
1329        }
1330    }
1331}
1332
1333static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
1334                         TCGReg dest, TCGReg arg1, TCGReg arg2)
1335{
1336    tcg_out_setcond(s, cond, dest, arg1, arg2, false);
1337}
1338
1339static void tgen_setcondi(TCGContext *s, TCGType type, TCGCond cond,
1340                          TCGReg dest, TCGReg arg1, tcg_target_long arg2)
1341{
1342    tcg_out_setcond(s, cond, dest, arg1, arg2, true);
1343}
1344
1345static const TCGOutOpSetcond outop_setcond = {
1346    .base.static_constraint = C_O1_I2(r, r, rI),
1347    .out_rrr = tgen_setcond,
1348    .out_rri = tgen_setcondi,
1349};
1350
1351static void tcg_out_negsetcond(TCGContext *s, TCGCond cond, TCGReg ret,
1352                               TCGReg arg1, tcg_target_long arg2, bool c2)
1353{
1354    int tmpflags;
1355    TCGReg tmp;
1356
1357    /* For LT/GE comparison against 0, replicate the sign bit. */
1358    if (c2 && arg2 == 0) {
1359        switch (cond) {
1360        case TCG_COND_GE:
1361            tcg_out_opc_imm(s, OPC_XORI, ret, arg1, -1);
1362            arg1 = ret;
1363            /* fall through */
1364        case TCG_COND_LT:
1365            tcg_out_opc_imm(s, OPC_SRAI, ret, arg1, TCG_TARGET_REG_BITS - 1);
1366            return;
1367        default:
1368            break;
1369        }
1370    }
1371
1372    tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2);
1373    tmp = tmpflags & ~SETCOND_FLAGS;
1374
1375    /* If intermediate result is zero/non-zero: test != 0. */
1376    if (tmpflags & SETCOND_NEZ) {
1377        tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp);
1378        tmp = ret;
1379    }
1380
1381    /* Produce the 0/-1 result. */
1382    if (tmpflags & SETCOND_INV) {
1383        tcg_out_opc_imm(s, OPC_ADDI, ret, tmp, -1);
1384    } else {
1385        tcg_out_opc_reg(s, OPC_SUB, ret, TCG_REG_ZERO, tmp);
1386    }
1387}
1388
1389static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond,
1390                            TCGReg dest, TCGReg arg1, TCGReg arg2)
1391{
1392    tcg_out_negsetcond(s, cond, dest, arg1, arg2, false);
1393}
1394
1395static void tgen_negsetcondi(TCGContext *s, TCGType type, TCGCond cond,
1396                             TCGReg dest, TCGReg arg1, tcg_target_long arg2)
1397{
1398    tcg_out_negsetcond(s, cond, dest, arg1, arg2, true);
1399}
1400
1401static const TCGOutOpSetcond outop_negsetcond = {
1402    .base.static_constraint = C_O1_I2(r, r, rI),
1403    .out_rrr = tgen_negsetcond,
1404    .out_rri = tgen_negsetcondi,
1405};
1406
1407static void tcg_out_movcond_zicond(TCGContext *s, TCGReg ret, TCGReg test_ne,
1408                                   int val1, bool c_val1,
1409                                   int val2, bool c_val2)
1410{
1411    if (val1 == 0) {
1412        if (c_val2) {
1413            tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, val2);
1414            val2 = TCG_REG_TMP1;
1415        }
1416        tcg_out_opc_reg(s, OPC_CZERO_NEZ, ret, val2, test_ne);
1417        return;
1418    }
1419
1420    if (val2 == 0) {
1421        if (c_val1) {
1422            tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, val1);
1423            val1 = TCG_REG_TMP1;
1424        }
1425        tcg_out_opc_reg(s, OPC_CZERO_EQZ, ret, val1, test_ne);
1426        return;
1427    }
1428
1429    if (c_val2) {
1430        if (c_val1) {
1431            tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, val1 - val2);
1432        } else {
1433            tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP1, val1, -val2);
1434        }
1435        tcg_out_opc_reg(s, OPC_CZERO_EQZ, ret, TCG_REG_TMP1, test_ne);
1436        tcg_out_opc_imm(s, OPC_ADDI, ret, ret, val2);
1437        return;
1438    }
1439
1440    if (c_val1) {
1441        tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP1, val2, -val1);
1442        tcg_out_opc_reg(s, OPC_CZERO_NEZ, ret, TCG_REG_TMP1, test_ne);
1443        tcg_out_opc_imm(s, OPC_ADDI, ret, ret, val1);
1444        return;
1445    }
1446
1447    tcg_out_opc_reg(s, OPC_CZERO_NEZ, TCG_REG_TMP1, val2, test_ne);
1448    tcg_out_opc_reg(s, OPC_CZERO_EQZ, TCG_REG_TMP0, val1, test_ne);
1449    tcg_out_opc_reg(s, OPC_OR, ret, TCG_REG_TMP0, TCG_REG_TMP1);
1450}
1451
1452static void tcg_out_movcond_br1(TCGContext *s, TCGCond cond, TCGReg ret,
1453                                TCGReg cmp1, TCGReg cmp2,
1454                                int val, bool c_val)
1455{
1456    RISCVInsn op;
1457    int disp = 8;
1458
1459    tcg_debug_assert((unsigned)cond < ARRAY_SIZE(tcg_brcond_to_riscv));
1460    op = tcg_brcond_to_riscv[cond].op;
1461    tcg_debug_assert(op != 0);
1462
1463    if (tcg_brcond_to_riscv[cond].swap) {
1464        tcg_out_opc_branch(s, op, cmp2, cmp1, disp);
1465    } else {
1466        tcg_out_opc_branch(s, op, cmp1, cmp2, disp);
1467    }
1468    if (c_val) {
1469        tcg_out_opc_imm(s, OPC_ADDI, ret, TCG_REG_ZERO, val);
1470    } else {
1471        tcg_out_opc_imm(s, OPC_ADDI, ret, val, 0);
1472    }
1473}
1474
1475static void tcg_out_movcond_br2(TCGContext *s, TCGCond cond, TCGReg ret,
1476                                TCGReg cmp1, TCGReg cmp2,
1477                                int val1, bool c_val1,
1478                                int val2, bool c_val2)
1479{
1480    TCGReg tmp;
1481
1482    /* TCG optimizer reorders to prefer ret matching val2. */
1483    if (!c_val2 && ret == val2) {
1484        cond = tcg_invert_cond(cond);
1485        tcg_out_movcond_br1(s, cond, ret, cmp1, cmp2, val1, c_val1);
1486        return;
1487    }
1488
1489    if (!c_val1 && ret == val1) {
1490        tcg_out_movcond_br1(s, cond, ret, cmp1, cmp2, val2, c_val2);
1491        return;
1492    }
1493
1494    tmp = (ret == cmp1 || ret == cmp2 ? TCG_REG_TMP1 : ret);
1495    if (c_val1) {
1496        tcg_out_movi(s, TCG_TYPE_REG, tmp, val1);
1497    } else {
1498        tcg_out_mov(s, TCG_TYPE_REG, tmp, val1);
1499    }
1500    tcg_out_movcond_br1(s, cond, tmp, cmp1, cmp2, val2, c_val2);
1501    tcg_out_mov(s, TCG_TYPE_REG, ret, tmp);
1502}
1503
1504static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
1505                            TCGReg ret, TCGReg cmp1, TCGArg cmp2, bool c_cmp2,
1506                            TCGArg val1, bool c_val1,
1507                            TCGArg val2, bool c_val2)
1508{
1509    int tmpflags;
1510    TCGReg t;
1511
1512    if (!(cpuinfo & CPUINFO_ZICOND) && (!c_cmp2 || cmp2 == 0)) {
1513        tcg_out_movcond_br2(s, cond, ret, cmp1, cmp2,
1514                            val1, c_val1, val2, c_val2);
1515        return;
1516    }
1517
1518    tmpflags = tcg_out_setcond_int(s, cond, TCG_REG_TMP0, cmp1, cmp2, c_cmp2);
1519    t = tmpflags & ~SETCOND_FLAGS;
1520
1521    if (cpuinfo & CPUINFO_ZICOND) {
1522        if (tmpflags & SETCOND_INV) {
1523            tcg_out_movcond_zicond(s, ret, t, val2, c_val2, val1, c_val1);
1524        } else {
1525            tcg_out_movcond_zicond(s, ret, t, val1, c_val1, val2, c_val2);
1526        }
1527    } else {
1528        cond = tmpflags & SETCOND_INV ? TCG_COND_EQ : TCG_COND_NE;
1529        tcg_out_movcond_br2(s, cond, ret, t, TCG_REG_ZERO,
1530                            val1, c_val1, val2, c_val2);
1531    }
1532}
1533
1534static const TCGOutOpMovcond outop_movcond = {
1535    .base.static_constraint = C_O1_I4(r, r, rI, rM, rM),
1536    .out = tcg_out_movcond,
1537};
1538
1539static void tcg_out_cltz(TCGContext *s, TCGType type, RISCVInsn insn,
1540                         TCGReg ret, TCGReg src1, int src2, bool c_src2)
1541{
1542    tcg_out_opc_imm(s, insn, ret, src1, 0);
1543
1544    if (!c_src2 || src2 != (type == TCG_TYPE_I32 ? 32 : 64)) {
1545        /*
1546         * The requested zero result does not match the insn, so adjust.
1547         * Note that constraints put 'ret' in a new register, so the
1548         * computation above did not clobber either 'src1' or 'src2'.
1549         */
1550        tcg_out_movcond(s, type, TCG_COND_EQ, ret, src1, 0, true,
1551                        src2, c_src2, ret, false);
1552    }
1553}
1554
1555static void tcg_out_cmpsel(TCGContext *s, TCGType type, unsigned vece,
1556                           TCGCond cond, TCGReg ret,
1557                           TCGReg cmp1, TCGReg cmp2, bool c_cmp2,
1558                           TCGReg val1, bool c_val1,
1559                           TCGReg val2, bool c_val2)
1560{
1561    set_vtype_len_sew(s, type, vece);
1562
1563    /* Use only vmerge_vim if possible, by inverting the test. */
1564    if (c_val2 && !c_val1) {
1565        TCGArg temp = val1;
1566        cond = tcg_invert_cond(cond);
1567        val1 = val2;
1568        val2 = temp;
1569        c_val1 = true;
1570        c_val2 = false;
1571    }
1572
1573    /* Perform the comparison into V0 mask. */
1574    if (c_cmp2) {
1575        tcg_out_opc_vi(s, tcg_cmpcond_to_rvv_vi[cond].op, TCG_REG_V0, cmp1,
1576                       cmp2 - tcg_cmpcond_to_rvv_vi[cond].adjust);
1577    } else if (tcg_cmpcond_to_rvv_vv[cond].swap) {
1578        tcg_out_opc_vv(s, tcg_cmpcond_to_rvv_vv[cond].op,
1579                       TCG_REG_V0, cmp2, cmp1);
1580    } else {
1581        tcg_out_opc_vv(s, tcg_cmpcond_to_rvv_vv[cond].op,
1582                       TCG_REG_V0, cmp1, cmp2);
1583    }
1584    if (c_val1) {
1585        if (c_val2) {
1586            tcg_out_opc_vi(s, OPC_VMV_V_I, ret, 0, val2);
1587            val2 = ret;
1588        }
1589        /* vd[i] == v0.mask[i] ? imm : vs2[i] */
1590        tcg_out_opc_vim_mask(s, OPC_VMERGE_VIM, ret, val2, val1);
1591    } else {
1592        /* vd[i] == v0.mask[i] ? vs1[i] : vs2[i] */
1593        tcg_out_opc_vvm_mask(s, OPC_VMERGE_VVM, ret, val2, val1);
1594    }
1595}
1596
1597static void tcg_out_vshifti(TCGContext *s, RISCVInsn opc_vi, RISCVInsn opc_vx,
1598                             TCGReg dst, TCGReg src, unsigned imm)
1599{
1600    if (imm < 32) {
1601        tcg_out_opc_vi(s, opc_vi, dst, src, imm);
1602    } else {
1603        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP0, imm);
1604        tcg_out_opc_vx(s, opc_vx, dst, src, TCG_REG_TMP0);
1605    }
1606}
1607
1608static void init_setting_vtype(TCGContext *s)
1609{
1610    s->riscv_cur_type = TCG_TYPE_COUNT;
1611}
1612
1613static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
1614{
1615    TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA;
1616    ptrdiff_t offset = tcg_pcrel_diff(s, arg);
1617    int ret;
1618
1619    init_setting_vtype(s);
1620
1621    tcg_debug_assert((offset & 1) == 0);
1622    if (offset == sextreg(offset, 0, 20)) {
1623        /* short jump: -2097150 to 2097152 */
1624        tcg_out_opc_jump(s, OPC_JAL, link, offset);
1625    } else if (offset == (int32_t)offset) {
1626        /* long jump: -2147483646 to 2147483648 */
1627        tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP0, 0);
1628        tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, 0);
1629        ret = reloc_call(s->code_ptr - 2, arg);
1630        tcg_debug_assert(ret == true);
1631    } else {
1632        /* far jump: 64-bit */
1633        tcg_target_long imm = sextreg((tcg_target_long)arg, 0, 12);
1634        tcg_target_long base = (tcg_target_long)arg - imm;
1635        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, base);
1636        tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, imm);
1637    }
1638}
1639
1640static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg,
1641                         const TCGHelperInfo *info)
1642{
1643    tcg_out_call_int(s, arg, false);
1644}
1645
1646static void tcg_out_mb(TCGContext *s, TCGArg a0)
1647{
1648    tcg_insn_unit insn = OPC_FENCE;
1649
1650    if (a0 & TCG_MO_LD_LD) {
1651        insn |= 0x02200000;
1652    }
1653    if (a0 & TCG_MO_ST_LD) {
1654        insn |= 0x01200000;
1655    }
1656    if (a0 & TCG_MO_LD_ST) {
1657        insn |= 0x02100000;
1658    }
1659    if (a0 & TCG_MO_ST_ST) {
1660        insn |= 0x01100000;
1661    }
1662    tcg_out32(s, insn);
1663}
1664
1665/*
1666 * Load/store and TLB
1667 */
1668
1669static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1670{
1671    tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
1672    bool ok = reloc_jimm20(s->code_ptr - 1, target);
1673    tcg_debug_assert(ok);
1674}
1675
1676bool tcg_target_has_memory_bswap(MemOp memop)
1677{
1678    return false;
1679}
1680
1681/* We have three temps, we might as well expose them. */
1682static const TCGLdstHelperParam ldst_helper_param = {
1683    .ntmp = 3, .tmp = { TCG_REG_TMP0, TCG_REG_TMP1, TCG_REG_TMP2 }
1684};
1685
1686static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1687{
1688    MemOp opc = get_memop(l->oi);
1689
1690    /* resolve label address */
1691    if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1692        return false;
1693    }
1694
1695    /* call load helper */
1696    tcg_out_ld_helper_args(s, l, &ldst_helper_param);
1697    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false);
1698    tcg_out_ld_helper_ret(s, l, true, &ldst_helper_param);
1699
1700    tcg_out_goto(s, l->raddr);
1701    return true;
1702}
1703
1704static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1705{
1706    MemOp opc = get_memop(l->oi);
1707
1708    /* resolve label address */
1709    if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1710        return false;
1711    }
1712
1713    /* call store helper */
1714    tcg_out_st_helper_args(s, l, &ldst_helper_param);
1715    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
1716
1717    tcg_out_goto(s, l->raddr);
1718    return true;
1719}
1720
1721/* We expect to use a 12-bit negative offset from ENV.  */
1722#define MIN_TLB_MASK_TABLE_OFS  -(1 << 11)
1723
1724/*
1725 * For system-mode, perform the TLB load and compare.
1726 * For user-mode, perform any required alignment tests.
1727 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1728 * is required and fill in @h with the host address for the fast path.
1729 */
1730static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
1731                                           TCGReg addr_reg, MemOpIdx oi,
1732                                           bool is_ld)
1733{
1734    TCGType addr_type = s->addr_type;
1735    TCGLabelQemuLdst *ldst = NULL;
1736    MemOp opc = get_memop(oi);
1737    TCGAtomAlign aa;
1738    unsigned a_mask;
1739
1740    aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
1741    a_mask = (1u << aa.align) - 1;
1742
1743    if (tcg_use_softmmu) {
1744        unsigned s_bits = opc & MO_SIZE;
1745        unsigned s_mask = (1u << s_bits) - 1;
1746        int mem_index = get_mmuidx(oi);
1747        int fast_ofs = tlb_mask_table_ofs(s, mem_index);
1748        int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
1749        int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
1750        int compare_mask;
1751        TCGReg addr_adj;
1752
1753        ldst = new_ldst_label(s);
1754        ldst->is_ld = is_ld;
1755        ldst->oi = oi;
1756        ldst->addr_reg = addr_reg;
1757
1758        init_setting_vtype(s);
1759
1760        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
1761        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
1762
1763        tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
1764                        s->page_bits - CPU_TLB_ENTRY_BITS);
1765        tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
1766        tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
1767
1768        /*
1769         * For aligned accesses, we check the first byte and include the
1770         * alignment bits within the address.  For unaligned access, we
1771         * check that we don't cross pages using the address of the last
1772         * byte of the access.
1773         */
1774        addr_adj = addr_reg;
1775        if (a_mask < s_mask) {
1776            addr_adj = TCG_REG_TMP0;
1777            tcg_out_opc_imm(s, addr_type == TCG_TYPE_I32 ? OPC_ADDIW : OPC_ADDI,
1778                            addr_adj, addr_reg, s_mask - a_mask);
1779        }
1780        compare_mask = s->page_mask | a_mask;
1781        if (compare_mask == sextreg(compare_mask, 0, 12)) {
1782            tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask);
1783        } else {
1784            tcg_out_movi(s, addr_type, TCG_REG_TMP1, compare_mask);
1785            tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_adj);
1786        }
1787
1788        /* Load the tlb comparator and the addend.  */
1789        QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
1790        tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2,
1791                   is_ld ? offsetof(CPUTLBEntry, addr_read)
1792                         : offsetof(CPUTLBEntry, addr_write));
1793        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
1794                   offsetof(CPUTLBEntry, addend));
1795
1796        /* Compare masked address with the TLB entry. */
1797        ldst->label_ptr[0] = s->code_ptr;
1798        tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
1799
1800        /* TLB Hit - translate address using addend.  */
1801        if (addr_type != TCG_TYPE_I32) {
1802            tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, addr_reg, TCG_REG_TMP2);
1803        } else if (cpuinfo & CPUINFO_ZBA) {
1804            tcg_out_opc_reg(s, OPC_ADD_UW, TCG_REG_TMP0,
1805                            addr_reg, TCG_REG_TMP2);
1806        } else {
1807            tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg);
1808            tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0,
1809                            TCG_REG_TMP0, TCG_REG_TMP2);
1810        }
1811        *pbase = TCG_REG_TMP0;
1812    } else {
1813        TCGReg base;
1814
1815        if (a_mask) {
1816            ldst = new_ldst_label(s);
1817            ldst->is_ld = is_ld;
1818            ldst->oi = oi;
1819            ldst->addr_reg = addr_reg;
1820
1821            init_setting_vtype(s);
1822
1823            /* We are expecting alignment max 7, so we can always use andi. */
1824            tcg_debug_assert(a_mask == sextreg(a_mask, 0, 12));
1825            tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
1826
1827            ldst->label_ptr[0] = s->code_ptr;
1828            tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0);
1829        }
1830
1831        if (guest_base != 0) {
1832            base = TCG_REG_TMP0;
1833            if (addr_type != TCG_TYPE_I32) {
1834                tcg_out_opc_reg(s, OPC_ADD, base, addr_reg,
1835                                TCG_GUEST_BASE_REG);
1836            } else if (cpuinfo & CPUINFO_ZBA) {
1837                tcg_out_opc_reg(s, OPC_ADD_UW, base, addr_reg,
1838                                TCG_GUEST_BASE_REG);
1839            } else {
1840                tcg_out_ext32u(s, base, addr_reg);
1841                tcg_out_opc_reg(s, OPC_ADD, base, base, TCG_GUEST_BASE_REG);
1842            }
1843        } else if (addr_type != TCG_TYPE_I32) {
1844            base = addr_reg;
1845        } else {
1846            base = TCG_REG_TMP0;
1847            tcg_out_ext32u(s, base, addr_reg);
1848        }
1849        *pbase = base;
1850    }
1851
1852    return ldst;
1853}
1854
1855static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val,
1856                                   TCGReg base, MemOp opc, TCGType type)
1857{
1858    /* Byte swapping is left to middle-end expansion. */
1859    tcg_debug_assert((opc & MO_BSWAP) == 0);
1860
1861    switch (opc & (MO_SSIZE)) {
1862    case MO_UB:
1863        tcg_out_opc_imm(s, OPC_LBU, val, base, 0);
1864        break;
1865    case MO_SB:
1866        tcg_out_opc_imm(s, OPC_LB, val, base, 0);
1867        break;
1868    case MO_UW:
1869        tcg_out_opc_imm(s, OPC_LHU, val, base, 0);
1870        break;
1871    case MO_SW:
1872        tcg_out_opc_imm(s, OPC_LH, val, base, 0);
1873        break;
1874    case MO_UL:
1875        if (type == TCG_TYPE_I64) {
1876            tcg_out_opc_imm(s, OPC_LWU, val, base, 0);
1877            break;
1878        }
1879        /* FALLTHRU */
1880    case MO_SL:
1881        tcg_out_opc_imm(s, OPC_LW, val, base, 0);
1882        break;
1883    case MO_UQ:
1884        tcg_out_opc_imm(s, OPC_LD, val, base, 0);
1885        break;
1886    default:
1887        g_assert_not_reached();
1888    }
1889}
1890
1891static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1892                            MemOpIdx oi, TCGType data_type)
1893{
1894    TCGLabelQemuLdst *ldst;
1895    TCGReg base;
1896
1897    ldst = prepare_host_addr(s, &base, addr_reg, oi, true);
1898    tcg_out_qemu_ld_direct(s, data_reg, base, get_memop(oi), data_type);
1899
1900    if (ldst) {
1901        ldst->type = data_type;
1902        ldst->datalo_reg = data_reg;
1903        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1904    }
1905}
1906
1907static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val,
1908                                   TCGReg base, MemOp opc)
1909{
1910    /* Byte swapping is left to middle-end expansion. */
1911    tcg_debug_assert((opc & MO_BSWAP) == 0);
1912
1913    switch (opc & (MO_SSIZE)) {
1914    case MO_8:
1915        tcg_out_opc_store(s, OPC_SB, base, val, 0);
1916        break;
1917    case MO_16:
1918        tcg_out_opc_store(s, OPC_SH, base, val, 0);
1919        break;
1920    case MO_32:
1921        tcg_out_opc_store(s, OPC_SW, base, val, 0);
1922        break;
1923    case MO_64:
1924        tcg_out_opc_store(s, OPC_SD, base, val, 0);
1925        break;
1926    default:
1927        g_assert_not_reached();
1928    }
1929}
1930
1931static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1932                            MemOpIdx oi, TCGType data_type)
1933{
1934    TCGLabelQemuLdst *ldst;
1935    TCGReg base;
1936
1937    ldst = prepare_host_addr(s, &base, addr_reg, oi, false);
1938    tcg_out_qemu_st_direct(s, data_reg, base, get_memop(oi));
1939
1940    if (ldst) {
1941        ldst->type = data_type;
1942        ldst->datalo_reg = data_reg;
1943        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1944    }
1945}
1946
1947static const tcg_insn_unit *tb_ret_addr;
1948
1949static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1950{
1951    /* Reuse the zeroing that exists for goto_ptr.  */
1952    if (a0 == 0) {
1953        tcg_out_call_int(s, tcg_code_gen_epilogue, true);
1954    } else {
1955        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0);
1956        tcg_out_call_int(s, tb_ret_addr, true);
1957    }
1958}
1959
1960static void tcg_out_goto_tb(TCGContext *s, int which)
1961{
1962    /* Direct branch will be patched by tb_target_set_jmp_target. */
1963    set_jmp_insn_offset(s, which);
1964    tcg_out32(s, OPC_JAL);
1965
1966    /* When branch is out of range, fall through to indirect. */
1967    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
1968               get_jmp_target_addr(s, which));
1969    tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_TMP0, 0);
1970    set_jmp_reset_offset(s, which);
1971}
1972
1973void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1974                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1975{
1976    uintptr_t addr = tb->jmp_target_addr[n];
1977    ptrdiff_t offset = addr - jmp_rx;
1978    tcg_insn_unit insn;
1979
1980    /* Either directly branch, or fall through to indirect branch. */
1981    if (offset == sextreg(offset, 0, 20)) {
1982        insn = encode_uj(OPC_JAL, TCG_REG_ZERO, offset);
1983    } else {
1984        insn = OPC_NOP;
1985    }
1986    qatomic_set((uint32_t *)jmp_rw, insn);
1987    flush_idcache_range(jmp_rx, jmp_rw, 4);
1988}
1989
1990
1991static void tgen_add(TCGContext *s, TCGType type,
1992                     TCGReg a0, TCGReg a1, TCGReg a2)
1993{
1994    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_ADDW : OPC_ADD;
1995    tcg_out_opc_reg(s, insn, a0, a1, a2);
1996}
1997
1998static void tgen_addi(TCGContext *s, TCGType type,
1999                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2000{
2001    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_ADDIW : OPC_ADDI;
2002    tcg_out_opc_imm(s, insn, a0, a1, a2);
2003}
2004
2005static const TCGOutOpBinary outop_add = {
2006    .base.static_constraint = C_O1_I2(r, r, rI),
2007    .out_rrr = tgen_add,
2008    .out_rri = tgen_addi,
2009};
2010
2011static void tgen_and(TCGContext *s, TCGType type,
2012                     TCGReg a0, TCGReg a1, TCGReg a2)
2013{
2014    tcg_out_opc_reg(s, OPC_AND, a0, a1, a2);
2015}
2016
2017static void tgen_andi(TCGContext *s, TCGType type,
2018                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2019{
2020    tcg_out_opc_imm(s, OPC_ANDI, a0, a1, a2);
2021}
2022
2023static const TCGOutOpBinary outop_and = {
2024    .base.static_constraint = C_O1_I2(r, r, rI),
2025    .out_rrr = tgen_and,
2026    .out_rri = tgen_andi,
2027};
2028
2029static void tgen_andc(TCGContext *s, TCGType type,
2030                      TCGReg a0, TCGReg a1, TCGReg a2)
2031{
2032    tcg_out_opc_reg(s, OPC_ANDN, a0, a1, a2);
2033}
2034
2035static TCGConstraintSetIndex cset_zbb_rrr(TCGType type, unsigned flags)
2036{
2037    return cpuinfo & CPUINFO_ZBB ? C_O1_I2(r, r, r) : C_NotImplemented;
2038}
2039
2040static const TCGOutOpBinary outop_andc = {
2041    .base.static_constraint = C_Dynamic,
2042    .base.dynamic_constraint = cset_zbb_rrr,
2043    .out_rrr = tgen_andc,
2044};
2045
2046static void tgen_clz(TCGContext *s, TCGType type,
2047                     TCGReg a0, TCGReg a1, TCGReg a2)
2048{
2049    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_CLZW : OPC_CLZ;
2050    tcg_out_cltz(s, type, insn, a0, a1, a2, false);
2051}
2052
2053static void tgen_clzi(TCGContext *s, TCGType type,
2054                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2055{
2056    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_CLZW : OPC_CLZ;
2057    tcg_out_cltz(s, type, insn, a0, a1, a2, true);
2058}
2059
2060static TCGConstraintSetIndex cset_clzctz(TCGType type, unsigned flags)
2061{
2062    return cpuinfo & CPUINFO_ZBB ? C_N1_I2(r, r, rM) : C_NotImplemented;
2063}
2064
2065static const TCGOutOpBinary outop_clz = {
2066    .base.static_constraint = C_Dynamic,
2067    .base.dynamic_constraint = cset_clzctz,
2068    .out_rrr = tgen_clz,
2069    .out_rri = tgen_clzi,
2070};
2071
2072static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
2073{
2074    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_CPOPW : OPC_CPOP;
2075    tcg_out_opc_imm(s, insn, a0, a1, 0);
2076}
2077
2078static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags)
2079{
2080    return cpuinfo & CPUINFO_ZBB ? C_O1_I1(r, r) : C_NotImplemented;
2081}
2082
2083static const TCGOutOpUnary outop_ctpop = {
2084    .base.static_constraint = C_Dynamic,
2085    .base.dynamic_constraint = cset_ctpop,
2086    .out_rr = tgen_ctpop,
2087};
2088
2089static void tgen_ctz(TCGContext *s, TCGType type,
2090                     TCGReg a0, TCGReg a1, TCGReg a2)
2091{
2092    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_CTZW : OPC_CTZ;
2093    tcg_out_cltz(s, type, insn, a0, a1, a2, false);
2094}
2095
2096static void tgen_ctzi(TCGContext *s, TCGType type,
2097                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2098{
2099    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_CTZW : OPC_CTZ;
2100    tcg_out_cltz(s, type, insn, a0, a1, a2, true);
2101}
2102
2103static const TCGOutOpBinary outop_ctz = {
2104    .base.static_constraint = C_Dynamic,
2105    .base.dynamic_constraint = cset_clzctz,
2106    .out_rrr = tgen_ctz,
2107    .out_rri = tgen_ctzi,
2108};
2109
2110static void tgen_divs(TCGContext *s, TCGType type,
2111                      TCGReg a0, TCGReg a1, TCGReg a2)
2112{
2113    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_DIVW : OPC_DIV;
2114    tcg_out_opc_reg(s, insn, a0, a1, a2);
2115}
2116
2117static const TCGOutOpBinary outop_divs = {
2118    .base.static_constraint = C_O1_I2(r, r, r),
2119    .out_rrr = tgen_divs,
2120};
2121
2122static const TCGOutOpDivRem outop_divs2 = {
2123    .base.static_constraint = C_NotImplemented,
2124};
2125
2126static void tgen_divu(TCGContext *s, TCGType type,
2127                      TCGReg a0, TCGReg a1, TCGReg a2)
2128{
2129    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_DIVUW : OPC_DIVU;
2130    tcg_out_opc_reg(s, insn, a0, a1, a2);
2131}
2132
2133static const TCGOutOpBinary outop_divu = {
2134    .base.static_constraint = C_O1_I2(r, r, r),
2135    .out_rrr = tgen_divu,
2136};
2137
2138static const TCGOutOpDivRem outop_divu2 = {
2139    .base.static_constraint = C_NotImplemented,
2140};
2141
2142static void tgen_eqv(TCGContext *s, TCGType type,
2143                     TCGReg a0, TCGReg a1, TCGReg a2)
2144{
2145    tcg_out_opc_reg(s, OPC_XNOR, a0, a1, a2);
2146}
2147
2148static const TCGOutOpBinary outop_eqv = {
2149    .base.static_constraint = C_Dynamic,
2150    .base.dynamic_constraint = cset_zbb_rrr,
2151    .out_rrr = tgen_eqv,
2152};
2153
2154static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
2155{
2156    tcg_out_opc_imm(s, OPC_SRAI, a0, a1, 32);
2157}
2158
2159static const TCGOutOpUnary outop_extrh_i64_i32 = {
2160    .base.static_constraint = C_O1_I1(r, r),
2161    .out_rr = tgen_extrh_i64_i32,
2162};
2163
2164static void tgen_mul(TCGContext *s, TCGType type,
2165                     TCGReg a0, TCGReg a1, TCGReg a2)
2166{
2167    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_MULW : OPC_MUL;
2168    tcg_out_opc_reg(s, insn, a0, a1, a2);
2169}
2170
2171static const TCGOutOpBinary outop_mul = {
2172    .base.static_constraint = C_O1_I2(r, r, r),
2173    .out_rrr = tgen_mul,
2174};
2175
2176static const TCGOutOpMul2 outop_muls2 = {
2177    .base.static_constraint = C_NotImplemented,
2178};
2179
2180static TCGConstraintSetIndex cset_mulh(TCGType type, unsigned flags)
2181{
2182    return type == TCG_TYPE_I32 ? C_NotImplemented : C_O1_I2(r, r, r);
2183}
2184
2185static void tgen_mulsh(TCGContext *s, TCGType type,
2186                       TCGReg a0, TCGReg a1, TCGReg a2)
2187{
2188    tcg_out_opc_reg(s, OPC_MULH, a0, a1, a2);
2189}
2190
2191static const TCGOutOpBinary outop_mulsh = {
2192    .base.static_constraint = C_Dynamic,
2193    .base.dynamic_constraint = cset_mulh,
2194    .out_rrr = tgen_mulsh,
2195};
2196
2197static const TCGOutOpMul2 outop_mulu2 = {
2198    .base.static_constraint = C_NotImplemented,
2199};
2200
2201static void tgen_muluh(TCGContext *s, TCGType type,
2202                       TCGReg a0, TCGReg a1, TCGReg a2)
2203{
2204    tcg_out_opc_reg(s, OPC_MULHU, a0, a1, a2);
2205}
2206
2207static const TCGOutOpBinary outop_muluh = {
2208    .base.static_constraint = C_Dynamic,
2209    .base.dynamic_constraint = cset_mulh,
2210    .out_rrr = tgen_muluh,
2211};
2212
2213static const TCGOutOpBinary outop_nand = {
2214    .base.static_constraint = C_NotImplemented,
2215};
2216
2217static const TCGOutOpBinary outop_nor = {
2218    .base.static_constraint = C_NotImplemented,
2219};
2220
2221static void tgen_or(TCGContext *s, TCGType type,
2222                    TCGReg a0, TCGReg a1, TCGReg a2)
2223{
2224    tcg_out_opc_reg(s, OPC_OR, a0, a1, a2);
2225}
2226
2227static void tgen_ori(TCGContext *s, TCGType type,
2228                     TCGReg a0, TCGReg a1, tcg_target_long a2)
2229{
2230    tcg_out_opc_imm(s, OPC_ORI, a0, a1, a2);
2231}
2232
2233static const TCGOutOpBinary outop_or = {
2234    .base.static_constraint = C_O1_I2(r, r, rI),
2235    .out_rrr = tgen_or,
2236    .out_rri = tgen_ori,
2237};
2238
2239static void tgen_orc(TCGContext *s, TCGType type,
2240                     TCGReg a0, TCGReg a1, TCGReg a2)
2241{
2242    tcg_out_opc_reg(s, OPC_ORN, a0, a1, a2);
2243}
2244
2245static const TCGOutOpBinary outop_orc = {
2246    .base.static_constraint = C_Dynamic,
2247    .base.dynamic_constraint = cset_zbb_rrr,
2248    .out_rrr = tgen_orc,
2249};
2250
2251static void tgen_rems(TCGContext *s, TCGType type,
2252                      TCGReg a0, TCGReg a1, TCGReg a2)
2253{
2254    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_REMW : OPC_REM;
2255    tcg_out_opc_reg(s, insn, a0, a1, a2);
2256}
2257
2258static const TCGOutOpBinary outop_rems = {
2259    .base.static_constraint = C_O1_I2(r, r, r),
2260    .out_rrr = tgen_rems,
2261};
2262
2263static void tgen_remu(TCGContext *s, TCGType type,
2264                      TCGReg a0, TCGReg a1, TCGReg a2)
2265{
2266    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_REMUW : OPC_REMU;
2267    tcg_out_opc_reg(s, insn, a0, a1, a2);
2268}
2269
2270static const TCGOutOpBinary outop_remu = {
2271    .base.static_constraint = C_O1_I2(r, r, r),
2272    .out_rrr = tgen_remu,
2273};
2274
2275static TCGConstraintSetIndex cset_rot(TCGType type, unsigned flags)
2276{
2277    return cpuinfo & CPUINFO_ZBB ? C_O1_I2(r, r, ri) : C_NotImplemented;
2278}
2279
2280static void tgen_rotr(TCGContext *s, TCGType type,
2281                      TCGReg a0, TCGReg a1, TCGReg a2)
2282{
2283    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_RORW : OPC_ROR;
2284    tcg_out_opc_reg(s, insn, a0, a1, a2);
2285}
2286
2287static void tgen_rotri(TCGContext *s, TCGType type,
2288                       TCGReg a0, TCGReg a1, tcg_target_long a2)
2289{
2290    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_RORIW : OPC_RORI;
2291    unsigned mask = type == TCG_TYPE_I32 ? 31 : 63;
2292    tcg_out_opc_imm(s, insn, a0, a1, a2 & mask);
2293}
2294
2295static const TCGOutOpBinary outop_rotr = {
2296    .base.static_constraint = C_Dynamic,
2297    .base.dynamic_constraint = cset_rot,
2298    .out_rrr = tgen_rotr,
2299    .out_rri = tgen_rotri,
2300};
2301
2302static void tgen_rotl(TCGContext *s, TCGType type,
2303                      TCGReg a0, TCGReg a1, TCGReg a2)
2304{
2305    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_ROLW : OPC_ROL;
2306    tcg_out_opc_reg(s, insn, a0, a1, a2);
2307}
2308
2309static void tgen_rotli(TCGContext *s, TCGType type,
2310                       TCGReg a0, TCGReg a1, tcg_target_long a2)
2311{
2312    tgen_rotri(s, type, a0, a1, -a2);
2313}
2314
2315static const TCGOutOpBinary outop_rotl = {
2316    .base.static_constraint = C_Dynamic,
2317    .base.dynamic_constraint = cset_rot,
2318    .out_rrr = tgen_rotl,
2319    .out_rri = tgen_rotli,
2320};
2321
2322static void tgen_sar(TCGContext *s, TCGType type,
2323                     TCGReg a0, TCGReg a1, TCGReg a2)
2324{
2325    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SRAW : OPC_SRA;
2326    tcg_out_opc_reg(s, insn, a0, a1, a2);
2327}
2328
2329static void tgen_sari(TCGContext *s, TCGType type,
2330                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2331{
2332    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SRAIW : OPC_SRAI;
2333    unsigned mask = type == TCG_TYPE_I32 ? 31 : 63;
2334    tcg_out_opc_imm(s, insn, a0, a1, a2 & mask);
2335}
2336
2337static const TCGOutOpBinary outop_sar = {
2338    .base.static_constraint = C_O1_I2(r, r, ri),
2339    .out_rrr = tgen_sar,
2340    .out_rri = tgen_sari,
2341};
2342
2343static void tgen_shl(TCGContext *s, TCGType type,
2344                     TCGReg a0, TCGReg a1, TCGReg a2)
2345{
2346    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SLLW : OPC_SLL;
2347    tcg_out_opc_reg(s, insn, a0, a1, a2);
2348}
2349
2350static void tgen_shli(TCGContext *s, TCGType type,
2351                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2352{
2353    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SLLIW : OPC_SLLI;
2354    unsigned mask = type == TCG_TYPE_I32 ? 31 : 63;
2355    tcg_out_opc_imm(s, insn, a0, a1, a2 & mask);
2356}
2357
2358static const TCGOutOpBinary outop_shl = {
2359    .base.static_constraint = C_O1_I2(r, r, ri),
2360    .out_rrr = tgen_shl,
2361    .out_rri = tgen_shli,
2362};
2363
2364static void tgen_shr(TCGContext *s, TCGType type,
2365                     TCGReg a0, TCGReg a1, TCGReg a2)
2366{
2367    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SRLW : OPC_SRL;
2368    tcg_out_opc_reg(s, insn, a0, a1, a2);
2369}
2370
2371static void tgen_shri(TCGContext *s, TCGType type,
2372                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2373{
2374    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SRLIW : OPC_SRLI;
2375    unsigned mask = type == TCG_TYPE_I32 ? 31 : 63;
2376    tcg_out_opc_imm(s, insn, a0, a1, a2 & mask);
2377}
2378
2379static const TCGOutOpBinary outop_shr = {
2380    .base.static_constraint = C_O1_I2(r, r, ri),
2381    .out_rrr = tgen_shr,
2382    .out_rri = tgen_shri,
2383};
2384
2385static void tgen_sub(TCGContext *s, TCGType type,
2386                     TCGReg a0, TCGReg a1, TCGReg a2)
2387{
2388    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_SUBW : OPC_SUB;
2389    tcg_out_opc_reg(s, insn, a0, a1, a2);
2390}
2391
2392static const TCGOutOpSubtract outop_sub = {
2393    .base.static_constraint = C_O1_I2(r, r, r),
2394    .out_rrr = tgen_sub,
2395};
2396
2397static void tgen_xor(TCGContext *s, TCGType type,
2398                     TCGReg a0, TCGReg a1, TCGReg a2)
2399{
2400    tcg_out_opc_reg(s, OPC_XOR, a0, a1, a2);
2401}
2402
2403static void tgen_xori(TCGContext *s, TCGType type,
2404                      TCGReg a0, TCGReg a1, tcg_target_long a2)
2405{
2406    tcg_out_opc_imm(s, OPC_XORI, a0, a1, a2);
2407}
2408
2409static const TCGOutOpBinary outop_xor = {
2410    .base.static_constraint = C_O1_I2(r, r, rI),
2411    .out_rrr = tgen_xor,
2412    .out_rri = tgen_xori,
2413};
2414
2415static TCGConstraintSetIndex cset_bswap(TCGType type, unsigned flags)
2416{
2417    return cpuinfo & CPUINFO_ZBB ? C_O1_I1(r, r) : C_NotImplemented;
2418}
2419
2420static void tgen_bswap16(TCGContext *s, TCGType type,
2421                         TCGReg a0, TCGReg a1, unsigned flags)
2422{
2423    tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0);
2424    if (flags & TCG_BSWAP_OZ) {
2425        tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 48);
2426    } else {
2427        tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 48);
2428    }
2429}
2430
2431static const TCGOutOpBswap outop_bswap16 = {
2432    .base.static_constraint = C_Dynamic,
2433    .base.dynamic_constraint = cset_bswap,
2434    .out_rr = tgen_bswap16,
2435};
2436
2437static void tgen_bswap32(TCGContext *s, TCGType type,
2438                         TCGReg a0, TCGReg a1, unsigned flags)
2439{
2440    tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0);
2441    if (flags & TCG_BSWAP_OZ) {
2442        tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 32);
2443    } else {
2444        tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 32);
2445    }
2446}
2447
2448static const TCGOutOpBswap outop_bswap32 = {
2449    .base.static_constraint = C_Dynamic,
2450    .base.dynamic_constraint = cset_bswap,
2451    .out_rr = tgen_bswap32,
2452};
2453
2454static void tgen_bswap64(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
2455{
2456    tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0);
2457}
2458
2459static const TCGOutOpUnary outop_bswap64 = {
2460    .base.static_constraint = C_Dynamic,
2461    .base.dynamic_constraint = cset_bswap,
2462    .out_rr = tgen_bswap64,
2463};
2464
2465static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
2466{
2467    tgen_sub(s, type, a0, TCG_REG_ZERO, a1);
2468}
2469
2470static const TCGOutOpUnary outop_neg = {
2471    .base.static_constraint = C_O1_I1(r, r),
2472    .out_rr = tgen_neg,
2473};
2474
2475static void tgen_not(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
2476{
2477    tgen_xori(s, type, a0, a1, -1);
2478}
2479
2480static const TCGOutOpUnary outop_not = {
2481    .base.static_constraint = C_O1_I1(r, r),
2482    .out_rr = tgen_not,
2483};
2484
2485static const TCGOutOpDeposit outop_deposit = {
2486    .base.static_constraint = C_NotImplemented,
2487};
2488
2489static void tgen_extract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
2490                         unsigned ofs, unsigned len)
2491{
2492    if (ofs == 0) {
2493        switch (len) {
2494        case 16:
2495            tcg_out_ext16u(s, a0, a1);
2496            return;
2497        case 32:
2498            tcg_out_ext32u(s, a0, a1);
2499            return;
2500        }
2501    }
2502    if (ofs + len == 32) {
2503        tgen_shli(s, TCG_TYPE_I32, a0, a1, ofs);
2504        return;
2505    }
2506    if (len == 1) {
2507        tcg_out_opc_imm(s, OPC_BEXTI, a0, a1, ofs);
2508        return;
2509    }
2510    g_assert_not_reached();
2511}
2512
2513static const TCGOutOpExtract outop_extract = {
2514    .base.static_constraint = C_O1_I1(r, r),
2515    .out_rr = tgen_extract,
2516};
2517
2518static void tgen_sextract(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
2519                          unsigned ofs, unsigned len)
2520{
2521    if (ofs == 0) {
2522        switch (len) {
2523        case 8:
2524            tcg_out_ext8s(s, type, a0, a1);
2525            return;
2526        case 16:
2527            tcg_out_ext16s(s, type, a0, a1);
2528            return;
2529        case 32:
2530            tcg_out_ext32s(s, a0, a1);
2531            return;
2532        }
2533    } else if (ofs + len == 32) {
2534        tgen_sari(s, TCG_TYPE_I32, a0, a1, ofs);
2535        return;
2536    }
2537    g_assert_not_reached();
2538}
2539
2540static const TCGOutOpExtract outop_sextract = {
2541    .base.static_constraint = C_O1_I1(r, r),
2542    .out_rr = tgen_sextract,
2543};
2544
2545static const TCGOutOpExtract2 outop_extract2 = {
2546    .base.static_constraint = C_NotImplemented,
2547};
2548
2549
2550static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
2551                       const TCGArg args[TCG_MAX_OP_ARGS],
2552                       const int const_args[TCG_MAX_OP_ARGS])
2553{
2554    TCGArg a0 = args[0];
2555    TCGArg a1 = args[1];
2556    TCGArg a2 = args[2];
2557
2558    switch (opc) {
2559    case INDEX_op_goto_ptr:
2560        tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, a0, 0);
2561        break;
2562
2563    case INDEX_op_br:
2564        tcg_out_reloc(s, s->code_ptr, R_RISCV_JAL, arg_label(a0), 0);
2565        tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
2566        break;
2567
2568    case INDEX_op_ld8u_i32:
2569    case INDEX_op_ld8u_i64:
2570        tcg_out_ldst(s, OPC_LBU, a0, a1, a2);
2571        break;
2572    case INDEX_op_ld8s_i32:
2573    case INDEX_op_ld8s_i64:
2574        tcg_out_ldst(s, OPC_LB, a0, a1, a2);
2575        break;
2576    case INDEX_op_ld16u_i32:
2577    case INDEX_op_ld16u_i64:
2578        tcg_out_ldst(s, OPC_LHU, a0, a1, a2);
2579        break;
2580    case INDEX_op_ld16s_i32:
2581    case INDEX_op_ld16s_i64:
2582        tcg_out_ldst(s, OPC_LH, a0, a1, a2);
2583        break;
2584    case INDEX_op_ld32u_i64:
2585        tcg_out_ldst(s, OPC_LWU, a0, a1, a2);
2586        break;
2587    case INDEX_op_ld_i32:
2588    case INDEX_op_ld32s_i64:
2589        tcg_out_ldst(s, OPC_LW, a0, a1, a2);
2590        break;
2591    case INDEX_op_ld_i64:
2592        tcg_out_ldst(s, OPC_LD, a0, a1, a2);
2593        break;
2594
2595    case INDEX_op_st8_i32:
2596    case INDEX_op_st8_i64:
2597        tcg_out_ldst(s, OPC_SB, a0, a1, a2);
2598        break;
2599    case INDEX_op_st16_i32:
2600    case INDEX_op_st16_i64:
2601        tcg_out_ldst(s, OPC_SH, a0, a1, a2);
2602        break;
2603    case INDEX_op_st_i32:
2604    case INDEX_op_st32_i64:
2605        tcg_out_ldst(s, OPC_SW, a0, a1, a2);
2606        break;
2607    case INDEX_op_st_i64:
2608        tcg_out_ldst(s, OPC_SD, a0, a1, a2);
2609        break;
2610
2611    case INDEX_op_add2_i32:
2612        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
2613                        const_args[4], const_args[5], false, true);
2614        break;
2615    case INDEX_op_add2_i64:
2616        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
2617                        const_args[4], const_args[5], false, false);
2618        break;
2619    case INDEX_op_sub2_i32:
2620        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
2621                        const_args[4], const_args[5], true, true);
2622        break;
2623    case INDEX_op_sub2_i64:
2624        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
2625                        const_args[4], const_args[5], true, false);
2626        break;
2627
2628    case INDEX_op_qemu_ld_i32:
2629        tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
2630        break;
2631    case INDEX_op_qemu_ld_i64:
2632        tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
2633        break;
2634    case INDEX_op_qemu_st_i32:
2635        tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
2636        break;
2637    case INDEX_op_qemu_st_i64:
2638        tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
2639        break;
2640
2641    case INDEX_op_mb:
2642        tcg_out_mb(s, a0);
2643        break;
2644
2645    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2646    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2647    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2648    default:
2649        g_assert_not_reached();
2650    }
2651}
2652
2653static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2654                           unsigned vecl, unsigned vece,
2655                           const TCGArg args[TCG_MAX_OP_ARGS],
2656                           const int const_args[TCG_MAX_OP_ARGS])
2657{
2658    TCGType type = vecl + TCG_TYPE_V64;
2659    TCGArg a0, a1, a2;
2660    int c2;
2661
2662    a0 = args[0];
2663    a1 = args[1];
2664    a2 = args[2];
2665    c2 = const_args[2];
2666
2667    switch (opc) {
2668    case INDEX_op_dupm_vec:
2669        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2670        break;
2671    case INDEX_op_ld_vec:
2672        tcg_out_ld(s, type, a0, a1, a2);
2673        break;
2674    case INDEX_op_st_vec:
2675        tcg_out_st(s, type, a0, a1, a2);
2676        break;
2677    case INDEX_op_add_vec:
2678        set_vtype_len_sew(s, type, vece);
2679        tcg_out_opc_vv_vi(s, OPC_VADD_VV, OPC_VADD_VI, a0, a1, a2, c2);
2680        break;
2681    case INDEX_op_sub_vec:
2682        set_vtype_len_sew(s, type, vece);
2683        if (const_args[1]) {
2684            tcg_out_opc_vi(s, OPC_VRSUB_VI, a0, a2, a1);
2685        } else {
2686            tcg_out_opc_vv(s, OPC_VSUB_VV, a0, a1, a2);
2687        }
2688        break;
2689    case INDEX_op_and_vec:
2690        set_vtype_len(s, type);
2691        tcg_out_opc_vv_vi(s, OPC_VAND_VV, OPC_VAND_VI, a0, a1, a2, c2);
2692        break;
2693    case INDEX_op_or_vec:
2694        set_vtype_len(s, type);
2695        tcg_out_opc_vv_vi(s, OPC_VOR_VV, OPC_VOR_VI, a0, a1, a2, c2);
2696        break;
2697    case INDEX_op_xor_vec:
2698        set_vtype_len(s, type);
2699        tcg_out_opc_vv_vi(s, OPC_VXOR_VV, OPC_VXOR_VI, a0, a1, a2, c2);
2700        break;
2701    case INDEX_op_not_vec:
2702        set_vtype_len(s, type);
2703        tcg_out_opc_vi(s, OPC_VXOR_VI, a0, a1, -1);
2704        break;
2705    case INDEX_op_neg_vec:
2706        set_vtype_len_sew(s, type, vece);
2707        tcg_out_opc_vi(s, OPC_VRSUB_VI, a0, a1, 0);
2708        break;
2709    case INDEX_op_mul_vec:
2710        set_vtype_len_sew(s, type, vece);
2711        tcg_out_opc_vv(s, OPC_VMUL_VV, a0, a1, a2);
2712        break;
2713    case INDEX_op_ssadd_vec:
2714        set_vtype_len_sew(s, type, vece);
2715        tcg_out_opc_vv_vi(s, OPC_VSADD_VV, OPC_VSADD_VI, a0, a1, a2, c2);
2716        break;
2717    case INDEX_op_sssub_vec:
2718        set_vtype_len_sew(s, type, vece);
2719        tcg_out_opc_vv_vi(s, OPC_VSSUB_VV, OPC_VSSUB_VI, a0, a1, a2, c2);
2720        break;
2721    case INDEX_op_usadd_vec:
2722        set_vtype_len_sew(s, type, vece);
2723        tcg_out_opc_vv_vi(s, OPC_VSADDU_VV, OPC_VSADDU_VI, a0, a1, a2, c2);
2724        break;
2725    case INDEX_op_ussub_vec:
2726        set_vtype_len_sew(s, type, vece);
2727        tcg_out_opc_vv_vi(s, OPC_VSSUBU_VV, OPC_VSSUBU_VI, a0, a1, a2, c2);
2728        break;
2729    case INDEX_op_smax_vec:
2730        set_vtype_len_sew(s, type, vece);
2731        tcg_out_opc_vv_vi(s, OPC_VMAX_VV, OPC_VMAX_VI, a0, a1, a2, c2);
2732        break;
2733    case INDEX_op_smin_vec:
2734        set_vtype_len_sew(s, type, vece);
2735        tcg_out_opc_vv_vi(s, OPC_VMIN_VV, OPC_VMIN_VI, a0, a1, a2, c2);
2736        break;
2737    case INDEX_op_umax_vec:
2738        set_vtype_len_sew(s, type, vece);
2739        tcg_out_opc_vv_vi(s, OPC_VMAXU_VV, OPC_VMAXU_VI, a0, a1, a2, c2);
2740        break;
2741    case INDEX_op_umin_vec:
2742        set_vtype_len_sew(s, type, vece);
2743        tcg_out_opc_vv_vi(s, OPC_VMINU_VV, OPC_VMINU_VI, a0, a1, a2, c2);
2744        break;
2745    case INDEX_op_shls_vec:
2746        set_vtype_len_sew(s, type, vece);
2747        tcg_out_opc_vx(s, OPC_VSLL_VX, a0, a1, a2);
2748        break;
2749    case INDEX_op_shrs_vec:
2750        set_vtype_len_sew(s, type, vece);
2751        tcg_out_opc_vx(s, OPC_VSRL_VX, a0, a1, a2);
2752        break;
2753    case INDEX_op_sars_vec:
2754        set_vtype_len_sew(s, type, vece);
2755        tcg_out_opc_vx(s, OPC_VSRA_VX, a0, a1, a2);
2756        break;
2757    case INDEX_op_shlv_vec:
2758        set_vtype_len_sew(s, type, vece);
2759        tcg_out_opc_vv(s, OPC_VSLL_VV, a0, a1, a2);
2760        break;
2761    case INDEX_op_shrv_vec:
2762        set_vtype_len_sew(s, type, vece);
2763        tcg_out_opc_vv(s, OPC_VSRL_VV, a0, a1, a2);
2764        break;
2765    case INDEX_op_sarv_vec:
2766        set_vtype_len_sew(s, type, vece);
2767        tcg_out_opc_vv(s, OPC_VSRA_VV, a0, a1, a2);
2768        break;
2769    case INDEX_op_shli_vec:
2770        set_vtype_len_sew(s, type, vece);
2771        tcg_out_vshifti(s, OPC_VSLL_VI, OPC_VSLL_VX, a0, a1, a2);
2772        break;
2773    case INDEX_op_shri_vec:
2774        set_vtype_len_sew(s, type, vece);
2775        tcg_out_vshifti(s, OPC_VSRL_VI, OPC_VSRL_VX, a0, a1, a2);
2776        break;
2777    case INDEX_op_sari_vec:
2778        set_vtype_len_sew(s, type, vece);
2779        tcg_out_vshifti(s, OPC_VSRA_VI, OPC_VSRA_VX, a0, a1, a2);
2780        break;
2781    case INDEX_op_rotli_vec:
2782        set_vtype_len_sew(s, type, vece);
2783        tcg_out_vshifti(s, OPC_VSLL_VI, OPC_VSLL_VX, TCG_REG_V0, a1, a2);
2784        tcg_out_vshifti(s, OPC_VSRL_VI, OPC_VSRL_VX, a0, a1,
2785                        -a2 & ((8 << vece) - 1));
2786        tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
2787        break;
2788    case INDEX_op_rotls_vec:
2789        set_vtype_len_sew(s, type, vece);
2790        tcg_out_opc_vx(s, OPC_VSLL_VX, TCG_REG_V0, a1, a2);
2791        tcg_out_opc_reg(s, OPC_SUBW, TCG_REG_TMP0, TCG_REG_ZERO, a2);
2792        tcg_out_opc_vx(s, OPC_VSRL_VX, a0, a1, TCG_REG_TMP0);
2793        tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
2794        break;
2795    case INDEX_op_rotlv_vec:
2796        set_vtype_len_sew(s, type, vece);
2797        tcg_out_opc_vi(s, OPC_VRSUB_VI, TCG_REG_V0, a2, 0);
2798        tcg_out_opc_vv(s, OPC_VSRL_VV, TCG_REG_V0, a1, TCG_REG_V0);
2799        tcg_out_opc_vv(s, OPC_VSLL_VV, a0, a1, a2);
2800        tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
2801        break;
2802    case INDEX_op_rotrv_vec:
2803        set_vtype_len_sew(s, type, vece);
2804        tcg_out_opc_vi(s, OPC_VRSUB_VI, TCG_REG_V0, a2, 0);
2805        tcg_out_opc_vv(s, OPC_VSLL_VV, TCG_REG_V0, a1, TCG_REG_V0);
2806        tcg_out_opc_vv(s, OPC_VSRL_VV, a0, a1, a2);
2807        tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
2808        break;
2809    case INDEX_op_cmp_vec:
2810        tcg_out_cmpsel(s, type, vece, args[3], a0, a1, a2, c2,
2811                       -1, true, 0, true);
2812        break;
2813    case INDEX_op_cmpsel_vec:
2814        tcg_out_cmpsel(s, type, vece, args[5], a0, a1, a2, c2,
2815                       args[3], const_args[3], args[4], const_args[4]);
2816        break;
2817    case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov.  */
2818    case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec.  */
2819    default:
2820        g_assert_not_reached();
2821    }
2822}
2823
2824void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2825                       TCGArg a0, ...)
2826{
2827    g_assert_not_reached();
2828}
2829
2830int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2831{
2832    switch (opc) {
2833    case INDEX_op_add_vec:
2834    case INDEX_op_sub_vec:
2835    case INDEX_op_and_vec:
2836    case INDEX_op_or_vec:
2837    case INDEX_op_xor_vec:
2838    case INDEX_op_not_vec:
2839    case INDEX_op_neg_vec:
2840    case INDEX_op_mul_vec:
2841    case INDEX_op_ssadd_vec:
2842    case INDEX_op_sssub_vec:
2843    case INDEX_op_usadd_vec:
2844    case INDEX_op_ussub_vec:
2845    case INDEX_op_smax_vec:
2846    case INDEX_op_smin_vec:
2847    case INDEX_op_umax_vec:
2848    case INDEX_op_umin_vec:
2849    case INDEX_op_shls_vec:
2850    case INDEX_op_shrs_vec:
2851    case INDEX_op_sars_vec:
2852    case INDEX_op_shlv_vec:
2853    case INDEX_op_shrv_vec:
2854    case INDEX_op_sarv_vec:
2855    case INDEX_op_shri_vec:
2856    case INDEX_op_shli_vec:
2857    case INDEX_op_sari_vec:
2858    case INDEX_op_rotls_vec:
2859    case INDEX_op_rotlv_vec:
2860    case INDEX_op_rotrv_vec:
2861    case INDEX_op_rotli_vec:
2862    case INDEX_op_cmp_vec:
2863    case INDEX_op_cmpsel_vec:
2864        return 1;
2865    default:
2866        return 0;
2867    }
2868}
2869
2870static TCGConstraintSetIndex
2871tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
2872{
2873    switch (op) {
2874    case INDEX_op_goto_ptr:
2875        return C_O0_I1(r);
2876
2877    case INDEX_op_ld8u_i32:
2878    case INDEX_op_ld8s_i32:
2879    case INDEX_op_ld16u_i32:
2880    case INDEX_op_ld16s_i32:
2881    case INDEX_op_ld_i32:
2882    case INDEX_op_ld8u_i64:
2883    case INDEX_op_ld8s_i64:
2884    case INDEX_op_ld16u_i64:
2885    case INDEX_op_ld16s_i64:
2886    case INDEX_op_ld32s_i64:
2887    case INDEX_op_ld32u_i64:
2888    case INDEX_op_ld_i64:
2889        return C_O1_I1(r, r);
2890
2891    case INDEX_op_st8_i32:
2892    case INDEX_op_st16_i32:
2893    case INDEX_op_st_i32:
2894    case INDEX_op_st8_i64:
2895    case INDEX_op_st16_i64:
2896    case INDEX_op_st32_i64:
2897    case INDEX_op_st_i64:
2898        return C_O0_I2(rz, r);
2899
2900    case INDEX_op_add2_i32:
2901    case INDEX_op_add2_i64:
2902    case INDEX_op_sub2_i32:
2903    case INDEX_op_sub2_i64:
2904        return C_O2_I4(r, r, rz, rz, rM, rM);
2905
2906    case INDEX_op_qemu_ld_i32:
2907    case INDEX_op_qemu_ld_i64:
2908        return C_O1_I1(r, r);
2909    case INDEX_op_qemu_st_i32:
2910    case INDEX_op_qemu_st_i64:
2911        return C_O0_I2(rz, r);
2912
2913    case INDEX_op_st_vec:
2914        return C_O0_I2(v, r);
2915    case INDEX_op_dup_vec:
2916    case INDEX_op_dupm_vec:
2917    case INDEX_op_ld_vec:
2918        return C_O1_I1(v, r);
2919    case INDEX_op_neg_vec:
2920    case INDEX_op_not_vec:
2921    case INDEX_op_shli_vec:
2922    case INDEX_op_shri_vec:
2923    case INDEX_op_sari_vec:
2924    case INDEX_op_rotli_vec:
2925        return C_O1_I1(v, v);
2926    case INDEX_op_add_vec:
2927    case INDEX_op_and_vec:
2928    case INDEX_op_or_vec:
2929    case INDEX_op_xor_vec:
2930    case INDEX_op_ssadd_vec:
2931    case INDEX_op_sssub_vec:
2932    case INDEX_op_usadd_vec:
2933    case INDEX_op_ussub_vec:
2934    case INDEX_op_smax_vec:
2935    case INDEX_op_smin_vec:
2936    case INDEX_op_umax_vec:
2937    case INDEX_op_umin_vec:
2938        return C_O1_I2(v, v, vK);
2939    case INDEX_op_sub_vec:
2940        return C_O1_I2(v, vK, v);
2941    case INDEX_op_mul_vec:
2942    case INDEX_op_shlv_vec:
2943    case INDEX_op_shrv_vec:
2944    case INDEX_op_sarv_vec:
2945    case INDEX_op_rotlv_vec:
2946    case INDEX_op_rotrv_vec:
2947        return C_O1_I2(v, v, v);
2948    case INDEX_op_shls_vec:
2949    case INDEX_op_shrs_vec:
2950    case INDEX_op_sars_vec:
2951    case INDEX_op_rotls_vec:
2952        return C_O1_I2(v, v, r);
2953    case INDEX_op_cmp_vec:
2954        return C_O1_I2(v, v, vL);
2955    case INDEX_op_cmpsel_vec:
2956        return C_O1_I4(v, v, vL, vK, vK);
2957    default:
2958        return C_NotImplemented;
2959    }
2960}
2961
2962static const int tcg_target_callee_save_regs[] = {
2963    TCG_REG_S0,       /* used for the global env (TCG_AREG0) */
2964    TCG_REG_S1,
2965    TCG_REG_S2,
2966    TCG_REG_S3,
2967    TCG_REG_S4,
2968    TCG_REG_S5,
2969    TCG_REG_S6,
2970    TCG_REG_S7,
2971    TCG_REG_S8,
2972    TCG_REG_S9,
2973    TCG_REG_S10,
2974    TCG_REG_S11,
2975    TCG_REG_RA,       /* should be last for ABI compliance */
2976};
2977
2978/* Stack frame parameters.  */
2979#define REG_SIZE   (TCG_TARGET_REG_BITS / 8)
2980#define SAVE_SIZE  ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE)
2981#define TEMP_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2982#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \
2983                     + TCG_TARGET_STACK_ALIGN - 1) \
2984                    & -TCG_TARGET_STACK_ALIGN)
2985#define SAVE_OFS   (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE)
2986
2987/* We're expecting to be able to use an immediate for frame allocation.  */
2988QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7ff);
2989
2990/* Generate global QEMU prologue and epilogue code */
2991static void tcg_target_qemu_prologue(TCGContext *s)
2992{
2993    int i;
2994
2995    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE);
2996
2997    /* TB prologue */
2998    tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE);
2999    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
3000        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
3001                   TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
3002    }
3003
3004    if (!tcg_use_softmmu && guest_base) {
3005        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
3006        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
3007    }
3008
3009    /* Call generated code */
3010    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3011    tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, tcg_target_call_iarg_regs[1], 0);
3012
3013    /* Return path for goto_ptr. Set return value to 0 */
3014    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3015    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_A0, TCG_REG_ZERO);
3016
3017    /* TB epilogue */
3018    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3019    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
3020        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
3021                   TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
3022    }
3023
3024    tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE);
3025    tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_RA, 0);
3026}
3027
3028static void tcg_out_tb_start(TCGContext *s)
3029{
3030    init_setting_vtype(s);
3031}
3032
3033static bool vtype_check(unsigned vtype)
3034{
3035    unsigned long tmp;
3036
3037    /* vsetvl tmp, zero, vtype */
3038    asm(".insn r 0x57, 7, 0x40, %0, zero, %1" : "=r"(tmp) : "r"(vtype));
3039    return tmp != 0;
3040}
3041
3042static void probe_frac_lmul_1(TCGType type, MemOp vsew)
3043{
3044    VsetCache *p = &riscv_vset_cache[type - TCG_TYPE_V64][vsew];
3045    unsigned avl = tcg_type_size(type) >> vsew;
3046    int lmul = type - riscv_lg2_vlenb;
3047    unsigned vtype = encode_vtype(true, true, vsew, lmul & 7);
3048    bool lmul_eq_avl = true;
3049
3050    /* Guaranteed by Zve64x. */
3051    assert(lmul < 3);
3052
3053    /*
3054     * For LMUL < -3, the host vector size is so large that TYPE
3055     * is smaller than the minimum 1/8 fraction.
3056     *
3057     * For other fractional LMUL settings, implementations must
3058     * support SEW settings between SEW_MIN and LMUL * ELEN, inclusive.
3059     * So if ELEN = 64, LMUL = 1/2, then SEW will support e8, e16, e32,
3060     * but e64 may not be supported. In other words, the hardware only
3061     * guarantees SEW_MIN <= SEW <= LMUL * ELEN.  Check.
3062     */
3063    if (lmul < 0 && (lmul < -3 || !vtype_check(vtype))) {
3064        vtype = encode_vtype(true, true, vsew, VLMUL_M1);
3065        lmul_eq_avl = false;
3066    }
3067
3068    if (avl < 32) {
3069        p->vset_insn = encode_vseti(OPC_VSETIVLI, TCG_REG_ZERO, avl, vtype);
3070    } else if (lmul_eq_avl) {
3071        /* rd != 0 and rs1 == 0 uses vlmax */
3072        p->vset_insn = encode_vset(OPC_VSETVLI, TCG_REG_TMP0, TCG_REG_ZERO, vtype);
3073    } else {
3074        p->movi_insn = encode_i(OPC_ADDI, TCG_REG_TMP0, TCG_REG_ZERO, avl);
3075        p->vset_insn = encode_vset(OPC_VSETVLI, TCG_REG_ZERO, TCG_REG_TMP0, vtype);
3076    }
3077}
3078
3079static void probe_frac_lmul(void)
3080{
3081    /* Match riscv_lg2_vlenb to TCG_TYPE_V64. */
3082    QEMU_BUILD_BUG_ON(TCG_TYPE_V64 != 3);
3083
3084    for (TCGType t = TCG_TYPE_V64; t <= TCG_TYPE_V256; t++) {
3085        for (MemOp e = MO_8; e <= MO_64; e++) {
3086            probe_frac_lmul_1(t, e);
3087        }
3088    }
3089}
3090
3091static void tcg_target_init(TCGContext *s)
3092{
3093    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
3094    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
3095
3096    tcg_target_call_clobber_regs = -1;
3097    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0);
3098    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1);
3099    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2);
3100    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S3);
3101    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S4);
3102    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S5);
3103    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S6);
3104    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S7);
3105    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8);
3106    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9);
3107    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S10);
3108    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S11);
3109
3110    s->reserved_regs = 0;
3111    tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO);
3112    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
3113    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
3114    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
3115    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
3116    tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP);
3117    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP);
3118
3119    if (cpuinfo & CPUINFO_ZVE64X) {
3120        switch (riscv_lg2_vlenb) {
3121        case TCG_TYPE_V64:
3122            tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
3123            tcg_target_available_regs[TCG_TYPE_V128] = ALL_DVECTOR_REG_GROUPS;
3124            tcg_target_available_regs[TCG_TYPE_V256] = ALL_QVECTOR_REG_GROUPS;
3125            s->reserved_regs |= (~ALL_QVECTOR_REG_GROUPS & ALL_VECTOR_REGS);
3126            break;
3127        case TCG_TYPE_V128:
3128            tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
3129            tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
3130            tcg_target_available_regs[TCG_TYPE_V256] = ALL_DVECTOR_REG_GROUPS;
3131            s->reserved_regs |= (~ALL_DVECTOR_REG_GROUPS & ALL_VECTOR_REGS);
3132            break;
3133        default:
3134            /* Guaranteed by Zve64x. */
3135            tcg_debug_assert(riscv_lg2_vlenb >= TCG_TYPE_V256);
3136            tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
3137            tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
3138            tcg_target_available_regs[TCG_TYPE_V256] = ALL_VECTOR_REGS;
3139            break;
3140        }
3141        tcg_regset_set_reg(s->reserved_regs, TCG_REG_V0);
3142        probe_frac_lmul();
3143    }
3144}
3145
3146typedef struct {
3147    DebugFrameHeader h;
3148    uint8_t fde_def_cfa[4];
3149    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2];
3150} DebugFrame;
3151
3152#define ELF_HOST_MACHINE EM_RISCV
3153
3154static const DebugFrame debug_frame = {
3155    .h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */
3156    .h.cie.id = -1,
3157    .h.cie.version = 1,
3158    .h.cie.code_align = 1,
3159    .h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */
3160    .h.cie.return_column = TCG_REG_RA,
3161
3162    /* Total FDE size does not include the "len" member.  */
3163    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3164
3165    .fde_def_cfa = {
3166        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
3167        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3168        (FRAME_SIZE >> 7)
3169    },
3170    .fde_reg_ofs = {
3171        0x80 + 9,  12,                  /* DW_CFA_offset, s1,  -96 */
3172        0x80 + 18, 11,                  /* DW_CFA_offset, s2,  -88 */
3173        0x80 + 19, 10,                  /* DW_CFA_offset, s3,  -80 */
3174        0x80 + 20, 9,                   /* DW_CFA_offset, s4,  -72 */
3175        0x80 + 21, 8,                   /* DW_CFA_offset, s5,  -64 */
3176        0x80 + 22, 7,                   /* DW_CFA_offset, s6,  -56 */
3177        0x80 + 23, 6,                   /* DW_CFA_offset, s7,  -48 */
3178        0x80 + 24, 5,                   /* DW_CFA_offset, s8,  -40 */
3179        0x80 + 25, 4,                   /* DW_CFA_offset, s9,  -32 */
3180        0x80 + 26, 3,                   /* DW_CFA_offset, s10, -24 */
3181        0x80 + 27, 2,                   /* DW_CFA_offset, s11, -16 */
3182        0x80 + 1 , 1,                   /* DW_CFA_offset, ra,  -8 */
3183    }
3184};
3185
3186void tcg_register_jit(const void *buf, size_t buf_size)
3187{
3188    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3189}
3190