xref: /openbmc/qemu/tcg/riscv/tcg-target.c.inc (revision cde3c425)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2018 SiFive, Inc
5 * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org>
6 * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net>
7 * Copyright (c) 2008 Fabrice Bellard
8 *
9 * Based on i386/tcg-target.c and mips/tcg-target.c
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a copy
12 * of this software and associated documentation files (the "Software"), to deal
13 * in the Software without restriction, including without limitation the rights
14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 * copies of the Software, and to permit persons to whom the Software is
16 * furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice shall be included in
19 * all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27 * THE SOFTWARE.
28 */
29
30#include "../tcg-ldst.c.inc"
31#include "../tcg-pool.c.inc"
32
33#ifdef CONFIG_DEBUG_TCG
34static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
35    "zero", "ra",  "sp",  "gp",  "tp",  "t0",  "t1",  "t2",
36    "s0",   "s1",  "a0",  "a1",  "a2",  "a3",  "a4",  "a5",
37    "a6",   "a7",  "s2",  "s3",  "s4",  "s5",  "s6",  "s7",
38    "s8",   "s9",  "s10", "s11", "t3",  "t4",  "t5",  "t6",
39    "v0",   "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
40    "v8",   "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
41    "v16",  "v17", "v18", "v19", "v20", "v21", "v22", "v23",
42    "v24",  "v25", "v26", "v27", "v28", "v29", "v30", "v31",
43};
44#endif
45
46static const int tcg_target_reg_alloc_order[] = {
47    /* Call saved registers */
48    /* TCG_REG_S0 reserved for TCG_AREG0 */
49    TCG_REG_S1,
50    TCG_REG_S2,
51    TCG_REG_S3,
52    TCG_REG_S4,
53    TCG_REG_S5,
54    TCG_REG_S6,
55    TCG_REG_S7,
56    TCG_REG_S8,
57    TCG_REG_S9,
58    TCG_REG_S10,
59    TCG_REG_S11,
60
61    /* Call clobbered registers */
62    TCG_REG_T0,
63    TCG_REG_T1,
64    TCG_REG_T2,
65    TCG_REG_T3,
66    TCG_REG_T4,
67    TCG_REG_T5,
68    TCG_REG_T6,
69
70    /* Argument registers */
71    TCG_REG_A0,
72    TCG_REG_A1,
73    TCG_REG_A2,
74    TCG_REG_A3,
75    TCG_REG_A4,
76    TCG_REG_A5,
77    TCG_REG_A6,
78    TCG_REG_A7,
79
80    /* Vector registers and TCG_REG_V0 reserved for mask. */
81    TCG_REG_V1,  TCG_REG_V2,  TCG_REG_V3,  TCG_REG_V4,
82    TCG_REG_V5,  TCG_REG_V6,  TCG_REG_V7,  TCG_REG_V8,
83    TCG_REG_V9,  TCG_REG_V10, TCG_REG_V11, TCG_REG_V12,
84    TCG_REG_V13, TCG_REG_V14, TCG_REG_V15, TCG_REG_V16,
85    TCG_REG_V17, TCG_REG_V18, TCG_REG_V19, TCG_REG_V20,
86    TCG_REG_V21, TCG_REG_V22, TCG_REG_V23, TCG_REG_V24,
87    TCG_REG_V25, TCG_REG_V26, TCG_REG_V27, TCG_REG_V28,
88    TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
89};
90
91static const int tcg_target_call_iarg_regs[] = {
92    TCG_REG_A0,
93    TCG_REG_A1,
94    TCG_REG_A2,
95    TCG_REG_A3,
96    TCG_REG_A4,
97    TCG_REG_A5,
98    TCG_REG_A6,
99    TCG_REG_A7,
100};
101
102static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
103{
104    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
105    tcg_debug_assert(slot >= 0 && slot <= 1);
106    return TCG_REG_A0 + slot;
107}
108
109#define TCG_CT_CONST_ZERO    0x100
110#define TCG_CT_CONST_S12     0x200
111#define TCG_CT_CONST_N12     0x400
112#define TCG_CT_CONST_M12     0x800
113#define TCG_CT_CONST_J12    0x1000
114#define TCG_CT_CONST_S5     0x2000
115#define TCG_CT_CONST_CMP_VI 0x4000
116
117#define ALL_GENERAL_REGS   MAKE_64BIT_MASK(0, 32)
118#define ALL_VECTOR_REGS    MAKE_64BIT_MASK(32, 32)
119#define ALL_DVECTOR_REG_GROUPS 0x5555555500000000
120#define ALL_QVECTOR_REG_GROUPS 0x1111111100000000
121
122#define sextreg  sextract64
123
124/*
125 * RISC-V Base ISA opcodes (IM)
126 */
127
128#define V_OPIVV (0x0 << 12)
129#define V_OPFVV (0x1 << 12)
130#define V_OPMVV (0x2 << 12)
131#define V_OPIVI (0x3 << 12)
132#define V_OPIVX (0x4 << 12)
133#define V_OPFVF (0x5 << 12)
134#define V_OPMVX (0x6 << 12)
135#define V_OPCFG (0x7 << 12)
136
137/* NF <= 7 && NF >= 0 */
138#define V_NF(x) (x << 29)
139#define V_UNIT_STRIDE (0x0 << 20)
140#define V_UNIT_STRIDE_WHOLE_REG (0x8 << 20)
141
142typedef enum {
143    VLMUL_M1 = 0, /* LMUL=1 */
144    VLMUL_M2,     /* LMUL=2 */
145    VLMUL_M4,     /* LMUL=4 */
146    VLMUL_M8,     /* LMUL=8 */
147    VLMUL_RESERVED,
148    VLMUL_MF8,    /* LMUL=1/8 */
149    VLMUL_MF4,    /* LMUL=1/4 */
150    VLMUL_MF2,    /* LMUL=1/2 */
151} RISCVVlmul;
152
153typedef enum {
154    OPC_ADD = 0x33,
155    OPC_ADDI = 0x13,
156    OPC_AND = 0x7033,
157    OPC_ANDI = 0x7013,
158    OPC_AUIPC = 0x17,
159    OPC_BEQ = 0x63,
160    OPC_BGE = 0x5063,
161    OPC_BGEU = 0x7063,
162    OPC_BLT = 0x4063,
163    OPC_BLTU = 0x6063,
164    OPC_BNE = 0x1063,
165    OPC_DIV = 0x2004033,
166    OPC_DIVU = 0x2005033,
167    OPC_JAL = 0x6f,
168    OPC_JALR = 0x67,
169    OPC_LB = 0x3,
170    OPC_LBU = 0x4003,
171    OPC_LD = 0x3003,
172    OPC_LH = 0x1003,
173    OPC_LHU = 0x5003,
174    OPC_LUI = 0x37,
175    OPC_LW = 0x2003,
176    OPC_LWU = 0x6003,
177    OPC_MUL = 0x2000033,
178    OPC_MULH = 0x2001033,
179    OPC_MULHSU = 0x2002033,
180    OPC_MULHU = 0x2003033,
181    OPC_OR = 0x6033,
182    OPC_ORI = 0x6013,
183    OPC_REM = 0x2006033,
184    OPC_REMU = 0x2007033,
185    OPC_SB = 0x23,
186    OPC_SD = 0x3023,
187    OPC_SH = 0x1023,
188    OPC_SLL = 0x1033,
189    OPC_SLLI = 0x1013,
190    OPC_SLT = 0x2033,
191    OPC_SLTI = 0x2013,
192    OPC_SLTIU = 0x3013,
193    OPC_SLTU = 0x3033,
194    OPC_SRA = 0x40005033,
195    OPC_SRAI = 0x40005013,
196    OPC_SRL = 0x5033,
197    OPC_SRLI = 0x5013,
198    OPC_SUB = 0x40000033,
199    OPC_SW = 0x2023,
200    OPC_XOR = 0x4033,
201    OPC_XORI = 0x4013,
202
203    OPC_ADDIW = 0x1b,
204    OPC_ADDW = 0x3b,
205    OPC_DIVUW = 0x200503b,
206    OPC_DIVW = 0x200403b,
207    OPC_MULW = 0x200003b,
208    OPC_REMUW = 0x200703b,
209    OPC_REMW = 0x200603b,
210    OPC_SLLIW = 0x101b,
211    OPC_SLLW = 0x103b,
212    OPC_SRAIW = 0x4000501b,
213    OPC_SRAW = 0x4000503b,
214    OPC_SRLIW = 0x501b,
215    OPC_SRLW = 0x503b,
216    OPC_SUBW = 0x4000003b,
217
218    OPC_FENCE = 0x0000000f,
219    OPC_NOP   = OPC_ADDI,   /* nop = addi r0,r0,0 */
220
221    /* Zba: Bit manipulation extension, address generation */
222    OPC_ADD_UW = 0x0800003b,
223
224    /* Zbb: Bit manipulation extension, basic bit manipulation */
225    OPC_ANDN   = 0x40007033,
226    OPC_CLZ    = 0x60001013,
227    OPC_CLZW   = 0x6000101b,
228    OPC_CPOP   = 0x60201013,
229    OPC_CPOPW  = 0x6020101b,
230    OPC_CTZ    = 0x60101013,
231    OPC_CTZW   = 0x6010101b,
232    OPC_ORN    = 0x40006033,
233    OPC_REV8   = 0x6b805013,
234    OPC_ROL    = 0x60001033,
235    OPC_ROLW   = 0x6000103b,
236    OPC_ROR    = 0x60005033,
237    OPC_RORW   = 0x6000503b,
238    OPC_RORI   = 0x60005013,
239    OPC_RORIW  = 0x6000501b,
240    OPC_SEXT_B = 0x60401013,
241    OPC_SEXT_H = 0x60501013,
242    OPC_XNOR   = 0x40004033,
243    OPC_ZEXT_H = 0x0800403b,
244
245    /* Zicond: integer conditional operations */
246    OPC_CZERO_EQZ = 0x0e005033,
247    OPC_CZERO_NEZ = 0x0e007033,
248
249    /* V: Vector extension 1.0 */
250    OPC_VSETVLI  = 0x57 | V_OPCFG,
251    OPC_VSETIVLI = 0xc0000057 | V_OPCFG,
252    OPC_VSETVL   = 0x80000057 | V_OPCFG,
253
254    OPC_VLE8_V  = 0x7 | V_UNIT_STRIDE,
255    OPC_VLE16_V = 0x5007 | V_UNIT_STRIDE,
256    OPC_VLE32_V = 0x6007 | V_UNIT_STRIDE,
257    OPC_VLE64_V = 0x7007 | V_UNIT_STRIDE,
258    OPC_VSE8_V  = 0x27 | V_UNIT_STRIDE,
259    OPC_VSE16_V = 0x5027 | V_UNIT_STRIDE,
260    OPC_VSE32_V = 0x6027 | V_UNIT_STRIDE,
261    OPC_VSE64_V = 0x7027 | V_UNIT_STRIDE,
262
263    OPC_VL1RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(0),
264    OPC_VL2RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(1),
265    OPC_VL4RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(3),
266    OPC_VL8RE64_V = 0x2007007 | V_UNIT_STRIDE_WHOLE_REG | V_NF(7),
267
268    OPC_VS1R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(0),
269    OPC_VS2R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(1),
270    OPC_VS4R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(3),
271    OPC_VS8R_V = 0x2000027 | V_UNIT_STRIDE_WHOLE_REG | V_NF(7),
272
273    OPC_VMERGE_VIM = 0x5c000057 | V_OPIVI,
274    OPC_VMERGE_VVM = 0x5c000057 | V_OPIVV,
275
276    OPC_VADD_VV = 0x57 | V_OPIVV,
277    OPC_VADD_VI = 0x57 | V_OPIVI,
278    OPC_VSUB_VV = 0x8000057 | V_OPIVV,
279    OPC_VRSUB_VI = 0xc000057 | V_OPIVI,
280    OPC_VAND_VV = 0x24000057 | V_OPIVV,
281    OPC_VAND_VI = 0x24000057 | V_OPIVI,
282    OPC_VOR_VV = 0x28000057 | V_OPIVV,
283    OPC_VOR_VI = 0x28000057 | V_OPIVI,
284    OPC_VXOR_VV = 0x2c000057 | V_OPIVV,
285    OPC_VXOR_VI = 0x2c000057 | V_OPIVI,
286
287    OPC_VMUL_VV = 0x94000057 | V_OPMVV,
288    OPC_VSADD_VV = 0x84000057 | V_OPIVV,
289    OPC_VSADD_VI = 0x84000057 | V_OPIVI,
290    OPC_VSSUB_VV = 0x8c000057 | V_OPIVV,
291    OPC_VSSUB_VI = 0x8c000057 | V_OPIVI,
292    OPC_VSADDU_VV = 0x80000057 | V_OPIVV,
293    OPC_VSADDU_VI = 0x80000057 | V_OPIVI,
294    OPC_VSSUBU_VV = 0x88000057 | V_OPIVV,
295    OPC_VSSUBU_VI = 0x88000057 | V_OPIVI,
296
297    OPC_VMAX_VV = 0x1c000057 | V_OPIVV,
298    OPC_VMAX_VI = 0x1c000057 | V_OPIVI,
299    OPC_VMAXU_VV = 0x18000057 | V_OPIVV,
300    OPC_VMAXU_VI = 0x18000057 | V_OPIVI,
301    OPC_VMIN_VV = 0x14000057 | V_OPIVV,
302    OPC_VMIN_VI = 0x14000057 | V_OPIVI,
303    OPC_VMINU_VV = 0x10000057 | V_OPIVV,
304    OPC_VMINU_VI = 0x10000057 | V_OPIVI,
305
306    OPC_VMSEQ_VV = 0x60000057 | V_OPIVV,
307    OPC_VMSEQ_VI = 0x60000057 | V_OPIVI,
308    OPC_VMSEQ_VX = 0x60000057 | V_OPIVX,
309    OPC_VMSNE_VV = 0x64000057 | V_OPIVV,
310    OPC_VMSNE_VI = 0x64000057 | V_OPIVI,
311    OPC_VMSNE_VX = 0x64000057 | V_OPIVX,
312
313    OPC_VMSLTU_VV = 0x68000057 | V_OPIVV,
314    OPC_VMSLTU_VX = 0x68000057 | V_OPIVX,
315    OPC_VMSLT_VV = 0x6c000057 | V_OPIVV,
316    OPC_VMSLT_VX = 0x6c000057 | V_OPIVX,
317    OPC_VMSLEU_VV = 0x70000057 | V_OPIVV,
318    OPC_VMSLEU_VX = 0x70000057 | V_OPIVX,
319    OPC_VMSLE_VV = 0x74000057 | V_OPIVV,
320    OPC_VMSLE_VX = 0x74000057 | V_OPIVX,
321
322    OPC_VMSLEU_VI = 0x70000057 | V_OPIVI,
323    OPC_VMSLE_VI = 0x74000057 | V_OPIVI,
324    OPC_VMSGTU_VI = 0x78000057 | V_OPIVI,
325    OPC_VMSGTU_VX = 0x78000057 | V_OPIVX,
326    OPC_VMSGT_VI = 0x7c000057 | V_OPIVI,
327    OPC_VMSGT_VX = 0x7c000057 | V_OPIVX,
328
329    OPC_VSLL_VV = 0x94000057 | V_OPIVV,
330    OPC_VSLL_VI = 0x94000057 | V_OPIVI,
331    OPC_VSLL_VX = 0x94000057 | V_OPIVX,
332    OPC_VSRL_VV = 0xa0000057 | V_OPIVV,
333    OPC_VSRL_VI = 0xa0000057 | V_OPIVI,
334    OPC_VSRL_VX = 0xa0000057 | V_OPIVX,
335    OPC_VSRA_VV = 0xa4000057 | V_OPIVV,
336    OPC_VSRA_VI = 0xa4000057 | V_OPIVI,
337    OPC_VSRA_VX = 0xa4000057 | V_OPIVX,
338
339    OPC_VMV_V_V = 0x5e000057 | V_OPIVV,
340    OPC_VMV_V_I = 0x5e000057 | V_OPIVI,
341    OPC_VMV_V_X = 0x5e000057 | V_OPIVX,
342
343    OPC_VMVNR_V = 0x9e000057 | V_OPIVI,
344} RISCVInsn;
345
346static const struct {
347    RISCVInsn op;
348    bool swap;
349} tcg_cmpcond_to_rvv_vv[] = {
350    [TCG_COND_EQ] =  { OPC_VMSEQ_VV,  false },
351    [TCG_COND_NE] =  { OPC_VMSNE_VV,  false },
352    [TCG_COND_LT] =  { OPC_VMSLT_VV,  false },
353    [TCG_COND_GE] =  { OPC_VMSLE_VV,  true  },
354    [TCG_COND_GT] =  { OPC_VMSLT_VV,  true  },
355    [TCG_COND_LE] =  { OPC_VMSLE_VV,  false },
356    [TCG_COND_LTU] = { OPC_VMSLTU_VV, false },
357    [TCG_COND_GEU] = { OPC_VMSLEU_VV, true  },
358    [TCG_COND_GTU] = { OPC_VMSLTU_VV, true  },
359    [TCG_COND_LEU] = { OPC_VMSLEU_VV, false }
360};
361
362static const struct {
363    RISCVInsn op;
364    int min;
365    int max;
366    bool adjust;
367}  tcg_cmpcond_to_rvv_vi[] = {
368    [TCG_COND_EQ]  = { OPC_VMSEQ_VI,  -16, 15, false },
369    [TCG_COND_NE]  = { OPC_VMSNE_VI,  -16, 15, false },
370    [TCG_COND_GT]  = { OPC_VMSGT_VI,  -16, 15, false },
371    [TCG_COND_LE]  = { OPC_VMSLE_VI,  -16, 15, false },
372    [TCG_COND_LT]  = { OPC_VMSLE_VI,  -15, 16, true  },
373    [TCG_COND_GE]  = { OPC_VMSGT_VI,  -15, 16, true  },
374    [TCG_COND_LEU] = { OPC_VMSLEU_VI,   0, 15, false },
375    [TCG_COND_GTU] = { OPC_VMSGTU_VI,   0, 15, false },
376    [TCG_COND_LTU] = { OPC_VMSLEU_VI,   1, 16, true  },
377    [TCG_COND_GEU] = { OPC_VMSGTU_VI,   1, 16, true  },
378};
379
380/* test if a constant matches the constraint */
381static bool tcg_target_const_match(int64_t val, int ct,
382                                   TCGType type, TCGCond cond, int vece)
383{
384    if (ct & TCG_CT_CONST) {
385        return 1;
386    }
387    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
388        return 1;
389    }
390    if (type >= TCG_TYPE_V64) {
391        /* Val is replicated by VECE; extract the highest element. */
392        val >>= (-8 << vece) & 63;
393    }
394    /*
395     * Sign extended from 12 bits: [-0x800, 0x7ff].
396     * Used for most arithmetic, as this is the isa field.
397     */
398    if ((ct & TCG_CT_CONST_S12) && val >= -0x800 && val <= 0x7ff) {
399        return 1;
400    }
401    /*
402     * Sign extended from 12 bits, negated: [-0x7ff, 0x800].
403     * Used for subtraction, where a constant must be handled by ADDI.
404     */
405    if ((ct & TCG_CT_CONST_N12) && val >= -0x7ff && val <= 0x800) {
406        return 1;
407    }
408    /*
409     * Sign extended from 12 bits, +/- matching: [-0x7ff, 0x7ff].
410     * Used by addsub2 and movcond, which may need the negative value,
411     * and requires the modified constant to be representable.
412     */
413    if ((ct & TCG_CT_CONST_M12) && val >= -0x7ff && val <= 0x7ff) {
414        return 1;
415    }
416    /*
417     * Inverse of sign extended from 12 bits: ~[-0x800, 0x7ff].
418     * Used to map ANDN back to ANDI, etc.
419     */
420    if ((ct & TCG_CT_CONST_J12) && ~val >= -0x800 && ~val <= 0x7ff) {
421        return 1;
422    }
423    /*
424     * Sign extended from 5 bits: [-0x10, 0x0f].
425     * Used for vector-immediate.
426     */
427    if ((ct & TCG_CT_CONST_S5) && val >= -0x10 && val <= 0x0f) {
428        return 1;
429    }
430    /*
431     * Used for vector compare OPIVI instructions.
432     */
433    if ((ct & TCG_CT_CONST_CMP_VI) &&
434        val >= tcg_cmpcond_to_rvv_vi[cond].min &&
435        val <= tcg_cmpcond_to_rvv_vi[cond].max) {
436        return true;
437     }
438    return 0;
439}
440
441/*
442 * RISC-V immediate and instruction encoders (excludes 16-bit RVC)
443 */
444
445/* Type-R */
446
447static int32_t encode_r(RISCVInsn opc, TCGReg rd, TCGReg rs1, TCGReg rs2)
448{
449    return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20;
450}
451
452/* Type-I */
453
454static int32_t encode_imm12(uint32_t imm)
455{
456    return (imm & 0xfff) << 20;
457}
458
459static int32_t encode_i(RISCVInsn opc, TCGReg rd, TCGReg rs1, uint32_t imm)
460{
461    return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | encode_imm12(imm);
462}
463
464/* Type-S */
465
466static int32_t encode_simm12(uint32_t imm)
467{
468    int32_t ret = 0;
469
470    ret |= (imm & 0xFE0) << 20;
471    ret |= (imm & 0x1F) << 7;
472
473    return ret;
474}
475
476static int32_t encode_s(RISCVInsn opc, TCGReg rs1, TCGReg rs2, uint32_t imm)
477{
478    return opc | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20 | encode_simm12(imm);
479}
480
481/* Type-SB */
482
483static int32_t encode_sbimm12(uint32_t imm)
484{
485    int32_t ret = 0;
486
487    ret |= (imm & 0x1000) << 19;
488    ret |= (imm & 0x7e0) << 20;
489    ret |= (imm & 0x1e) << 7;
490    ret |= (imm & 0x800) >> 4;
491
492    return ret;
493}
494
495static int32_t encode_sb(RISCVInsn opc, TCGReg rs1, TCGReg rs2, uint32_t imm)
496{
497    return opc | (rs1 & 0x1f) << 15 | (rs2 & 0x1f) << 20 | encode_sbimm12(imm);
498}
499
500/* Type-U */
501
502static int32_t encode_uimm20(uint32_t imm)
503{
504    return imm & 0xfffff000;
505}
506
507static int32_t encode_u(RISCVInsn opc, TCGReg rd, uint32_t imm)
508{
509    return opc | (rd & 0x1f) << 7 | encode_uimm20(imm);
510}
511
512/* Type-UJ */
513
514static int32_t encode_ujimm20(uint32_t imm)
515{
516    int32_t ret = 0;
517
518    ret |= (imm & 0x0007fe) << (21 - 1);
519    ret |= (imm & 0x000800) << (20 - 11);
520    ret |= (imm & 0x0ff000) << (12 - 12);
521    ret |= (imm & 0x100000) << (31 - 20);
522
523    return ret;
524}
525
526static int32_t encode_uj(RISCVInsn opc, TCGReg rd, uint32_t imm)
527{
528    return opc | (rd & 0x1f) << 7 | encode_ujimm20(imm);
529}
530
531
532/* Type-OPIVI */
533
534static int32_t encode_vi(RISCVInsn opc, TCGReg rd, int32_t imm,
535                         TCGReg vs2, bool vm)
536{
537    return opc | (rd & 0x1f) << 7 | (imm & 0x1f) << 15 |
538           (vs2 & 0x1f) << 20 | (vm << 25);
539}
540
541/* Type-OPIVV/OPMVV/OPIVX/OPMVX, Vector load and store */
542
543static int32_t encode_v(RISCVInsn opc, TCGReg d, TCGReg s1,
544                        TCGReg s2, bool vm)
545{
546    return opc | (d & 0x1f) << 7 | (s1 & 0x1f) << 15 |
547           (s2 & 0x1f) << 20 | (vm << 25);
548}
549
550/* Vector vtype */
551
552static uint32_t encode_vtype(bool vta, bool vma,
553                            MemOp vsew, RISCVVlmul vlmul)
554{
555    return vma << 7 | vta << 6 | vsew << 3 | vlmul;
556}
557
558static int32_t encode_vset(RISCVInsn opc, TCGReg rd,
559                           TCGArg rs1, uint32_t vtype)
560{
561    return opc | (rd & 0x1f) << 7 | (rs1 & 0x1f) << 15 | (vtype & 0x7ff) << 20;
562}
563
564static int32_t encode_vseti(RISCVInsn opc, TCGReg rd,
565                            uint32_t uimm, uint32_t vtype)
566{
567    return opc | (rd & 0x1f) << 7 | (uimm & 0x1f) << 15 | (vtype & 0x3ff) << 20;
568}
569
570/*
571 * RISC-V instruction emitters
572 */
573
574static void tcg_out_opc_reg(TCGContext *s, RISCVInsn opc,
575                            TCGReg rd, TCGReg rs1, TCGReg rs2)
576{
577    tcg_out32(s, encode_r(opc, rd, rs1, rs2));
578}
579
580static void tcg_out_opc_imm(TCGContext *s, RISCVInsn opc,
581                            TCGReg rd, TCGReg rs1, TCGArg imm)
582{
583    tcg_out32(s, encode_i(opc, rd, rs1, imm));
584}
585
586static void tcg_out_opc_store(TCGContext *s, RISCVInsn opc,
587                              TCGReg rs1, TCGReg rs2, uint32_t imm)
588{
589    tcg_out32(s, encode_s(opc, rs1, rs2, imm));
590}
591
592static void tcg_out_opc_branch(TCGContext *s, RISCVInsn opc,
593                               TCGReg rs1, TCGReg rs2, uint32_t imm)
594{
595    tcg_out32(s, encode_sb(opc, rs1, rs2, imm));
596}
597
598static void tcg_out_opc_upper(TCGContext *s, RISCVInsn opc,
599                              TCGReg rd, uint32_t imm)
600{
601    tcg_out32(s, encode_u(opc, rd, imm));
602}
603
604static void tcg_out_opc_jump(TCGContext *s, RISCVInsn opc,
605                             TCGReg rd, uint32_t imm)
606{
607    tcg_out32(s, encode_uj(opc, rd, imm));
608}
609
610static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
611{
612    int i;
613    for (i = 0; i < count; ++i) {
614        p[i] = OPC_NOP;
615    }
616}
617
618/*
619 * Relocations
620 */
621
622static bool reloc_sbimm12(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
623{
624    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
625    intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
626
627    tcg_debug_assert((offset & 1) == 0);
628    if (offset == sextreg(offset, 0, 12)) {
629        *src_rw |= encode_sbimm12(offset);
630        return true;
631    }
632
633    return false;
634}
635
636static bool reloc_jimm20(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
637{
638    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
639    intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
640
641    tcg_debug_assert((offset & 1) == 0);
642    if (offset == sextreg(offset, 0, 20)) {
643        *src_rw |= encode_ujimm20(offset);
644        return true;
645    }
646
647    return false;
648}
649
650static bool reloc_call(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
651{
652    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
653    intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
654    int32_t lo = sextreg(offset, 0, 12);
655    int32_t hi = offset - lo;
656
657    if (offset == hi + lo) {
658        src_rw[0] |= encode_uimm20(hi);
659        src_rw[1] |= encode_imm12(lo);
660        return true;
661    }
662
663    return false;
664}
665
666static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
667                        intptr_t value, intptr_t addend)
668{
669    tcg_debug_assert(addend == 0);
670    switch (type) {
671    case R_RISCV_BRANCH:
672        return reloc_sbimm12(code_ptr, (tcg_insn_unit *)value);
673    case R_RISCV_JAL:
674        return reloc_jimm20(code_ptr, (tcg_insn_unit *)value);
675    case R_RISCV_CALL:
676        return reloc_call(code_ptr, (tcg_insn_unit *)value);
677    default:
678        g_assert_not_reached();
679    }
680}
681
682/*
683 * RISC-V vector instruction emitters
684 */
685
686/*
687 * Vector registers uses the same 5 lower bits as GPR registers,
688 * and vm=0 (vm = false) means vector masking ENABLED.
689 * With RVV 1.0, vs2 is the first operand, while rs1/imm is the
690 * second operand.
691 */
692static void tcg_out_opc_vv(TCGContext *s, RISCVInsn opc,
693                           TCGReg vd, TCGReg vs2, TCGReg vs1)
694{
695    tcg_out32(s, encode_v(opc, vd, vs1, vs2, true));
696}
697
698static void tcg_out_opc_vx(TCGContext *s, RISCVInsn opc,
699                           TCGReg vd, TCGReg vs2, TCGReg rs1)
700{
701    tcg_out32(s, encode_v(opc, vd, rs1, vs2, true));
702}
703
704static void tcg_out_opc_vi(TCGContext *s, RISCVInsn opc,
705                           TCGReg vd, TCGReg vs2, int32_t imm)
706{
707    tcg_out32(s, encode_vi(opc, vd, imm, vs2, true));
708}
709
710static void tcg_out_opc_vv_vi(TCGContext *s, RISCVInsn o_vv, RISCVInsn o_vi,
711                              TCGReg vd, TCGReg vs2, TCGArg vi1, int c_vi1)
712{
713    if (c_vi1) {
714        tcg_out_opc_vi(s, o_vi, vd, vs2, vi1);
715    } else {
716        tcg_out_opc_vv(s, o_vv, vd, vs2, vi1);
717    }
718}
719
720static void tcg_out_opc_vim_mask(TCGContext *s, RISCVInsn opc, TCGReg vd,
721                                 TCGReg vs2, int32_t imm)
722{
723    tcg_out32(s, encode_vi(opc, vd, imm, vs2, false));
724}
725
726static void tcg_out_opc_vvm_mask(TCGContext *s, RISCVInsn opc, TCGReg vd,
727                                 TCGReg vs2, TCGReg vs1)
728{
729    tcg_out32(s, encode_v(opc, vd, vs1, vs2, false));
730}
731
732typedef struct VsetCache {
733    uint32_t movi_insn;
734    uint32_t vset_insn;
735} VsetCache;
736
737static VsetCache riscv_vset_cache[3][4];
738
739static void set_vtype(TCGContext *s, TCGType type, MemOp vsew)
740{
741    const VsetCache *p = &riscv_vset_cache[type - TCG_TYPE_V64][vsew];
742
743    s->riscv_cur_type = type;
744    s->riscv_cur_vsew = vsew;
745
746    if (p->movi_insn) {
747        tcg_out32(s, p->movi_insn);
748    }
749    tcg_out32(s, p->vset_insn);
750}
751
752static MemOp set_vtype_len(TCGContext *s, TCGType type)
753{
754    if (type != s->riscv_cur_type) {
755        set_vtype(s, type, MO_64);
756    }
757    return s->riscv_cur_vsew;
758}
759
760static void set_vtype_len_sew(TCGContext *s, TCGType type, MemOp vsew)
761{
762    if (type != s->riscv_cur_type || vsew != s->riscv_cur_vsew) {
763        set_vtype(s, type, vsew);
764    }
765}
766
767/*
768 * TCG intrinsics
769 */
770
771static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
772{
773    if (ret == arg) {
774        return true;
775    }
776    switch (type) {
777    case TCG_TYPE_I32:
778    case TCG_TYPE_I64:
779        tcg_out_opc_imm(s, OPC_ADDI, ret, arg, 0);
780        break;
781    case TCG_TYPE_V64:
782    case TCG_TYPE_V128:
783    case TCG_TYPE_V256:
784        {
785            int lmul = type - riscv_lg2_vlenb;
786            int nf = 1 << MAX(lmul, 0);
787            tcg_out_opc_vi(s, OPC_VMVNR_V, ret, arg, nf - 1);
788        }
789        break;
790    default:
791        g_assert_not_reached();
792    }
793    return true;
794}
795
796static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
797                         tcg_target_long val)
798{
799    tcg_target_long lo, hi, tmp;
800    int shift, ret;
801
802    if (type == TCG_TYPE_I32) {
803        val = (int32_t)val;
804    }
805
806    lo = sextreg(val, 0, 12);
807    if (val == lo) {
808        tcg_out_opc_imm(s, OPC_ADDI, rd, TCG_REG_ZERO, lo);
809        return;
810    }
811
812    hi = val - lo;
813    if (val == (int32_t)val) {
814        tcg_out_opc_upper(s, OPC_LUI, rd, hi);
815        if (lo != 0) {
816            tcg_out_opc_imm(s, OPC_ADDIW, rd, rd, lo);
817        }
818        return;
819    }
820
821    tmp = tcg_pcrel_diff(s, (void *)val);
822    if (tmp == (int32_t)tmp) {
823        tcg_out_opc_upper(s, OPC_AUIPC, rd, 0);
824        tcg_out_opc_imm(s, OPC_ADDI, rd, rd, 0);
825        ret = reloc_call(s->code_ptr - 2, (const tcg_insn_unit *)val);
826        tcg_debug_assert(ret == true);
827        return;
828    }
829
830    /* Look for a single 20-bit section.  */
831    shift = ctz64(val);
832    tmp = val >> shift;
833    if (tmp == sextreg(tmp, 0, 20)) {
834        tcg_out_opc_upper(s, OPC_LUI, rd, tmp << 12);
835        if (shift > 12) {
836            tcg_out_opc_imm(s, OPC_SLLI, rd, rd, shift - 12);
837        } else {
838            tcg_out_opc_imm(s, OPC_SRAI, rd, rd, 12 - shift);
839        }
840        return;
841    }
842
843    /* Look for a few high zero bits, with lots of bits set in the middle.  */
844    shift = clz64(val);
845    tmp = val << shift;
846    if (tmp == sextreg(tmp, 12, 20) << 12) {
847        tcg_out_opc_upper(s, OPC_LUI, rd, tmp);
848        tcg_out_opc_imm(s, OPC_SRLI, rd, rd, shift);
849        return;
850    } else if (tmp == sextreg(tmp, 0, 12)) {
851        tcg_out_opc_imm(s, OPC_ADDI, rd, TCG_REG_ZERO, tmp);
852        tcg_out_opc_imm(s, OPC_SRLI, rd, rd, shift);
853        return;
854    }
855
856    /* Drop into the constant pool.  */
857    new_pool_label(s, val, R_RISCV_CALL, s->code_ptr, 0);
858    tcg_out_opc_upper(s, OPC_AUIPC, rd, 0);
859    tcg_out_opc_imm(s, OPC_LD, rd, rd, 0);
860}
861
862static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
863{
864    return false;
865}
866
867static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
868                             tcg_target_long imm)
869{
870    /* This function is only used for passing structs by reference. */
871    g_assert_not_reached();
872}
873
874static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg)
875{
876    tcg_out_opc_imm(s, OPC_ANDI, ret, arg, 0xff);
877}
878
879static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg)
880{
881    if (cpuinfo & CPUINFO_ZBB) {
882        tcg_out_opc_reg(s, OPC_ZEXT_H, ret, arg, TCG_REG_ZERO);
883    } else {
884        tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16);
885        tcg_out_opc_imm(s, OPC_SRLIW, ret, ret, 16);
886    }
887}
888
889static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg)
890{
891    if (cpuinfo & CPUINFO_ZBA) {
892        tcg_out_opc_reg(s, OPC_ADD_UW, ret, arg, TCG_REG_ZERO);
893    } else {
894        tcg_out_opc_imm(s, OPC_SLLI, ret, arg, 32);
895        tcg_out_opc_imm(s, OPC_SRLI, ret, ret, 32);
896    }
897}
898
899static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
900{
901    if (cpuinfo & CPUINFO_ZBB) {
902        tcg_out_opc_imm(s, OPC_SEXT_B, ret, arg, 0);
903    } else {
904        tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 24);
905        tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 24);
906    }
907}
908
909static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
910{
911    if (cpuinfo & CPUINFO_ZBB) {
912        tcg_out_opc_imm(s, OPC_SEXT_H, ret, arg, 0);
913    } else {
914        tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16);
915        tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 16);
916    }
917}
918
919static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg)
920{
921    tcg_out_opc_imm(s, OPC_ADDIW, ret, arg, 0);
922}
923
924static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
925{
926    if (ret != arg) {
927        tcg_out_ext32s(s, ret, arg);
928    }
929}
930
931static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
932{
933    tcg_out_ext32u(s, ret, arg);
934}
935
936static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg)
937{
938    tcg_out_ext32s(s, ret, arg);
939}
940
941static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
942                         TCGReg addr, intptr_t offset)
943{
944    intptr_t imm12 = sextreg(offset, 0, 12);
945
946    if (offset != imm12) {
947        intptr_t diff = tcg_pcrel_diff(s, (void *)offset);
948
949        if (addr == TCG_REG_ZERO && diff == (int32_t)diff) {
950            imm12 = sextreg(diff, 0, 12);
951            tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP2, diff - imm12);
952        } else {
953            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12);
954            if (addr != TCG_REG_ZERO) {
955                tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, addr);
956            }
957        }
958        addr = TCG_REG_TMP2;
959    }
960
961    switch (opc) {
962    case OPC_SB:
963    case OPC_SH:
964    case OPC_SW:
965    case OPC_SD:
966        tcg_out_opc_store(s, opc, addr, data, imm12);
967        break;
968    case OPC_LB:
969    case OPC_LBU:
970    case OPC_LH:
971    case OPC_LHU:
972    case OPC_LW:
973    case OPC_LWU:
974    case OPC_LD:
975        tcg_out_opc_imm(s, opc, data, addr, imm12);
976        break;
977    default:
978        g_assert_not_reached();
979    }
980}
981
982static void tcg_out_vec_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
983                             TCGReg addr, intptr_t offset)
984{
985    tcg_debug_assert(data >= TCG_REG_V0);
986    tcg_debug_assert(addr < TCG_REG_V0);
987
988    if (offset) {
989        tcg_debug_assert(addr != TCG_REG_ZERO);
990        if (offset == sextreg(offset, 0, 12)) {
991            tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, addr, offset);
992        } else {
993            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
994            tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0, addr);
995        }
996        addr = TCG_REG_TMP0;
997    }
998    tcg_out32(s, encode_v(opc, data, addr, 0, true));
999}
1000
1001static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
1002                       TCGReg arg1, intptr_t arg2)
1003{
1004    RISCVInsn insn;
1005
1006    switch (type) {
1007    case TCG_TYPE_I32:
1008        tcg_out_ldst(s, OPC_LW, arg, arg1, arg2);
1009        break;
1010    case TCG_TYPE_I64:
1011        tcg_out_ldst(s, OPC_LD, arg, arg1, arg2);
1012        break;
1013    case TCG_TYPE_V64:
1014    case TCG_TYPE_V128:
1015    case TCG_TYPE_V256:
1016        if (type >= riscv_lg2_vlenb) {
1017            static const RISCVInsn whole_reg_ld[] = {
1018                OPC_VL1RE64_V, OPC_VL2RE64_V, OPC_VL4RE64_V, OPC_VL8RE64_V
1019            };
1020            unsigned idx = type - riscv_lg2_vlenb;
1021
1022            tcg_debug_assert(idx < ARRAY_SIZE(whole_reg_ld));
1023            insn = whole_reg_ld[idx];
1024        } else {
1025            static const RISCVInsn unit_stride_ld[] = {
1026                OPC_VLE8_V, OPC_VLE16_V, OPC_VLE32_V, OPC_VLE64_V
1027            };
1028            MemOp prev_vsew = set_vtype_len(s, type);
1029
1030            tcg_debug_assert(prev_vsew < ARRAY_SIZE(unit_stride_ld));
1031            insn = unit_stride_ld[prev_vsew];
1032        }
1033        tcg_out_vec_ldst(s, insn, arg, arg1, arg2);
1034        break;
1035    default:
1036        g_assert_not_reached();
1037    }
1038}
1039
1040static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
1041                       TCGReg arg1, intptr_t arg2)
1042{
1043    RISCVInsn insn;
1044
1045    switch (type) {
1046    case TCG_TYPE_I32:
1047        tcg_out_ldst(s, OPC_SW, arg, arg1, arg2);
1048        break;
1049    case TCG_TYPE_I64:
1050        tcg_out_ldst(s, OPC_SD, arg, arg1, arg2);
1051        break;
1052    case TCG_TYPE_V64:
1053    case TCG_TYPE_V128:
1054    case TCG_TYPE_V256:
1055        if (type >= riscv_lg2_vlenb) {
1056            static const RISCVInsn whole_reg_st[] = {
1057                OPC_VS1R_V, OPC_VS2R_V, OPC_VS4R_V, OPC_VS8R_V
1058            };
1059            unsigned idx = type - riscv_lg2_vlenb;
1060
1061            tcg_debug_assert(idx < ARRAY_SIZE(whole_reg_st));
1062            insn = whole_reg_st[idx];
1063        } else {
1064            static const RISCVInsn unit_stride_st[] = {
1065                OPC_VSE8_V, OPC_VSE16_V, OPC_VSE32_V, OPC_VSE64_V
1066            };
1067            MemOp prev_vsew = set_vtype_len(s, type);
1068
1069            tcg_debug_assert(prev_vsew < ARRAY_SIZE(unit_stride_st));
1070            insn = unit_stride_st[prev_vsew];
1071        }
1072        tcg_out_vec_ldst(s, insn, arg, arg1, arg2);
1073        break;
1074    default:
1075        g_assert_not_reached();
1076    }
1077}
1078
1079static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1080                        TCGReg base, intptr_t ofs)
1081{
1082    if (val == 0) {
1083        tcg_out_st(s, type, TCG_REG_ZERO, base, ofs);
1084        return true;
1085    }
1086    return false;
1087}
1088
1089static void tcg_out_addsub2(TCGContext *s,
1090                            TCGReg rl, TCGReg rh,
1091                            TCGReg al, TCGReg ah,
1092                            TCGArg bl, TCGArg bh,
1093                            bool cbl, bool cbh, bool is_sub, bool is32bit)
1094{
1095    const RISCVInsn opc_add = is32bit ? OPC_ADDW : OPC_ADD;
1096    const RISCVInsn opc_addi = is32bit ? OPC_ADDIW : OPC_ADDI;
1097    const RISCVInsn opc_sub = is32bit ? OPC_SUBW : OPC_SUB;
1098    TCGReg th = TCG_REG_TMP1;
1099
1100    /* If we have a negative constant such that negating it would
1101       make the high part zero, we can (usually) eliminate one insn.  */
1102    if (cbl && cbh && bh == -1 && bl != 0) {
1103        bl = -bl;
1104        bh = 0;
1105        is_sub = !is_sub;
1106    }
1107
1108    /* By operating on the high part first, we get to use the final
1109       carry operation to move back from the temporary.  */
1110    if (!cbh) {
1111        tcg_out_opc_reg(s, (is_sub ? opc_sub : opc_add), th, ah, bh);
1112    } else if (bh != 0 || ah == rl) {
1113        tcg_out_opc_imm(s, opc_addi, th, ah, (is_sub ? -bh : bh));
1114    } else {
1115        th = ah;
1116    }
1117
1118    /* Note that tcg optimization should eliminate the bl == 0 case.  */
1119    if (is_sub) {
1120        if (cbl) {
1121            tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, al, bl);
1122            tcg_out_opc_imm(s, opc_addi, rl, al, -bl);
1123        } else {
1124            tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0, al, bl);
1125            tcg_out_opc_reg(s, opc_sub, rl, al, bl);
1126        }
1127        tcg_out_opc_reg(s, opc_sub, rh, th, TCG_REG_TMP0);
1128    } else {
1129        if (cbl) {
1130            tcg_out_opc_imm(s, opc_addi, rl, al, bl);
1131            tcg_out_opc_imm(s, OPC_SLTIU, TCG_REG_TMP0, rl, bl);
1132        } else if (al == bl) {
1133            /*
1134             * If the input regs overlap, this is a simple doubling
1135             * and carry-out is the input msb.  This special case is
1136             * required when the output reg overlaps the input,
1137             * but we might as well use it always.
1138             */
1139            tcg_out_opc_imm(s, OPC_SLTI, TCG_REG_TMP0, al, 0);
1140            tcg_out_opc_reg(s, opc_add, rl, al, al);
1141        } else {
1142            tcg_out_opc_reg(s, opc_add, rl, al, bl);
1143            tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_TMP0,
1144                            rl, (rl == bl ? al : bl));
1145        }
1146        tcg_out_opc_reg(s, opc_add, rh, th, TCG_REG_TMP0);
1147    }
1148}
1149
1150static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
1151                                   TCGReg dst, TCGReg src)
1152{
1153    set_vtype_len_sew(s, type, vece);
1154    tcg_out_opc_vx(s, OPC_VMV_V_X, dst, 0, src);
1155    return true;
1156}
1157
1158static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1159                                    TCGReg dst, TCGReg base, intptr_t offset)
1160{
1161    tcg_out_ld(s, TCG_TYPE_REG, TCG_REG_TMP0, base, offset);
1162    return tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0);
1163}
1164
1165static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1166                                    TCGReg dst, int64_t arg)
1167{
1168    /* Arg is replicated by VECE; extract the highest element. */
1169    arg >>= (-8 << vece) & 63;
1170
1171    if (arg >= -16 && arg < 16) {
1172        if (arg == 0 || arg == -1) {
1173            set_vtype_len(s, type);
1174        } else {
1175            set_vtype_len_sew(s, type, vece);
1176        }
1177        tcg_out_opc_vi(s, OPC_VMV_V_I, dst, 0, arg);
1178        return;
1179    }
1180    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, arg);
1181    tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0);
1182}
1183
1184static const struct {
1185    RISCVInsn op;
1186    bool swap;
1187} tcg_brcond_to_riscv[] = {
1188    [TCG_COND_EQ] =  { OPC_BEQ,  false },
1189    [TCG_COND_NE] =  { OPC_BNE,  false },
1190    [TCG_COND_LT] =  { OPC_BLT,  false },
1191    [TCG_COND_GE] =  { OPC_BGE,  false },
1192    [TCG_COND_LE] =  { OPC_BGE,  true  },
1193    [TCG_COND_GT] =  { OPC_BLT,  true  },
1194    [TCG_COND_LTU] = { OPC_BLTU, false },
1195    [TCG_COND_GEU] = { OPC_BGEU, false },
1196    [TCG_COND_LEU] = { OPC_BGEU, true  },
1197    [TCG_COND_GTU] = { OPC_BLTU, true  }
1198};
1199
1200static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
1201                           TCGReg arg2, TCGLabel *l)
1202{
1203    RISCVInsn op = tcg_brcond_to_riscv[cond].op;
1204
1205    tcg_debug_assert(op != 0);
1206
1207    if (tcg_brcond_to_riscv[cond].swap) {
1208        TCGReg t = arg1;
1209        arg1 = arg2;
1210        arg2 = t;
1211    }
1212
1213    tcg_out_reloc(s, s->code_ptr, R_RISCV_BRANCH, l, 0);
1214    tcg_out_opc_branch(s, op, arg1, arg2, 0);
1215}
1216
1217#define SETCOND_INV    TCG_TARGET_NB_REGS
1218#define SETCOND_NEZ    (SETCOND_INV << 1)
1219#define SETCOND_FLAGS  (SETCOND_INV | SETCOND_NEZ)
1220
1221static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret,
1222                               TCGReg arg1, tcg_target_long arg2, bool c2)
1223{
1224    int flags = 0;
1225
1226    switch (cond) {
1227    case TCG_COND_EQ:    /* -> NE  */
1228    case TCG_COND_GE:    /* -> LT  */
1229    case TCG_COND_GEU:   /* -> LTU */
1230    case TCG_COND_GT:    /* -> LE  */
1231    case TCG_COND_GTU:   /* -> LEU */
1232        cond = tcg_invert_cond(cond);
1233        flags ^= SETCOND_INV;
1234        break;
1235    default:
1236        break;
1237    }
1238
1239    switch (cond) {
1240    case TCG_COND_LE:
1241    case TCG_COND_LEU:
1242        /*
1243         * If we have a constant input, the most efficient way to implement
1244         * LE is by adding 1 and using LT.  Watch out for wrap around for LEU.
1245         * We don't need to care for this for LE because the constant input
1246         * is constrained to signed 12-bit, and 0x800 is representable in the
1247         * temporary register.
1248         */
1249        if (c2) {
1250            if (cond == TCG_COND_LEU) {
1251                /* unsigned <= -1 is true */
1252                if (arg2 == -1) {
1253                    tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV));
1254                    return ret;
1255                }
1256                cond = TCG_COND_LTU;
1257            } else {
1258                cond = TCG_COND_LT;
1259            }
1260            tcg_debug_assert(arg2 <= 0x7ff);
1261            if (++arg2 == 0x800) {
1262                tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2);
1263                arg2 = TCG_REG_TMP0;
1264                c2 = false;
1265            }
1266        } else {
1267            TCGReg tmp = arg2;
1268            arg2 = arg1;
1269            arg1 = tmp;
1270            cond = tcg_swap_cond(cond);    /* LE -> GE */
1271            cond = tcg_invert_cond(cond);  /* GE -> LT */
1272            flags ^= SETCOND_INV;
1273        }
1274        break;
1275    default:
1276        break;
1277    }
1278
1279    switch (cond) {
1280    case TCG_COND_NE:
1281        flags |= SETCOND_NEZ;
1282        if (!c2) {
1283            tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2);
1284        } else if (arg2 == 0) {
1285            ret = arg1;
1286        } else {
1287            tcg_out_opc_imm(s, OPC_XORI, ret, arg1, arg2);
1288        }
1289        break;
1290
1291    case TCG_COND_LT:
1292        if (c2) {
1293            tcg_out_opc_imm(s, OPC_SLTI, ret, arg1, arg2);
1294        } else {
1295            tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
1296        }
1297        break;
1298
1299    case TCG_COND_LTU:
1300        if (c2) {
1301            tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, arg2);
1302        } else {
1303            tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
1304        }
1305        break;
1306
1307    default:
1308        g_assert_not_reached();
1309    }
1310
1311    return ret | flags;
1312}
1313
1314static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
1315                            TCGReg arg1, tcg_target_long arg2, bool c2)
1316{
1317    int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2);
1318
1319    if (tmpflags != ret) {
1320        TCGReg tmp = tmpflags & ~SETCOND_FLAGS;
1321
1322        switch (tmpflags & SETCOND_FLAGS) {
1323        case SETCOND_INV:
1324            /* Intermediate result is boolean: simply invert. */
1325            tcg_out_opc_imm(s, OPC_XORI, ret, tmp, 1);
1326            break;
1327        case SETCOND_NEZ:
1328            /* Intermediate result is zero/non-zero: test != 0. */
1329            tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp);
1330            break;
1331        case SETCOND_NEZ | SETCOND_INV:
1332            /* Intermediate result is zero/non-zero: test == 0. */
1333            tcg_out_opc_imm(s, OPC_SLTIU, ret, tmp, 1);
1334            break;
1335        default:
1336            g_assert_not_reached();
1337        }
1338    }
1339}
1340
1341static void tcg_out_negsetcond(TCGContext *s, TCGCond cond, TCGReg ret,
1342                               TCGReg arg1, tcg_target_long arg2, bool c2)
1343{
1344    int tmpflags;
1345    TCGReg tmp;
1346
1347    /* For LT/GE comparison against 0, replicate the sign bit. */
1348    if (c2 && arg2 == 0) {
1349        switch (cond) {
1350        case TCG_COND_GE:
1351            tcg_out_opc_imm(s, OPC_XORI, ret, arg1, -1);
1352            arg1 = ret;
1353            /* fall through */
1354        case TCG_COND_LT:
1355            tcg_out_opc_imm(s, OPC_SRAI, ret, arg1, TCG_TARGET_REG_BITS - 1);
1356            return;
1357        default:
1358            break;
1359        }
1360    }
1361
1362    tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2);
1363    tmp = tmpflags & ~SETCOND_FLAGS;
1364
1365    /* If intermediate result is zero/non-zero: test != 0. */
1366    if (tmpflags & SETCOND_NEZ) {
1367        tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp);
1368        tmp = ret;
1369    }
1370
1371    /* Produce the 0/-1 result. */
1372    if (tmpflags & SETCOND_INV) {
1373        tcg_out_opc_imm(s, OPC_ADDI, ret, tmp, -1);
1374    } else {
1375        tcg_out_opc_reg(s, OPC_SUB, ret, TCG_REG_ZERO, tmp);
1376    }
1377}
1378
1379static void tcg_out_movcond_zicond(TCGContext *s, TCGReg ret, TCGReg test_ne,
1380                                   int val1, bool c_val1,
1381                                   int val2, bool c_val2)
1382{
1383    if (val1 == 0) {
1384        if (c_val2) {
1385            tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, val2);
1386            val2 = TCG_REG_TMP1;
1387        }
1388        tcg_out_opc_reg(s, OPC_CZERO_NEZ, ret, val2, test_ne);
1389        return;
1390    }
1391
1392    if (val2 == 0) {
1393        if (c_val1) {
1394            tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, val1);
1395            val1 = TCG_REG_TMP1;
1396        }
1397        tcg_out_opc_reg(s, OPC_CZERO_EQZ, ret, val1, test_ne);
1398        return;
1399    }
1400
1401    if (c_val2) {
1402        if (c_val1) {
1403            tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, val1 - val2);
1404        } else {
1405            tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP1, val1, -val2);
1406        }
1407        tcg_out_opc_reg(s, OPC_CZERO_EQZ, ret, TCG_REG_TMP1, test_ne);
1408        tcg_out_opc_imm(s, OPC_ADDI, ret, ret, val2);
1409        return;
1410    }
1411
1412    if (c_val1) {
1413        tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP1, val2, -val1);
1414        tcg_out_opc_reg(s, OPC_CZERO_NEZ, ret, TCG_REG_TMP1, test_ne);
1415        tcg_out_opc_imm(s, OPC_ADDI, ret, ret, val1);
1416        return;
1417    }
1418
1419    tcg_out_opc_reg(s, OPC_CZERO_NEZ, TCG_REG_TMP1, val2, test_ne);
1420    tcg_out_opc_reg(s, OPC_CZERO_EQZ, TCG_REG_TMP0, val1, test_ne);
1421    tcg_out_opc_reg(s, OPC_OR, ret, TCG_REG_TMP0, TCG_REG_TMP1);
1422}
1423
1424static void tcg_out_movcond_br1(TCGContext *s, TCGCond cond, TCGReg ret,
1425                                TCGReg cmp1, TCGReg cmp2,
1426                                int val, bool c_val)
1427{
1428    RISCVInsn op;
1429    int disp = 8;
1430
1431    tcg_debug_assert((unsigned)cond < ARRAY_SIZE(tcg_brcond_to_riscv));
1432    op = tcg_brcond_to_riscv[cond].op;
1433    tcg_debug_assert(op != 0);
1434
1435    if (tcg_brcond_to_riscv[cond].swap) {
1436        tcg_out_opc_branch(s, op, cmp2, cmp1, disp);
1437    } else {
1438        tcg_out_opc_branch(s, op, cmp1, cmp2, disp);
1439    }
1440    if (c_val) {
1441        tcg_out_opc_imm(s, OPC_ADDI, ret, TCG_REG_ZERO, val);
1442    } else {
1443        tcg_out_opc_imm(s, OPC_ADDI, ret, val, 0);
1444    }
1445}
1446
1447static void tcg_out_movcond_br2(TCGContext *s, TCGCond cond, TCGReg ret,
1448                                TCGReg cmp1, TCGReg cmp2,
1449                                int val1, bool c_val1,
1450                                int val2, bool c_val2)
1451{
1452    TCGReg tmp;
1453
1454    /* TCG optimizer reorders to prefer ret matching val2. */
1455    if (!c_val2 && ret == val2) {
1456        cond = tcg_invert_cond(cond);
1457        tcg_out_movcond_br1(s, cond, ret, cmp1, cmp2, val1, c_val1);
1458        return;
1459    }
1460
1461    if (!c_val1 && ret == val1) {
1462        tcg_out_movcond_br1(s, cond, ret, cmp1, cmp2, val2, c_val2);
1463        return;
1464    }
1465
1466    tmp = (ret == cmp1 || ret == cmp2 ? TCG_REG_TMP1 : ret);
1467    if (c_val1) {
1468        tcg_out_movi(s, TCG_TYPE_REG, tmp, val1);
1469    } else {
1470        tcg_out_mov(s, TCG_TYPE_REG, tmp, val1);
1471    }
1472    tcg_out_movcond_br1(s, cond, tmp, cmp1, cmp2, val2, c_val2);
1473    tcg_out_mov(s, TCG_TYPE_REG, ret, tmp);
1474}
1475
1476static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
1477                            TCGReg cmp1, int cmp2, bool c_cmp2,
1478                            TCGReg val1, bool c_val1,
1479                            TCGReg val2, bool c_val2)
1480{
1481    int tmpflags;
1482    TCGReg t;
1483
1484    if (!(cpuinfo & CPUINFO_ZICOND) && (!c_cmp2 || cmp2 == 0)) {
1485        tcg_out_movcond_br2(s, cond, ret, cmp1, cmp2,
1486                            val1, c_val1, val2, c_val2);
1487        return;
1488    }
1489
1490    tmpflags = tcg_out_setcond_int(s, cond, TCG_REG_TMP0, cmp1, cmp2, c_cmp2);
1491    t = tmpflags & ~SETCOND_FLAGS;
1492
1493    if (cpuinfo & CPUINFO_ZICOND) {
1494        if (tmpflags & SETCOND_INV) {
1495            tcg_out_movcond_zicond(s, ret, t, val2, c_val2, val1, c_val1);
1496        } else {
1497            tcg_out_movcond_zicond(s, ret, t, val1, c_val1, val2, c_val2);
1498        }
1499    } else {
1500        cond = tmpflags & SETCOND_INV ? TCG_COND_EQ : TCG_COND_NE;
1501        tcg_out_movcond_br2(s, cond, ret, t, TCG_REG_ZERO,
1502                            val1, c_val1, val2, c_val2);
1503    }
1504}
1505
1506static void tcg_out_cltz(TCGContext *s, TCGType type, RISCVInsn insn,
1507                         TCGReg ret, TCGReg src1, int src2, bool c_src2)
1508{
1509    tcg_out_opc_imm(s, insn, ret, src1, 0);
1510
1511    if (!c_src2 || src2 != (type == TCG_TYPE_I32 ? 32 : 64)) {
1512        /*
1513         * The requested zero result does not match the insn, so adjust.
1514         * Note that constraints put 'ret' in a new register, so the
1515         * computation above did not clobber either 'src1' or 'src2'.
1516         */
1517        tcg_out_movcond(s, TCG_COND_EQ, ret, src1, 0, true,
1518                        src2, c_src2, ret, false);
1519    }
1520}
1521
1522static void tcg_out_cmpsel(TCGContext *s, TCGType type, unsigned vece,
1523                           TCGCond cond, TCGReg ret,
1524                           TCGReg cmp1, TCGReg cmp2, bool c_cmp2,
1525                           TCGReg val1, bool c_val1,
1526                           TCGReg val2, bool c_val2)
1527{
1528    set_vtype_len_sew(s, type, vece);
1529
1530    /* Use only vmerge_vim if possible, by inverting the test. */
1531    if (c_val2 && !c_val1) {
1532        TCGArg temp = val1;
1533        cond = tcg_invert_cond(cond);
1534        val1 = val2;
1535        val2 = temp;
1536        c_val1 = true;
1537        c_val2 = false;
1538    }
1539
1540    /* Perform the comparison into V0 mask. */
1541    if (c_cmp2) {
1542        tcg_out_opc_vi(s, tcg_cmpcond_to_rvv_vi[cond].op, TCG_REG_V0, cmp1,
1543                       cmp2 - tcg_cmpcond_to_rvv_vi[cond].adjust);
1544    } else if (tcg_cmpcond_to_rvv_vv[cond].swap) {
1545        tcg_out_opc_vv(s, tcg_cmpcond_to_rvv_vv[cond].op,
1546                       TCG_REG_V0, cmp2, cmp1);
1547    } else {
1548        tcg_out_opc_vv(s, tcg_cmpcond_to_rvv_vv[cond].op,
1549                       TCG_REG_V0, cmp1, cmp2);
1550    }
1551    if (c_val1) {
1552        if (c_val2) {
1553            tcg_out_opc_vi(s, OPC_VMV_V_I, ret, 0, val2);
1554            val2 = ret;
1555        }
1556        /* vd[i] == v0.mask[i] ? imm : vs2[i] */
1557        tcg_out_opc_vim_mask(s, OPC_VMERGE_VIM, ret, val2, val1);
1558    } else {
1559        /* vd[i] == v0.mask[i] ? vs1[i] : vs2[i] */
1560        tcg_out_opc_vvm_mask(s, OPC_VMERGE_VVM, ret, val2, val1);
1561    }
1562}
1563
1564static void tcg_out_vshifti(TCGContext *s, RISCVInsn opc_vi, RISCVInsn opc_vx,
1565                             TCGReg dst, TCGReg src, unsigned imm)
1566{
1567    if (imm < 32) {
1568        tcg_out_opc_vi(s, opc_vi, dst, src, imm);
1569    } else {
1570        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP0, imm);
1571        tcg_out_opc_vx(s, opc_vx, dst, src, TCG_REG_TMP0);
1572    }
1573}
1574
1575static void init_setting_vtype(TCGContext *s)
1576{
1577    s->riscv_cur_type = TCG_TYPE_COUNT;
1578}
1579
1580static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
1581{
1582    TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA;
1583    ptrdiff_t offset = tcg_pcrel_diff(s, arg);
1584    int ret;
1585
1586    init_setting_vtype(s);
1587
1588    tcg_debug_assert((offset & 1) == 0);
1589    if (offset == sextreg(offset, 0, 20)) {
1590        /* short jump: -2097150 to 2097152 */
1591        tcg_out_opc_jump(s, OPC_JAL, link, offset);
1592    } else if (offset == (int32_t)offset) {
1593        /* long jump: -2147483646 to 2147483648 */
1594        tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP0, 0);
1595        tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, 0);
1596        ret = reloc_call(s->code_ptr - 2, arg);
1597        tcg_debug_assert(ret == true);
1598    } else {
1599        /* far jump: 64-bit */
1600        tcg_target_long imm = sextreg((tcg_target_long)arg, 0, 12);
1601        tcg_target_long base = (tcg_target_long)arg - imm;
1602        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, base);
1603        tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, imm);
1604    }
1605}
1606
1607static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg,
1608                         const TCGHelperInfo *info)
1609{
1610    tcg_out_call_int(s, arg, false);
1611}
1612
1613static void tcg_out_mb(TCGContext *s, TCGArg a0)
1614{
1615    tcg_insn_unit insn = OPC_FENCE;
1616
1617    if (a0 & TCG_MO_LD_LD) {
1618        insn |= 0x02200000;
1619    }
1620    if (a0 & TCG_MO_ST_LD) {
1621        insn |= 0x01200000;
1622    }
1623    if (a0 & TCG_MO_LD_ST) {
1624        insn |= 0x02100000;
1625    }
1626    if (a0 & TCG_MO_ST_ST) {
1627        insn |= 0x02200000;
1628    }
1629    tcg_out32(s, insn);
1630}
1631
1632/*
1633 * Load/store and TLB
1634 */
1635
1636static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1637{
1638    tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
1639    bool ok = reloc_jimm20(s->code_ptr - 1, target);
1640    tcg_debug_assert(ok);
1641}
1642
1643bool tcg_target_has_memory_bswap(MemOp memop)
1644{
1645    return false;
1646}
1647
1648/* We have three temps, we might as well expose them. */
1649static const TCGLdstHelperParam ldst_helper_param = {
1650    .ntmp = 3, .tmp = { TCG_REG_TMP0, TCG_REG_TMP1, TCG_REG_TMP2 }
1651};
1652
1653static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1654{
1655    MemOp opc = get_memop(l->oi);
1656
1657    /* resolve label address */
1658    if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1659        return false;
1660    }
1661
1662    /* call load helper */
1663    tcg_out_ld_helper_args(s, l, &ldst_helper_param);
1664    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false);
1665    tcg_out_ld_helper_ret(s, l, true, &ldst_helper_param);
1666
1667    tcg_out_goto(s, l->raddr);
1668    return true;
1669}
1670
1671static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1672{
1673    MemOp opc = get_memop(l->oi);
1674
1675    /* resolve label address */
1676    if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
1677        return false;
1678    }
1679
1680    /* call store helper */
1681    tcg_out_st_helper_args(s, l, &ldst_helper_param);
1682    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
1683
1684    tcg_out_goto(s, l->raddr);
1685    return true;
1686}
1687
1688/* We expect to use a 12-bit negative offset from ENV.  */
1689#define MIN_TLB_MASK_TABLE_OFS  -(1 << 11)
1690
1691/*
1692 * For system-mode, perform the TLB load and compare.
1693 * For user-mode, perform any required alignment tests.
1694 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1695 * is required and fill in @h with the host address for the fast path.
1696 */
1697static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
1698                                           TCGReg addr_reg, MemOpIdx oi,
1699                                           bool is_ld)
1700{
1701    TCGType addr_type = s->addr_type;
1702    TCGLabelQemuLdst *ldst = NULL;
1703    MemOp opc = get_memop(oi);
1704    TCGAtomAlign aa;
1705    unsigned a_mask;
1706
1707    aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
1708    a_mask = (1u << aa.align) - 1;
1709
1710    if (tcg_use_softmmu) {
1711        unsigned s_bits = opc & MO_SIZE;
1712        unsigned s_mask = (1u << s_bits) - 1;
1713        int mem_index = get_mmuidx(oi);
1714        int fast_ofs = tlb_mask_table_ofs(s, mem_index);
1715        int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
1716        int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
1717        int compare_mask;
1718        TCGReg addr_adj;
1719
1720        ldst = new_ldst_label(s);
1721        ldst->is_ld = is_ld;
1722        ldst->oi = oi;
1723        ldst->addrlo_reg = addr_reg;
1724
1725        init_setting_vtype(s);
1726
1727        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
1728        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
1729
1730        tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
1731                        s->page_bits - CPU_TLB_ENTRY_BITS);
1732        tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
1733        tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
1734
1735        /*
1736         * For aligned accesses, we check the first byte and include the
1737         * alignment bits within the address.  For unaligned access, we
1738         * check that we don't cross pages using the address of the last
1739         * byte of the access.
1740         */
1741        addr_adj = addr_reg;
1742        if (a_mask < s_mask) {
1743            addr_adj = TCG_REG_TMP0;
1744            tcg_out_opc_imm(s, addr_type == TCG_TYPE_I32 ? OPC_ADDIW : OPC_ADDI,
1745                            addr_adj, addr_reg, s_mask - a_mask);
1746        }
1747        compare_mask = s->page_mask | a_mask;
1748        if (compare_mask == sextreg(compare_mask, 0, 12)) {
1749            tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask);
1750        } else {
1751            tcg_out_movi(s, addr_type, TCG_REG_TMP1, compare_mask);
1752            tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_adj);
1753        }
1754
1755        /* Load the tlb comparator and the addend.  */
1756        QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
1757        tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2,
1758                   is_ld ? offsetof(CPUTLBEntry, addr_read)
1759                         : offsetof(CPUTLBEntry, addr_write));
1760        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
1761                   offsetof(CPUTLBEntry, addend));
1762
1763        /* Compare masked address with the TLB entry. */
1764        ldst->label_ptr[0] = s->code_ptr;
1765        tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
1766
1767        /* TLB Hit - translate address using addend.  */
1768        if (addr_type != TCG_TYPE_I32) {
1769            tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, addr_reg, TCG_REG_TMP2);
1770        } else if (cpuinfo & CPUINFO_ZBA) {
1771            tcg_out_opc_reg(s, OPC_ADD_UW, TCG_REG_TMP0,
1772                            addr_reg, TCG_REG_TMP2);
1773        } else {
1774            tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg);
1775            tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0,
1776                            TCG_REG_TMP0, TCG_REG_TMP2);
1777        }
1778        *pbase = TCG_REG_TMP0;
1779    } else {
1780        TCGReg base;
1781
1782        if (a_mask) {
1783            ldst = new_ldst_label(s);
1784            ldst->is_ld = is_ld;
1785            ldst->oi = oi;
1786            ldst->addrlo_reg = addr_reg;
1787
1788            init_setting_vtype(s);
1789
1790            /* We are expecting alignment max 7, so we can always use andi. */
1791            tcg_debug_assert(a_mask == sextreg(a_mask, 0, 12));
1792            tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
1793
1794            ldst->label_ptr[0] = s->code_ptr;
1795            tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0);
1796        }
1797
1798        if (guest_base != 0) {
1799            base = TCG_REG_TMP0;
1800            if (addr_type != TCG_TYPE_I32) {
1801                tcg_out_opc_reg(s, OPC_ADD, base, addr_reg,
1802                                TCG_GUEST_BASE_REG);
1803            } else if (cpuinfo & CPUINFO_ZBA) {
1804                tcg_out_opc_reg(s, OPC_ADD_UW, base, addr_reg,
1805                                TCG_GUEST_BASE_REG);
1806            } else {
1807                tcg_out_ext32u(s, base, addr_reg);
1808                tcg_out_opc_reg(s, OPC_ADD, base, base, TCG_GUEST_BASE_REG);
1809            }
1810        } else if (addr_type != TCG_TYPE_I32) {
1811            base = addr_reg;
1812        } else {
1813            base = TCG_REG_TMP0;
1814            tcg_out_ext32u(s, base, addr_reg);
1815        }
1816        *pbase = base;
1817    }
1818
1819    return ldst;
1820}
1821
1822static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val,
1823                                   TCGReg base, MemOp opc, TCGType type)
1824{
1825    /* Byte swapping is left to middle-end expansion. */
1826    tcg_debug_assert((opc & MO_BSWAP) == 0);
1827
1828    switch (opc & (MO_SSIZE)) {
1829    case MO_UB:
1830        tcg_out_opc_imm(s, OPC_LBU, val, base, 0);
1831        break;
1832    case MO_SB:
1833        tcg_out_opc_imm(s, OPC_LB, val, base, 0);
1834        break;
1835    case MO_UW:
1836        tcg_out_opc_imm(s, OPC_LHU, val, base, 0);
1837        break;
1838    case MO_SW:
1839        tcg_out_opc_imm(s, OPC_LH, val, base, 0);
1840        break;
1841    case MO_UL:
1842        if (type == TCG_TYPE_I64) {
1843            tcg_out_opc_imm(s, OPC_LWU, val, base, 0);
1844            break;
1845        }
1846        /* FALLTHRU */
1847    case MO_SL:
1848        tcg_out_opc_imm(s, OPC_LW, val, base, 0);
1849        break;
1850    case MO_UQ:
1851        tcg_out_opc_imm(s, OPC_LD, val, base, 0);
1852        break;
1853    default:
1854        g_assert_not_reached();
1855    }
1856}
1857
1858static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1859                            MemOpIdx oi, TCGType data_type)
1860{
1861    TCGLabelQemuLdst *ldst;
1862    TCGReg base;
1863
1864    ldst = prepare_host_addr(s, &base, addr_reg, oi, true);
1865    tcg_out_qemu_ld_direct(s, data_reg, base, get_memop(oi), data_type);
1866
1867    if (ldst) {
1868        ldst->type = data_type;
1869        ldst->datalo_reg = data_reg;
1870        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1871    }
1872}
1873
1874static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val,
1875                                   TCGReg base, MemOp opc)
1876{
1877    /* Byte swapping is left to middle-end expansion. */
1878    tcg_debug_assert((opc & MO_BSWAP) == 0);
1879
1880    switch (opc & (MO_SSIZE)) {
1881    case MO_8:
1882        tcg_out_opc_store(s, OPC_SB, base, val, 0);
1883        break;
1884    case MO_16:
1885        tcg_out_opc_store(s, OPC_SH, base, val, 0);
1886        break;
1887    case MO_32:
1888        tcg_out_opc_store(s, OPC_SW, base, val, 0);
1889        break;
1890    case MO_64:
1891        tcg_out_opc_store(s, OPC_SD, base, val, 0);
1892        break;
1893    default:
1894        g_assert_not_reached();
1895    }
1896}
1897
1898static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1899                            MemOpIdx oi, TCGType data_type)
1900{
1901    TCGLabelQemuLdst *ldst;
1902    TCGReg base;
1903
1904    ldst = prepare_host_addr(s, &base, addr_reg, oi, false);
1905    tcg_out_qemu_st_direct(s, data_reg, base, get_memop(oi));
1906
1907    if (ldst) {
1908        ldst->type = data_type;
1909        ldst->datalo_reg = data_reg;
1910        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1911    }
1912}
1913
1914static const tcg_insn_unit *tb_ret_addr;
1915
1916static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1917{
1918    /* Reuse the zeroing that exists for goto_ptr.  */
1919    if (a0 == 0) {
1920        tcg_out_call_int(s, tcg_code_gen_epilogue, true);
1921    } else {
1922        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0);
1923        tcg_out_call_int(s, tb_ret_addr, true);
1924    }
1925}
1926
1927static void tcg_out_goto_tb(TCGContext *s, int which)
1928{
1929    /* Direct branch will be patched by tb_target_set_jmp_target. */
1930    set_jmp_insn_offset(s, which);
1931    tcg_out32(s, OPC_JAL);
1932
1933    /* When branch is out of range, fall through to indirect. */
1934    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
1935               get_jmp_target_addr(s, which));
1936    tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_TMP0, 0);
1937    set_jmp_reset_offset(s, which);
1938}
1939
1940void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1941                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1942{
1943    uintptr_t addr = tb->jmp_target_addr[n];
1944    ptrdiff_t offset = addr - jmp_rx;
1945    tcg_insn_unit insn;
1946
1947    /* Either directly branch, or fall through to indirect branch. */
1948    if (offset == sextreg(offset, 0, 20)) {
1949        insn = encode_uj(OPC_JAL, TCG_REG_ZERO, offset);
1950    } else {
1951        insn = OPC_NOP;
1952    }
1953    qatomic_set((uint32_t *)jmp_rw, insn);
1954    flush_idcache_range(jmp_rx, jmp_rw, 4);
1955}
1956
1957static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1958                       const TCGArg args[TCG_MAX_OP_ARGS],
1959                       const int const_args[TCG_MAX_OP_ARGS])
1960{
1961    TCGArg a0 = args[0];
1962    TCGArg a1 = args[1];
1963    TCGArg a2 = args[2];
1964    int c2 = const_args[2];
1965
1966    switch (opc) {
1967    case INDEX_op_goto_ptr:
1968        tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, a0, 0);
1969        break;
1970
1971    case INDEX_op_br:
1972        tcg_out_reloc(s, s->code_ptr, R_RISCV_JAL, arg_label(a0), 0);
1973        tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
1974        break;
1975
1976    case INDEX_op_ld8u_i32:
1977    case INDEX_op_ld8u_i64:
1978        tcg_out_ldst(s, OPC_LBU, a0, a1, a2);
1979        break;
1980    case INDEX_op_ld8s_i32:
1981    case INDEX_op_ld8s_i64:
1982        tcg_out_ldst(s, OPC_LB, a0, a1, a2);
1983        break;
1984    case INDEX_op_ld16u_i32:
1985    case INDEX_op_ld16u_i64:
1986        tcg_out_ldst(s, OPC_LHU, a0, a1, a2);
1987        break;
1988    case INDEX_op_ld16s_i32:
1989    case INDEX_op_ld16s_i64:
1990        tcg_out_ldst(s, OPC_LH, a0, a1, a2);
1991        break;
1992    case INDEX_op_ld32u_i64:
1993        tcg_out_ldst(s, OPC_LWU, a0, a1, a2);
1994        break;
1995    case INDEX_op_ld_i32:
1996    case INDEX_op_ld32s_i64:
1997        tcg_out_ldst(s, OPC_LW, a0, a1, a2);
1998        break;
1999    case INDEX_op_ld_i64:
2000        tcg_out_ldst(s, OPC_LD, a0, a1, a2);
2001        break;
2002
2003    case INDEX_op_st8_i32:
2004    case INDEX_op_st8_i64:
2005        tcg_out_ldst(s, OPC_SB, a0, a1, a2);
2006        break;
2007    case INDEX_op_st16_i32:
2008    case INDEX_op_st16_i64:
2009        tcg_out_ldst(s, OPC_SH, a0, a1, a2);
2010        break;
2011    case INDEX_op_st_i32:
2012    case INDEX_op_st32_i64:
2013        tcg_out_ldst(s, OPC_SW, a0, a1, a2);
2014        break;
2015    case INDEX_op_st_i64:
2016        tcg_out_ldst(s, OPC_SD, a0, a1, a2);
2017        break;
2018
2019    case INDEX_op_add_i32:
2020        if (c2) {
2021            tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, a2);
2022        } else {
2023            tcg_out_opc_reg(s, OPC_ADDW, a0, a1, a2);
2024        }
2025        break;
2026    case INDEX_op_add_i64:
2027        if (c2) {
2028            tcg_out_opc_imm(s, OPC_ADDI, a0, a1, a2);
2029        } else {
2030            tcg_out_opc_reg(s, OPC_ADD, a0, a1, a2);
2031        }
2032        break;
2033
2034    case INDEX_op_sub_i32:
2035        if (c2) {
2036            tcg_out_opc_imm(s, OPC_ADDIW, a0, a1, -a2);
2037        } else {
2038            tcg_out_opc_reg(s, OPC_SUBW, a0, a1, a2);
2039        }
2040        break;
2041    case INDEX_op_sub_i64:
2042        if (c2) {
2043            tcg_out_opc_imm(s, OPC_ADDI, a0, a1, -a2);
2044        } else {
2045            tcg_out_opc_reg(s, OPC_SUB, a0, a1, a2);
2046        }
2047        break;
2048
2049    case INDEX_op_and_i32:
2050    case INDEX_op_and_i64:
2051        if (c2) {
2052            tcg_out_opc_imm(s, OPC_ANDI, a0, a1, a2);
2053        } else {
2054            tcg_out_opc_reg(s, OPC_AND, a0, a1, a2);
2055        }
2056        break;
2057
2058    case INDEX_op_or_i32:
2059    case INDEX_op_or_i64:
2060        if (c2) {
2061            tcg_out_opc_imm(s, OPC_ORI, a0, a1, a2);
2062        } else {
2063            tcg_out_opc_reg(s, OPC_OR, a0, a1, a2);
2064        }
2065        break;
2066
2067    case INDEX_op_xor_i32:
2068    case INDEX_op_xor_i64:
2069        if (c2) {
2070            tcg_out_opc_imm(s, OPC_XORI, a0, a1, a2);
2071        } else {
2072            tcg_out_opc_reg(s, OPC_XOR, a0, a1, a2);
2073        }
2074        break;
2075
2076    case INDEX_op_andc_i32:
2077    case INDEX_op_andc_i64:
2078        if (c2) {
2079            tcg_out_opc_imm(s, OPC_ANDI, a0, a1, ~a2);
2080        } else {
2081            tcg_out_opc_reg(s, OPC_ANDN, a0, a1, a2);
2082        }
2083        break;
2084    case INDEX_op_orc_i32:
2085    case INDEX_op_orc_i64:
2086        if (c2) {
2087            tcg_out_opc_imm(s, OPC_ORI, a0, a1, ~a2);
2088        } else {
2089            tcg_out_opc_reg(s, OPC_ORN, a0, a1, a2);
2090        }
2091        break;
2092    case INDEX_op_eqv_i32:
2093    case INDEX_op_eqv_i64:
2094        if (c2) {
2095            tcg_out_opc_imm(s, OPC_XORI, a0, a1, ~a2);
2096        } else {
2097            tcg_out_opc_reg(s, OPC_XNOR, a0, a1, a2);
2098        }
2099        break;
2100
2101    case INDEX_op_not_i32:
2102    case INDEX_op_not_i64:
2103        tcg_out_opc_imm(s, OPC_XORI, a0, a1, -1);
2104        break;
2105
2106    case INDEX_op_neg_i32:
2107        tcg_out_opc_reg(s, OPC_SUBW, a0, TCG_REG_ZERO, a1);
2108        break;
2109    case INDEX_op_neg_i64:
2110        tcg_out_opc_reg(s, OPC_SUB, a0, TCG_REG_ZERO, a1);
2111        break;
2112
2113    case INDEX_op_mul_i32:
2114        tcg_out_opc_reg(s, OPC_MULW, a0, a1, a2);
2115        break;
2116    case INDEX_op_mul_i64:
2117        tcg_out_opc_reg(s, OPC_MUL, a0, a1, a2);
2118        break;
2119
2120    case INDEX_op_div_i32:
2121        tcg_out_opc_reg(s, OPC_DIVW, a0, a1, a2);
2122        break;
2123    case INDEX_op_div_i64:
2124        tcg_out_opc_reg(s, OPC_DIV, a0, a1, a2);
2125        break;
2126
2127    case INDEX_op_divu_i32:
2128        tcg_out_opc_reg(s, OPC_DIVUW, a0, a1, a2);
2129        break;
2130    case INDEX_op_divu_i64:
2131        tcg_out_opc_reg(s, OPC_DIVU, a0, a1, a2);
2132        break;
2133
2134    case INDEX_op_rem_i32:
2135        tcg_out_opc_reg(s, OPC_REMW, a0, a1, a2);
2136        break;
2137    case INDEX_op_rem_i64:
2138        tcg_out_opc_reg(s, OPC_REM, a0, a1, a2);
2139        break;
2140
2141    case INDEX_op_remu_i32:
2142        tcg_out_opc_reg(s, OPC_REMUW, a0, a1, a2);
2143        break;
2144    case INDEX_op_remu_i64:
2145        tcg_out_opc_reg(s, OPC_REMU, a0, a1, a2);
2146        break;
2147
2148    case INDEX_op_shl_i32:
2149        if (c2) {
2150            tcg_out_opc_imm(s, OPC_SLLIW, a0, a1, a2 & 0x1f);
2151        } else {
2152            tcg_out_opc_reg(s, OPC_SLLW, a0, a1, a2);
2153        }
2154        break;
2155    case INDEX_op_shl_i64:
2156        if (c2) {
2157            tcg_out_opc_imm(s, OPC_SLLI, a0, a1, a2 & 0x3f);
2158        } else {
2159            tcg_out_opc_reg(s, OPC_SLL, a0, a1, a2);
2160        }
2161        break;
2162
2163    case INDEX_op_shr_i32:
2164        if (c2) {
2165            tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2 & 0x1f);
2166        } else {
2167            tcg_out_opc_reg(s, OPC_SRLW, a0, a1, a2);
2168        }
2169        break;
2170    case INDEX_op_shr_i64:
2171        if (c2) {
2172            tcg_out_opc_imm(s, OPC_SRLI, a0, a1, a2 & 0x3f);
2173        } else {
2174            tcg_out_opc_reg(s, OPC_SRL, a0, a1, a2);
2175        }
2176        break;
2177
2178    case INDEX_op_sar_i32:
2179        if (c2) {
2180            tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2 & 0x1f);
2181        } else {
2182            tcg_out_opc_reg(s, OPC_SRAW, a0, a1, a2);
2183        }
2184        break;
2185    case INDEX_op_sar_i64:
2186        if (c2) {
2187            tcg_out_opc_imm(s, OPC_SRAI, a0, a1, a2 & 0x3f);
2188        } else {
2189            tcg_out_opc_reg(s, OPC_SRA, a0, a1, a2);
2190        }
2191        break;
2192
2193    case INDEX_op_rotl_i32:
2194        if (c2) {
2195            tcg_out_opc_imm(s, OPC_RORIW, a0, a1, -a2 & 0x1f);
2196        } else {
2197            tcg_out_opc_reg(s, OPC_ROLW, a0, a1, a2);
2198        }
2199        break;
2200    case INDEX_op_rotl_i64:
2201        if (c2) {
2202            tcg_out_opc_imm(s, OPC_RORI, a0, a1, -a2 & 0x3f);
2203        } else {
2204            tcg_out_opc_reg(s, OPC_ROL, a0, a1, a2);
2205        }
2206        break;
2207
2208    case INDEX_op_rotr_i32:
2209        if (c2) {
2210            tcg_out_opc_imm(s, OPC_RORIW, a0, a1, a2 & 0x1f);
2211        } else {
2212            tcg_out_opc_reg(s, OPC_RORW, a0, a1, a2);
2213        }
2214        break;
2215    case INDEX_op_rotr_i64:
2216        if (c2) {
2217            tcg_out_opc_imm(s, OPC_RORI, a0, a1, a2 & 0x3f);
2218        } else {
2219            tcg_out_opc_reg(s, OPC_ROR, a0, a1, a2);
2220        }
2221        break;
2222
2223    case INDEX_op_bswap64_i64:
2224        tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0);
2225        break;
2226    case INDEX_op_bswap32_i32:
2227        a2 = 0;
2228        /* fall through */
2229    case INDEX_op_bswap32_i64:
2230        tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0);
2231        if (a2 & TCG_BSWAP_OZ) {
2232            tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 32);
2233        } else {
2234            tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 32);
2235        }
2236        break;
2237    case INDEX_op_bswap16_i64:
2238    case INDEX_op_bswap16_i32:
2239        tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0);
2240        if (a2 & TCG_BSWAP_OZ) {
2241            tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 48);
2242        } else {
2243            tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 48);
2244        }
2245        break;
2246
2247    case INDEX_op_ctpop_i32:
2248        tcg_out_opc_imm(s, OPC_CPOPW, a0, a1, 0);
2249        break;
2250    case INDEX_op_ctpop_i64:
2251        tcg_out_opc_imm(s, OPC_CPOP, a0, a1, 0);
2252        break;
2253
2254    case INDEX_op_clz_i32:
2255        tcg_out_cltz(s, TCG_TYPE_I32, OPC_CLZW, a0, a1, a2, c2);
2256        break;
2257    case INDEX_op_clz_i64:
2258        tcg_out_cltz(s, TCG_TYPE_I64, OPC_CLZ, a0, a1, a2, c2);
2259        break;
2260    case INDEX_op_ctz_i32:
2261        tcg_out_cltz(s, TCG_TYPE_I32, OPC_CTZW, a0, a1, a2, c2);
2262        break;
2263    case INDEX_op_ctz_i64:
2264        tcg_out_cltz(s, TCG_TYPE_I64, OPC_CTZ, a0, a1, a2, c2);
2265        break;
2266
2267    case INDEX_op_add2_i32:
2268        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
2269                        const_args[4], const_args[5], false, true);
2270        break;
2271    case INDEX_op_add2_i64:
2272        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
2273                        const_args[4], const_args[5], false, false);
2274        break;
2275    case INDEX_op_sub2_i32:
2276        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
2277                        const_args[4], const_args[5], true, true);
2278        break;
2279    case INDEX_op_sub2_i64:
2280        tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
2281                        const_args[4], const_args[5], true, false);
2282        break;
2283
2284    case INDEX_op_brcond_i32:
2285    case INDEX_op_brcond_i64:
2286        tcg_out_brcond(s, a2, a0, a1, arg_label(args[3]));
2287        break;
2288
2289    case INDEX_op_setcond_i32:
2290    case INDEX_op_setcond_i64:
2291        tcg_out_setcond(s, args[3], a0, a1, a2, c2);
2292        break;
2293
2294    case INDEX_op_negsetcond_i32:
2295    case INDEX_op_negsetcond_i64:
2296        tcg_out_negsetcond(s, args[3], a0, a1, a2, c2);
2297        break;
2298
2299    case INDEX_op_movcond_i32:
2300    case INDEX_op_movcond_i64:
2301        tcg_out_movcond(s, args[5], a0, a1, a2, c2,
2302                        args[3], const_args[3], args[4], const_args[4]);
2303        break;
2304
2305    case INDEX_op_qemu_ld_a32_i32:
2306    case INDEX_op_qemu_ld_a64_i32:
2307        tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
2308        break;
2309    case INDEX_op_qemu_ld_a32_i64:
2310    case INDEX_op_qemu_ld_a64_i64:
2311        tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
2312        break;
2313    case INDEX_op_qemu_st_a32_i32:
2314    case INDEX_op_qemu_st_a64_i32:
2315        tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
2316        break;
2317    case INDEX_op_qemu_st_a32_i64:
2318    case INDEX_op_qemu_st_a64_i64:
2319        tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
2320        break;
2321
2322    case INDEX_op_extrh_i64_i32:
2323        tcg_out_opc_imm(s, OPC_SRAI, a0, a1, 32);
2324        break;
2325
2326    case INDEX_op_mulsh_i32:
2327    case INDEX_op_mulsh_i64:
2328        tcg_out_opc_reg(s, OPC_MULH, a0, a1, a2);
2329        break;
2330
2331    case INDEX_op_muluh_i32:
2332    case INDEX_op_muluh_i64:
2333        tcg_out_opc_reg(s, OPC_MULHU, a0, a1, a2);
2334        break;
2335
2336    case INDEX_op_mb:
2337        tcg_out_mb(s, a0);
2338        break;
2339
2340    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2341    case INDEX_op_mov_i64:
2342    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2343    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2344    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2345    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2346    case INDEX_op_ext8s_i64:
2347    case INDEX_op_ext8u_i32:
2348    case INDEX_op_ext8u_i64:
2349    case INDEX_op_ext16s_i32:
2350    case INDEX_op_ext16s_i64:
2351    case INDEX_op_ext16u_i32:
2352    case INDEX_op_ext16u_i64:
2353    case INDEX_op_ext32s_i64:
2354    case INDEX_op_ext32u_i64:
2355    case INDEX_op_ext_i32_i64:
2356    case INDEX_op_extu_i32_i64:
2357    case INDEX_op_extrl_i64_i32:
2358    default:
2359        g_assert_not_reached();
2360    }
2361}
2362
2363static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2364                           unsigned vecl, unsigned vece,
2365                           const TCGArg args[TCG_MAX_OP_ARGS],
2366                           const int const_args[TCG_MAX_OP_ARGS])
2367{
2368    TCGType type = vecl + TCG_TYPE_V64;
2369    TCGArg a0, a1, a2;
2370    int c2;
2371
2372    a0 = args[0];
2373    a1 = args[1];
2374    a2 = args[2];
2375    c2 = const_args[2];
2376
2377    switch (opc) {
2378    case INDEX_op_dupm_vec:
2379        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2380        break;
2381    case INDEX_op_ld_vec:
2382        tcg_out_ld(s, type, a0, a1, a2);
2383        break;
2384    case INDEX_op_st_vec:
2385        tcg_out_st(s, type, a0, a1, a2);
2386        break;
2387    case INDEX_op_add_vec:
2388        set_vtype_len_sew(s, type, vece);
2389        tcg_out_opc_vv_vi(s, OPC_VADD_VV, OPC_VADD_VI, a0, a1, a2, c2);
2390        break;
2391    case INDEX_op_sub_vec:
2392        set_vtype_len_sew(s, type, vece);
2393        if (const_args[1]) {
2394            tcg_out_opc_vi(s, OPC_VRSUB_VI, a0, a2, a1);
2395        } else {
2396            tcg_out_opc_vv(s, OPC_VSUB_VV, a0, a1, a2);
2397        }
2398        break;
2399    case INDEX_op_and_vec:
2400        set_vtype_len(s, type);
2401        tcg_out_opc_vv_vi(s, OPC_VAND_VV, OPC_VAND_VI, a0, a1, a2, c2);
2402        break;
2403    case INDEX_op_or_vec:
2404        set_vtype_len(s, type);
2405        tcg_out_opc_vv_vi(s, OPC_VOR_VV, OPC_VOR_VI, a0, a1, a2, c2);
2406        break;
2407    case INDEX_op_xor_vec:
2408        set_vtype_len(s, type);
2409        tcg_out_opc_vv_vi(s, OPC_VXOR_VV, OPC_VXOR_VI, a0, a1, a2, c2);
2410        break;
2411    case INDEX_op_not_vec:
2412        set_vtype_len(s, type);
2413        tcg_out_opc_vi(s, OPC_VXOR_VI, a0, a1, -1);
2414        break;
2415    case INDEX_op_neg_vec:
2416        set_vtype_len_sew(s, type, vece);
2417        tcg_out_opc_vi(s, OPC_VRSUB_VI, a0, a1, 0);
2418        break;
2419    case INDEX_op_mul_vec:
2420        set_vtype_len_sew(s, type, vece);
2421        tcg_out_opc_vv(s, OPC_VMUL_VV, a0, a1, a2);
2422        break;
2423    case INDEX_op_ssadd_vec:
2424        set_vtype_len_sew(s, type, vece);
2425        tcg_out_opc_vv_vi(s, OPC_VSADD_VV, OPC_VSADD_VI, a0, a1, a2, c2);
2426        break;
2427    case INDEX_op_sssub_vec:
2428        set_vtype_len_sew(s, type, vece);
2429        tcg_out_opc_vv_vi(s, OPC_VSSUB_VV, OPC_VSSUB_VI, a0, a1, a2, c2);
2430        break;
2431    case INDEX_op_usadd_vec:
2432        set_vtype_len_sew(s, type, vece);
2433        tcg_out_opc_vv_vi(s, OPC_VSADDU_VV, OPC_VSADDU_VI, a0, a1, a2, c2);
2434        break;
2435    case INDEX_op_ussub_vec:
2436        set_vtype_len_sew(s, type, vece);
2437        tcg_out_opc_vv_vi(s, OPC_VSSUBU_VV, OPC_VSSUBU_VI, a0, a1, a2, c2);
2438        break;
2439    case INDEX_op_smax_vec:
2440        set_vtype_len_sew(s, type, vece);
2441        tcg_out_opc_vv_vi(s, OPC_VMAX_VV, OPC_VMAX_VI, a0, a1, a2, c2);
2442        break;
2443    case INDEX_op_smin_vec:
2444        set_vtype_len_sew(s, type, vece);
2445        tcg_out_opc_vv_vi(s, OPC_VMIN_VV, OPC_VMIN_VI, a0, a1, a2, c2);
2446        break;
2447    case INDEX_op_umax_vec:
2448        set_vtype_len_sew(s, type, vece);
2449        tcg_out_opc_vv_vi(s, OPC_VMAXU_VV, OPC_VMAXU_VI, a0, a1, a2, c2);
2450        break;
2451    case INDEX_op_umin_vec:
2452        set_vtype_len_sew(s, type, vece);
2453        tcg_out_opc_vv_vi(s, OPC_VMINU_VV, OPC_VMINU_VI, a0, a1, a2, c2);
2454        break;
2455    case INDEX_op_shls_vec:
2456        set_vtype_len_sew(s, type, vece);
2457        tcg_out_opc_vx(s, OPC_VSLL_VX, a0, a1, a2);
2458        break;
2459    case INDEX_op_shrs_vec:
2460        set_vtype_len_sew(s, type, vece);
2461        tcg_out_opc_vx(s, OPC_VSRL_VX, a0, a1, a2);
2462        break;
2463    case INDEX_op_sars_vec:
2464        set_vtype_len_sew(s, type, vece);
2465        tcg_out_opc_vx(s, OPC_VSRA_VX, a0, a1, a2);
2466        break;
2467    case INDEX_op_shlv_vec:
2468        set_vtype_len_sew(s, type, vece);
2469        tcg_out_opc_vv(s, OPC_VSLL_VV, a0, a1, a2);
2470        break;
2471    case INDEX_op_shrv_vec:
2472        set_vtype_len_sew(s, type, vece);
2473        tcg_out_opc_vv(s, OPC_VSRL_VV, a0, a1, a2);
2474        break;
2475    case INDEX_op_sarv_vec:
2476        set_vtype_len_sew(s, type, vece);
2477        tcg_out_opc_vv(s, OPC_VSRA_VV, a0, a1, a2);
2478        break;
2479    case INDEX_op_shli_vec:
2480        set_vtype_len_sew(s, type, vece);
2481        tcg_out_vshifti(s, OPC_VSLL_VI, OPC_VSLL_VX, a0, a1, a2);
2482        break;
2483    case INDEX_op_shri_vec:
2484        set_vtype_len_sew(s, type, vece);
2485        tcg_out_vshifti(s, OPC_VSRL_VI, OPC_VSRL_VX, a0, a1, a2);
2486        break;
2487    case INDEX_op_sari_vec:
2488        set_vtype_len_sew(s, type, vece);
2489        tcg_out_vshifti(s, OPC_VSRA_VI, OPC_VSRA_VX, a0, a1, a2);
2490        break;
2491    case INDEX_op_rotli_vec:
2492        set_vtype_len_sew(s, type, vece);
2493        tcg_out_vshifti(s, OPC_VSLL_VI, OPC_VSLL_VX, TCG_REG_V0, a1, a2);
2494        tcg_out_vshifti(s, OPC_VSRL_VI, OPC_VSRL_VX, a0, a1,
2495                        -a2 & ((8 << vece) - 1));
2496        tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
2497        break;
2498    case INDEX_op_rotls_vec:
2499        set_vtype_len_sew(s, type, vece);
2500        tcg_out_opc_vx(s, OPC_VSLL_VX, TCG_REG_V0, a1, a2);
2501        tcg_out_opc_reg(s, OPC_SUBW, TCG_REG_TMP0, TCG_REG_ZERO, a2);
2502        tcg_out_opc_vx(s, OPC_VSRL_VX, a0, a1, TCG_REG_TMP0);
2503        tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
2504        break;
2505    case INDEX_op_rotlv_vec:
2506        set_vtype_len_sew(s, type, vece);
2507        tcg_out_opc_vi(s, OPC_VRSUB_VI, TCG_REG_V0, a2, 0);
2508        tcg_out_opc_vv(s, OPC_VSRL_VV, TCG_REG_V0, a1, TCG_REG_V0);
2509        tcg_out_opc_vv(s, OPC_VSLL_VV, a0, a1, a2);
2510        tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
2511        break;
2512    case INDEX_op_rotrv_vec:
2513        set_vtype_len_sew(s, type, vece);
2514        tcg_out_opc_vi(s, OPC_VRSUB_VI, TCG_REG_V0, a2, 0);
2515        tcg_out_opc_vv(s, OPC_VSLL_VV, TCG_REG_V0, a1, TCG_REG_V0);
2516        tcg_out_opc_vv(s, OPC_VSRL_VV, a0, a1, a2);
2517        tcg_out_opc_vv(s, OPC_VOR_VV, a0, a0, TCG_REG_V0);
2518        break;
2519    case INDEX_op_cmp_vec:
2520        tcg_out_cmpsel(s, type, vece, args[3], a0, a1, a2, c2,
2521                       -1, true, 0, true);
2522        break;
2523    case INDEX_op_cmpsel_vec:
2524        tcg_out_cmpsel(s, type, vece, args[5], a0, a1, a2, c2,
2525                       args[3], const_args[3], args[4], const_args[4]);
2526        break;
2527    case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov.  */
2528    case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec.  */
2529    default:
2530        g_assert_not_reached();
2531    }
2532}
2533
2534void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2535                       TCGArg a0, ...)
2536{
2537    g_assert_not_reached();
2538}
2539
2540int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2541{
2542    switch (opc) {
2543    case INDEX_op_add_vec:
2544    case INDEX_op_sub_vec:
2545    case INDEX_op_and_vec:
2546    case INDEX_op_or_vec:
2547    case INDEX_op_xor_vec:
2548    case INDEX_op_not_vec:
2549    case INDEX_op_neg_vec:
2550    case INDEX_op_mul_vec:
2551    case INDEX_op_ssadd_vec:
2552    case INDEX_op_sssub_vec:
2553    case INDEX_op_usadd_vec:
2554    case INDEX_op_ussub_vec:
2555    case INDEX_op_smax_vec:
2556    case INDEX_op_smin_vec:
2557    case INDEX_op_umax_vec:
2558    case INDEX_op_umin_vec:
2559    case INDEX_op_shls_vec:
2560    case INDEX_op_shrs_vec:
2561    case INDEX_op_sars_vec:
2562    case INDEX_op_shlv_vec:
2563    case INDEX_op_shrv_vec:
2564    case INDEX_op_sarv_vec:
2565    case INDEX_op_shri_vec:
2566    case INDEX_op_shli_vec:
2567    case INDEX_op_sari_vec:
2568    case INDEX_op_rotls_vec:
2569    case INDEX_op_rotlv_vec:
2570    case INDEX_op_rotrv_vec:
2571    case INDEX_op_rotli_vec:
2572    case INDEX_op_cmp_vec:
2573    case INDEX_op_cmpsel_vec:
2574        return 1;
2575    default:
2576        return 0;
2577    }
2578}
2579
2580static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2581{
2582    switch (op) {
2583    case INDEX_op_goto_ptr:
2584        return C_O0_I1(r);
2585
2586    case INDEX_op_ld8u_i32:
2587    case INDEX_op_ld8s_i32:
2588    case INDEX_op_ld16u_i32:
2589    case INDEX_op_ld16s_i32:
2590    case INDEX_op_ld_i32:
2591    case INDEX_op_not_i32:
2592    case INDEX_op_neg_i32:
2593    case INDEX_op_ld8u_i64:
2594    case INDEX_op_ld8s_i64:
2595    case INDEX_op_ld16u_i64:
2596    case INDEX_op_ld16s_i64:
2597    case INDEX_op_ld32s_i64:
2598    case INDEX_op_ld32u_i64:
2599    case INDEX_op_ld_i64:
2600    case INDEX_op_not_i64:
2601    case INDEX_op_neg_i64:
2602    case INDEX_op_ext8u_i32:
2603    case INDEX_op_ext8u_i64:
2604    case INDEX_op_ext16u_i32:
2605    case INDEX_op_ext16u_i64:
2606    case INDEX_op_ext32u_i64:
2607    case INDEX_op_extu_i32_i64:
2608    case INDEX_op_ext8s_i32:
2609    case INDEX_op_ext8s_i64:
2610    case INDEX_op_ext16s_i32:
2611    case INDEX_op_ext16s_i64:
2612    case INDEX_op_ext32s_i64:
2613    case INDEX_op_extrl_i64_i32:
2614    case INDEX_op_extrh_i64_i32:
2615    case INDEX_op_ext_i32_i64:
2616    case INDEX_op_bswap16_i32:
2617    case INDEX_op_bswap32_i32:
2618    case INDEX_op_bswap16_i64:
2619    case INDEX_op_bswap32_i64:
2620    case INDEX_op_bswap64_i64:
2621    case INDEX_op_ctpop_i32:
2622    case INDEX_op_ctpop_i64:
2623        return C_O1_I1(r, r);
2624
2625    case INDEX_op_st8_i32:
2626    case INDEX_op_st16_i32:
2627    case INDEX_op_st_i32:
2628    case INDEX_op_st8_i64:
2629    case INDEX_op_st16_i64:
2630    case INDEX_op_st32_i64:
2631    case INDEX_op_st_i64:
2632        return C_O0_I2(rZ, r);
2633
2634    case INDEX_op_add_i32:
2635    case INDEX_op_and_i32:
2636    case INDEX_op_or_i32:
2637    case INDEX_op_xor_i32:
2638    case INDEX_op_add_i64:
2639    case INDEX_op_and_i64:
2640    case INDEX_op_or_i64:
2641    case INDEX_op_xor_i64:
2642    case INDEX_op_setcond_i32:
2643    case INDEX_op_setcond_i64:
2644    case INDEX_op_negsetcond_i32:
2645    case INDEX_op_negsetcond_i64:
2646        return C_O1_I2(r, r, rI);
2647
2648    case INDEX_op_andc_i32:
2649    case INDEX_op_andc_i64:
2650    case INDEX_op_orc_i32:
2651    case INDEX_op_orc_i64:
2652    case INDEX_op_eqv_i32:
2653    case INDEX_op_eqv_i64:
2654        return C_O1_I2(r, r, rJ);
2655
2656    case INDEX_op_sub_i32:
2657    case INDEX_op_sub_i64:
2658        return C_O1_I2(r, rZ, rN);
2659
2660    case INDEX_op_mul_i32:
2661    case INDEX_op_mulsh_i32:
2662    case INDEX_op_muluh_i32:
2663    case INDEX_op_div_i32:
2664    case INDEX_op_divu_i32:
2665    case INDEX_op_rem_i32:
2666    case INDEX_op_remu_i32:
2667    case INDEX_op_mul_i64:
2668    case INDEX_op_mulsh_i64:
2669    case INDEX_op_muluh_i64:
2670    case INDEX_op_div_i64:
2671    case INDEX_op_divu_i64:
2672    case INDEX_op_rem_i64:
2673    case INDEX_op_remu_i64:
2674        return C_O1_I2(r, rZ, rZ);
2675
2676    case INDEX_op_shl_i32:
2677    case INDEX_op_shr_i32:
2678    case INDEX_op_sar_i32:
2679    case INDEX_op_rotl_i32:
2680    case INDEX_op_rotr_i32:
2681    case INDEX_op_shl_i64:
2682    case INDEX_op_shr_i64:
2683    case INDEX_op_sar_i64:
2684    case INDEX_op_rotl_i64:
2685    case INDEX_op_rotr_i64:
2686        return C_O1_I2(r, r, ri);
2687
2688    case INDEX_op_clz_i32:
2689    case INDEX_op_clz_i64:
2690    case INDEX_op_ctz_i32:
2691    case INDEX_op_ctz_i64:
2692        return C_N1_I2(r, r, rM);
2693
2694    case INDEX_op_brcond_i32:
2695    case INDEX_op_brcond_i64:
2696        return C_O0_I2(rZ, rZ);
2697
2698    case INDEX_op_movcond_i32:
2699    case INDEX_op_movcond_i64:
2700        return C_O1_I4(r, r, rI, rM, rM);
2701
2702    case INDEX_op_add2_i32:
2703    case INDEX_op_add2_i64:
2704    case INDEX_op_sub2_i32:
2705    case INDEX_op_sub2_i64:
2706        return C_O2_I4(r, r, rZ, rZ, rM, rM);
2707
2708    case INDEX_op_qemu_ld_a32_i32:
2709    case INDEX_op_qemu_ld_a64_i32:
2710    case INDEX_op_qemu_ld_a32_i64:
2711    case INDEX_op_qemu_ld_a64_i64:
2712        return C_O1_I1(r, r);
2713    case INDEX_op_qemu_st_a32_i32:
2714    case INDEX_op_qemu_st_a64_i32:
2715    case INDEX_op_qemu_st_a32_i64:
2716    case INDEX_op_qemu_st_a64_i64:
2717        return C_O0_I2(rZ, r);
2718
2719    case INDEX_op_st_vec:
2720        return C_O0_I2(v, r);
2721    case INDEX_op_dup_vec:
2722    case INDEX_op_dupm_vec:
2723    case INDEX_op_ld_vec:
2724        return C_O1_I1(v, r);
2725    case INDEX_op_neg_vec:
2726    case INDEX_op_not_vec:
2727    case INDEX_op_shli_vec:
2728    case INDEX_op_shri_vec:
2729    case INDEX_op_sari_vec:
2730    case INDEX_op_rotli_vec:
2731        return C_O1_I1(v, v);
2732    case INDEX_op_add_vec:
2733    case INDEX_op_and_vec:
2734    case INDEX_op_or_vec:
2735    case INDEX_op_xor_vec:
2736    case INDEX_op_ssadd_vec:
2737    case INDEX_op_sssub_vec:
2738    case INDEX_op_usadd_vec:
2739    case INDEX_op_ussub_vec:
2740    case INDEX_op_smax_vec:
2741    case INDEX_op_smin_vec:
2742    case INDEX_op_umax_vec:
2743    case INDEX_op_umin_vec:
2744        return C_O1_I2(v, v, vK);
2745    case INDEX_op_sub_vec:
2746        return C_O1_I2(v, vK, v);
2747    case INDEX_op_mul_vec:
2748    case INDEX_op_shlv_vec:
2749    case INDEX_op_shrv_vec:
2750    case INDEX_op_sarv_vec:
2751    case INDEX_op_rotlv_vec:
2752    case INDEX_op_rotrv_vec:
2753        return C_O1_I2(v, v, v);
2754    case INDEX_op_shls_vec:
2755    case INDEX_op_shrs_vec:
2756    case INDEX_op_sars_vec:
2757    case INDEX_op_rotls_vec:
2758        return C_O1_I2(v, v, r);
2759    case INDEX_op_cmp_vec:
2760        return C_O1_I2(v, v, vL);
2761    case INDEX_op_cmpsel_vec:
2762        return C_O1_I4(v, v, vL, vK, vK);
2763    default:
2764        g_assert_not_reached();
2765    }
2766}
2767
2768static const int tcg_target_callee_save_regs[] = {
2769    TCG_REG_S0,       /* used for the global env (TCG_AREG0) */
2770    TCG_REG_S1,
2771    TCG_REG_S2,
2772    TCG_REG_S3,
2773    TCG_REG_S4,
2774    TCG_REG_S5,
2775    TCG_REG_S6,
2776    TCG_REG_S7,
2777    TCG_REG_S8,
2778    TCG_REG_S9,
2779    TCG_REG_S10,
2780    TCG_REG_S11,
2781    TCG_REG_RA,       /* should be last for ABI compliance */
2782};
2783
2784/* Stack frame parameters.  */
2785#define REG_SIZE   (TCG_TARGET_REG_BITS / 8)
2786#define SAVE_SIZE  ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE)
2787#define TEMP_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2788#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \
2789                     + TCG_TARGET_STACK_ALIGN - 1) \
2790                    & -TCG_TARGET_STACK_ALIGN)
2791#define SAVE_OFS   (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE)
2792
2793/* We're expecting to be able to use an immediate for frame allocation.  */
2794QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7ff);
2795
2796/* Generate global QEMU prologue and epilogue code */
2797static void tcg_target_qemu_prologue(TCGContext *s)
2798{
2799    int i;
2800
2801    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE);
2802
2803    /* TB prologue */
2804    tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE);
2805    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2806        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2807                   TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
2808    }
2809
2810    if (!tcg_use_softmmu && guest_base) {
2811        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
2812        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2813    }
2814
2815    /* Call generated code */
2816    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2817    tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, tcg_target_call_iarg_regs[1], 0);
2818
2819    /* Return path for goto_ptr. Set return value to 0 */
2820    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2821    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_A0, TCG_REG_ZERO);
2822
2823    /* TB epilogue */
2824    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
2825    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2826        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2827                   TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
2828    }
2829
2830    tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE);
2831    tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_RA, 0);
2832}
2833
2834static void tcg_out_tb_start(TCGContext *s)
2835{
2836    init_setting_vtype(s);
2837}
2838
2839static bool vtype_check(unsigned vtype)
2840{
2841    unsigned long tmp;
2842
2843    /* vsetvl tmp, zero, vtype */
2844    asm(".insn r 0x57, 7, 0x40, %0, zero, %1" : "=r"(tmp) : "r"(vtype));
2845    return tmp != 0;
2846}
2847
2848static void probe_frac_lmul_1(TCGType type, MemOp vsew)
2849{
2850    VsetCache *p = &riscv_vset_cache[type - TCG_TYPE_V64][vsew];
2851    unsigned avl = tcg_type_size(type) >> vsew;
2852    int lmul = type - riscv_lg2_vlenb;
2853    unsigned vtype = encode_vtype(true, true, vsew, lmul & 7);
2854    bool lmul_eq_avl = true;
2855
2856    /* Guaranteed by Zve64x. */
2857    assert(lmul < 3);
2858
2859    /*
2860     * For LMUL < -3, the host vector size is so large that TYPE
2861     * is smaller than the minimum 1/8 fraction.
2862     *
2863     * For other fractional LMUL settings, implementations must
2864     * support SEW settings between SEW_MIN and LMUL * ELEN, inclusive.
2865     * So if ELEN = 64, LMUL = 1/2, then SEW will support e8, e16, e32,
2866     * but e64 may not be supported. In other words, the hardware only
2867     * guarantees SEW_MIN <= SEW <= LMUL * ELEN.  Check.
2868     */
2869    if (lmul < 0 && (lmul < -3 || !vtype_check(vtype))) {
2870        vtype = encode_vtype(true, true, vsew, VLMUL_M1);
2871        lmul_eq_avl = false;
2872    }
2873
2874    if (avl < 32) {
2875        p->vset_insn = encode_vseti(OPC_VSETIVLI, TCG_REG_ZERO, avl, vtype);
2876    } else if (lmul_eq_avl) {
2877        /* rd != 0 and rs1 == 0 uses vlmax */
2878        p->vset_insn = encode_vset(OPC_VSETVLI, TCG_REG_TMP0, TCG_REG_ZERO, vtype);
2879    } else {
2880        p->movi_insn = encode_i(OPC_ADDI, TCG_REG_TMP0, TCG_REG_ZERO, avl);
2881        p->vset_insn = encode_vset(OPC_VSETVLI, TCG_REG_ZERO, TCG_REG_TMP0, vtype);
2882    }
2883}
2884
2885static void probe_frac_lmul(void)
2886{
2887    /* Match riscv_lg2_vlenb to TCG_TYPE_V64. */
2888    QEMU_BUILD_BUG_ON(TCG_TYPE_V64 != 3);
2889
2890    for (TCGType t = TCG_TYPE_V64; t <= TCG_TYPE_V256; t++) {
2891        for (MemOp e = MO_8; e <= MO_64; e++) {
2892            probe_frac_lmul_1(t, e);
2893        }
2894    }
2895}
2896
2897static void tcg_target_init(TCGContext *s)
2898{
2899    tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff;
2900    tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff;
2901
2902    tcg_target_call_clobber_regs = -1;
2903    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0);
2904    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1);
2905    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2);
2906    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S3);
2907    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S4);
2908    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S5);
2909    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S6);
2910    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S7);
2911    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8);
2912    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9);
2913    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S10);
2914    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S11);
2915
2916    s->reserved_regs = 0;
2917    tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO);
2918    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
2919    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
2920    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
2921    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2922    tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP);
2923    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP);
2924
2925    if (cpuinfo & CPUINFO_ZVE64X) {
2926        switch (riscv_lg2_vlenb) {
2927        case TCG_TYPE_V64:
2928            tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
2929            tcg_target_available_regs[TCG_TYPE_V128] = ALL_DVECTOR_REG_GROUPS;
2930            tcg_target_available_regs[TCG_TYPE_V256] = ALL_QVECTOR_REG_GROUPS;
2931            s->reserved_regs |= (~ALL_QVECTOR_REG_GROUPS & ALL_VECTOR_REGS);
2932            break;
2933        case TCG_TYPE_V128:
2934            tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
2935            tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
2936            tcg_target_available_regs[TCG_TYPE_V256] = ALL_DVECTOR_REG_GROUPS;
2937            s->reserved_regs |= (~ALL_DVECTOR_REG_GROUPS & ALL_VECTOR_REGS);
2938            break;
2939        default:
2940            /* Guaranteed by Zve64x. */
2941            tcg_debug_assert(riscv_lg2_vlenb >= TCG_TYPE_V256);
2942            tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
2943            tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
2944            tcg_target_available_regs[TCG_TYPE_V256] = ALL_VECTOR_REGS;
2945            break;
2946        }
2947        tcg_regset_set_reg(s->reserved_regs, TCG_REG_V0);
2948        probe_frac_lmul();
2949    }
2950}
2951
2952typedef struct {
2953    DebugFrameHeader h;
2954    uint8_t fde_def_cfa[4];
2955    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2];
2956} DebugFrame;
2957
2958#define ELF_HOST_MACHINE EM_RISCV
2959
2960static const DebugFrame debug_frame = {
2961    .h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */
2962    .h.cie.id = -1,
2963    .h.cie.version = 1,
2964    .h.cie.code_align = 1,
2965    .h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */
2966    .h.cie.return_column = TCG_REG_RA,
2967
2968    /* Total FDE size does not include the "len" member.  */
2969    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2970
2971    .fde_def_cfa = {
2972        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
2973        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2974        (FRAME_SIZE >> 7)
2975    },
2976    .fde_reg_ofs = {
2977        0x80 + 9,  12,                  /* DW_CFA_offset, s1,  -96 */
2978        0x80 + 18, 11,                  /* DW_CFA_offset, s2,  -88 */
2979        0x80 + 19, 10,                  /* DW_CFA_offset, s3,  -80 */
2980        0x80 + 20, 9,                   /* DW_CFA_offset, s4,  -72 */
2981        0x80 + 21, 8,                   /* DW_CFA_offset, s5,  -64 */
2982        0x80 + 22, 7,                   /* DW_CFA_offset, s6,  -56 */
2983        0x80 + 23, 6,                   /* DW_CFA_offset, s7,  -48 */
2984        0x80 + 24, 5,                   /* DW_CFA_offset, s8,  -40 */
2985        0x80 + 25, 4,                   /* DW_CFA_offset, s9,  -32 */
2986        0x80 + 26, 3,                   /* DW_CFA_offset, s10, -24 */
2987        0x80 + 27, 2,                   /* DW_CFA_offset, s11, -16 */
2988        0x80 + 1 , 1,                   /* DW_CFA_offset, ra,  -8 */
2989    }
2990};
2991
2992void tcg_register_jit(const void *buf, size_t buf_size)
2993{
2994    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2995}
2996