1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2021 WANG Xuerui <git@xen0n.name>
5 *
6 * Based on tcg/riscv/tcg-target.c.inc
7 *
8 * Copyright (c) 2018 SiFive, Inc
9 * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org>
10 * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net>
11 * Copyright (c) 2008 Fabrice Bellard
12 *
13 * Permission is hereby granted, free of charge, to any person obtaining a copy
14 * of this software and associated documentation files (the "Software"), to deal
15 * in the Software without restriction, including without limitation the rights
16 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 * copies of the Software, and to permit persons to whom the Software is
18 * furnished to do so, subject to the following conditions:
19 *
20 * The above copyright notice and this permission notice shall be included in
21 * all copies or substantial portions of the Software.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 * THE SOFTWARE.
30 */
31
32#ifdef CONFIG_DEBUG_TCG
33static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
34    "zero",
35    "ra",
36    "tp",
37    "sp",
38    "a0",
39    "a1",
40    "a2",
41    "a3",
42    "a4",
43    "a5",
44    "a6",
45    "a7",
46    "t0",
47    "t1",
48    "t2",
49    "t3",
50    "t4",
51    "t5",
52    "t6",
53    "t7",
54    "t8",
55    "r21", /* reserved in the LP64* ABI, hence no ABI name */
56    "s9",
57    "s0",
58    "s1",
59    "s2",
60    "s3",
61    "s4",
62    "s5",
63    "s6",
64    "s7",
65    "s8"
66};
67#endif
68
69static const int tcg_target_reg_alloc_order[] = {
70    /* Registers preserved across calls */
71    /* TCG_REG_S0 reserved for TCG_AREG0 */
72    TCG_REG_S1,
73    TCG_REG_S2,
74    TCG_REG_S3,
75    TCG_REG_S4,
76    TCG_REG_S5,
77    TCG_REG_S6,
78    TCG_REG_S7,
79    TCG_REG_S8,
80    TCG_REG_S9,
81
82    /* Registers (potentially) clobbered across calls */
83    TCG_REG_T0,
84    TCG_REG_T1,
85    TCG_REG_T2,
86    TCG_REG_T3,
87    TCG_REG_T4,
88    TCG_REG_T5,
89    TCG_REG_T6,
90    TCG_REG_T7,
91    TCG_REG_T8,
92
93    /* Argument registers, opposite order of allocation.  */
94    TCG_REG_A7,
95    TCG_REG_A6,
96    TCG_REG_A5,
97    TCG_REG_A4,
98    TCG_REG_A3,
99    TCG_REG_A2,
100    TCG_REG_A1,
101    TCG_REG_A0,
102};
103
104static const int tcg_target_call_iarg_regs[] = {
105    TCG_REG_A0,
106    TCG_REG_A1,
107    TCG_REG_A2,
108    TCG_REG_A3,
109    TCG_REG_A4,
110    TCG_REG_A5,
111    TCG_REG_A6,
112    TCG_REG_A7,
113};
114
115static const int tcg_target_call_oarg_regs[] = {
116    TCG_REG_A0,
117    TCG_REG_A1,
118};
119
120#ifndef CONFIG_SOFTMMU
121#define USE_GUEST_BASE     (guest_base != 0)
122#define TCG_GUEST_BASE_REG TCG_REG_S1
123#endif
124
125#define TCG_CT_CONST_ZERO  0x100
126#define TCG_CT_CONST_S12   0x200
127#define TCG_CT_CONST_N12   0x400
128#define TCG_CT_CONST_U12   0x800
129#define TCG_CT_CONST_C12   0x1000
130#define TCG_CT_CONST_WSZ   0x2000
131
132#define ALL_GENERAL_REGS      MAKE_64BIT_MASK(0, 32)
133/*
134 * For softmmu, we need to avoid conflicts with the first 5
135 * argument registers to call the helper.  Some of these are
136 * also used for the tlb lookup.
137 */
138#ifdef CONFIG_SOFTMMU
139#define SOFTMMU_RESERVE_REGS  MAKE_64BIT_MASK(TCG_REG_A0, 5)
140#else
141#define SOFTMMU_RESERVE_REGS  0
142#endif
143
144
145static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
146{
147    return sextract64(val, pos, len);
148}
149
150/* test if a constant matches the constraint */
151static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
152{
153    if (ct & TCG_CT_CONST) {
154        return true;
155    }
156    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
157        return true;
158    }
159    if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) {
160        return true;
161    }
162    if ((ct & TCG_CT_CONST_N12) && -val == sextreg(-val, 0, 12)) {
163        return true;
164    }
165    if ((ct & TCG_CT_CONST_U12) && val >= 0 && val <= 0xfff) {
166        return true;
167    }
168    if ((ct & TCG_CT_CONST_C12) && ~val >= 0 && ~val <= 0xfff) {
169        return true;
170    }
171    if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
172        return true;
173    }
174    return false;
175}
176
177/*
178 * Relocations
179 */
180
181/*
182 * Relocation records defined in LoongArch ELF psABI v1.00 is way too
183 * complicated; a whopping stack machine is needed to stuff the fields, at
184 * the very least one SOP_PUSH and one SOP_POP (of the correct format) are
185 * needed.
186 *
187 * Hence, define our own simpler relocation types. Numbers are chosen as to
188 * not collide with potential future additions to the true ELF relocation
189 * type enum.
190 */
191
192/* Field Sk16, shifted right by 2; suitable for conditional jumps */
193#define R_LOONGARCH_BR_SK16     256
194/* Field Sd10k16, shifted right by 2; suitable for B and BL */
195#define R_LOONGARCH_BR_SD10K16  257
196
197static bool reloc_br_sk16(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
198{
199    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
200    intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
201
202    tcg_debug_assert((offset & 3) == 0);
203    offset >>= 2;
204    if (offset == sextreg(offset, 0, 16)) {
205        *src_rw = deposit64(*src_rw, 10, 16, offset);
206        return true;
207    }
208
209    return false;
210}
211
212static bool reloc_br_sd10k16(tcg_insn_unit *src_rw,
213                             const tcg_insn_unit *target)
214{
215    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
216    intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
217
218    tcg_debug_assert((offset & 3) == 0);
219    offset >>= 2;
220    if (offset == sextreg(offset, 0, 26)) {
221        *src_rw = deposit64(*src_rw, 0, 10, offset >> 16); /* slot d10 */
222        *src_rw = deposit64(*src_rw, 10, 16, offset); /* slot k16 */
223        return true;
224    }
225
226    return false;
227}
228
229static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
230                        intptr_t value, intptr_t addend)
231{
232    tcg_debug_assert(addend == 0);
233    switch (type) {
234    case R_LOONGARCH_BR_SK16:
235        return reloc_br_sk16(code_ptr, (tcg_insn_unit *)value);
236    case R_LOONGARCH_BR_SD10K16:
237        return reloc_br_sd10k16(code_ptr, (tcg_insn_unit *)value);
238    default:
239        g_assert_not_reached();
240    }
241}
242
243#include "tcg-insn-defs.c.inc"
244
245/*
246 * TCG intrinsics
247 */
248
249static void tcg_out_mb(TCGContext *s, TCGArg a0)
250{
251    /* Baseline LoongArch only has the full barrier, unfortunately.  */
252    tcg_out_opc_dbar(s, 0);
253}
254
255static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
256{
257    if (ret == arg) {
258        return true;
259    }
260    switch (type) {
261    case TCG_TYPE_I32:
262    case TCG_TYPE_I64:
263        /*
264         * Conventional register-register move used in LoongArch is
265         * `or dst, src, zero`.
266         */
267        tcg_out_opc_or(s, ret, arg, TCG_REG_ZERO);
268        break;
269    default:
270        g_assert_not_reached();
271    }
272    return true;
273}
274
275static bool imm_part_needs_loading(bool high_bits_are_ones,
276                                   tcg_target_long part)
277{
278    if (high_bits_are_ones) {
279        return part != -1;
280    } else {
281        return part != 0;
282    }
283}
284
285/* Loads a 32-bit immediate into rd, sign-extended.  */
286static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val)
287{
288    tcg_target_long lo = sextreg(val, 0, 12);
289    tcg_target_long hi12 = sextreg(val, 12, 20);
290
291    /* Single-instruction cases.  */
292    if (lo == val) {
293        /* val fits in simm12: addi.w rd, zero, val */
294        tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val);
295        return;
296    }
297    if (0x800 <= val && val <= 0xfff) {
298        /* val fits in uimm12: ori rd, zero, val */
299        tcg_out_opc_ori(s, rd, TCG_REG_ZERO, val);
300        return;
301    }
302
303    /* High bits must be set; load with lu12i.w + optional ori.  */
304    tcg_out_opc_lu12i_w(s, rd, hi12);
305    if (lo != 0) {
306        tcg_out_opc_ori(s, rd, rd, lo & 0xfff);
307    }
308}
309
310static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
311                         tcg_target_long val)
312{
313    /*
314     * LoongArch conventionally loads 64-bit immediates in at most 4 steps,
315     * with dedicated instructions for filling the respective bitfields
316     * below:
317     *
318     *        6                   5                   4               3
319     *  3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2
320     * +-----------------------+---------------------------------------+...
321     * |          hi52         |                  hi32                 |
322     * +-----------------------+---------------------------------------+...
323     *       3                   2                   1
324     *     1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
325     * ...+-------------------------------------+-------------------------+
326     *    |                 hi12                |            lo           |
327     * ...+-------------------------------------+-------------------------+
328     *
329     * Check if val belong to one of the several fast cases, before falling
330     * back to the slow path.
331     */
332
333    intptr_t pc_offset;
334    tcg_target_long val_lo, val_hi, pc_hi, offset_hi;
335    tcg_target_long hi32, hi52;
336    bool rd_high_bits_are_ones;
337
338    /* Value fits in signed i32.  */
339    if (type == TCG_TYPE_I32 || val == (int32_t)val) {
340        tcg_out_movi_i32(s, rd, val);
341        return;
342    }
343
344    /* PC-relative cases.  */
345    pc_offset = tcg_pcrel_diff(s, (void *)val);
346    if (pc_offset == sextreg(pc_offset, 0, 22) && (pc_offset & 3) == 0) {
347        /* Single pcaddu2i.  */
348        tcg_out_opc_pcaddu2i(s, rd, pc_offset >> 2);
349        return;
350    }
351
352    if (pc_offset == (int32_t)pc_offset) {
353        /* Offset within 32 bits; load with pcalau12i + ori.  */
354        val_lo = sextreg(val, 0, 12);
355        val_hi = val >> 12;
356        pc_hi = (val - pc_offset) >> 12;
357        offset_hi = val_hi - pc_hi;
358
359        tcg_debug_assert(offset_hi == sextreg(offset_hi, 0, 20));
360        tcg_out_opc_pcalau12i(s, rd, offset_hi);
361        if (val_lo != 0) {
362            tcg_out_opc_ori(s, rd, rd, val_lo & 0xfff);
363        }
364        return;
365    }
366
367    hi32 = sextreg(val, 32, 20);
368    hi52 = sextreg(val, 52, 12);
369
370    /* Single cu52i.d case.  */
371    if (ctz64(val) >= 52) {
372        tcg_out_opc_cu52i_d(s, rd, TCG_REG_ZERO, hi52);
373        return;
374    }
375
376    /* Slow path.  Initialize the low 32 bits, then concat high bits.  */
377    tcg_out_movi_i32(s, rd, val);
378    rd_high_bits_are_ones = (int32_t)val < 0;
379
380    if (imm_part_needs_loading(rd_high_bits_are_ones, hi32)) {
381        tcg_out_opc_cu32i_d(s, rd, hi32);
382        rd_high_bits_are_ones = hi32 < 0;
383    }
384
385    if (imm_part_needs_loading(rd_high_bits_are_ones, hi52)) {
386        tcg_out_opc_cu52i_d(s, rd, rd, hi52);
387    }
388}
389
390static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg)
391{
392    tcg_out_opc_andi(s, ret, arg, 0xff);
393}
394
395static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg)
396{
397    tcg_out_opc_bstrpick_w(s, ret, arg, 0, 15);
398}
399
400static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg)
401{
402    tcg_out_opc_bstrpick_d(s, ret, arg, 0, 31);
403}
404
405static void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg)
406{
407    tcg_out_opc_sext_b(s, ret, arg);
408}
409
410static void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg)
411{
412    tcg_out_opc_sext_h(s, ret, arg);
413}
414
415static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg)
416{
417    tcg_out_opc_addi_w(s, ret, arg, 0);
418}
419
420static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc,
421                           TCGReg a0, TCGReg a1, TCGReg a2,
422                           bool c2, bool is_32bit)
423{
424    if (c2) {
425        /*
426         * Fast path: semantics already satisfied due to constraint and
427         * insn behavior, single instruction is enough.
428         */
429        tcg_debug_assert(a2 == (is_32bit ? 32 : 64));
430        /* all clz/ctz insns belong to DJ-format */
431        tcg_out32(s, encode_dj_insn(opc, a0, a1));
432        return;
433    }
434
435    tcg_out32(s, encode_dj_insn(opc, TCG_REG_TMP0, a1));
436    /* a0 = a1 ? REG_TMP0 : a2 */
437    tcg_out_opc_maskeqz(s, TCG_REG_TMP0, TCG_REG_TMP0, a1);
438    tcg_out_opc_masknez(s, a0, a2, a1);
439    tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0);
440}
441
442static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
443                            TCGReg arg1, TCGReg arg2, bool c2)
444{
445    TCGReg tmp;
446
447    if (c2) {
448        tcg_debug_assert(arg2 == 0);
449    }
450
451    switch (cond) {
452    case TCG_COND_EQ:
453        if (c2) {
454            tmp = arg1;
455        } else {
456            tcg_out_opc_sub_d(s, ret, arg1, arg2);
457            tmp = ret;
458        }
459        tcg_out_opc_sltui(s, ret, tmp, 1);
460        break;
461    case TCG_COND_NE:
462        if (c2) {
463            tmp = arg1;
464        } else {
465            tcg_out_opc_sub_d(s, ret, arg1, arg2);
466            tmp = ret;
467        }
468        tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp);
469        break;
470    case TCG_COND_LT:
471        tcg_out_opc_slt(s, ret, arg1, arg2);
472        break;
473    case TCG_COND_GE:
474        tcg_out_opc_slt(s, ret, arg1, arg2);
475        tcg_out_opc_xori(s, ret, ret, 1);
476        break;
477    case TCG_COND_LE:
478        tcg_out_setcond(s, TCG_COND_GE, ret, arg2, arg1, false);
479        break;
480    case TCG_COND_GT:
481        tcg_out_setcond(s, TCG_COND_LT, ret, arg2, arg1, false);
482        break;
483    case TCG_COND_LTU:
484        tcg_out_opc_sltu(s, ret, arg1, arg2);
485        break;
486    case TCG_COND_GEU:
487        tcg_out_opc_sltu(s, ret, arg1, arg2);
488        tcg_out_opc_xori(s, ret, ret, 1);
489        break;
490    case TCG_COND_LEU:
491        tcg_out_setcond(s, TCG_COND_GEU, ret, arg2, arg1, false);
492        break;
493    case TCG_COND_GTU:
494        tcg_out_setcond(s, TCG_COND_LTU, ret, arg2, arg1, false);
495        break;
496    default:
497        g_assert_not_reached();
498        break;
499    }
500}
501
502/*
503 * Branch helpers
504 */
505
506static const struct {
507    LoongArchInsn op;
508    bool swap;
509} tcg_brcond_to_loongarch[] = {
510    [TCG_COND_EQ] =  { OPC_BEQ,  false },
511    [TCG_COND_NE] =  { OPC_BNE,  false },
512    [TCG_COND_LT] =  { OPC_BGT,  true  },
513    [TCG_COND_GE] =  { OPC_BLE,  true  },
514    [TCG_COND_LE] =  { OPC_BLE,  false },
515    [TCG_COND_GT] =  { OPC_BGT,  false },
516    [TCG_COND_LTU] = { OPC_BGTU, true  },
517    [TCG_COND_GEU] = { OPC_BLEU, true  },
518    [TCG_COND_LEU] = { OPC_BLEU, false },
519    [TCG_COND_GTU] = { OPC_BGTU, false }
520};
521
522static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
523                           TCGReg arg2, TCGLabel *l)
524{
525    LoongArchInsn op = tcg_brcond_to_loongarch[cond].op;
526
527    tcg_debug_assert(op != 0);
528
529    if (tcg_brcond_to_loongarch[cond].swap) {
530        TCGReg t = arg1;
531        arg1 = arg2;
532        arg2 = t;
533    }
534
535    /* all conditional branch insns belong to DJSk16-format */
536    tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SK16, l, 0);
537    tcg_out32(s, encode_djsk16_insn(op, arg1, arg2, 0));
538}
539
540static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
541{
542    TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA;
543    ptrdiff_t offset = tcg_pcrel_diff(s, arg);
544
545    tcg_debug_assert((offset & 3) == 0);
546    if (offset == sextreg(offset, 0, 28)) {
547        /* short jump: +/- 256MiB */
548        if (tail) {
549            tcg_out_opc_b(s, offset >> 2);
550        } else {
551            tcg_out_opc_bl(s, offset >> 2);
552        }
553    } else if (offset == sextreg(offset, 0, 38)) {
554        /* long jump: +/- 256GiB */
555        tcg_target_long lo = sextreg(offset, 0, 18);
556        tcg_target_long hi = offset - lo;
557        tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, hi >> 18);
558        tcg_out_opc_jirl(s, link, TCG_REG_TMP0, lo >> 2);
559    } else {
560        /* far jump: 64-bit */
561        tcg_target_long lo = sextreg((tcg_target_long)arg, 0, 18);
562        tcg_target_long hi = (tcg_target_long)arg - lo;
563        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, hi);
564        tcg_out_opc_jirl(s, link, TCG_REG_TMP0, lo >> 2);
565    }
566}
567
568static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg)
569{
570    tcg_out_call_int(s, arg, false);
571}
572
573/*
574 * Load/store helpers
575 */
576
577static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data,
578                         TCGReg addr, intptr_t offset)
579{
580    intptr_t imm12 = sextreg(offset, 0, 12);
581
582    if (offset != imm12) {
583        intptr_t diff = offset - (uintptr_t)s->code_ptr;
584
585        if (addr == TCG_REG_ZERO && diff == (int32_t)diff) {
586            imm12 = sextreg(diff, 0, 12);
587            tcg_out_opc_pcaddu12i(s, TCG_REG_TMP2, (diff - imm12) >> 12);
588        } else {
589            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12);
590            if (addr != TCG_REG_ZERO) {
591                tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, addr);
592            }
593        }
594        addr = TCG_REG_TMP2;
595    }
596
597    switch (opc) {
598    case OPC_LD_B:
599    case OPC_LD_BU:
600    case OPC_LD_H:
601    case OPC_LD_HU:
602    case OPC_LD_W:
603    case OPC_LD_WU:
604    case OPC_LD_D:
605    case OPC_ST_B:
606    case OPC_ST_H:
607    case OPC_ST_W:
608    case OPC_ST_D:
609        tcg_out32(s, encode_djsk12_insn(opc, data, addr, imm12));
610        break;
611    default:
612        g_assert_not_reached();
613    }
614}
615
616static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
617                       TCGReg arg1, intptr_t arg2)
618{
619    bool is_32bit = type == TCG_TYPE_I32;
620    tcg_out_ldst(s, is_32bit ? OPC_LD_W : OPC_LD_D, arg, arg1, arg2);
621}
622
623static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
624                       TCGReg arg1, intptr_t arg2)
625{
626    bool is_32bit = type == TCG_TYPE_I32;
627    tcg_out_ldst(s, is_32bit ? OPC_ST_W : OPC_ST_D, arg, arg1, arg2);
628}
629
630static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
631                        TCGReg base, intptr_t ofs)
632{
633    if (val == 0) {
634        tcg_out_st(s, type, TCG_REG_ZERO, base, ofs);
635        return true;
636    }
637    return false;
638}
639
640/*
641 * Load/store helpers for SoftMMU, and qemu_ld/st implementations
642 */
643
644#if defined(CONFIG_SOFTMMU)
645#include "../tcg-ldst.c.inc"
646
647/*
648 * helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
649 *                                     MemOpIdx oi, uintptr_t ra)
650 */
651static void * const qemu_ld_helpers[4] = {
652    [MO_8]  = helper_ret_ldub_mmu,
653    [MO_16] = helper_le_lduw_mmu,
654    [MO_32] = helper_le_ldul_mmu,
655    [MO_64] = helper_le_ldq_mmu,
656};
657
658/*
659 * helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
660 *                                     uintxx_t val, MemOpIdx oi,
661 *                                     uintptr_t ra)
662 */
663static void * const qemu_st_helpers[4] = {
664    [MO_8]  = helper_ret_stb_mmu,
665    [MO_16] = helper_le_stw_mmu,
666    [MO_32] = helper_le_stl_mmu,
667    [MO_64] = helper_le_stq_mmu,
668};
669
670/* We expect to use a 12-bit negative offset from ENV.  */
671QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
672QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
673
674static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
675{
676    tcg_out_opc_b(s, 0);
677    return reloc_br_sd10k16(s->code_ptr - 1, target);
678}
679
680/*
681 * Emits common code for TLB addend lookup, that eventually loads the
682 * addend in TCG_REG_TMP2.
683 */
684static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, MemOpIdx oi,
685                             tcg_insn_unit **label_ptr, bool is_load)
686{
687    MemOp opc = get_memop(oi);
688    unsigned s_bits = opc & MO_SIZE;
689    unsigned a_bits = get_alignment_bits(opc);
690    tcg_target_long compare_mask;
691    int mem_index = get_mmuidx(oi);
692    int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
693    int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
694    int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
695
696    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
697    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
698
699    tcg_out_opc_srli_d(s, TCG_REG_TMP2, addrl,
700                    TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
701    tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
702    tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
703
704    /* Load the tlb comparator and the addend.  */
705    tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
706               is_load ? offsetof(CPUTLBEntry, addr_read)
707               : offsetof(CPUTLBEntry, addr_write));
708    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
709               offsetof(CPUTLBEntry, addend));
710
711    /* We don't support unaligned accesses.  */
712    if (a_bits < s_bits) {
713        a_bits = s_bits;
714    }
715    /* Clear the non-page, non-alignment bits from the address.  */
716    compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
717    tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
718    tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addrl);
719
720    /* Compare masked address with the TLB entry.  */
721    label_ptr[0] = s->code_ptr;
722    tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
723
724    /* TLB Hit - addend in TCG_REG_TMP2, ready for use.  */
725}
726
727static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
728                                TCGType type,
729                                TCGReg datalo, TCGReg addrlo,
730                                void *raddr, tcg_insn_unit **label_ptr)
731{
732    TCGLabelQemuLdst *label = new_ldst_label(s);
733
734    label->is_ld = is_ld;
735    label->oi = oi;
736    label->type = type;
737    label->datalo_reg = datalo;
738    label->datahi_reg = 0; /* unused */
739    label->addrlo_reg = addrlo;
740    label->addrhi_reg = 0; /* unused */
741    label->raddr = tcg_splitwx_to_rx(raddr);
742    label->label_ptr[0] = label_ptr[0];
743}
744
745static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
746{
747    MemOpIdx oi = l->oi;
748    MemOp opc = get_memop(oi);
749    MemOp size = opc & MO_SIZE;
750    TCGType type = l->type;
751
752    /* resolve label address */
753    if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
754        return false;
755    }
756
757    /* call load helper */
758    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
759    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg);
760    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A2, oi);
761    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, (tcg_target_long)l->raddr);
762
763    tcg_out_call(s, qemu_ld_helpers[size]);
764
765    switch (opc & MO_SSIZE) {
766    case MO_SB:
767        tcg_out_ext8s(s, l->datalo_reg, TCG_REG_A0);
768        break;
769    case MO_SW:
770        tcg_out_ext16s(s, l->datalo_reg, TCG_REG_A0);
771        break;
772    case MO_SL:
773        tcg_out_ext32s(s, l->datalo_reg, TCG_REG_A0);
774        break;
775    case MO_UL:
776        if (type == TCG_TYPE_I32) {
777            /* MO_UL loads of i32 should be sign-extended too */
778            tcg_out_ext32s(s, l->datalo_reg, TCG_REG_A0);
779            break;
780        }
781        /* fallthrough */
782    default:
783        tcg_out_mov(s, type, l->datalo_reg, TCG_REG_A0);
784        break;
785    }
786
787    return tcg_out_goto(s, l->raddr);
788}
789
790static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
791{
792    MemOpIdx oi = l->oi;
793    MemOp opc = get_memop(oi);
794    MemOp size = opc & MO_SIZE;
795
796    /* resolve label address */
797    if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
798        return false;
799    }
800
801    /* call store helper */
802    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
803    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg);
804    switch (size) {
805    case MO_8:
806        tcg_out_ext8u(s, TCG_REG_A2, l->datalo_reg);
807        break;
808    case MO_16:
809        tcg_out_ext16u(s, TCG_REG_A2, l->datalo_reg);
810        break;
811    case MO_32:
812        tcg_out_ext32u(s, TCG_REG_A2, l->datalo_reg);
813        break;
814    case MO_64:
815        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_A2, l->datalo_reg);
816        break;
817    default:
818        g_assert_not_reached();
819        break;
820    }
821    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, oi);
822    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A4, (tcg_target_long)l->raddr);
823
824    tcg_out_call(s, qemu_st_helpers[size]);
825
826    return tcg_out_goto(s, l->raddr);
827}
828#endif /* CONFIG_SOFTMMU */
829
830/*
831 * `ext32u` the address register into the temp register given,
832 * if target is 32-bit, no-op otherwise.
833 *
834 * Returns the address register ready for use with TLB addend.
835 */
836static TCGReg tcg_out_zext_addr_if_32_bit(TCGContext *s,
837                                          TCGReg addr, TCGReg tmp)
838{
839    if (TARGET_LONG_BITS == 32) {
840        tcg_out_ext32u(s, tmp, addr);
841        return tmp;
842    }
843    return addr;
844}
845
846static void tcg_out_qemu_ld_indexed(TCGContext *s, TCGReg rd, TCGReg rj,
847                                   TCGReg rk, MemOp opc, TCGType type)
848{
849    /* Byte swapping is left to middle-end expansion.  */
850    tcg_debug_assert((opc & MO_BSWAP) == 0);
851
852    switch (opc & MO_SSIZE) {
853    case MO_UB:
854        tcg_out_opc_ldx_bu(s, rd, rj, rk);
855        break;
856    case MO_SB:
857        tcg_out_opc_ldx_b(s, rd, rj, rk);
858        break;
859    case MO_UW:
860        tcg_out_opc_ldx_hu(s, rd, rj, rk);
861        break;
862    case MO_SW:
863        tcg_out_opc_ldx_h(s, rd, rj, rk);
864        break;
865    case MO_UL:
866        if (type == TCG_TYPE_I64) {
867            tcg_out_opc_ldx_wu(s, rd, rj, rk);
868            break;
869        }
870        /* fallthrough */
871    case MO_SL:
872        tcg_out_opc_ldx_w(s, rd, rj, rk);
873        break;
874    case MO_Q:
875        tcg_out_opc_ldx_d(s, rd, rj, rk);
876        break;
877    default:
878        g_assert_not_reached();
879    }
880}
881
882static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGType type)
883{
884    TCGReg addr_regl;
885    TCGReg data_regl;
886    MemOpIdx oi;
887    MemOp opc;
888#if defined(CONFIG_SOFTMMU)
889    tcg_insn_unit *label_ptr[1];
890#endif
891    TCGReg base;
892
893    data_regl = *args++;
894    addr_regl = *args++;
895    oi = *args++;
896    opc = get_memop(oi);
897
898#if defined(CONFIG_SOFTMMU)
899    tcg_out_tlb_load(s, addr_regl, oi, label_ptr, 1);
900    base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0);
901    tcg_out_qemu_ld_indexed(s, data_regl, base, TCG_REG_TMP2, opc, type);
902    add_qemu_ldst_label(s, 1, oi, type,
903                        data_regl, addr_regl,
904                        s->code_ptr, label_ptr);
905#else
906    base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0);
907    TCGReg guest_base_reg = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
908    tcg_out_qemu_ld_indexed(s, data_regl, base, guest_base_reg, opc, type);
909#endif
910}
911
912static void tcg_out_qemu_st_indexed(TCGContext *s, TCGReg data,
913                                   TCGReg rj, TCGReg rk, MemOp opc)
914{
915    /* Byte swapping is left to middle-end expansion.  */
916    tcg_debug_assert((opc & MO_BSWAP) == 0);
917
918    switch (opc & MO_SIZE) {
919    case MO_8:
920        tcg_out_opc_stx_b(s, data, rj, rk);
921        break;
922    case MO_16:
923        tcg_out_opc_stx_h(s, data, rj, rk);
924        break;
925    case MO_32:
926        tcg_out_opc_stx_w(s, data, rj, rk);
927        break;
928    case MO_64:
929        tcg_out_opc_stx_d(s, data, rj, rk);
930        break;
931    default:
932        g_assert_not_reached();
933    }
934}
935
936static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
937{
938    TCGReg addr_regl;
939    TCGReg data_regl;
940    MemOpIdx oi;
941    MemOp opc;
942#if defined(CONFIG_SOFTMMU)
943    tcg_insn_unit *label_ptr[1];
944#endif
945    TCGReg base;
946
947    data_regl = *args++;
948    addr_regl = *args++;
949    oi = *args++;
950    opc = get_memop(oi);
951
952#if defined(CONFIG_SOFTMMU)
953    tcg_out_tlb_load(s, addr_regl, oi, label_ptr, 0);
954    base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0);
955    tcg_out_qemu_st_indexed(s, data_regl, base, TCG_REG_TMP2, opc);
956    add_qemu_ldst_label(s, 0, oi,
957                        0, /* type param is unused for stores */
958                        data_regl, addr_regl,
959                        s->code_ptr, label_ptr);
960#else
961    base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0);
962    TCGReg guest_base_reg = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
963    tcg_out_qemu_st_indexed(s, data_regl, base, guest_base_reg, opc);
964#endif
965}
966
967/*
968 * Entry-points
969 */
970
971static const tcg_insn_unit *tb_ret_addr;
972
973static void tcg_out_op(TCGContext *s, TCGOpcode opc,
974                       const TCGArg args[TCG_MAX_OP_ARGS],
975                       const int const_args[TCG_MAX_OP_ARGS])
976{
977    TCGArg a0 = args[0];
978    TCGArg a1 = args[1];
979    TCGArg a2 = args[2];
980    int c2 = const_args[2];
981
982    switch (opc) {
983    case INDEX_op_exit_tb:
984        /* Reuse the zeroing that exists for goto_ptr.  */
985        if (a0 == 0) {
986            tcg_out_call_int(s, tcg_code_gen_epilogue, true);
987        } else {
988            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0);
989            tcg_out_call_int(s, tb_ret_addr, true);
990        }
991        break;
992
993    case INDEX_op_goto_tb:
994        assert(s->tb_jmp_insn_offset == 0);
995        /* indirect jump method */
996        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
997                   (uintptr_t)(s->tb_jmp_target_addr + a0));
998        tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
999        set_jmp_reset_offset(s, a0);
1000        break;
1001
1002    case INDEX_op_mb:
1003        tcg_out_mb(s, a0);
1004        break;
1005
1006    case INDEX_op_goto_ptr:
1007        tcg_out_opc_jirl(s, TCG_REG_ZERO, a0, 0);
1008        break;
1009
1010    case INDEX_op_br:
1011        tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SD10K16, arg_label(a0),
1012                      0);
1013        tcg_out_opc_b(s, 0);
1014        break;
1015
1016    case INDEX_op_brcond_i32:
1017    case INDEX_op_brcond_i64:
1018        tcg_out_brcond(s, a2, a0, a1, arg_label(args[3]));
1019        break;
1020
1021    case INDEX_op_ext8s_i32:
1022    case INDEX_op_ext8s_i64:
1023        tcg_out_ext8s(s, a0, a1);
1024        break;
1025
1026    case INDEX_op_ext8u_i32:
1027    case INDEX_op_ext8u_i64:
1028        tcg_out_ext8u(s, a0, a1);
1029        break;
1030
1031    case INDEX_op_ext16s_i32:
1032    case INDEX_op_ext16s_i64:
1033        tcg_out_ext16s(s, a0, a1);
1034        break;
1035
1036    case INDEX_op_ext16u_i32:
1037    case INDEX_op_ext16u_i64:
1038        tcg_out_ext16u(s, a0, a1);
1039        break;
1040
1041    case INDEX_op_ext32u_i64:
1042    case INDEX_op_extu_i32_i64:
1043        tcg_out_ext32u(s, a0, a1);
1044        break;
1045
1046    case INDEX_op_ext32s_i64:
1047    case INDEX_op_extrl_i64_i32:
1048    case INDEX_op_ext_i32_i64:
1049        tcg_out_ext32s(s, a0, a1);
1050        break;
1051
1052    case INDEX_op_extrh_i64_i32:
1053        tcg_out_opc_srai_d(s, a0, a1, 32);
1054        break;
1055
1056    case INDEX_op_not_i32:
1057    case INDEX_op_not_i64:
1058        tcg_out_opc_nor(s, a0, a1, TCG_REG_ZERO);
1059        break;
1060
1061    case INDEX_op_nor_i32:
1062    case INDEX_op_nor_i64:
1063        if (c2) {
1064            tcg_out_opc_ori(s, a0, a1, a2);
1065            tcg_out_opc_nor(s, a0, a0, TCG_REG_ZERO);
1066        } else {
1067            tcg_out_opc_nor(s, a0, a1, a2);
1068        }
1069        break;
1070
1071    case INDEX_op_andc_i32:
1072    case INDEX_op_andc_i64:
1073        if (c2) {
1074            /* guaranteed to fit due to constraint */
1075            tcg_out_opc_andi(s, a0, a1, ~a2);
1076        } else {
1077            tcg_out_opc_andn(s, a0, a1, a2);
1078        }
1079        break;
1080
1081    case INDEX_op_orc_i32:
1082    case INDEX_op_orc_i64:
1083        if (c2) {
1084            /* guaranteed to fit due to constraint */
1085            tcg_out_opc_ori(s, a0, a1, ~a2);
1086        } else {
1087            tcg_out_opc_orn(s, a0, a1, a2);
1088        }
1089        break;
1090
1091    case INDEX_op_and_i32:
1092    case INDEX_op_and_i64:
1093        if (c2) {
1094            tcg_out_opc_andi(s, a0, a1, a2);
1095        } else {
1096            tcg_out_opc_and(s, a0, a1, a2);
1097        }
1098        break;
1099
1100    case INDEX_op_or_i32:
1101    case INDEX_op_or_i64:
1102        if (c2) {
1103            tcg_out_opc_ori(s, a0, a1, a2);
1104        } else {
1105            tcg_out_opc_or(s, a0, a1, a2);
1106        }
1107        break;
1108
1109    case INDEX_op_xor_i32:
1110    case INDEX_op_xor_i64:
1111        if (c2) {
1112            tcg_out_opc_xori(s, a0, a1, a2);
1113        } else {
1114            tcg_out_opc_xor(s, a0, a1, a2);
1115        }
1116        break;
1117
1118    case INDEX_op_extract_i32:
1119        tcg_out_opc_bstrpick_w(s, a0, a1, a2, a2 + args[3] - 1);
1120        break;
1121    case INDEX_op_extract_i64:
1122        tcg_out_opc_bstrpick_d(s, a0, a1, a2, a2 + args[3] - 1);
1123        break;
1124
1125    case INDEX_op_deposit_i32:
1126        tcg_out_opc_bstrins_w(s, a0, a2, args[3], args[3] + args[4] - 1);
1127        break;
1128    case INDEX_op_deposit_i64:
1129        tcg_out_opc_bstrins_d(s, a0, a2, args[3], args[3] + args[4] - 1);
1130        break;
1131
1132    case INDEX_op_bswap16_i32:
1133    case INDEX_op_bswap16_i64:
1134        tcg_out_opc_revb_2h(s, a0, a1);
1135        if (a2 & TCG_BSWAP_OS) {
1136            tcg_out_ext16s(s, a0, a0);
1137        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
1138            tcg_out_ext16u(s, a0, a0);
1139        }
1140        break;
1141
1142    case INDEX_op_bswap32_i32:
1143        /* All 32-bit values are computed sign-extended in the register.  */
1144        a2 = TCG_BSWAP_OS;
1145        /* fallthrough */
1146    case INDEX_op_bswap32_i64:
1147        tcg_out_opc_revb_2w(s, a0, a1);
1148        if (a2 & TCG_BSWAP_OS) {
1149            tcg_out_ext32s(s, a0, a0);
1150        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
1151            tcg_out_ext32u(s, a0, a0);
1152        }
1153        break;
1154
1155    case INDEX_op_bswap64_i64:
1156        tcg_out_opc_revb_d(s, a0, a1);
1157        break;
1158
1159    case INDEX_op_clz_i32:
1160        tcg_out_clzctz(s, OPC_CLZ_W, a0, a1, a2, c2, true);
1161        break;
1162    case INDEX_op_clz_i64:
1163        tcg_out_clzctz(s, OPC_CLZ_D, a0, a1, a2, c2, false);
1164        break;
1165
1166    case INDEX_op_ctz_i32:
1167        tcg_out_clzctz(s, OPC_CTZ_W, a0, a1, a2, c2, true);
1168        break;
1169    case INDEX_op_ctz_i64:
1170        tcg_out_clzctz(s, OPC_CTZ_D, a0, a1, a2, c2, false);
1171        break;
1172
1173    case INDEX_op_shl_i32:
1174        if (c2) {
1175            tcg_out_opc_slli_w(s, a0, a1, a2 & 0x1f);
1176        } else {
1177            tcg_out_opc_sll_w(s, a0, a1, a2);
1178        }
1179        break;
1180    case INDEX_op_shl_i64:
1181        if (c2) {
1182            tcg_out_opc_slli_d(s, a0, a1, a2 & 0x3f);
1183        } else {
1184            tcg_out_opc_sll_d(s, a0, a1, a2);
1185        }
1186        break;
1187
1188    case INDEX_op_shr_i32:
1189        if (c2) {
1190            tcg_out_opc_srli_w(s, a0, a1, a2 & 0x1f);
1191        } else {
1192            tcg_out_opc_srl_w(s, a0, a1, a2);
1193        }
1194        break;
1195    case INDEX_op_shr_i64:
1196        if (c2) {
1197            tcg_out_opc_srli_d(s, a0, a1, a2 & 0x3f);
1198        } else {
1199            tcg_out_opc_srl_d(s, a0, a1, a2);
1200        }
1201        break;
1202
1203    case INDEX_op_sar_i32:
1204        if (c2) {
1205            tcg_out_opc_srai_w(s, a0, a1, a2 & 0x1f);
1206        } else {
1207            tcg_out_opc_sra_w(s, a0, a1, a2);
1208        }
1209        break;
1210    case INDEX_op_sar_i64:
1211        if (c2) {
1212            tcg_out_opc_srai_d(s, a0, a1, a2 & 0x3f);
1213        } else {
1214            tcg_out_opc_sra_d(s, a0, a1, a2);
1215        }
1216        break;
1217
1218    case INDEX_op_rotl_i32:
1219        /* transform into equivalent rotr/rotri */
1220        if (c2) {
1221            tcg_out_opc_rotri_w(s, a0, a1, (32 - a2) & 0x1f);
1222        } else {
1223            tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2);
1224            tcg_out_opc_rotr_w(s, a0, a1, TCG_REG_TMP0);
1225        }
1226        break;
1227    case INDEX_op_rotl_i64:
1228        /* transform into equivalent rotr/rotri */
1229        if (c2) {
1230            tcg_out_opc_rotri_d(s, a0, a1, (64 - a2) & 0x3f);
1231        } else {
1232            tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2);
1233            tcg_out_opc_rotr_d(s, a0, a1, TCG_REG_TMP0);
1234        }
1235        break;
1236
1237    case INDEX_op_rotr_i32:
1238        if (c2) {
1239            tcg_out_opc_rotri_w(s, a0, a1, a2 & 0x1f);
1240        } else {
1241            tcg_out_opc_rotr_w(s, a0, a1, a2);
1242        }
1243        break;
1244    case INDEX_op_rotr_i64:
1245        if (c2) {
1246            tcg_out_opc_rotri_d(s, a0, a1, a2 & 0x3f);
1247        } else {
1248            tcg_out_opc_rotr_d(s, a0, a1, a2);
1249        }
1250        break;
1251
1252    case INDEX_op_add_i32:
1253        if (c2) {
1254            tcg_out_opc_addi_w(s, a0, a1, a2);
1255        } else {
1256            tcg_out_opc_add_w(s, a0, a1, a2);
1257        }
1258        break;
1259    case INDEX_op_add_i64:
1260        if (c2) {
1261            tcg_out_opc_addi_d(s, a0, a1, a2);
1262        } else {
1263            tcg_out_opc_add_d(s, a0, a1, a2);
1264        }
1265        break;
1266
1267    case INDEX_op_sub_i32:
1268        if (c2) {
1269            tcg_out_opc_addi_w(s, a0, a1, -a2);
1270        } else {
1271            tcg_out_opc_sub_w(s, a0, a1, a2);
1272        }
1273        break;
1274    case INDEX_op_sub_i64:
1275        if (c2) {
1276            tcg_out_opc_addi_d(s, a0, a1, -a2);
1277        } else {
1278            tcg_out_opc_sub_d(s, a0, a1, a2);
1279        }
1280        break;
1281
1282    case INDEX_op_mul_i32:
1283        tcg_out_opc_mul_w(s, a0, a1, a2);
1284        break;
1285    case INDEX_op_mul_i64:
1286        tcg_out_opc_mul_d(s, a0, a1, a2);
1287        break;
1288
1289    case INDEX_op_mulsh_i32:
1290        tcg_out_opc_mulh_w(s, a0, a1, a2);
1291        break;
1292    case INDEX_op_mulsh_i64:
1293        tcg_out_opc_mulh_d(s, a0, a1, a2);
1294        break;
1295
1296    case INDEX_op_muluh_i32:
1297        tcg_out_opc_mulh_wu(s, a0, a1, a2);
1298        break;
1299    case INDEX_op_muluh_i64:
1300        tcg_out_opc_mulh_du(s, a0, a1, a2);
1301        break;
1302
1303    case INDEX_op_div_i32:
1304        tcg_out_opc_div_w(s, a0, a1, a2);
1305        break;
1306    case INDEX_op_div_i64:
1307        tcg_out_opc_div_d(s, a0, a1, a2);
1308        break;
1309
1310    case INDEX_op_divu_i32:
1311        tcg_out_opc_div_wu(s, a0, a1, a2);
1312        break;
1313    case INDEX_op_divu_i64:
1314        tcg_out_opc_div_du(s, a0, a1, a2);
1315        break;
1316
1317    case INDEX_op_rem_i32:
1318        tcg_out_opc_mod_w(s, a0, a1, a2);
1319        break;
1320    case INDEX_op_rem_i64:
1321        tcg_out_opc_mod_d(s, a0, a1, a2);
1322        break;
1323
1324    case INDEX_op_remu_i32:
1325        tcg_out_opc_mod_wu(s, a0, a1, a2);
1326        break;
1327    case INDEX_op_remu_i64:
1328        tcg_out_opc_mod_du(s, a0, a1, a2);
1329        break;
1330
1331    case INDEX_op_setcond_i32:
1332    case INDEX_op_setcond_i64:
1333        tcg_out_setcond(s, args[3], a0, a1, a2, c2);
1334        break;
1335
1336    case INDEX_op_ld8s_i32:
1337    case INDEX_op_ld8s_i64:
1338        tcg_out_ldst(s, OPC_LD_B, a0, a1, a2);
1339        break;
1340    case INDEX_op_ld8u_i32:
1341    case INDEX_op_ld8u_i64:
1342        tcg_out_ldst(s, OPC_LD_BU, a0, a1, a2);
1343        break;
1344    case INDEX_op_ld16s_i32:
1345    case INDEX_op_ld16s_i64:
1346        tcg_out_ldst(s, OPC_LD_H, a0, a1, a2);
1347        break;
1348    case INDEX_op_ld16u_i32:
1349    case INDEX_op_ld16u_i64:
1350        tcg_out_ldst(s, OPC_LD_HU, a0, a1, a2);
1351        break;
1352    case INDEX_op_ld_i32:
1353    case INDEX_op_ld32s_i64:
1354        tcg_out_ldst(s, OPC_LD_W, a0, a1, a2);
1355        break;
1356    case INDEX_op_ld32u_i64:
1357        tcg_out_ldst(s, OPC_LD_WU, a0, a1, a2);
1358        break;
1359    case INDEX_op_ld_i64:
1360        tcg_out_ldst(s, OPC_LD_D, a0, a1, a2);
1361        break;
1362
1363    case INDEX_op_st8_i32:
1364    case INDEX_op_st8_i64:
1365        tcg_out_ldst(s, OPC_ST_B, a0, a1, a2);
1366        break;
1367    case INDEX_op_st16_i32:
1368    case INDEX_op_st16_i64:
1369        tcg_out_ldst(s, OPC_ST_H, a0, a1, a2);
1370        break;
1371    case INDEX_op_st_i32:
1372    case INDEX_op_st32_i64:
1373        tcg_out_ldst(s, OPC_ST_W, a0, a1, a2);
1374        break;
1375    case INDEX_op_st_i64:
1376        tcg_out_ldst(s, OPC_ST_D, a0, a1, a2);
1377        break;
1378
1379    case INDEX_op_qemu_ld_i32:
1380        tcg_out_qemu_ld(s, args, TCG_TYPE_I32);
1381        break;
1382    case INDEX_op_qemu_ld_i64:
1383        tcg_out_qemu_ld(s, args, TCG_TYPE_I64);
1384        break;
1385    case INDEX_op_qemu_st_i32:
1386        tcg_out_qemu_st(s, args);
1387        break;
1388    case INDEX_op_qemu_st_i64:
1389        tcg_out_qemu_st(s, args);
1390        break;
1391
1392    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
1393    case INDEX_op_mov_i64:
1394    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
1395    default:
1396        g_assert_not_reached();
1397    }
1398}
1399
1400static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1401{
1402    switch (op) {
1403    case INDEX_op_goto_ptr:
1404        return C_O0_I1(r);
1405
1406    case INDEX_op_st8_i32:
1407    case INDEX_op_st8_i64:
1408    case INDEX_op_st16_i32:
1409    case INDEX_op_st16_i64:
1410    case INDEX_op_st32_i64:
1411    case INDEX_op_st_i32:
1412    case INDEX_op_st_i64:
1413        return C_O0_I2(rZ, r);
1414
1415    case INDEX_op_brcond_i32:
1416    case INDEX_op_brcond_i64:
1417        return C_O0_I2(rZ, rZ);
1418
1419    case INDEX_op_qemu_st_i32:
1420    case INDEX_op_qemu_st_i64:
1421        return C_O0_I2(LZ, L);
1422
1423    case INDEX_op_ext8s_i32:
1424    case INDEX_op_ext8s_i64:
1425    case INDEX_op_ext8u_i32:
1426    case INDEX_op_ext8u_i64:
1427    case INDEX_op_ext16s_i32:
1428    case INDEX_op_ext16s_i64:
1429    case INDEX_op_ext16u_i32:
1430    case INDEX_op_ext16u_i64:
1431    case INDEX_op_ext32s_i64:
1432    case INDEX_op_ext32u_i64:
1433    case INDEX_op_extu_i32_i64:
1434    case INDEX_op_extrl_i64_i32:
1435    case INDEX_op_extrh_i64_i32:
1436    case INDEX_op_ext_i32_i64:
1437    case INDEX_op_not_i32:
1438    case INDEX_op_not_i64:
1439    case INDEX_op_extract_i32:
1440    case INDEX_op_extract_i64:
1441    case INDEX_op_bswap16_i32:
1442    case INDEX_op_bswap16_i64:
1443    case INDEX_op_bswap32_i32:
1444    case INDEX_op_bswap32_i64:
1445    case INDEX_op_bswap64_i64:
1446    case INDEX_op_ld8s_i32:
1447    case INDEX_op_ld8s_i64:
1448    case INDEX_op_ld8u_i32:
1449    case INDEX_op_ld8u_i64:
1450    case INDEX_op_ld16s_i32:
1451    case INDEX_op_ld16s_i64:
1452    case INDEX_op_ld16u_i32:
1453    case INDEX_op_ld16u_i64:
1454    case INDEX_op_ld32s_i64:
1455    case INDEX_op_ld32u_i64:
1456    case INDEX_op_ld_i32:
1457    case INDEX_op_ld_i64:
1458        return C_O1_I1(r, r);
1459
1460    case INDEX_op_qemu_ld_i32:
1461    case INDEX_op_qemu_ld_i64:
1462        return C_O1_I1(r, L);
1463
1464    case INDEX_op_andc_i32:
1465    case INDEX_op_andc_i64:
1466    case INDEX_op_orc_i32:
1467    case INDEX_op_orc_i64:
1468        /*
1469         * LoongArch insns for these ops don't have reg-imm forms, but we
1470         * can express using andi/ori if ~constant satisfies
1471         * TCG_CT_CONST_U12.
1472         */
1473        return C_O1_I2(r, r, rC);
1474
1475    case INDEX_op_shl_i32:
1476    case INDEX_op_shl_i64:
1477    case INDEX_op_shr_i32:
1478    case INDEX_op_shr_i64:
1479    case INDEX_op_sar_i32:
1480    case INDEX_op_sar_i64:
1481    case INDEX_op_rotl_i32:
1482    case INDEX_op_rotl_i64:
1483    case INDEX_op_rotr_i32:
1484    case INDEX_op_rotr_i64:
1485        return C_O1_I2(r, r, ri);
1486
1487    case INDEX_op_add_i32:
1488    case INDEX_op_add_i64:
1489        return C_O1_I2(r, r, rI);
1490
1491    case INDEX_op_and_i32:
1492    case INDEX_op_and_i64:
1493    case INDEX_op_nor_i32:
1494    case INDEX_op_nor_i64:
1495    case INDEX_op_or_i32:
1496    case INDEX_op_or_i64:
1497    case INDEX_op_xor_i32:
1498    case INDEX_op_xor_i64:
1499        /* LoongArch reg-imm bitops have their imms ZERO-extended */
1500        return C_O1_I2(r, r, rU);
1501
1502    case INDEX_op_clz_i32:
1503    case INDEX_op_clz_i64:
1504    case INDEX_op_ctz_i32:
1505    case INDEX_op_ctz_i64:
1506        return C_O1_I2(r, r, rW);
1507
1508    case INDEX_op_setcond_i32:
1509    case INDEX_op_setcond_i64:
1510        return C_O1_I2(r, r, rZ);
1511
1512    case INDEX_op_deposit_i32:
1513    case INDEX_op_deposit_i64:
1514        /* Must deposit into the same register as input */
1515        return C_O1_I2(r, 0, rZ);
1516
1517    case INDEX_op_sub_i32:
1518    case INDEX_op_sub_i64:
1519        return C_O1_I2(r, rZ, rN);
1520
1521    case INDEX_op_mul_i32:
1522    case INDEX_op_mul_i64:
1523    case INDEX_op_mulsh_i32:
1524    case INDEX_op_mulsh_i64:
1525    case INDEX_op_muluh_i32:
1526    case INDEX_op_muluh_i64:
1527    case INDEX_op_div_i32:
1528    case INDEX_op_div_i64:
1529    case INDEX_op_divu_i32:
1530    case INDEX_op_divu_i64:
1531    case INDEX_op_rem_i32:
1532    case INDEX_op_rem_i64:
1533    case INDEX_op_remu_i32:
1534    case INDEX_op_remu_i64:
1535        return C_O1_I2(r, rZ, rZ);
1536
1537    default:
1538        g_assert_not_reached();
1539    }
1540}
1541
1542static const int tcg_target_callee_save_regs[] = {
1543    TCG_REG_S0,     /* used for the global env (TCG_AREG0) */
1544    TCG_REG_S1,
1545    TCG_REG_S2,
1546    TCG_REG_S3,
1547    TCG_REG_S4,
1548    TCG_REG_S5,
1549    TCG_REG_S6,
1550    TCG_REG_S7,
1551    TCG_REG_S8,
1552    TCG_REG_S9,
1553    TCG_REG_RA,     /* should be last for ABI compliance */
1554};
1555
1556/* Stack frame parameters.  */
1557#define REG_SIZE   (TCG_TARGET_REG_BITS / 8)
1558#define SAVE_SIZE  ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE)
1559#define TEMP_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
1560#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \
1561                     + TCG_TARGET_STACK_ALIGN - 1) \
1562                    & -TCG_TARGET_STACK_ALIGN)
1563#define SAVE_OFS   (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE)
1564
1565/* We're expecting to be able to use an immediate for frame allocation.  */
1566QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7ff);
1567
1568/* Generate global QEMU prologue and epilogue code */
1569static void tcg_target_qemu_prologue(TCGContext *s)
1570{
1571    int i;
1572
1573    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE);
1574
1575    /* TB prologue */
1576    tcg_out_opc_addi_d(s, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE);
1577    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
1578        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
1579                   TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
1580    }
1581
1582#if !defined(CONFIG_SOFTMMU)
1583    if (USE_GUEST_BASE) {
1584        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
1585        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
1586    }
1587#endif
1588
1589    /* Call generated code */
1590    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1591    tcg_out_opc_jirl(s, TCG_REG_ZERO, tcg_target_call_iarg_regs[1], 0);
1592
1593    /* Return path for goto_ptr. Set return value to 0 */
1594    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
1595    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_A0, TCG_REG_ZERO);
1596
1597    /* TB epilogue */
1598    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
1599    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
1600        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
1601                   TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
1602    }
1603
1604    tcg_out_opc_addi_d(s, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE);
1605    tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_RA, 0);
1606}
1607
1608static void tcg_target_init(TCGContext *s)
1609{
1610    tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
1611    tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
1612
1613    tcg_target_call_clobber_regs = ALL_GENERAL_REGS;
1614    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0);
1615    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1);
1616    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2);
1617    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S3);
1618    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S4);
1619    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S5);
1620    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S6);
1621    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S7);
1622    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8);
1623    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9);
1624
1625    s->reserved_regs = 0;
1626    tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO);
1627    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
1628    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
1629    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
1630    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1631    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP);
1632    tcg_regset_set_reg(s->reserved_regs, TCG_REG_RESERVED);
1633}
1634
1635typedef struct {
1636    DebugFrameHeader h;
1637    uint8_t fde_def_cfa[4];
1638    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2];
1639} DebugFrame;
1640
1641#define ELF_HOST_MACHINE EM_LOONGARCH
1642
1643static const DebugFrame debug_frame = {
1644    .h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */
1645    .h.cie.id = -1,
1646    .h.cie.version = 1,
1647    .h.cie.code_align = 1,
1648    .h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */
1649    .h.cie.return_column = TCG_REG_RA,
1650
1651    /* Total FDE size does not include the "len" member.  */
1652    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
1653
1654    .fde_def_cfa = {
1655        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ...  */
1656        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
1657        (FRAME_SIZE >> 7)
1658    },
1659    .fde_reg_ofs = {
1660        0x80 + 23, 11,                  /* DW_CFA_offset, s0, -88 */
1661        0x80 + 24, 10,                  /* DW_CFA_offset, s1, -80 */
1662        0x80 + 25, 9,                   /* DW_CFA_offset, s2, -72 */
1663        0x80 + 26, 8,                   /* DW_CFA_offset, s3, -64 */
1664        0x80 + 27, 7,                   /* DW_CFA_offset, s4, -56 */
1665        0x80 + 28, 6,                   /* DW_CFA_offset, s5, -48 */
1666        0x80 + 29, 5,                   /* DW_CFA_offset, s6, -40 */
1667        0x80 + 30, 4,                   /* DW_CFA_offset, s7, -32 */
1668        0x80 + 31, 3,                   /* DW_CFA_offset, s8, -24 */
1669        0x80 + 22, 2,                   /* DW_CFA_offset, s9, -16 */
1670        0x80 + 1 , 1,                   /* DW_CFA_offset, ra, -8 */
1671    }
1672};
1673
1674void tcg_register_jit(const void *buf, size_t buf_size)
1675{
1676    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
1677}
1678