xref: /openbmc/qemu/tcg/loongarch64/tcg-target.c.inc (revision ba26f1477735a5ad7dd40a3227ac2a54cf82014d)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2021 WANG Xuerui <git@xen0n.name>
5 *
6 * Based on tcg/riscv/tcg-target.c.inc
7 *
8 * Copyright (c) 2018 SiFive, Inc
9 * Copyright (c) 2008-2009 Arnaud Patard <arnaud.patard@rtp-net.org>
10 * Copyright (c) 2009 Aurelien Jarno <aurelien@aurel32.net>
11 * Copyright (c) 2008 Fabrice Bellard
12 *
13 * Permission is hereby granted, free of charge, to any person obtaining a copy
14 * of this software and associated documentation files (the "Software"), to deal
15 * in the Software without restriction, including without limitation the rights
16 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 * copies of the Software, and to permit persons to whom the Software is
18 * furnished to do so, subject to the following conditions:
19 *
20 * The above copyright notice and this permission notice shall be included in
21 * all copies or substantial portions of the Software.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 * THE SOFTWARE.
30 */
31
32#include <asm/hwcap.h>
33
34/* used for function call generation */
35#define TCG_REG_CALL_STACK              TCG_REG_SP
36#define TCG_TARGET_STACK_ALIGN          16
37#define TCG_TARGET_CALL_STACK_OFFSET    0
38#define TCG_TARGET_CALL_ARG_I32         TCG_CALL_ARG_NORMAL
39#define TCG_TARGET_CALL_ARG_I64         TCG_CALL_ARG_NORMAL
40#define TCG_TARGET_CALL_ARG_I128        TCG_CALL_ARG_NORMAL
41#define TCG_TARGET_CALL_RET_I128        TCG_CALL_RET_NORMAL
42
43#ifdef CONFIG_DEBUG_TCG
44static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
45    "zero",
46    "ra",
47    "tp",
48    "sp",
49    "a0",
50    "a1",
51    "a2",
52    "a3",
53    "a4",
54    "a5",
55    "a6",
56    "a7",
57    "t0",
58    "t1",
59    "t2",
60    "t3",
61    "t4",
62    "t5",
63    "t6",
64    "t7",
65    "t8",
66    "r21", /* reserved in the LP64* ABI, hence no ABI name */
67    "s9",
68    "s0",
69    "s1",
70    "s2",
71    "s3",
72    "s4",
73    "s5",
74    "s6",
75    "s7",
76    "s8",
77    "vr0",
78    "vr1",
79    "vr2",
80    "vr3",
81    "vr4",
82    "vr5",
83    "vr6",
84    "vr7",
85    "vr8",
86    "vr9",
87    "vr10",
88    "vr11",
89    "vr12",
90    "vr13",
91    "vr14",
92    "vr15",
93    "vr16",
94    "vr17",
95    "vr18",
96    "vr19",
97    "vr20",
98    "vr21",
99    "vr22",
100    "vr23",
101    "vr24",
102    "vr25",
103    "vr26",
104    "vr27",
105    "vr28",
106    "vr29",
107    "vr30",
108    "vr31",
109};
110#endif
111
112static const int tcg_target_reg_alloc_order[] = {
113    /* Registers preserved across calls */
114    /* TCG_REG_S0 reserved for TCG_AREG0 */
115    TCG_REG_S1,
116    TCG_REG_S2,
117    TCG_REG_S3,
118    TCG_REG_S4,
119    TCG_REG_S5,
120    TCG_REG_S6,
121    TCG_REG_S7,
122    TCG_REG_S8,
123    TCG_REG_S9,
124
125    /* Registers (potentially) clobbered across calls */
126    TCG_REG_T0,
127    TCG_REG_T1,
128    TCG_REG_T2,
129    TCG_REG_T3,
130    TCG_REG_T4,
131    TCG_REG_T5,
132    TCG_REG_T6,
133    TCG_REG_T7,
134    TCG_REG_T8,
135
136    /* Argument registers, opposite order of allocation.  */
137    TCG_REG_A7,
138    TCG_REG_A6,
139    TCG_REG_A5,
140    TCG_REG_A4,
141    TCG_REG_A3,
142    TCG_REG_A2,
143    TCG_REG_A1,
144    TCG_REG_A0,
145
146    /* Vector registers */
147    TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
148    TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
149    TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
150    TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
151    TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
152    TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
153    /* V24 - V31 are caller-saved, and skipped.  */
154};
155
156static const int tcg_target_call_iarg_regs[] = {
157    TCG_REG_A0,
158    TCG_REG_A1,
159    TCG_REG_A2,
160    TCG_REG_A3,
161    TCG_REG_A4,
162    TCG_REG_A5,
163    TCG_REG_A6,
164    TCG_REG_A7,
165};
166
167static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
168{
169    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
170    tcg_debug_assert(slot >= 0 && slot <= 1);
171    return TCG_REG_A0 + slot;
172}
173
174#define TCG_GUEST_BASE_REG TCG_REG_S1
175
176#define TCG_CT_CONST_ZERO  0x100
177#define TCG_CT_CONST_S12   0x200
178#define TCG_CT_CONST_S32   0x400
179#define TCG_CT_CONST_U12   0x800
180#define TCG_CT_CONST_C12   0x1000
181#define TCG_CT_CONST_WSZ   0x2000
182#define TCG_CT_CONST_VCMP  0x4000
183#define TCG_CT_CONST_VADD  0x8000
184
185#define ALL_GENERAL_REGS   MAKE_64BIT_MASK(0, 32)
186#define ALL_VECTOR_REGS    MAKE_64BIT_MASK(32, 32)
187
188static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
189{
190    return sextract64(val, pos, len);
191}
192
193/* test if a constant matches the constraint */
194static bool tcg_target_const_match(int64_t val, int ct,
195                                   TCGType type, TCGCond cond, int vece)
196{
197    if (ct & TCG_CT_CONST) {
198        return true;
199    }
200    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
201        return true;
202    }
203    if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) {
204        return true;
205    }
206    if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
207        return true;
208    }
209    if ((ct & TCG_CT_CONST_U12) && val >= 0 && val <= 0xfff) {
210        return true;
211    }
212    if ((ct & TCG_CT_CONST_C12) && ~val >= 0 && ~val <= 0xfff) {
213        return true;
214    }
215    if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
216        return true;
217    }
218    int64_t vec_val = sextract64(val, 0, 8 << vece);
219    if ((ct & TCG_CT_CONST_VCMP) && -0x10 <= vec_val && vec_val <= 0x1f) {
220        return true;
221    }
222    if ((ct & TCG_CT_CONST_VADD) && -0x1f <= vec_val && vec_val <= 0x1f) {
223        return true;
224    }
225    return false;
226}
227
228/*
229 * Relocations
230 */
231
232/*
233 * Relocation records defined in LoongArch ELF psABI v1.00 is way too
234 * complicated; a whopping stack machine is needed to stuff the fields, at
235 * the very least one SOP_PUSH and one SOP_POP (of the correct format) are
236 * needed.
237 *
238 * Hence, define our own simpler relocation types. Numbers are chosen as to
239 * not collide with potential future additions to the true ELF relocation
240 * type enum.
241 */
242
243/* Field Sk16, shifted right by 2; suitable for conditional jumps */
244#define R_LOONGARCH_BR_SK16     256
245/* Field Sd10k16, shifted right by 2; suitable for B and BL */
246#define R_LOONGARCH_BR_SD10K16  257
247
248static bool reloc_br_sk16(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
249{
250    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
251    intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
252
253    tcg_debug_assert((offset & 3) == 0);
254    offset >>= 2;
255    if (offset == sextreg(offset, 0, 16)) {
256        *src_rw = deposit64(*src_rw, 10, 16, offset);
257        return true;
258    }
259
260    return false;
261}
262
263static bool reloc_br_sd10k16(tcg_insn_unit *src_rw,
264                             const tcg_insn_unit *target)
265{
266    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
267    intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
268
269    tcg_debug_assert((offset & 3) == 0);
270    offset >>= 2;
271    if (offset == sextreg(offset, 0, 26)) {
272        *src_rw = deposit64(*src_rw, 0, 10, offset >> 16); /* slot d10 */
273        *src_rw = deposit64(*src_rw, 10, 16, offset); /* slot k16 */
274        return true;
275    }
276
277    return false;
278}
279
280static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
281                        intptr_t value, intptr_t addend)
282{
283    tcg_debug_assert(addend == 0);
284    switch (type) {
285    case R_LOONGARCH_BR_SK16:
286        return reloc_br_sk16(code_ptr, (tcg_insn_unit *)value);
287    case R_LOONGARCH_BR_SD10K16:
288        return reloc_br_sd10k16(code_ptr, (tcg_insn_unit *)value);
289    default:
290        g_assert_not_reached();
291    }
292}
293
294#include "tcg-insn-defs.c.inc"
295
296/*
297 * TCG intrinsics
298 */
299
300static void tcg_out_mb(TCGContext *s, TCGArg a0)
301{
302    /* Baseline LoongArch only has the full barrier, unfortunately.  */
303    tcg_out_opc_dbar(s, 0);
304}
305
306static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
307{
308    if (ret == arg) {
309        return true;
310    }
311    switch (type) {
312    case TCG_TYPE_I32:
313    case TCG_TYPE_I64:
314        if (ret < TCG_REG_V0) {
315            if (arg < TCG_REG_V0) {
316                /*
317                 * Conventional register-register move used in LoongArch is
318                 * `or dst, src, zero`.
319                 */
320                tcg_out_opc_or(s, ret, arg, TCG_REG_ZERO);
321            } else {
322                tcg_out_opc_movfr2gr_d(s, ret, arg);
323            }
324        } else {
325            if (arg < TCG_REG_V0) {
326                tcg_out_opc_movgr2fr_d(s, ret, arg);
327            } else {
328                tcg_out_opc_fmov_d(s, ret, arg);
329            }
330        }
331        break;
332    case TCG_TYPE_V64:
333    case TCG_TYPE_V128:
334        tcg_out_opc_vori_b(s, ret, arg, 0);
335        break;
336    case TCG_TYPE_V256:
337        tcg_out_opc_xvori_b(s, ret, arg, 0);
338        break;
339    default:
340        g_assert_not_reached();
341    }
342    return true;
343}
344
345/* Loads a 32-bit immediate into rd, sign-extended.  */
346static void tcg_out_movi_i32(TCGContext *s, TCGReg rd, int32_t val)
347{
348    tcg_target_long lo = sextreg(val, 0, 12);
349    tcg_target_long hi12 = sextreg(val, 12, 20);
350
351    /* Single-instruction cases.  */
352    if (hi12 == 0) {
353        /* val fits in uimm12: ori rd, zero, val */
354        tcg_out_opc_ori(s, rd, TCG_REG_ZERO, val);
355        return;
356    }
357    if (hi12 == sextreg(lo, 12, 20)) {
358        /* val fits in simm12: addi.w rd, zero, val */
359        tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val);
360        return;
361    }
362
363    /* High bits must be set; load with lu12i.w + optional ori.  */
364    tcg_out_opc_lu12i_w(s, rd, hi12);
365    if (lo != 0) {
366        tcg_out_opc_ori(s, rd, rd, lo & 0xfff);
367    }
368}
369
370static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
371                         tcg_target_long val)
372{
373    /*
374     * LoongArch conventionally loads 64-bit immediates in at most 4 steps,
375     * with dedicated instructions for filling the respective bitfields
376     * below:
377     *
378     *        6                   5                   4               3
379     *  3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2
380     * +-----------------------+---------------------------------------+...
381     * |          hi52         |                  hi32                 |
382     * +-----------------------+---------------------------------------+...
383     *       3                   2                   1
384     *     1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
385     * ...+-------------------------------------+-------------------------+
386     *    |                 hi12                |            lo           |
387     * ...+-------------------------------------+-------------------------+
388     *
389     * Check if val belong to one of the several fast cases, before falling
390     * back to the slow path.
391     */
392
393    intptr_t src_rx, pc_offset;
394    tcg_target_long hi12, hi32, hi52;
395
396    /* Value fits in signed i32.  */
397    if (type == TCG_TYPE_I32 || val == (int32_t)val) {
398        tcg_out_movi_i32(s, rd, val);
399        return;
400    }
401
402    /* PC-relative cases.  */
403    src_rx = (intptr_t)tcg_splitwx_to_rx(s->code_ptr);
404    if ((val & 3) == 0) {
405        pc_offset = val - src_rx;
406        if (pc_offset == sextreg(pc_offset, 0, 22)) {
407            /* Single pcaddu2i.  */
408            tcg_out_opc_pcaddu2i(s, rd, pc_offset >> 2);
409            return;
410        }
411    }
412
413    pc_offset = (val >> 12) - (src_rx >> 12);
414    if (pc_offset == sextreg(pc_offset, 0, 20)) {
415        /* Load with pcalau12i + ori.  */
416        tcg_target_long val_lo = val & 0xfff;
417        tcg_out_opc_pcalau12i(s, rd, pc_offset);
418        if (val_lo != 0) {
419            tcg_out_opc_ori(s, rd, rd, val_lo);
420        }
421        return;
422    }
423
424    hi12 = sextreg(val, 12, 20);
425    hi32 = sextreg(val, 32, 20);
426    hi52 = sextreg(val, 52, 12);
427
428    /* Single cu52i.d case.  */
429    if ((hi52 != 0) && (ctz64(val) >= 52)) {
430        tcg_out_opc_cu52i_d(s, rd, TCG_REG_ZERO, hi52);
431        return;
432    }
433
434    /* Slow path.  Initialize the low 32 bits, then concat high bits.  */
435    tcg_out_movi_i32(s, rd, val);
436
437    /* Load hi32 and hi52 explicitly when they are unexpected values. */
438    if (hi32 != sextreg(hi12, 20, 20)) {
439        tcg_out_opc_cu32i_d(s, rd, hi32);
440    }
441
442    if (hi52 != sextreg(hi32, 20, 12)) {
443        tcg_out_opc_cu52i_d(s, rd, rd, hi52);
444    }
445}
446
447static void tcg_out_addi(TCGContext *s, TCGType type, TCGReg rd,
448                         TCGReg rs, tcg_target_long imm)
449{
450    tcg_target_long lo12 = sextreg(imm, 0, 12);
451    tcg_target_long hi16 = sextreg(imm - lo12, 16, 16);
452
453    /*
454     * Note that there's a hole in between hi16 and lo12:
455     *
456     *       3                   2                   1                   0
457     *     1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
458     * ...+-------------------------------+-------+-----------------------+
459     *    |             hi16              |       |          lo12         |
460     * ...+-------------------------------+-------+-----------------------+
461     *
462     * For bits within that hole, it's more efficient to use LU12I and ADD.
463     */
464    if (imm == (hi16 << 16) + lo12) {
465        if (hi16) {
466            tcg_out_opc_addu16i_d(s, rd, rs, hi16);
467            rs = rd;
468        }
469        if (type == TCG_TYPE_I32) {
470            tcg_out_opc_addi_w(s, rd, rs, lo12);
471        } else if (lo12) {
472            tcg_out_opc_addi_d(s, rd, rs, lo12);
473        } else {
474            tcg_out_mov(s, type, rd, rs);
475        }
476    } else {
477        tcg_out_movi(s, type, TCG_REG_TMP0, imm);
478        if (type == TCG_TYPE_I32) {
479            tcg_out_opc_add_w(s, rd, rs, TCG_REG_TMP0);
480        } else {
481            tcg_out_opc_add_d(s, rd, rs, TCG_REG_TMP0);
482        }
483    }
484}
485
486static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
487{
488    return false;
489}
490
491static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
492                             tcg_target_long imm)
493{
494    /* This function is only used for passing structs by reference. */
495    g_assert_not_reached();
496}
497
498static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg)
499{
500    tcg_out_opc_andi(s, ret, arg, 0xff);
501}
502
503static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg)
504{
505    tcg_out_opc_bstrpick_w(s, ret, arg, 0, 15);
506}
507
508static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg)
509{
510    tcg_out_opc_bstrpick_d(s, ret, arg, 0, 31);
511}
512
513static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
514{
515    tcg_out_opc_sext_b(s, ret, arg);
516}
517
518static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
519{
520    tcg_out_opc_sext_h(s, ret, arg);
521}
522
523static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg)
524{
525    tcg_out_opc_addi_w(s, ret, arg, 0);
526}
527
528static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
529{
530    if (ret != arg) {
531        tcg_out_ext32s(s, ret, arg);
532    }
533}
534
535static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
536{
537    tcg_out_ext32u(s, ret, arg);
538}
539
540static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg)
541{
542    tcg_out_ext32s(s, ret, arg);
543}
544
545static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc,
546                           TCGReg a0, TCGReg a1, TCGReg a2,
547                           bool c2, bool is_32bit)
548{
549    if (c2) {
550        /*
551         * Fast path: semantics already satisfied due to constraint and
552         * insn behavior, single instruction is enough.
553         */
554        tcg_debug_assert(a2 == (is_32bit ? 32 : 64));
555        /* all clz/ctz insns belong to DJ-format */
556        tcg_out32(s, encode_dj_insn(opc, a0, a1));
557        return;
558    }
559
560    tcg_out32(s, encode_dj_insn(opc, TCG_REG_TMP0, a1));
561    /* a0 = a1 ? REG_TMP0 : a2 */
562    tcg_out_opc_maskeqz(s, TCG_REG_TMP0, TCG_REG_TMP0, a1);
563    tcg_out_opc_masknez(s, a0, a2, a1);
564    tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0);
565}
566
567#define SETCOND_INV    TCG_TARGET_NB_REGS
568#define SETCOND_NEZ    (SETCOND_INV << 1)
569#define SETCOND_FLAGS  (SETCOND_INV | SETCOND_NEZ)
570
571static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret,
572                               TCGReg arg1, tcg_target_long arg2, bool c2)
573{
574    int flags = 0;
575
576    switch (cond) {
577    case TCG_COND_EQ:    /* -> NE  */
578    case TCG_COND_GE:    /* -> LT  */
579    case TCG_COND_GEU:   /* -> LTU */
580    case TCG_COND_GT:    /* -> LE  */
581    case TCG_COND_GTU:   /* -> LEU */
582        cond = tcg_invert_cond(cond);
583        flags ^= SETCOND_INV;
584        break;
585    default:
586        break;
587    }
588
589    switch (cond) {
590    case TCG_COND_LE:
591    case TCG_COND_LEU:
592        /*
593         * If we have a constant input, the most efficient way to implement
594         * LE is by adding 1 and using LT.  Watch out for wrap around for LEU.
595         * We don't need to care for this for LE because the constant input
596         * is still constrained to int32_t, and INT32_MAX+1 is representable
597         * in the 64-bit temporary register.
598         */
599        if (c2) {
600            if (cond == TCG_COND_LEU) {
601                /* unsigned <= -1 is true */
602                if (arg2 == -1) {
603                    tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV));
604                    return ret;
605                }
606                cond = TCG_COND_LTU;
607            } else {
608                cond = TCG_COND_LT;
609            }
610            arg2 += 1;
611        } else {
612            TCGReg tmp = arg2;
613            arg2 = arg1;
614            arg1 = tmp;
615            cond = tcg_swap_cond(cond);    /* LE -> GE */
616            cond = tcg_invert_cond(cond);  /* GE -> LT */
617            flags ^= SETCOND_INV;
618        }
619        break;
620    default:
621        break;
622    }
623
624    switch (cond) {
625    case TCG_COND_NE:
626        flags |= SETCOND_NEZ;
627        if (!c2) {
628            tcg_out_opc_xor(s, ret, arg1, arg2);
629        } else if (arg2 == 0) {
630            ret = arg1;
631        } else if (arg2 >= 0 && arg2 <= 0xfff) {
632            tcg_out_opc_xori(s, ret, arg1, arg2);
633        } else {
634            tcg_out_addi(s, TCG_TYPE_REG, ret, arg1, -arg2);
635        }
636        break;
637
638    case TCG_COND_LT:
639    case TCG_COND_LTU:
640        if (c2) {
641            if (arg2 >= -0x800 && arg2 <= 0x7ff) {
642                if (cond == TCG_COND_LT) {
643                    tcg_out_opc_slti(s, ret, arg1, arg2);
644                } else {
645                    tcg_out_opc_sltui(s, ret, arg1, arg2);
646                }
647                break;
648            }
649            tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2);
650            arg2 = TCG_REG_TMP0;
651        }
652        if (cond == TCG_COND_LT) {
653            tcg_out_opc_slt(s, ret, arg1, arg2);
654        } else {
655            tcg_out_opc_sltu(s, ret, arg1, arg2);
656        }
657        break;
658
659    default:
660        g_assert_not_reached();
661    }
662
663    return ret | flags;
664}
665
666static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
667                            TCGReg arg1, tcg_target_long arg2, bool c2)
668{
669    int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2);
670
671    if (tmpflags != ret) {
672        TCGReg tmp = tmpflags & ~SETCOND_FLAGS;
673
674        switch (tmpflags & SETCOND_FLAGS) {
675        case SETCOND_INV:
676            /* Intermediate result is boolean: simply invert. */
677            tcg_out_opc_xori(s, ret, tmp, 1);
678            break;
679        case SETCOND_NEZ:
680            /* Intermediate result is zero/non-zero: test != 0. */
681            tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp);
682            break;
683        case SETCOND_NEZ | SETCOND_INV:
684            /* Intermediate result is zero/non-zero: test == 0. */
685            tcg_out_opc_sltui(s, ret, tmp, 1);
686            break;
687        default:
688            g_assert_not_reached();
689        }
690    }
691}
692
693static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
694                            TCGReg c1, tcg_target_long c2, bool const2,
695                            TCGReg v1, TCGReg v2)
696{
697    int tmpflags = tcg_out_setcond_int(s, cond, TCG_REG_TMP0, c1, c2, const2);
698    TCGReg t;
699
700    /* Standardize the test below to t != 0. */
701    if (tmpflags & SETCOND_INV) {
702        t = v1, v1 = v2, v2 = t;
703    }
704
705    t = tmpflags & ~SETCOND_FLAGS;
706    if (v1 == TCG_REG_ZERO) {
707        tcg_out_opc_masknez(s, ret, v2, t);
708    } else if (v2 == TCG_REG_ZERO) {
709        tcg_out_opc_maskeqz(s, ret, v1, t);
710    } else {
711        tcg_out_opc_masknez(s, TCG_REG_TMP2, v2, t); /* t ? 0 : v2 */
712        tcg_out_opc_maskeqz(s, TCG_REG_TMP1, v1, t); /* t ? v1 : 0 */
713        tcg_out_opc_or(s, ret, TCG_REG_TMP1, TCG_REG_TMP2);
714    }
715}
716
717/*
718 * Branch helpers
719 */
720
721static const struct {
722    LoongArchInsn op;
723    bool swap;
724} tcg_brcond_to_loongarch[] = {
725    [TCG_COND_EQ] =  { OPC_BEQ,  false },
726    [TCG_COND_NE] =  { OPC_BNE,  false },
727    [TCG_COND_LT] =  { OPC_BGT,  true  },
728    [TCG_COND_GE] =  { OPC_BLE,  true  },
729    [TCG_COND_LE] =  { OPC_BLE,  false },
730    [TCG_COND_GT] =  { OPC_BGT,  false },
731    [TCG_COND_LTU] = { OPC_BGTU, true  },
732    [TCG_COND_GEU] = { OPC_BLEU, true  },
733    [TCG_COND_LEU] = { OPC_BLEU, false },
734    [TCG_COND_GTU] = { OPC_BGTU, false }
735};
736
737static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
738                           TCGReg arg2, TCGLabel *l)
739{
740    LoongArchInsn op = tcg_brcond_to_loongarch[cond].op;
741
742    tcg_debug_assert(op != 0);
743
744    if (tcg_brcond_to_loongarch[cond].swap) {
745        TCGReg t = arg1;
746        arg1 = arg2;
747        arg2 = t;
748    }
749
750    /* all conditional branch insns belong to DJSk16-format */
751    tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SK16, l, 0);
752    tcg_out32(s, encode_djsk16_insn(op, arg1, arg2, 0));
753}
754
755static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
756{
757    TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA;
758    ptrdiff_t offset = tcg_pcrel_diff(s, arg);
759
760    tcg_debug_assert((offset & 3) == 0);
761    if (offset == sextreg(offset, 0, 28)) {
762        /* short jump: +/- 256MiB */
763        if (tail) {
764            tcg_out_opc_b(s, offset >> 2);
765        } else {
766            tcg_out_opc_bl(s, offset >> 2);
767        }
768    } else if (offset == sextreg(offset, 0, 38)) {
769        /* long jump: +/- 256GiB */
770        tcg_target_long lo = sextreg(offset, 0, 18);
771        tcg_target_long hi = offset - lo;
772        tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, hi >> 18);
773        tcg_out_opc_jirl(s, link, TCG_REG_TMP0, lo >> 2);
774    } else {
775        /* far jump: 64-bit */
776        tcg_target_long lo = sextreg((tcg_target_long)arg, 0, 18);
777        tcg_target_long hi = (tcg_target_long)arg - lo;
778        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, hi);
779        tcg_out_opc_jirl(s, link, TCG_REG_TMP0, lo >> 2);
780    }
781}
782
783static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg,
784                         const TCGHelperInfo *info)
785{
786    tcg_out_call_int(s, arg, false);
787}
788
789/*
790 * Load/store helpers
791 */
792
793static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data,
794                         TCGReg addr, intptr_t offset)
795{
796    intptr_t imm12 = sextreg(offset, 0, 12);
797
798    if (offset != imm12) {
799        intptr_t diff = tcg_pcrel_diff(s, (void *)offset);
800
801        if (addr == TCG_REG_ZERO && diff == (int32_t)diff) {
802            imm12 = sextreg(diff, 0, 12);
803            tcg_out_opc_pcaddu12i(s, TCG_REG_TMP2, (diff - imm12) >> 12);
804        } else {
805            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12);
806            if (addr != TCG_REG_ZERO) {
807                tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, addr);
808            }
809        }
810        addr = TCG_REG_TMP2;
811    }
812
813    switch (opc) {
814    case OPC_LD_B:
815    case OPC_LD_BU:
816    case OPC_LD_H:
817    case OPC_LD_HU:
818    case OPC_LD_W:
819    case OPC_LD_WU:
820    case OPC_LD_D:
821    case OPC_ST_B:
822    case OPC_ST_H:
823    case OPC_ST_W:
824    case OPC_ST_D:
825        tcg_out32(s, encode_djsk12_insn(opc, data, addr, imm12));
826        break;
827    case OPC_FLD_S:
828    case OPC_FLD_D:
829    case OPC_FST_S:
830    case OPC_FST_D:
831        tcg_out32(s, encode_fdjsk12_insn(opc, data, addr, imm12));
832        break;
833    default:
834        g_assert_not_reached();
835    }
836}
837
838static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg dest,
839                       TCGReg base, intptr_t offset)
840{
841    switch (type) {
842    case TCG_TYPE_I32:
843        if (dest < TCG_REG_V0) {
844            tcg_out_ldst(s, OPC_LD_W, dest, base, offset);
845        } else {
846            tcg_out_ldst(s, OPC_FLD_S, dest, base, offset);
847        }
848        break;
849    case TCG_TYPE_I64:
850    case TCG_TYPE_V64:
851        if (dest < TCG_REG_V0) {
852            tcg_out_ldst(s, OPC_LD_D, dest, base, offset);
853        } else {
854            tcg_out_ldst(s, OPC_FLD_D, dest, base, offset);
855        }
856        break;
857    case TCG_TYPE_V128:
858        if (-0x800 <= offset && offset <= 0x7ff) {
859            tcg_out_opc_vld(s, dest, base, offset);
860        } else {
861            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
862            tcg_out_opc_vldx(s, dest, base, TCG_REG_TMP0);
863        }
864        break;
865    case TCG_TYPE_V256:
866        if (-0x800 <= offset && offset <= 0x7ff) {
867            tcg_out_opc_xvld(s, dest, base, offset);
868        } else {
869            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
870            tcg_out_opc_xvldx(s, dest, base, TCG_REG_TMP0);
871        }
872        break;
873    default:
874        g_assert_not_reached();
875    }
876}
877
878static void tcg_out_st(TCGContext *s, TCGType type, TCGReg src,
879                       TCGReg base, intptr_t offset)
880{
881    switch (type) {
882    case TCG_TYPE_I32:
883        if (src < TCG_REG_V0) {
884            tcg_out_ldst(s, OPC_ST_W, src, base, offset);
885        } else {
886            tcg_out_ldst(s, OPC_FST_S, src, base, offset);
887        }
888        break;
889    case TCG_TYPE_I64:
890    case TCG_TYPE_V64:
891        if (src < TCG_REG_V0) {
892            tcg_out_ldst(s, OPC_ST_D, src, base, offset);
893        } else {
894            tcg_out_ldst(s, OPC_FST_D, src, base, offset);
895        }
896        break;
897    case TCG_TYPE_V128:
898        if (-0x800 <= offset && offset <= 0x7ff) {
899            tcg_out_opc_vst(s, src, base, offset);
900        } else {
901            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
902            tcg_out_opc_vstx(s, src, base, TCG_REG_TMP0);
903        }
904        break;
905    case TCG_TYPE_V256:
906        if (-0x800 <= offset && offset <= 0x7ff) {
907            tcg_out_opc_xvst(s, src, base, offset);
908        } else {
909            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
910            tcg_out_opc_xvstx(s, src, base, TCG_REG_TMP0);
911        }
912        break;
913    default:
914        g_assert_not_reached();
915    }
916}
917
918static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
919                        TCGReg base, intptr_t ofs)
920{
921    if (val == 0) {
922        tcg_out_st(s, type, TCG_REG_ZERO, base, ofs);
923        return true;
924    }
925    return false;
926}
927
928/*
929 * Load/store helpers for SoftMMU, and qemu_ld/st implementations
930 */
931
932static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
933{
934    tcg_out_opc_b(s, 0);
935    return reloc_br_sd10k16(s->code_ptr - 1, target);
936}
937
938static const TCGLdstHelperParam ldst_helper_param = {
939    .ntmp = 1, .tmp = { TCG_REG_TMP0 }
940};
941
942static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
943{
944    MemOp opc = get_memop(l->oi);
945
946    /* resolve label address */
947    if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
948        return false;
949    }
950
951    tcg_out_ld_helper_args(s, l, &ldst_helper_param);
952    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE], false);
953    tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param);
954    return tcg_out_goto(s, l->raddr);
955}
956
957static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
958{
959    MemOp opc = get_memop(l->oi);
960
961    /* resolve label address */
962    if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
963        return false;
964    }
965
966    tcg_out_st_helper_args(s, l, &ldst_helper_param);
967    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
968    return tcg_out_goto(s, l->raddr);
969}
970
971typedef struct {
972    TCGReg base;
973    TCGReg index;
974    TCGAtomAlign aa;
975} HostAddress;
976
977bool tcg_target_has_memory_bswap(MemOp memop)
978{
979    return false;
980}
981
982/* We expect to use a 12-bit negative offset from ENV.  */
983#define MIN_TLB_MASK_TABLE_OFS  -(1 << 11)
984
985/*
986 * For system-mode, perform the TLB load and compare.
987 * For user-mode, perform any required alignment tests.
988 * In both cases, return a TCGLabelQemuLdst structure if the slow path
989 * is required and fill in @h with the host address for the fast path.
990 */
991static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
992                                           TCGReg addr_reg, MemOpIdx oi,
993                                           bool is_ld)
994{
995    TCGType addr_type = s->addr_type;
996    TCGLabelQemuLdst *ldst = NULL;
997    MemOp opc = get_memop(oi);
998    MemOp a_bits;
999
1000    h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
1001    a_bits = h->aa.align;
1002
1003    if (tcg_use_softmmu) {
1004        unsigned s_bits = opc & MO_SIZE;
1005        int mem_index = get_mmuidx(oi);
1006        int fast_ofs = tlb_mask_table_ofs(s, mem_index);
1007        int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
1008        int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
1009
1010        ldst = new_ldst_label(s);
1011        ldst->is_ld = is_ld;
1012        ldst->oi = oi;
1013        ldst->addrlo_reg = addr_reg;
1014
1015        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
1016        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
1017
1018        tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg,
1019                           s->page_bits - CPU_TLB_ENTRY_BITS);
1020        tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
1021        tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
1022
1023        /* Load the tlb comparator and the addend.  */
1024        QEMU_BUILD_BUG_ON(HOST_BIG_ENDIAN);
1025        tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2,
1026                   is_ld ? offsetof(CPUTLBEntry, addr_read)
1027                         : offsetof(CPUTLBEntry, addr_write));
1028        tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
1029                   offsetof(CPUTLBEntry, addend));
1030
1031        /*
1032         * For aligned accesses, we check the first byte and include the
1033         * alignment bits within the address.  For unaligned access, we
1034         * check that we don't cross pages using the address of the last
1035         * byte of the access.
1036         */
1037        if (a_bits < s_bits) {
1038            unsigned a_mask = (1u << a_bits) - 1;
1039            unsigned s_mask = (1u << s_bits) - 1;
1040            tcg_out_addi(s, addr_type, TCG_REG_TMP1, addr_reg, s_mask - a_mask);
1041        } else {
1042            tcg_out_mov(s, addr_type, TCG_REG_TMP1, addr_reg);
1043        }
1044        tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO,
1045                              a_bits, s->page_bits - 1);
1046
1047        /* Compare masked address with the TLB entry.  */
1048        ldst->label_ptr[0] = s->code_ptr;
1049        tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
1050
1051        h->index = TCG_REG_TMP2;
1052    } else {
1053        if (a_bits) {
1054            ldst = new_ldst_label(s);
1055
1056            ldst->is_ld = is_ld;
1057            ldst->oi = oi;
1058            ldst->addrlo_reg = addr_reg;
1059
1060            /*
1061             * Without micro-architecture details, we don't know which of
1062             * bstrpick or andi is faster, so use bstrpick as it's not
1063             * constrained by imm field width. Not to say alignments >= 2^12
1064             * are going to happen any time soon.
1065             */
1066            tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1);
1067
1068            ldst->label_ptr[0] = s->code_ptr;
1069            tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0);
1070        }
1071
1072        h->index = guest_base ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
1073    }
1074
1075    if (addr_type == TCG_TYPE_I32) {
1076        h->base = TCG_REG_TMP0;
1077        tcg_out_ext32u(s, h->base, addr_reg);
1078    } else {
1079        h->base = addr_reg;
1080    }
1081
1082    return ldst;
1083}
1084
1085static void tcg_out_qemu_ld_indexed(TCGContext *s, MemOp opc, TCGType type,
1086                                    TCGReg rd, HostAddress h)
1087{
1088    /* Byte swapping is left to middle-end expansion.  */
1089    tcg_debug_assert((opc & MO_BSWAP) == 0);
1090
1091    switch (opc & MO_SSIZE) {
1092    case MO_UB:
1093        tcg_out_opc_ldx_bu(s, rd, h.base, h.index);
1094        break;
1095    case MO_SB:
1096        tcg_out_opc_ldx_b(s, rd, h.base, h.index);
1097        break;
1098    case MO_UW:
1099        tcg_out_opc_ldx_hu(s, rd, h.base, h.index);
1100        break;
1101    case MO_SW:
1102        tcg_out_opc_ldx_h(s, rd, h.base, h.index);
1103        break;
1104    case MO_UL:
1105        if (type == TCG_TYPE_I64) {
1106            tcg_out_opc_ldx_wu(s, rd, h.base, h.index);
1107            break;
1108        }
1109        /* fallthrough */
1110    case MO_SL:
1111        tcg_out_opc_ldx_w(s, rd, h.base, h.index);
1112        break;
1113    case MO_UQ:
1114        tcg_out_opc_ldx_d(s, rd, h.base, h.index);
1115        break;
1116    default:
1117        g_assert_not_reached();
1118    }
1119}
1120
1121static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1122                            MemOpIdx oi, TCGType data_type)
1123{
1124    TCGLabelQemuLdst *ldst;
1125    HostAddress h;
1126
1127    ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1128    tcg_out_qemu_ld_indexed(s, get_memop(oi), data_type, data_reg, h);
1129
1130    if (ldst) {
1131        ldst->type = data_type;
1132        ldst->datalo_reg = data_reg;
1133        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1134    }
1135}
1136
1137static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc,
1138                                    TCGReg rd, HostAddress h)
1139{
1140    /* Byte swapping is left to middle-end expansion.  */
1141    tcg_debug_assert((opc & MO_BSWAP) == 0);
1142
1143    switch (opc & MO_SIZE) {
1144    case MO_8:
1145        tcg_out_opc_stx_b(s, rd, h.base, h.index);
1146        break;
1147    case MO_16:
1148        tcg_out_opc_stx_h(s, rd, h.base, h.index);
1149        break;
1150    case MO_32:
1151        tcg_out_opc_stx_w(s, rd, h.base, h.index);
1152        break;
1153    case MO_64:
1154        tcg_out_opc_stx_d(s, rd, h.base, h.index);
1155        break;
1156    default:
1157        g_assert_not_reached();
1158    }
1159}
1160
1161static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1162                            MemOpIdx oi, TCGType data_type)
1163{
1164    TCGLabelQemuLdst *ldst;
1165    HostAddress h;
1166
1167    ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1168    tcg_out_qemu_st_indexed(s, get_memop(oi), data_reg, h);
1169
1170    if (ldst) {
1171        ldst->type = data_type;
1172        ldst->datalo_reg = data_reg;
1173        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1174    }
1175}
1176
1177static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg data_lo, TCGReg data_hi,
1178                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
1179{
1180    TCGLabelQemuLdst *ldst;
1181    HostAddress h;
1182
1183    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
1184
1185    if (h.aa.atom == MO_128) {
1186        /*
1187         * Use VLDX/VSTX when 128-bit atomicity is required.
1188         * If address is aligned to 16-bytes, the 128-bit load/store is atomic.
1189         */
1190        if (is_ld) {
1191            tcg_out_opc_vldx(s, TCG_VEC_TMP0, h.base, h.index);
1192            tcg_out_opc_vpickve2gr_d(s, data_lo, TCG_VEC_TMP0, 0);
1193            tcg_out_opc_vpickve2gr_d(s, data_hi, TCG_VEC_TMP0, 1);
1194        } else {
1195            tcg_out_opc_vinsgr2vr_d(s, TCG_VEC_TMP0, data_lo, 0);
1196            tcg_out_opc_vinsgr2vr_d(s, TCG_VEC_TMP0, data_hi, 1);
1197            tcg_out_opc_vstx(s, TCG_VEC_TMP0, h.base, h.index);
1198        }
1199    } else {
1200        /* Otherwise use a pair of LD/ST. */
1201        TCGReg base = h.base;
1202        if (h.index != TCG_REG_ZERO) {
1203            base = TCG_REG_TMP0;
1204            tcg_out_opc_add_d(s, base, h.base, h.index);
1205        }
1206        if (is_ld) {
1207            tcg_debug_assert(base != data_lo);
1208            tcg_out_opc_ld_d(s, data_lo, base, 0);
1209            tcg_out_opc_ld_d(s, data_hi, base, 8);
1210        } else {
1211            tcg_out_opc_st_d(s, data_lo, base, 0);
1212            tcg_out_opc_st_d(s, data_hi, base, 8);
1213        }
1214    }
1215
1216    if (ldst) {
1217        ldst->type = TCG_TYPE_I128;
1218        ldst->datalo_reg = data_lo;
1219        ldst->datahi_reg = data_hi;
1220        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1221    }
1222}
1223
1224/*
1225 * Entry-points
1226 */
1227
1228static const tcg_insn_unit *tb_ret_addr;
1229
1230static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1231{
1232    /* Reuse the zeroing that exists for goto_ptr.  */
1233    if (a0 == 0) {
1234        tcg_out_call_int(s, tcg_code_gen_epilogue, true);
1235    } else {
1236        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0);
1237        tcg_out_call_int(s, tb_ret_addr, true);
1238    }
1239}
1240
1241static void tcg_out_goto_tb(TCGContext *s, int which)
1242{
1243    /*
1244     * Direct branch, or load indirect address, to be patched
1245     * by tb_target_set_jmp_target.  Check indirect load offset
1246     * in range early, regardless of direct branch distance,
1247     * via assert within tcg_out_opc_pcaddu2i.
1248     */
1249    uintptr_t i_addr = get_jmp_target_addr(s, which);
1250    intptr_t i_disp = tcg_pcrel_diff(s, (void *)i_addr);
1251
1252    set_jmp_insn_offset(s, which);
1253    tcg_out_opc_pcaddu2i(s, TCG_REG_TMP0, i_disp >> 2);
1254
1255    /* Finish the load and indirect branch. */
1256    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_TMP0, 0);
1257    tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
1258    set_jmp_reset_offset(s, which);
1259}
1260
1261void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1262                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1263{
1264    uintptr_t d_addr = tb->jmp_target_addr[n];
1265    ptrdiff_t d_disp = (ptrdiff_t)(d_addr - jmp_rx) >> 2;
1266    tcg_insn_unit insn;
1267
1268    /* Either directly branch, or load slot address for indirect branch. */
1269    if (d_disp == sextreg(d_disp, 0, 26)) {
1270        insn = encode_sd10k16_insn(OPC_B, d_disp);
1271    } else {
1272        uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n];
1273        intptr_t i_disp = i_addr - jmp_rx;
1274        insn = encode_dsj20_insn(OPC_PCADDU2I, TCG_REG_TMP0, i_disp >> 2);
1275    }
1276
1277    qatomic_set((tcg_insn_unit *)jmp_rw, insn);
1278    flush_idcache_range(jmp_rx, jmp_rw, 4);
1279}
1280
1281static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
1282                       const TCGArg args[TCG_MAX_OP_ARGS],
1283                       const int const_args[TCG_MAX_OP_ARGS])
1284{
1285    TCGArg a0 = args[0];
1286    TCGArg a1 = args[1];
1287    TCGArg a2 = args[2];
1288    TCGArg a3 = args[3];
1289    int c2 = const_args[2];
1290
1291    switch (opc) {
1292    case INDEX_op_mb:
1293        tcg_out_mb(s, a0);
1294        break;
1295
1296    case INDEX_op_goto_ptr:
1297        tcg_out_opc_jirl(s, TCG_REG_ZERO, a0, 0);
1298        break;
1299
1300    case INDEX_op_br:
1301        tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SD10K16, arg_label(a0),
1302                      0);
1303        tcg_out_opc_b(s, 0);
1304        break;
1305
1306    case INDEX_op_brcond_i32:
1307    case INDEX_op_brcond_i64:
1308        tcg_out_brcond(s, a2, a0, a1, arg_label(args[3]));
1309        break;
1310
1311    case INDEX_op_extrh_i64_i32:
1312        tcg_out_opc_srai_d(s, a0, a1, 32);
1313        break;
1314
1315    case INDEX_op_not_i32:
1316    case INDEX_op_not_i64:
1317        tcg_out_opc_nor(s, a0, a1, TCG_REG_ZERO);
1318        break;
1319
1320    case INDEX_op_nor_i32:
1321    case INDEX_op_nor_i64:
1322        if (c2) {
1323            tcg_out_opc_ori(s, a0, a1, a2);
1324            tcg_out_opc_nor(s, a0, a0, TCG_REG_ZERO);
1325        } else {
1326            tcg_out_opc_nor(s, a0, a1, a2);
1327        }
1328        break;
1329
1330    case INDEX_op_andc_i32:
1331    case INDEX_op_andc_i64:
1332        if (c2) {
1333            /* guaranteed to fit due to constraint */
1334            tcg_out_opc_andi(s, a0, a1, ~a2);
1335        } else {
1336            tcg_out_opc_andn(s, a0, a1, a2);
1337        }
1338        break;
1339
1340    case INDEX_op_orc_i32:
1341    case INDEX_op_orc_i64:
1342        if (c2) {
1343            /* guaranteed to fit due to constraint */
1344            tcg_out_opc_ori(s, a0, a1, ~a2);
1345        } else {
1346            tcg_out_opc_orn(s, a0, a1, a2);
1347        }
1348        break;
1349
1350    case INDEX_op_and_i32:
1351    case INDEX_op_and_i64:
1352        if (c2) {
1353            tcg_out_opc_andi(s, a0, a1, a2);
1354        } else {
1355            tcg_out_opc_and(s, a0, a1, a2);
1356        }
1357        break;
1358
1359    case INDEX_op_or_i32:
1360    case INDEX_op_or_i64:
1361        if (c2) {
1362            tcg_out_opc_ori(s, a0, a1, a2);
1363        } else {
1364            tcg_out_opc_or(s, a0, a1, a2);
1365        }
1366        break;
1367
1368    case INDEX_op_xor_i32:
1369    case INDEX_op_xor_i64:
1370        if (c2) {
1371            tcg_out_opc_xori(s, a0, a1, a2);
1372        } else {
1373            tcg_out_opc_xor(s, a0, a1, a2);
1374        }
1375        break;
1376
1377    case INDEX_op_extract_i32:
1378        if (a2 == 0 && args[3] <= 12) {
1379            tcg_out_opc_andi(s, a0, a1, (1 << args[3]) - 1);
1380        } else {
1381            tcg_out_opc_bstrpick_w(s, a0, a1, a2, a2 + args[3] - 1);
1382        }
1383        break;
1384    case INDEX_op_extract_i64:
1385        if (a2 == 0 && args[3] <= 12) {
1386            tcg_out_opc_andi(s, a0, a1, (1 << args[3]) - 1);
1387        } else {
1388            tcg_out_opc_bstrpick_d(s, a0, a1, a2, a2 + args[3] - 1);
1389        }
1390        break;
1391
1392    case INDEX_op_sextract_i64:
1393        if (a2 + args[3] == 32) {
1394            if (a2 == 0) {
1395                tcg_out_ext32s(s, a0, a1);
1396            } else {
1397                tcg_out_opc_srai_w(s, a0, a1, a2);
1398            }
1399            break;
1400        }
1401        /* FALLTHRU */
1402    case INDEX_op_sextract_i32:
1403        if (a2 == 0 && args[3] == 8) {
1404            tcg_out_ext8s(s, TCG_TYPE_REG, a0, a1);
1405        } else if (a2 == 0 && args[3] == 16) {
1406            tcg_out_ext16s(s, TCG_TYPE_REG, a0, a1);
1407        } else {
1408            g_assert_not_reached();
1409        }
1410        break;
1411
1412    case INDEX_op_deposit_i32:
1413        tcg_out_opc_bstrins_w(s, a0, a2, args[3], args[3] + args[4] - 1);
1414        break;
1415    case INDEX_op_deposit_i64:
1416        tcg_out_opc_bstrins_d(s, a0, a2, args[3], args[3] + args[4] - 1);
1417        break;
1418
1419    case INDEX_op_bswap16_i32:
1420    case INDEX_op_bswap16_i64:
1421        tcg_out_opc_revb_2h(s, a0, a1);
1422        if (a2 & TCG_BSWAP_OS) {
1423            tcg_out_ext16s(s, TCG_TYPE_REG, a0, a0);
1424        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
1425            tcg_out_ext16u(s, a0, a0);
1426        }
1427        break;
1428
1429    case INDEX_op_bswap32_i32:
1430        /* All 32-bit values are computed sign-extended in the register.  */
1431        a2 = TCG_BSWAP_OS;
1432        /* fallthrough */
1433    case INDEX_op_bswap32_i64:
1434        tcg_out_opc_revb_2w(s, a0, a1);
1435        if (a2 & TCG_BSWAP_OS) {
1436            tcg_out_ext32s(s, a0, a0);
1437        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
1438            tcg_out_ext32u(s, a0, a0);
1439        }
1440        break;
1441
1442    case INDEX_op_bswap64_i64:
1443        tcg_out_opc_revb_d(s, a0, a1);
1444        break;
1445
1446    case INDEX_op_clz_i32:
1447        tcg_out_clzctz(s, OPC_CLZ_W, a0, a1, a2, c2, true);
1448        break;
1449    case INDEX_op_clz_i64:
1450        tcg_out_clzctz(s, OPC_CLZ_D, a0, a1, a2, c2, false);
1451        break;
1452
1453    case INDEX_op_ctz_i32:
1454        tcg_out_clzctz(s, OPC_CTZ_W, a0, a1, a2, c2, true);
1455        break;
1456    case INDEX_op_ctz_i64:
1457        tcg_out_clzctz(s, OPC_CTZ_D, a0, a1, a2, c2, false);
1458        break;
1459
1460    case INDEX_op_shl_i32:
1461        if (c2) {
1462            tcg_out_opc_slli_w(s, a0, a1, a2 & 0x1f);
1463        } else {
1464            tcg_out_opc_sll_w(s, a0, a1, a2);
1465        }
1466        break;
1467    case INDEX_op_shl_i64:
1468        if (c2) {
1469            tcg_out_opc_slli_d(s, a0, a1, a2 & 0x3f);
1470        } else {
1471            tcg_out_opc_sll_d(s, a0, a1, a2);
1472        }
1473        break;
1474
1475    case INDEX_op_shr_i32:
1476        if (c2) {
1477            tcg_out_opc_srli_w(s, a0, a1, a2 & 0x1f);
1478        } else {
1479            tcg_out_opc_srl_w(s, a0, a1, a2);
1480        }
1481        break;
1482    case INDEX_op_shr_i64:
1483        if (c2) {
1484            tcg_out_opc_srli_d(s, a0, a1, a2 & 0x3f);
1485        } else {
1486            tcg_out_opc_srl_d(s, a0, a1, a2);
1487        }
1488        break;
1489
1490    case INDEX_op_sar_i32:
1491        if (c2) {
1492            tcg_out_opc_srai_w(s, a0, a1, a2 & 0x1f);
1493        } else {
1494            tcg_out_opc_sra_w(s, a0, a1, a2);
1495        }
1496        break;
1497    case INDEX_op_sar_i64:
1498        if (c2) {
1499            tcg_out_opc_srai_d(s, a0, a1, a2 & 0x3f);
1500        } else {
1501            tcg_out_opc_sra_d(s, a0, a1, a2);
1502        }
1503        break;
1504
1505    case INDEX_op_rotl_i32:
1506        /* transform into equivalent rotr/rotri */
1507        if (c2) {
1508            tcg_out_opc_rotri_w(s, a0, a1, (32 - a2) & 0x1f);
1509        } else {
1510            tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2);
1511            tcg_out_opc_rotr_w(s, a0, a1, TCG_REG_TMP0);
1512        }
1513        break;
1514    case INDEX_op_rotl_i64:
1515        /* transform into equivalent rotr/rotri */
1516        if (c2) {
1517            tcg_out_opc_rotri_d(s, a0, a1, (64 - a2) & 0x3f);
1518        } else {
1519            tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2);
1520            tcg_out_opc_rotr_d(s, a0, a1, TCG_REG_TMP0);
1521        }
1522        break;
1523
1524    case INDEX_op_rotr_i32:
1525        if (c2) {
1526            tcg_out_opc_rotri_w(s, a0, a1, a2 & 0x1f);
1527        } else {
1528            tcg_out_opc_rotr_w(s, a0, a1, a2);
1529        }
1530        break;
1531    case INDEX_op_rotr_i64:
1532        if (c2) {
1533            tcg_out_opc_rotri_d(s, a0, a1, a2 & 0x3f);
1534        } else {
1535            tcg_out_opc_rotr_d(s, a0, a1, a2);
1536        }
1537        break;
1538
1539    case INDEX_op_add_i32:
1540        if (c2) {
1541            tcg_out_addi(s, TCG_TYPE_I32, a0, a1, a2);
1542        } else {
1543            tcg_out_opc_add_w(s, a0, a1, a2);
1544        }
1545        break;
1546    case INDEX_op_add_i64:
1547        if (c2) {
1548            tcg_out_addi(s, TCG_TYPE_I64, a0, a1, a2);
1549        } else {
1550            tcg_out_opc_add_d(s, a0, a1, a2);
1551        }
1552        break;
1553
1554    case INDEX_op_sub_i32:
1555        if (c2) {
1556            tcg_out_addi(s, TCG_TYPE_I32, a0, a1, -a2);
1557        } else {
1558            tcg_out_opc_sub_w(s, a0, a1, a2);
1559        }
1560        break;
1561    case INDEX_op_sub_i64:
1562        if (c2) {
1563            tcg_out_addi(s, TCG_TYPE_I64, a0, a1, -a2);
1564        } else {
1565            tcg_out_opc_sub_d(s, a0, a1, a2);
1566        }
1567        break;
1568
1569    case INDEX_op_neg_i32:
1570        tcg_out_opc_sub_w(s, a0, TCG_REG_ZERO, a1);
1571        break;
1572    case INDEX_op_neg_i64:
1573        tcg_out_opc_sub_d(s, a0, TCG_REG_ZERO, a1);
1574        break;
1575
1576    case INDEX_op_mul_i32:
1577        tcg_out_opc_mul_w(s, a0, a1, a2);
1578        break;
1579    case INDEX_op_mul_i64:
1580        tcg_out_opc_mul_d(s, a0, a1, a2);
1581        break;
1582
1583    case INDEX_op_mulsh_i32:
1584        tcg_out_opc_mulh_w(s, a0, a1, a2);
1585        break;
1586    case INDEX_op_mulsh_i64:
1587        tcg_out_opc_mulh_d(s, a0, a1, a2);
1588        break;
1589
1590    case INDEX_op_muluh_i32:
1591        tcg_out_opc_mulh_wu(s, a0, a1, a2);
1592        break;
1593    case INDEX_op_muluh_i64:
1594        tcg_out_opc_mulh_du(s, a0, a1, a2);
1595        break;
1596
1597    case INDEX_op_div_i32:
1598        tcg_out_opc_div_w(s, a0, a1, a2);
1599        break;
1600    case INDEX_op_div_i64:
1601        tcg_out_opc_div_d(s, a0, a1, a2);
1602        break;
1603
1604    case INDEX_op_divu_i32:
1605        tcg_out_opc_div_wu(s, a0, a1, a2);
1606        break;
1607    case INDEX_op_divu_i64:
1608        tcg_out_opc_div_du(s, a0, a1, a2);
1609        break;
1610
1611    case INDEX_op_rem_i32:
1612        tcg_out_opc_mod_w(s, a0, a1, a2);
1613        break;
1614    case INDEX_op_rem_i64:
1615        tcg_out_opc_mod_d(s, a0, a1, a2);
1616        break;
1617
1618    case INDEX_op_remu_i32:
1619        tcg_out_opc_mod_wu(s, a0, a1, a2);
1620        break;
1621    case INDEX_op_remu_i64:
1622        tcg_out_opc_mod_du(s, a0, a1, a2);
1623        break;
1624
1625    case INDEX_op_setcond_i32:
1626    case INDEX_op_setcond_i64:
1627        tcg_out_setcond(s, args[3], a0, a1, a2, c2);
1628        break;
1629
1630    case INDEX_op_movcond_i32:
1631    case INDEX_op_movcond_i64:
1632        tcg_out_movcond(s, args[5], a0, a1, a2, c2, args[3], args[4]);
1633        break;
1634
1635    case INDEX_op_ld8s_i32:
1636    case INDEX_op_ld8s_i64:
1637        tcg_out_ldst(s, OPC_LD_B, a0, a1, a2);
1638        break;
1639    case INDEX_op_ld8u_i32:
1640    case INDEX_op_ld8u_i64:
1641        tcg_out_ldst(s, OPC_LD_BU, a0, a1, a2);
1642        break;
1643    case INDEX_op_ld16s_i32:
1644    case INDEX_op_ld16s_i64:
1645        tcg_out_ldst(s, OPC_LD_H, a0, a1, a2);
1646        break;
1647    case INDEX_op_ld16u_i32:
1648    case INDEX_op_ld16u_i64:
1649        tcg_out_ldst(s, OPC_LD_HU, a0, a1, a2);
1650        break;
1651    case INDEX_op_ld_i32:
1652    case INDEX_op_ld32s_i64:
1653        tcg_out_ldst(s, OPC_LD_W, a0, a1, a2);
1654        break;
1655    case INDEX_op_ld32u_i64:
1656        tcg_out_ldst(s, OPC_LD_WU, a0, a1, a2);
1657        break;
1658    case INDEX_op_ld_i64:
1659        tcg_out_ldst(s, OPC_LD_D, a0, a1, a2);
1660        break;
1661
1662    case INDEX_op_st8_i32:
1663    case INDEX_op_st8_i64:
1664        tcg_out_ldst(s, OPC_ST_B, a0, a1, a2);
1665        break;
1666    case INDEX_op_st16_i32:
1667    case INDEX_op_st16_i64:
1668        tcg_out_ldst(s, OPC_ST_H, a0, a1, a2);
1669        break;
1670    case INDEX_op_st_i32:
1671    case INDEX_op_st32_i64:
1672        tcg_out_ldst(s, OPC_ST_W, a0, a1, a2);
1673        break;
1674    case INDEX_op_st_i64:
1675        tcg_out_ldst(s, OPC_ST_D, a0, a1, a2);
1676        break;
1677
1678    case INDEX_op_qemu_ld_a32_i32:
1679    case INDEX_op_qemu_ld_a64_i32:
1680        tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
1681        break;
1682    case INDEX_op_qemu_ld_a32_i64:
1683    case INDEX_op_qemu_ld_a64_i64:
1684        tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
1685        break;
1686    case INDEX_op_qemu_ld_a32_i128:
1687    case INDEX_op_qemu_ld_a64_i128:
1688        tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, true);
1689        break;
1690    case INDEX_op_qemu_st_a32_i32:
1691    case INDEX_op_qemu_st_a64_i32:
1692        tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
1693        break;
1694    case INDEX_op_qemu_st_a32_i64:
1695    case INDEX_op_qemu_st_a64_i64:
1696        tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
1697        break;
1698    case INDEX_op_qemu_st_a32_i128:
1699    case INDEX_op_qemu_st_a64_i128:
1700        tcg_out_qemu_ldst_i128(s, a0, a1, a2, a3, false);
1701        break;
1702
1703    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
1704    case INDEX_op_mov_i64:
1705    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
1706    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
1707    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
1708    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
1709    case INDEX_op_ext8s_i64:
1710    case INDEX_op_ext8u_i32:
1711    case INDEX_op_ext8u_i64:
1712    case INDEX_op_ext16s_i32:
1713    case INDEX_op_ext16s_i64:
1714    case INDEX_op_ext16u_i32:
1715    case INDEX_op_ext16u_i64:
1716    case INDEX_op_ext32s_i64:
1717    case INDEX_op_ext32u_i64:
1718    case INDEX_op_ext_i32_i64:
1719    case INDEX_op_extu_i32_i64:
1720    case INDEX_op_extrl_i64_i32:
1721    default:
1722        g_assert_not_reached();
1723    }
1724}
1725
1726static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
1727                            TCGReg rd, TCGReg rs)
1728{
1729    static const LoongArchInsn repl_insn[2][4] = {
1730        { OPC_VREPLGR2VR_B, OPC_VREPLGR2VR_H,
1731          OPC_VREPLGR2VR_W, OPC_VREPLGR2VR_D },
1732        { OPC_XVREPLGR2VR_B, OPC_XVREPLGR2VR_H,
1733          OPC_XVREPLGR2VR_W, OPC_XVREPLGR2VR_D },
1734    };
1735    bool lasx = type == TCG_TYPE_V256;
1736
1737    tcg_debug_assert(vece <= MO_64);
1738    tcg_out32(s, encode_vdj_insn(repl_insn[lasx][vece], rd, rs));
1739    return true;
1740}
1741
1742static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
1743                             TCGReg r, TCGReg base, intptr_t offset)
1744{
1745    bool lasx = type == TCG_TYPE_V256;
1746
1747    /* Handle imm overflow and division (vldrepl.d imm is divided by 8). */
1748    if (offset < -0x800 || offset > 0x7ff ||
1749        (offset & ((1 << vece) - 1)) != 0) {
1750        tcg_out_addi(s, TCG_TYPE_I64, TCG_REG_TMP0, base, offset);
1751        base = TCG_REG_TMP0;
1752        offset = 0;
1753    }
1754    offset >>= vece;
1755
1756    switch (vece) {
1757    case MO_8:
1758        if (lasx) {
1759            tcg_out_opc_xvldrepl_b(s, r, base, offset);
1760        } else {
1761            tcg_out_opc_vldrepl_b(s, r, base, offset);
1762        }
1763        break;
1764    case MO_16:
1765        if (lasx) {
1766            tcg_out_opc_xvldrepl_h(s, r, base, offset);
1767        } else {
1768            tcg_out_opc_vldrepl_h(s, r, base, offset);
1769        }
1770        break;
1771    case MO_32:
1772        if (lasx) {
1773            tcg_out_opc_xvldrepl_w(s, r, base, offset);
1774        } else {
1775            tcg_out_opc_vldrepl_w(s, r, base, offset);
1776        }
1777        break;
1778    case MO_64:
1779        if (lasx) {
1780            tcg_out_opc_xvldrepl_d(s, r, base, offset);
1781        } else {
1782            tcg_out_opc_vldrepl_d(s, r, base, offset);
1783        }
1784        break;
1785    default:
1786        g_assert_not_reached();
1787    }
1788    return true;
1789}
1790
1791static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
1792                             TCGReg rd, int64_t v64)
1793{
1794    /* Try vldi if imm can fit */
1795    int64_t value = sextract64(v64, 0, 8 << vece);
1796    if (-0x200 <= value && value <= 0x1FF) {
1797        uint32_t imm = (vece << 10) | ((uint32_t)v64 & 0x3FF);
1798
1799        if (type == TCG_TYPE_V256) {
1800            tcg_out_opc_xvldi(s, rd, imm);
1801        } else {
1802            tcg_out_opc_vldi(s, rd, imm);
1803        }
1804        return;
1805    }
1806
1807    /* TODO: vldi patterns when imm 12 is set */
1808
1809    tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, value);
1810    tcg_out_dup_vec(s, type, vece, rd, TCG_REG_TMP0);
1811}
1812
1813static void tcg_out_addsub_vec(TCGContext *s, bool lasx, unsigned vece,
1814                               TCGArg a0, TCGArg a1, TCGArg a2,
1815                               bool a2_is_const, bool is_add)
1816{
1817    static const LoongArchInsn add_vec_insn[2][4] = {
1818        { OPC_VADD_B, OPC_VADD_H, OPC_VADD_W, OPC_VADD_D },
1819        { OPC_XVADD_B, OPC_XVADD_H, OPC_XVADD_W, OPC_XVADD_D },
1820    };
1821    static const LoongArchInsn add_vec_imm_insn[2][4] = {
1822        { OPC_VADDI_BU, OPC_VADDI_HU, OPC_VADDI_WU, OPC_VADDI_DU },
1823        { OPC_XVADDI_BU, OPC_XVADDI_HU, OPC_XVADDI_WU, OPC_XVADDI_DU },
1824    };
1825    static const LoongArchInsn sub_vec_insn[2][4] = {
1826        { OPC_VSUB_B, OPC_VSUB_H, OPC_VSUB_W, OPC_VSUB_D },
1827        { OPC_XVSUB_B, OPC_XVSUB_H, OPC_XVSUB_W, OPC_XVSUB_D },
1828    };
1829    static const LoongArchInsn sub_vec_imm_insn[2][4] = {
1830        { OPC_VSUBI_BU, OPC_VSUBI_HU, OPC_VSUBI_WU, OPC_VSUBI_DU },
1831        { OPC_XVSUBI_BU, OPC_XVSUBI_HU, OPC_XVSUBI_WU, OPC_XVSUBI_DU },
1832    };
1833    LoongArchInsn insn;
1834
1835    if (a2_is_const) {
1836        int64_t value = sextract64(a2, 0, 8 << vece);
1837
1838        if (!is_add) {
1839            value = -value;
1840        }
1841        if (value < 0) {
1842            insn = sub_vec_imm_insn[lasx][vece];
1843            value = -value;
1844        } else {
1845            insn = add_vec_imm_insn[lasx][vece];
1846        }
1847
1848        /* Constraint TCG_CT_CONST_VADD ensures validity. */
1849        tcg_debug_assert(0 <= value && value <= 0x1f);
1850
1851        tcg_out32(s, encode_vdvjuk5_insn(insn, a0, a1, value));
1852        return;
1853    }
1854
1855    if (is_add) {
1856        insn = add_vec_insn[lasx][vece];
1857    } else {
1858        insn = sub_vec_insn[lasx][vece];
1859    }
1860    tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
1861}
1862
1863static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
1864                           unsigned vecl, unsigned vece,
1865                           const TCGArg args[TCG_MAX_OP_ARGS],
1866                           const int const_args[TCG_MAX_OP_ARGS])
1867{
1868    TCGType type = vecl + TCG_TYPE_V64;
1869    bool lasx = type == TCG_TYPE_V256;
1870    TCGArg a0, a1, a2, a3;
1871    LoongArchInsn insn;
1872
1873    static const LoongArchInsn cmp_vec_insn[16][2][4] = {
1874        [TCG_COND_EQ] = {
1875            { OPC_VSEQ_B, OPC_VSEQ_H, OPC_VSEQ_W, OPC_VSEQ_D },
1876            { OPC_XVSEQ_B, OPC_XVSEQ_H, OPC_XVSEQ_W, OPC_XVSEQ_D },
1877        },
1878        [TCG_COND_LE] = {
1879            { OPC_VSLE_B, OPC_VSLE_H, OPC_VSLE_W, OPC_VSLE_D },
1880            { OPC_XVSLE_B, OPC_XVSLE_H, OPC_XVSLE_W, OPC_XVSLE_D },
1881        },
1882        [TCG_COND_LEU] = {
1883            { OPC_VSLE_BU, OPC_VSLE_HU, OPC_VSLE_WU, OPC_VSLE_DU },
1884            { OPC_XVSLE_BU, OPC_XVSLE_HU, OPC_XVSLE_WU, OPC_XVSLE_DU },
1885        },
1886        [TCG_COND_LT] = {
1887            { OPC_VSLT_B, OPC_VSLT_H, OPC_VSLT_W, OPC_VSLT_D },
1888            { OPC_XVSLT_B, OPC_XVSLT_H, OPC_XVSLT_W, OPC_XVSLT_D },
1889        },
1890        [TCG_COND_LTU] = {
1891            { OPC_VSLT_BU, OPC_VSLT_HU, OPC_VSLT_WU, OPC_VSLT_DU },
1892            { OPC_XVSLT_BU, OPC_XVSLT_HU, OPC_XVSLT_WU, OPC_XVSLT_DU },
1893        }
1894    };
1895    static const LoongArchInsn cmp_vec_imm_insn[16][2][4] = {
1896        [TCG_COND_EQ] = {
1897            { OPC_VSEQI_B, OPC_VSEQI_H, OPC_VSEQI_W, OPC_VSEQI_D },
1898            { OPC_XVSEQI_B, OPC_XVSEQI_H, OPC_XVSEQI_W, OPC_XVSEQI_D },
1899        },
1900        [TCG_COND_LE] = {
1901            { OPC_VSLEI_B, OPC_VSLEI_H, OPC_VSLEI_W, OPC_VSLEI_D },
1902            { OPC_XVSLEI_B, OPC_XVSLEI_H, OPC_XVSLEI_W, OPC_XVSLEI_D },
1903        },
1904        [TCG_COND_LEU] = {
1905            { OPC_VSLEI_BU, OPC_VSLEI_HU, OPC_VSLEI_WU, OPC_VSLEI_DU },
1906            { OPC_XVSLEI_BU, OPC_XVSLEI_HU, OPC_XVSLEI_WU, OPC_XVSLEI_DU },
1907        },
1908        [TCG_COND_LT] = {
1909            { OPC_VSLTI_B, OPC_VSLTI_H, OPC_VSLTI_W, OPC_VSLTI_D },
1910            { OPC_XVSLTI_B, OPC_XVSLTI_H, OPC_XVSLTI_W, OPC_XVSLTI_D },
1911        },
1912        [TCG_COND_LTU] = {
1913            { OPC_VSLTI_BU, OPC_VSLTI_HU, OPC_VSLTI_WU, OPC_VSLTI_DU },
1914            { OPC_XVSLTI_BU, OPC_XVSLTI_HU, OPC_XVSLTI_WU, OPC_XVSLTI_DU },
1915        }
1916    };
1917    static const LoongArchInsn neg_vec_insn[2][4] = {
1918        { OPC_VNEG_B, OPC_VNEG_H, OPC_VNEG_W, OPC_VNEG_D },
1919        { OPC_XVNEG_B, OPC_XVNEG_H, OPC_XVNEG_W, OPC_XVNEG_D },
1920    };
1921    static const LoongArchInsn mul_vec_insn[2][4] = {
1922        { OPC_VMUL_B, OPC_VMUL_H, OPC_VMUL_W, OPC_VMUL_D },
1923        { OPC_XVMUL_B, OPC_XVMUL_H, OPC_XVMUL_W, OPC_XVMUL_D },
1924    };
1925    static const LoongArchInsn smin_vec_insn[2][4] = {
1926        { OPC_VMIN_B, OPC_VMIN_H, OPC_VMIN_W, OPC_VMIN_D },
1927        { OPC_XVMIN_B, OPC_XVMIN_H, OPC_XVMIN_W, OPC_XVMIN_D },
1928    };
1929    static const LoongArchInsn umin_vec_insn[2][4] = {
1930        { OPC_VMIN_BU, OPC_VMIN_HU, OPC_VMIN_WU, OPC_VMIN_DU },
1931        { OPC_XVMIN_BU, OPC_XVMIN_HU, OPC_XVMIN_WU, OPC_XVMIN_DU },
1932    };
1933    static const LoongArchInsn smax_vec_insn[2][4] = {
1934        { OPC_VMAX_B, OPC_VMAX_H, OPC_VMAX_W, OPC_VMAX_D },
1935        { OPC_XVMAX_B, OPC_XVMAX_H, OPC_XVMAX_W, OPC_XVMAX_D },
1936    };
1937    static const LoongArchInsn umax_vec_insn[2][4] = {
1938        { OPC_VMAX_BU, OPC_VMAX_HU, OPC_VMAX_WU, OPC_VMAX_DU },
1939        { OPC_XVMAX_BU, OPC_XVMAX_HU, OPC_XVMAX_WU, OPC_XVMAX_DU },
1940    };
1941    static const LoongArchInsn ssadd_vec_insn[2][4] = {
1942        { OPC_VSADD_B, OPC_VSADD_H, OPC_VSADD_W, OPC_VSADD_D },
1943        { OPC_XVSADD_B, OPC_XVSADD_H, OPC_XVSADD_W, OPC_XVSADD_D },
1944    };
1945    static const LoongArchInsn usadd_vec_insn[2][4] = {
1946        { OPC_VSADD_BU, OPC_VSADD_HU, OPC_VSADD_WU, OPC_VSADD_DU },
1947        { OPC_XVSADD_BU, OPC_XVSADD_HU, OPC_XVSADD_WU, OPC_XVSADD_DU },
1948    };
1949    static const LoongArchInsn sssub_vec_insn[2][4] = {
1950        { OPC_VSSUB_B, OPC_VSSUB_H, OPC_VSSUB_W, OPC_VSSUB_D },
1951        { OPC_XVSSUB_B, OPC_XVSSUB_H, OPC_XVSSUB_W, OPC_XVSSUB_D },
1952    };
1953    static const LoongArchInsn ussub_vec_insn[2][4] = {
1954        { OPC_VSSUB_BU, OPC_VSSUB_HU, OPC_VSSUB_WU, OPC_VSSUB_DU },
1955        { OPC_XVSSUB_BU, OPC_XVSSUB_HU, OPC_XVSSUB_WU, OPC_XVSSUB_DU },
1956    };
1957    static const LoongArchInsn shlv_vec_insn[2][4] = {
1958        { OPC_VSLL_B, OPC_VSLL_H, OPC_VSLL_W, OPC_VSLL_D },
1959        { OPC_XVSLL_B, OPC_XVSLL_H, OPC_XVSLL_W, OPC_XVSLL_D },
1960    };
1961    static const LoongArchInsn shrv_vec_insn[2][4] = {
1962        { OPC_VSRL_B, OPC_VSRL_H, OPC_VSRL_W, OPC_VSRL_D },
1963        { OPC_XVSRL_B, OPC_XVSRL_H, OPC_XVSRL_W, OPC_XVSRL_D },
1964    };
1965    static const LoongArchInsn sarv_vec_insn[2][4] = {
1966        { OPC_VSRA_B, OPC_VSRA_H, OPC_VSRA_W, OPC_VSRA_D },
1967        { OPC_XVSRA_B, OPC_XVSRA_H, OPC_XVSRA_W, OPC_XVSRA_D },
1968    };
1969    static const LoongArchInsn shli_vec_insn[2][4] = {
1970        { OPC_VSLLI_B, OPC_VSLLI_H, OPC_VSLLI_W, OPC_VSLLI_D },
1971        { OPC_XVSLLI_B, OPC_XVSLLI_H, OPC_XVSLLI_W, OPC_XVSLLI_D },
1972    };
1973    static const LoongArchInsn shri_vec_insn[2][4] = {
1974        { OPC_VSRLI_B, OPC_VSRLI_H, OPC_VSRLI_W, OPC_VSRLI_D },
1975        { OPC_XVSRLI_B, OPC_XVSRLI_H, OPC_XVSRLI_W, OPC_XVSRLI_D },
1976    };
1977    static const LoongArchInsn sari_vec_insn[2][4] = {
1978        { OPC_VSRAI_B, OPC_VSRAI_H, OPC_VSRAI_W, OPC_VSRAI_D },
1979        { OPC_XVSRAI_B, OPC_XVSRAI_H, OPC_XVSRAI_W, OPC_XVSRAI_D },
1980    };
1981    static const LoongArchInsn rotrv_vec_insn[2][4] = {
1982        { OPC_VROTR_B, OPC_VROTR_H, OPC_VROTR_W, OPC_VROTR_D },
1983        { OPC_XVROTR_B, OPC_XVROTR_H, OPC_XVROTR_W, OPC_XVROTR_D },
1984    };
1985    static const LoongArchInsn rotri_vec_insn[2][4] = {
1986        { OPC_VROTRI_B, OPC_VROTRI_H, OPC_VROTRI_W, OPC_VROTRI_D },
1987        { OPC_XVROTRI_B, OPC_XVROTRI_H, OPC_XVROTRI_W, OPC_XVROTRI_D },
1988    };
1989
1990    a0 = args[0];
1991    a1 = args[1];
1992    a2 = args[2];
1993    a3 = args[3];
1994
1995    switch (opc) {
1996    case INDEX_op_st_vec:
1997        tcg_out_st(s, type, a0, a1, a2);
1998        break;
1999    case INDEX_op_ld_vec:
2000        tcg_out_ld(s, type, a0, a1, a2);
2001        break;
2002    case INDEX_op_and_vec:
2003        insn = lasx ? OPC_XVAND_V : OPC_VAND_V;
2004        goto vdvjvk;
2005    case INDEX_op_andc_vec:
2006        /*
2007         * vandn vd, vj, vk: vd = vk & ~vj
2008         * andc_vec vd, vj, vk: vd = vj & ~vk
2009         * vj and vk are swapped
2010         */
2011        a1 = a2;
2012        a2 = args[1];
2013        insn = lasx ? OPC_XVANDN_V : OPC_VANDN_V;
2014        goto vdvjvk;
2015    case INDEX_op_or_vec:
2016        insn = lasx ? OPC_XVOR_V : OPC_VOR_V;
2017        goto vdvjvk;
2018    case INDEX_op_orc_vec:
2019        insn = lasx ? OPC_XVORN_V : OPC_VORN_V;
2020        goto vdvjvk;
2021    case INDEX_op_xor_vec:
2022        insn = lasx ? OPC_XVXOR_V : OPC_VXOR_V;
2023        goto vdvjvk;
2024    case INDEX_op_not_vec:
2025        a2 = a1;
2026        /* fall through */
2027    case INDEX_op_nor_vec:
2028        insn = lasx ? OPC_XVNOR_V : OPC_VNOR_V;
2029        goto vdvjvk;
2030    case INDEX_op_cmp_vec:
2031        {
2032            TCGCond cond = args[3];
2033
2034            if (const_args[2]) {
2035                /*
2036                 * cmp_vec dest, src, value
2037                 * Try vseqi/vslei/vslti
2038                 */
2039                int64_t value = sextract64(a2, 0, 8 << vece);
2040                if ((cond == TCG_COND_EQ ||
2041                     cond == TCG_COND_LE ||
2042                     cond == TCG_COND_LT) &&
2043                    (-0x10 <= value && value <= 0x0f)) {
2044                    insn = cmp_vec_imm_insn[cond][lasx][vece];
2045                    tcg_out32(s, encode_vdvjsk5_insn(insn, a0, a1, value));
2046                    break;
2047                } else if ((cond == TCG_COND_LEU ||
2048                            cond == TCG_COND_LTU) &&
2049                           (0x00 <= value && value <= 0x1f)) {
2050                    insn = cmp_vec_imm_insn[cond][lasx][vece];
2051                    tcg_out32(s, encode_vdvjuk5_insn(insn, a0, a1, value));
2052                    break;
2053                }
2054
2055                /*
2056                 * Fallback to:
2057                 * dupi_vec temp, a2
2058                 * cmp_vec a0, a1, temp, cond
2059                 */
2060                tcg_out_dupi_vec(s, type, vece, TCG_VEC_TMP0, a2);
2061                a2 = TCG_VEC_TMP0;
2062            }
2063
2064            insn = cmp_vec_insn[cond][lasx][vece];
2065            if (insn == 0) {
2066                TCGArg t;
2067                t = a1, a1 = a2, a2 = t;
2068                cond = tcg_swap_cond(cond);
2069                insn = cmp_vec_insn[cond][lasx][vece];
2070                tcg_debug_assert(insn != 0);
2071            }
2072        }
2073        goto vdvjvk;
2074    case INDEX_op_add_vec:
2075        tcg_out_addsub_vec(s, lasx, vece, a0, a1, a2, const_args[2], true);
2076        break;
2077    case INDEX_op_sub_vec:
2078        tcg_out_addsub_vec(s, lasx, vece, a0, a1, a2, const_args[2], false);
2079        break;
2080    case INDEX_op_neg_vec:
2081        tcg_out32(s, encode_vdvj_insn(neg_vec_insn[lasx][vece], a0, a1));
2082        break;
2083    case INDEX_op_mul_vec:
2084        insn = mul_vec_insn[lasx][vece];
2085        goto vdvjvk;
2086    case INDEX_op_smin_vec:
2087        insn = smin_vec_insn[lasx][vece];
2088        goto vdvjvk;
2089    case INDEX_op_smax_vec:
2090        insn = smax_vec_insn[lasx][vece];
2091        goto vdvjvk;
2092    case INDEX_op_umin_vec:
2093        insn = umin_vec_insn[lasx][vece];
2094        goto vdvjvk;
2095    case INDEX_op_umax_vec:
2096        insn = umax_vec_insn[lasx][vece];
2097        goto vdvjvk;
2098    case INDEX_op_ssadd_vec:
2099        insn = ssadd_vec_insn[lasx][vece];
2100        goto vdvjvk;
2101    case INDEX_op_usadd_vec:
2102        insn = usadd_vec_insn[lasx][vece];
2103        goto vdvjvk;
2104    case INDEX_op_sssub_vec:
2105        insn = sssub_vec_insn[lasx][vece];
2106        goto vdvjvk;
2107    case INDEX_op_ussub_vec:
2108        insn = ussub_vec_insn[lasx][vece];
2109        goto vdvjvk;
2110    case INDEX_op_shlv_vec:
2111        insn = shlv_vec_insn[lasx][vece];
2112        goto vdvjvk;
2113    case INDEX_op_shrv_vec:
2114        insn = shrv_vec_insn[lasx][vece];
2115        goto vdvjvk;
2116    case INDEX_op_sarv_vec:
2117        insn = sarv_vec_insn[lasx][vece];
2118        goto vdvjvk;
2119    case INDEX_op_rotlv_vec:
2120        /* rotlv_vec a1, a2 = rotrv_vec a1, -a2 */
2121        tcg_out32(s, encode_vdvj_insn(neg_vec_insn[lasx][vece],
2122                                      TCG_VEC_TMP0, a2));
2123        a2 = TCG_VEC_TMP0;
2124        /* fall through */
2125    case INDEX_op_rotrv_vec:
2126        insn = rotrv_vec_insn[lasx][vece];
2127        goto vdvjvk;
2128    case INDEX_op_shli_vec:
2129        insn = shli_vec_insn[lasx][vece];
2130        goto vdvjukN;
2131    case INDEX_op_shri_vec:
2132        insn = shri_vec_insn[lasx][vece];
2133        goto vdvjukN;
2134    case INDEX_op_sari_vec:
2135        insn = sari_vec_insn[lasx][vece];
2136        goto vdvjukN;
2137    case INDEX_op_rotli_vec:
2138        /* rotli_vec a1, a2 = rotri_vec a1, -a2 */
2139        a2 = extract32(-a2, 0, 3 + vece);
2140        insn = rotri_vec_insn[lasx][vece];
2141        goto vdvjukN;
2142    case INDEX_op_bitsel_vec:
2143        /* vbitsel vd, vj, vk, va = bitsel_vec vd, va, vk, vj */
2144        if (lasx) {
2145            tcg_out_opc_xvbitsel_v(s, a0, a3, a2, a1);
2146        } else {
2147            tcg_out_opc_vbitsel_v(s, a0, a3, a2, a1);
2148        }
2149        break;
2150    case INDEX_op_dupm_vec:
2151        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2152        break;
2153    default:
2154        g_assert_not_reached();
2155    vdvjvk:
2156        tcg_out32(s, encode_vdvjvk_insn(insn, a0, a1, a2));
2157        break;
2158    vdvjukN:
2159        switch (vece) {
2160        case MO_8:
2161            tcg_out32(s, encode_vdvjuk3_insn(insn, a0, a1, a2));
2162            break;
2163        case MO_16:
2164            tcg_out32(s, encode_vdvjuk4_insn(insn, a0, a1, a2));
2165            break;
2166        case MO_32:
2167            tcg_out32(s, encode_vdvjuk5_insn(insn, a0, a1, a2));
2168            break;
2169        case MO_64:
2170            tcg_out32(s, encode_vdvjuk6_insn(insn, a0, a1, a2));
2171            break;
2172        default:
2173            g_assert_not_reached();
2174        }
2175        break;
2176    }
2177}
2178
2179int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2180{
2181    switch (opc) {
2182    case INDEX_op_ld_vec:
2183    case INDEX_op_st_vec:
2184    case INDEX_op_dup_vec:
2185    case INDEX_op_dupm_vec:
2186    case INDEX_op_cmp_vec:
2187    case INDEX_op_add_vec:
2188    case INDEX_op_sub_vec:
2189    case INDEX_op_and_vec:
2190    case INDEX_op_andc_vec:
2191    case INDEX_op_or_vec:
2192    case INDEX_op_orc_vec:
2193    case INDEX_op_xor_vec:
2194    case INDEX_op_nor_vec:
2195    case INDEX_op_not_vec:
2196    case INDEX_op_neg_vec:
2197    case INDEX_op_mul_vec:
2198    case INDEX_op_smin_vec:
2199    case INDEX_op_smax_vec:
2200    case INDEX_op_umin_vec:
2201    case INDEX_op_umax_vec:
2202    case INDEX_op_ssadd_vec:
2203    case INDEX_op_usadd_vec:
2204    case INDEX_op_sssub_vec:
2205    case INDEX_op_ussub_vec:
2206    case INDEX_op_shlv_vec:
2207    case INDEX_op_shrv_vec:
2208    case INDEX_op_sarv_vec:
2209    case INDEX_op_bitsel_vec:
2210        return 1;
2211    default:
2212        return 0;
2213    }
2214}
2215
2216void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2217                       TCGArg a0, ...)
2218{
2219    g_assert_not_reached();
2220}
2221
2222static TCGConstraintSetIndex
2223tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
2224{
2225    switch (op) {
2226    case INDEX_op_goto_ptr:
2227        return C_O0_I1(r);
2228
2229    case INDEX_op_st8_i32:
2230    case INDEX_op_st8_i64:
2231    case INDEX_op_st16_i32:
2232    case INDEX_op_st16_i64:
2233    case INDEX_op_st32_i64:
2234    case INDEX_op_st_i32:
2235    case INDEX_op_st_i64:
2236    case INDEX_op_qemu_st_a32_i32:
2237    case INDEX_op_qemu_st_a64_i32:
2238    case INDEX_op_qemu_st_a32_i64:
2239    case INDEX_op_qemu_st_a64_i64:
2240        return C_O0_I2(rZ, r);
2241
2242    case INDEX_op_qemu_ld_a32_i128:
2243    case INDEX_op_qemu_ld_a64_i128:
2244        return C_N2_I1(r, r, r);
2245
2246    case INDEX_op_qemu_st_a32_i128:
2247    case INDEX_op_qemu_st_a64_i128:
2248        return C_O0_I3(r, r, r);
2249
2250    case INDEX_op_brcond_i32:
2251    case INDEX_op_brcond_i64:
2252        return C_O0_I2(rZ, rZ);
2253
2254    case INDEX_op_ext8s_i32:
2255    case INDEX_op_ext8s_i64:
2256    case INDEX_op_ext8u_i32:
2257    case INDEX_op_ext8u_i64:
2258    case INDEX_op_ext16s_i32:
2259    case INDEX_op_ext16s_i64:
2260    case INDEX_op_ext16u_i32:
2261    case INDEX_op_ext16u_i64:
2262    case INDEX_op_ext32s_i64:
2263    case INDEX_op_ext32u_i64:
2264    case INDEX_op_extu_i32_i64:
2265    case INDEX_op_extrl_i64_i32:
2266    case INDEX_op_extrh_i64_i32:
2267    case INDEX_op_ext_i32_i64:
2268    case INDEX_op_neg_i32:
2269    case INDEX_op_neg_i64:
2270    case INDEX_op_not_i32:
2271    case INDEX_op_not_i64:
2272    case INDEX_op_extract_i32:
2273    case INDEX_op_extract_i64:
2274    case INDEX_op_sextract_i32:
2275    case INDEX_op_sextract_i64:
2276    case INDEX_op_bswap16_i32:
2277    case INDEX_op_bswap16_i64:
2278    case INDEX_op_bswap32_i32:
2279    case INDEX_op_bswap32_i64:
2280    case INDEX_op_bswap64_i64:
2281    case INDEX_op_ld8s_i32:
2282    case INDEX_op_ld8s_i64:
2283    case INDEX_op_ld8u_i32:
2284    case INDEX_op_ld8u_i64:
2285    case INDEX_op_ld16s_i32:
2286    case INDEX_op_ld16s_i64:
2287    case INDEX_op_ld16u_i32:
2288    case INDEX_op_ld16u_i64:
2289    case INDEX_op_ld32s_i64:
2290    case INDEX_op_ld32u_i64:
2291    case INDEX_op_ld_i32:
2292    case INDEX_op_ld_i64:
2293    case INDEX_op_qemu_ld_a32_i32:
2294    case INDEX_op_qemu_ld_a64_i32:
2295    case INDEX_op_qemu_ld_a32_i64:
2296    case INDEX_op_qemu_ld_a64_i64:
2297        return C_O1_I1(r, r);
2298
2299    case INDEX_op_andc_i32:
2300    case INDEX_op_andc_i64:
2301    case INDEX_op_orc_i32:
2302    case INDEX_op_orc_i64:
2303        /*
2304         * LoongArch insns for these ops don't have reg-imm forms, but we
2305         * can express using andi/ori if ~constant satisfies
2306         * TCG_CT_CONST_U12.
2307         */
2308        return C_O1_I2(r, r, rC);
2309
2310    case INDEX_op_shl_i32:
2311    case INDEX_op_shl_i64:
2312    case INDEX_op_shr_i32:
2313    case INDEX_op_shr_i64:
2314    case INDEX_op_sar_i32:
2315    case INDEX_op_sar_i64:
2316    case INDEX_op_rotl_i32:
2317    case INDEX_op_rotl_i64:
2318    case INDEX_op_rotr_i32:
2319    case INDEX_op_rotr_i64:
2320        return C_O1_I2(r, r, ri);
2321
2322    case INDEX_op_add_i32:
2323        return C_O1_I2(r, r, ri);
2324    case INDEX_op_add_i64:
2325        return C_O1_I2(r, r, rJ);
2326
2327    case INDEX_op_and_i32:
2328    case INDEX_op_and_i64:
2329    case INDEX_op_nor_i32:
2330    case INDEX_op_nor_i64:
2331    case INDEX_op_or_i32:
2332    case INDEX_op_or_i64:
2333    case INDEX_op_xor_i32:
2334    case INDEX_op_xor_i64:
2335        /* LoongArch reg-imm bitops have their imms ZERO-extended */
2336        return C_O1_I2(r, r, rU);
2337
2338    case INDEX_op_clz_i32:
2339    case INDEX_op_clz_i64:
2340    case INDEX_op_ctz_i32:
2341    case INDEX_op_ctz_i64:
2342        return C_O1_I2(r, r, rW);
2343
2344    case INDEX_op_deposit_i32:
2345    case INDEX_op_deposit_i64:
2346        /* Must deposit into the same register as input */
2347        return C_O1_I2(r, 0, rZ);
2348
2349    case INDEX_op_sub_i32:
2350    case INDEX_op_setcond_i32:
2351        return C_O1_I2(r, rZ, ri);
2352    case INDEX_op_sub_i64:
2353    case INDEX_op_setcond_i64:
2354        return C_O1_I2(r, rZ, rJ);
2355
2356    case INDEX_op_mul_i32:
2357    case INDEX_op_mul_i64:
2358    case INDEX_op_mulsh_i32:
2359    case INDEX_op_mulsh_i64:
2360    case INDEX_op_muluh_i32:
2361    case INDEX_op_muluh_i64:
2362    case INDEX_op_div_i32:
2363    case INDEX_op_div_i64:
2364    case INDEX_op_divu_i32:
2365    case INDEX_op_divu_i64:
2366    case INDEX_op_rem_i32:
2367    case INDEX_op_rem_i64:
2368    case INDEX_op_remu_i32:
2369    case INDEX_op_remu_i64:
2370        return C_O1_I2(r, rZ, rZ);
2371
2372    case INDEX_op_movcond_i32:
2373    case INDEX_op_movcond_i64:
2374        return C_O1_I4(r, rZ, rJ, rZ, rZ);
2375
2376    case INDEX_op_ld_vec:
2377    case INDEX_op_dupm_vec:
2378    case INDEX_op_dup_vec:
2379        return C_O1_I1(w, r);
2380
2381    case INDEX_op_st_vec:
2382        return C_O0_I2(w, r);
2383
2384    case INDEX_op_cmp_vec:
2385        return C_O1_I2(w, w, wM);
2386
2387    case INDEX_op_add_vec:
2388    case INDEX_op_sub_vec:
2389        return C_O1_I2(w, w, wA);
2390
2391    case INDEX_op_and_vec:
2392    case INDEX_op_andc_vec:
2393    case INDEX_op_or_vec:
2394    case INDEX_op_orc_vec:
2395    case INDEX_op_xor_vec:
2396    case INDEX_op_nor_vec:
2397    case INDEX_op_mul_vec:
2398    case INDEX_op_smin_vec:
2399    case INDEX_op_smax_vec:
2400    case INDEX_op_umin_vec:
2401    case INDEX_op_umax_vec:
2402    case INDEX_op_ssadd_vec:
2403    case INDEX_op_usadd_vec:
2404    case INDEX_op_sssub_vec:
2405    case INDEX_op_ussub_vec:
2406    case INDEX_op_shlv_vec:
2407    case INDEX_op_shrv_vec:
2408    case INDEX_op_sarv_vec:
2409    case INDEX_op_rotrv_vec:
2410    case INDEX_op_rotlv_vec:
2411        return C_O1_I2(w, w, w);
2412
2413    case INDEX_op_not_vec:
2414    case INDEX_op_neg_vec:
2415    case INDEX_op_shli_vec:
2416    case INDEX_op_shri_vec:
2417    case INDEX_op_sari_vec:
2418    case INDEX_op_rotli_vec:
2419        return C_O1_I1(w, w);
2420
2421    case INDEX_op_bitsel_vec:
2422        return C_O1_I3(w, w, w, w);
2423
2424    default:
2425        return C_NotImplemented;
2426    }
2427}
2428
2429static const int tcg_target_callee_save_regs[] = {
2430    TCG_REG_S0,     /* used for the global env (TCG_AREG0) */
2431    TCG_REG_S1,
2432    TCG_REG_S2,
2433    TCG_REG_S3,
2434    TCG_REG_S4,
2435    TCG_REG_S5,
2436    TCG_REG_S6,
2437    TCG_REG_S7,
2438    TCG_REG_S8,
2439    TCG_REG_S9,
2440    TCG_REG_RA,     /* should be last for ABI compliance */
2441};
2442
2443/* Stack frame parameters.  */
2444#define REG_SIZE   (TCG_TARGET_REG_BITS / 8)
2445#define SAVE_SIZE  ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE)
2446#define TEMP_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
2447#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \
2448                     + TCG_TARGET_STACK_ALIGN - 1) \
2449                    & -TCG_TARGET_STACK_ALIGN)
2450#define SAVE_OFS   (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE)
2451
2452/* We're expecting to be able to use an immediate for frame allocation.  */
2453QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7ff);
2454
2455/* Generate global QEMU prologue and epilogue code */
2456static void tcg_target_qemu_prologue(TCGContext *s)
2457{
2458    int i;
2459
2460    tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE);
2461
2462    /* TB prologue */
2463    tcg_out_opc_addi_d(s, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE);
2464    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2465        tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2466                   TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
2467    }
2468
2469    if (!tcg_use_softmmu && guest_base) {
2470        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
2471        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
2472    }
2473
2474    /* Call generated code */
2475    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2476    tcg_out_opc_jirl(s, TCG_REG_ZERO, tcg_target_call_iarg_regs[1], 0);
2477
2478    /* Return path for goto_ptr. Set return value to 0 */
2479    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
2480    tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_A0, TCG_REG_ZERO);
2481
2482    /* TB epilogue */
2483    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
2484    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2485        tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
2486                   TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
2487    }
2488
2489    tcg_out_opc_addi_d(s, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE);
2490    tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_RA, 0);
2491}
2492
2493static void tcg_out_tb_start(TCGContext *s)
2494{
2495    /* nothing to do */
2496}
2497
2498static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2499{
2500    for (int i = 0; i < count; ++i) {
2501        /* Canonical nop is andi r0,r0,0 */
2502        p[i] = OPC_ANDI;
2503    }
2504}
2505
2506static void tcg_target_init(TCGContext *s)
2507{
2508    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
2509
2510    /* Server and desktop class cpus have UAL; embedded cpus do not. */
2511    if (!(hwcap & HWCAP_LOONGARCH_UAL)) {
2512        error_report("TCG: unaligned access support required; exiting");
2513        exit(EXIT_FAILURE);
2514    }
2515
2516    tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
2517    tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
2518
2519    tcg_target_call_clobber_regs = ALL_GENERAL_REGS | ALL_VECTOR_REGS;
2520    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0);
2521    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1);
2522    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2);
2523    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S3);
2524    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S4);
2525    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S5);
2526    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S6);
2527    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S7);
2528    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8);
2529    tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9);
2530
2531    if (cpuinfo & CPUINFO_LSX) {
2532        tcg_target_available_regs[TCG_TYPE_V64] = ALL_VECTOR_REGS;
2533        tcg_target_available_regs[TCG_TYPE_V128] = ALL_VECTOR_REGS;
2534        if (cpuinfo & CPUINFO_LASX) {
2535            tcg_target_available_regs[TCG_TYPE_V256] = ALL_VECTOR_REGS;
2536        }
2537        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
2538        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
2539        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
2540        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
2541        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
2542        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
2543        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
2544        tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
2545    }
2546
2547    s->reserved_regs = 0;
2548    tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO);
2549    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
2550    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
2551    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
2552    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
2553    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP);
2554    tcg_regset_set_reg(s->reserved_regs, TCG_REG_RESERVED);
2555    tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
2556}
2557
2558typedef struct {
2559    DebugFrameHeader h;
2560    uint8_t fde_def_cfa[4];
2561    uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2];
2562} DebugFrame;
2563
2564#define ELF_HOST_MACHINE EM_LOONGARCH
2565
2566static const DebugFrame debug_frame = {
2567    .h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */
2568    .h.cie.id = -1,
2569    .h.cie.version = 1,
2570    .h.cie.code_align = 1,
2571    .h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */
2572    .h.cie.return_column = TCG_REG_RA,
2573
2574    /* Total FDE size does not include the "len" member.  */
2575    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2576
2577    .fde_def_cfa = {
2578        12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ...  */
2579        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2580        (FRAME_SIZE >> 7)
2581    },
2582    .fde_reg_ofs = {
2583        0x80 + 23, 11,                  /* DW_CFA_offset, s0, -88 */
2584        0x80 + 24, 10,                  /* DW_CFA_offset, s1, -80 */
2585        0x80 + 25, 9,                   /* DW_CFA_offset, s2, -72 */
2586        0x80 + 26, 8,                   /* DW_CFA_offset, s3, -64 */
2587        0x80 + 27, 7,                   /* DW_CFA_offset, s4, -56 */
2588        0x80 + 28, 6,                   /* DW_CFA_offset, s5, -48 */
2589        0x80 + 29, 5,                   /* DW_CFA_offset, s6, -40 */
2590        0x80 + 30, 4,                   /* DW_CFA_offset, s7, -32 */
2591        0x80 + 31, 3,                   /* DW_CFA_offset, s8, -24 */
2592        0x80 + 22, 2,                   /* DW_CFA_offset, s9, -16 */
2593        0x80 + 1 , 1,                   /* DW_CFA_offset, ra, -8 */
2594    }
2595};
2596
2597void tcg_register_jit(const void *buf, size_t buf_size)
2598{
2599    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2600}
2601