xref: /openbmc/qemu/tcg/tcg.c (revision 9ea2e69f)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/tcg-op-common.h"
40 
41 #if UINTPTR_MAX == UINT32_MAX
42 # define ELF_CLASS  ELFCLASS32
43 #else
44 # define ELF_CLASS  ELFCLASS64
45 #endif
46 #if HOST_BIG_ENDIAN
47 # define ELF_DATA   ELFDATA2MSB
48 #else
49 # define ELF_DATA   ELFDATA2LSB
50 #endif
51 
52 #include "elf.h"
53 #include "exec/log.h"
54 #include "tcg/tcg-ldst.h"
55 #include "tcg/tcg-temp-internal.h"
56 #include "tcg-internal.h"
57 #include "accel/tcg/perf.h"
58 #ifdef CONFIG_USER_ONLY
59 #include "exec/user/guest-base.h"
60 #endif
61 
62 /* Forward declarations for functions declared in tcg-target.c.inc and
63    used here. */
64 static void tcg_target_init(TCGContext *s);
65 static void tcg_target_qemu_prologue(TCGContext *s);
66 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
67                         intptr_t value, intptr_t addend);
68 
69 /* The CIE and FDE header definitions will be common to all hosts.  */
70 typedef struct {
71     uint32_t len __attribute__((aligned((sizeof(void *)))));
72     uint32_t id;
73     uint8_t version;
74     char augmentation[1];
75     uint8_t code_align;
76     uint8_t data_align;
77     uint8_t return_column;
78 } DebugFrameCIE;
79 
80 typedef struct QEMU_PACKED {
81     uint32_t len __attribute__((aligned((sizeof(void *)))));
82     uint32_t cie_offset;
83     uintptr_t func_start;
84     uintptr_t func_len;
85 } DebugFrameFDEHeader;
86 
87 typedef struct QEMU_PACKED {
88     DebugFrameCIE cie;
89     DebugFrameFDEHeader fde;
90 } DebugFrameHeader;
91 
92 typedef struct TCGLabelQemuLdst {
93     bool is_ld;             /* qemu_ld: true, qemu_st: false */
94     MemOpIdx oi;
95     TCGType type;           /* result type of a load */
96     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
97     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
98     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
99     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
100     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
101     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
102     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
103 } TCGLabelQemuLdst;
104 
105 static void tcg_register_jit_int(const void *buf, size_t size,
106                                  const void *debug_frame,
107                                  size_t debug_frame_size)
108     __attribute__((unused));
109 
110 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
111 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
112                        intptr_t arg2);
113 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
114 static void tcg_out_movi(TCGContext *s, TCGType type,
115                          TCGReg ret, tcg_target_long arg);
116 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
117 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
118 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
119 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
120 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
121 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
122 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
123 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
124 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
125 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
126 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
127 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
128 static void tcg_out_goto_tb(TCGContext *s, int which);
129 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
130                        const TCGArg args[TCG_MAX_OP_ARGS],
131                        const int const_args[TCG_MAX_OP_ARGS]);
132 #if TCG_TARGET_MAYBE_vec
133 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
134                             TCGReg dst, TCGReg src);
135 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
136                              TCGReg dst, TCGReg base, intptr_t offset);
137 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
138                              TCGReg dst, int64_t arg);
139 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
140                            unsigned vecl, unsigned vece,
141                            const TCGArg args[TCG_MAX_OP_ARGS],
142                            const int const_args[TCG_MAX_OP_ARGS]);
143 #else
144 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
145                                    TCGReg dst, TCGReg src)
146 {
147     g_assert_not_reached();
148 }
149 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
150                                     TCGReg dst, TCGReg base, intptr_t offset)
151 {
152     g_assert_not_reached();
153 }
154 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
155                                     TCGReg dst, int64_t arg)
156 {
157     g_assert_not_reached();
158 }
159 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
160                                   unsigned vecl, unsigned vece,
161                                   const TCGArg args[TCG_MAX_OP_ARGS],
162                                   const int const_args[TCG_MAX_OP_ARGS])
163 {
164     g_assert_not_reached();
165 }
166 #endif
167 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
168                        intptr_t arg2);
169 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
170                         TCGReg base, intptr_t ofs);
171 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
172                          const TCGHelperInfo *info);
173 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
174 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
175 #ifdef TCG_TARGET_NEED_LDST_LABELS
176 static int tcg_out_ldst_finalize(TCGContext *s);
177 #endif
178 
179 typedef struct TCGLdstHelperParam {
180     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
181     unsigned ntmp;
182     int tmp[3];
183 } TCGLdstHelperParam;
184 
185 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
186                                    const TCGLdstHelperParam *p)
187     __attribute__((unused));
188 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
189                                   bool load_sign, const TCGLdstHelperParam *p)
190     __attribute__((unused));
191 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
192                                    const TCGLdstHelperParam *p)
193     __attribute__((unused));
194 
195 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
196     [MO_UB] = helper_ldub_mmu,
197     [MO_SB] = helper_ldsb_mmu,
198     [MO_UW] = helper_lduw_mmu,
199     [MO_SW] = helper_ldsw_mmu,
200     [MO_UL] = helper_ldul_mmu,
201     [MO_UQ] = helper_ldq_mmu,
202 #if TCG_TARGET_REG_BITS == 64
203     [MO_SL] = helper_ldsl_mmu,
204     [MO_128] = helper_ld16_mmu,
205 #endif
206 };
207 
208 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
209     [MO_8]  = helper_stb_mmu,
210     [MO_16] = helper_stw_mmu,
211     [MO_32] = helper_stl_mmu,
212     [MO_64] = helper_stq_mmu,
213 #if TCG_TARGET_REG_BITS == 64
214     [MO_128] = helper_st16_mmu,
215 #endif
216 };
217 
218 typedef struct {
219     MemOp atom;   /* lg2 bits of atomicity required */
220     MemOp align;  /* lg2 bits of alignment to use */
221 } TCGAtomAlign;
222 
223 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
224                                            MemOp host_atom, bool allow_two_ops)
225     __attribute__((unused));
226 
227 TCGContext tcg_init_ctx;
228 __thread TCGContext *tcg_ctx;
229 
230 TCGContext **tcg_ctxs;
231 unsigned int tcg_cur_ctxs;
232 unsigned int tcg_max_ctxs;
233 TCGv_env cpu_env = 0;
234 const void *tcg_code_gen_epilogue;
235 uintptr_t tcg_splitwx_diff;
236 
237 #ifndef CONFIG_TCG_INTERPRETER
238 tcg_prologue_fn *tcg_qemu_tb_exec;
239 #endif
240 
241 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
242 static TCGRegSet tcg_target_call_clobber_regs;
243 
244 #if TCG_TARGET_INSN_UNIT_SIZE == 1
245 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
246 {
247     *s->code_ptr++ = v;
248 }
249 
250 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
251                                                       uint8_t v)
252 {
253     *p = v;
254 }
255 #endif
256 
257 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
258 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
259 {
260     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
261         *s->code_ptr++ = v;
262     } else {
263         tcg_insn_unit *p = s->code_ptr;
264         memcpy(p, &v, sizeof(v));
265         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
266     }
267 }
268 
269 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
270                                                        uint16_t v)
271 {
272     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
273         *p = v;
274     } else {
275         memcpy(p, &v, sizeof(v));
276     }
277 }
278 #endif
279 
280 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
281 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
282 {
283     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
284         *s->code_ptr++ = v;
285     } else {
286         tcg_insn_unit *p = s->code_ptr;
287         memcpy(p, &v, sizeof(v));
288         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
289     }
290 }
291 
292 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
293                                                        uint32_t v)
294 {
295     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
296         *p = v;
297     } else {
298         memcpy(p, &v, sizeof(v));
299     }
300 }
301 #endif
302 
303 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
304 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
305 {
306     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
307         *s->code_ptr++ = v;
308     } else {
309         tcg_insn_unit *p = s->code_ptr;
310         memcpy(p, &v, sizeof(v));
311         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
312     }
313 }
314 
315 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
316                                                        uint64_t v)
317 {
318     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
319         *p = v;
320     } else {
321         memcpy(p, &v, sizeof(v));
322     }
323 }
324 #endif
325 
326 /* label relocation processing */
327 
328 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
329                           TCGLabel *l, intptr_t addend)
330 {
331     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
332 
333     r->type = type;
334     r->ptr = code_ptr;
335     r->addend = addend;
336     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
337 }
338 
339 static void tcg_out_label(TCGContext *s, TCGLabel *l)
340 {
341     tcg_debug_assert(!l->has_value);
342     l->has_value = 1;
343     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
344 }
345 
346 TCGLabel *gen_new_label(void)
347 {
348     TCGContext *s = tcg_ctx;
349     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
350 
351     memset(l, 0, sizeof(TCGLabel));
352     l->id = s->nb_labels++;
353     QSIMPLEQ_INIT(&l->branches);
354     QSIMPLEQ_INIT(&l->relocs);
355 
356     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
357 
358     return l;
359 }
360 
361 static bool tcg_resolve_relocs(TCGContext *s)
362 {
363     TCGLabel *l;
364 
365     QSIMPLEQ_FOREACH(l, &s->labels, next) {
366         TCGRelocation *r;
367         uintptr_t value = l->u.value;
368 
369         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
370             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
371                 return false;
372             }
373         }
374     }
375     return true;
376 }
377 
378 static void set_jmp_reset_offset(TCGContext *s, int which)
379 {
380     /*
381      * We will check for overflow at the end of the opcode loop in
382      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
383      */
384     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
385 }
386 
387 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
388 {
389     /*
390      * We will check for overflow at the end of the opcode loop in
391      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
392      */
393     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
394 }
395 
396 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
397 {
398     /*
399      * Return the read-execute version of the pointer, for the benefit
400      * of any pc-relative addressing mode.
401      */
402     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
403 }
404 
405 #if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
406 static int tlb_mask_table_ofs(TCGContext *s, int which)
407 {
408     return s->tlb_fast_offset + which * sizeof(CPUTLBDescFast);
409 }
410 #endif
411 
412 /* Signal overflow, starting over with fewer guest insns. */
413 static G_NORETURN
414 void tcg_raise_tb_overflow(TCGContext *s)
415 {
416     siglongjmp(s->jmp_trans, -2);
417 }
418 
419 /*
420  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
421  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
422  *
423  * However, tcg_out_helper_load_slots reuses this field to hold an
424  * argument slot number (which may designate a argument register or an
425  * argument stack slot), converting to TCGReg once all arguments that
426  * are destined for the stack are processed.
427  */
428 typedef struct TCGMovExtend {
429     unsigned dst;
430     TCGReg src;
431     TCGType dst_type;
432     TCGType src_type;
433     MemOp src_ext;
434 } TCGMovExtend;
435 
436 /**
437  * tcg_out_movext -- move and extend
438  * @s: tcg context
439  * @dst_type: integral type for destination
440  * @dst: destination register
441  * @src_type: integral type for source
442  * @src_ext: extension to apply to source
443  * @src: source register
444  *
445  * Move or extend @src into @dst, depending on @src_ext and the types.
446  */
447 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
448                            TCGType src_type, MemOp src_ext, TCGReg src)
449 {
450     switch (src_ext) {
451     case MO_UB:
452         tcg_out_ext8u(s, dst, src);
453         break;
454     case MO_SB:
455         tcg_out_ext8s(s, dst_type, dst, src);
456         break;
457     case MO_UW:
458         tcg_out_ext16u(s, dst, src);
459         break;
460     case MO_SW:
461         tcg_out_ext16s(s, dst_type, dst, src);
462         break;
463     case MO_UL:
464     case MO_SL:
465         if (dst_type == TCG_TYPE_I32) {
466             if (src_type == TCG_TYPE_I32) {
467                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
468             } else {
469                 tcg_out_extrl_i64_i32(s, dst, src);
470             }
471         } else if (src_type == TCG_TYPE_I32) {
472             if (src_ext & MO_SIGN) {
473                 tcg_out_exts_i32_i64(s, dst, src);
474             } else {
475                 tcg_out_extu_i32_i64(s, dst, src);
476             }
477         } else {
478             if (src_ext & MO_SIGN) {
479                 tcg_out_ext32s(s, dst, src);
480             } else {
481                 tcg_out_ext32u(s, dst, src);
482             }
483         }
484         break;
485     case MO_UQ:
486         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
487         if (dst_type == TCG_TYPE_I32) {
488             tcg_out_extrl_i64_i32(s, dst, src);
489         } else {
490             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
491         }
492         break;
493     default:
494         g_assert_not_reached();
495     }
496 }
497 
498 /* Minor variations on a theme, using a structure. */
499 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
500                                     TCGReg src)
501 {
502     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
503 }
504 
505 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
506 {
507     tcg_out_movext1_new_src(s, i, i->src);
508 }
509 
510 /**
511  * tcg_out_movext2 -- move and extend two pair
512  * @s: tcg context
513  * @i1: first move description
514  * @i2: second move description
515  * @scratch: temporary register, or -1 for none
516  *
517  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
518  * between the sources and destinations.
519  */
520 
521 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
522                             const TCGMovExtend *i2, int scratch)
523 {
524     TCGReg src1 = i1->src;
525     TCGReg src2 = i2->src;
526 
527     if (i1->dst != src2) {
528         tcg_out_movext1(s, i1);
529         tcg_out_movext1(s, i2);
530         return;
531     }
532     if (i2->dst == src1) {
533         TCGType src1_type = i1->src_type;
534         TCGType src2_type = i2->src_type;
535 
536         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
537             /* The data is now in the correct registers, now extend. */
538             src1 = i2->src;
539             src2 = i1->src;
540         } else {
541             tcg_debug_assert(scratch >= 0);
542             tcg_out_mov(s, src1_type, scratch, src1);
543             src1 = scratch;
544         }
545     }
546     tcg_out_movext1_new_src(s, i2, src2);
547     tcg_out_movext1_new_src(s, i1, src1);
548 }
549 
550 /**
551  * tcg_out_movext3 -- move and extend three pair
552  * @s: tcg context
553  * @i1: first move description
554  * @i2: second move description
555  * @i3: third move description
556  * @scratch: temporary register, or -1 for none
557  *
558  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
559  * between the sources and destinations.
560  */
561 
562 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
563                             const TCGMovExtend *i2, const TCGMovExtend *i3,
564                             int scratch)
565 {
566     TCGReg src1 = i1->src;
567     TCGReg src2 = i2->src;
568     TCGReg src3 = i3->src;
569 
570     if (i1->dst != src2 && i1->dst != src3) {
571         tcg_out_movext1(s, i1);
572         tcg_out_movext2(s, i2, i3, scratch);
573         return;
574     }
575     if (i2->dst != src1 && i2->dst != src3) {
576         tcg_out_movext1(s, i2);
577         tcg_out_movext2(s, i1, i3, scratch);
578         return;
579     }
580     if (i3->dst != src1 && i3->dst != src2) {
581         tcg_out_movext1(s, i3);
582         tcg_out_movext2(s, i1, i2, scratch);
583         return;
584     }
585 
586     /*
587      * There is a cycle.  Since there are only 3 nodes, the cycle is
588      * either "clockwise" or "anti-clockwise", and can be solved with
589      * a single scratch or two xchg.
590      */
591     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
592         /* "Clockwise" */
593         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
594             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
595             /* The data is now in the correct registers, now extend. */
596             tcg_out_movext1_new_src(s, i1, i1->dst);
597             tcg_out_movext1_new_src(s, i2, i2->dst);
598             tcg_out_movext1_new_src(s, i3, i3->dst);
599         } else {
600             tcg_debug_assert(scratch >= 0);
601             tcg_out_mov(s, i1->src_type, scratch, src1);
602             tcg_out_movext1(s, i3);
603             tcg_out_movext1(s, i2);
604             tcg_out_movext1_new_src(s, i1, scratch);
605         }
606     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
607         /* "Anti-clockwise" */
608         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
609             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
610             /* The data is now in the correct registers, now extend. */
611             tcg_out_movext1_new_src(s, i1, i1->dst);
612             tcg_out_movext1_new_src(s, i2, i2->dst);
613             tcg_out_movext1_new_src(s, i3, i3->dst);
614         } else {
615             tcg_debug_assert(scratch >= 0);
616             tcg_out_mov(s, i1->src_type, scratch, src1);
617             tcg_out_movext1(s, i2);
618             tcg_out_movext1(s, i3);
619             tcg_out_movext1_new_src(s, i1, scratch);
620         }
621     } else {
622         g_assert_not_reached();
623     }
624 }
625 
626 #define C_PFX1(P, A)                    P##A
627 #define C_PFX2(P, A, B)                 P##A##_##B
628 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
629 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
630 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
631 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
632 
633 /* Define an enumeration for the various combinations. */
634 
635 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
636 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
637 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
638 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
639 
640 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
641 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
642 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
643 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
644 
645 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
646 
647 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
648 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
649 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
650 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
651 
652 typedef enum {
653 #include "tcg-target-con-set.h"
654 } TCGConstraintSetIndex;
655 
656 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
657 
658 #undef C_O0_I1
659 #undef C_O0_I2
660 #undef C_O0_I3
661 #undef C_O0_I4
662 #undef C_O1_I1
663 #undef C_O1_I2
664 #undef C_O1_I3
665 #undef C_O1_I4
666 #undef C_N1_I2
667 #undef C_O2_I1
668 #undef C_O2_I2
669 #undef C_O2_I3
670 #undef C_O2_I4
671 
672 /* Put all of the constraint sets into an array, indexed by the enum. */
673 
674 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
675 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
676 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
677 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
678 
679 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
680 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
681 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
682 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
683 
684 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
685 
686 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
687 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
688 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
689 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
690 
691 static const TCGTargetOpDef constraint_sets[] = {
692 #include "tcg-target-con-set.h"
693 };
694 
695 
696 #undef C_O0_I1
697 #undef C_O0_I2
698 #undef C_O0_I3
699 #undef C_O0_I4
700 #undef C_O1_I1
701 #undef C_O1_I2
702 #undef C_O1_I3
703 #undef C_O1_I4
704 #undef C_N1_I2
705 #undef C_O2_I1
706 #undef C_O2_I2
707 #undef C_O2_I3
708 #undef C_O2_I4
709 
710 /* Expand the enumerator to be returned from tcg_target_op_def(). */
711 
712 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
713 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
714 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
715 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
716 
717 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
718 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
719 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
720 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
721 
722 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
723 
724 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
725 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
726 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
727 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
728 
729 #include "tcg-target.c.inc"
730 
731 static void alloc_tcg_plugin_context(TCGContext *s)
732 {
733 #ifdef CONFIG_PLUGIN
734     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
735     s->plugin_tb->insns =
736         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
737 #endif
738 }
739 
740 /*
741  * All TCG threads except the parent (i.e. the one that called tcg_context_init
742  * and registered the target's TCG globals) must register with this function
743  * before initiating translation.
744  *
745  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
746  * of tcg_region_init() for the reasoning behind this.
747  *
748  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
749  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
750  * is not used anymore for translation once this function is called.
751  *
752  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
753  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
754  */
755 #ifdef CONFIG_USER_ONLY
756 void tcg_register_thread(void)
757 {
758     tcg_ctx = &tcg_init_ctx;
759 }
760 #else
761 void tcg_register_thread(void)
762 {
763     TCGContext *s = g_malloc(sizeof(*s));
764     unsigned int i, n;
765 
766     *s = tcg_init_ctx;
767 
768     /* Relink mem_base.  */
769     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
770         if (tcg_init_ctx.temps[i].mem_base) {
771             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
772             tcg_debug_assert(b >= 0 && b < n);
773             s->temps[i].mem_base = &s->temps[b];
774         }
775     }
776 
777     /* Claim an entry in tcg_ctxs */
778     n = qatomic_fetch_inc(&tcg_cur_ctxs);
779     g_assert(n < tcg_max_ctxs);
780     qatomic_set(&tcg_ctxs[n], s);
781 
782     if (n > 0) {
783         alloc_tcg_plugin_context(s);
784         tcg_region_initial_alloc(s);
785     }
786 
787     tcg_ctx = s;
788 }
789 #endif /* !CONFIG_USER_ONLY */
790 
791 /* pool based memory allocation */
792 void *tcg_malloc_internal(TCGContext *s, int size)
793 {
794     TCGPool *p;
795     int pool_size;
796 
797     if (size > TCG_POOL_CHUNK_SIZE) {
798         /* big malloc: insert a new pool (XXX: could optimize) */
799         p = g_malloc(sizeof(TCGPool) + size);
800         p->size = size;
801         p->next = s->pool_first_large;
802         s->pool_first_large = p;
803         return p->data;
804     } else {
805         p = s->pool_current;
806         if (!p) {
807             p = s->pool_first;
808             if (!p)
809                 goto new_pool;
810         } else {
811             if (!p->next) {
812             new_pool:
813                 pool_size = TCG_POOL_CHUNK_SIZE;
814                 p = g_malloc(sizeof(TCGPool) + pool_size);
815                 p->size = pool_size;
816                 p->next = NULL;
817                 if (s->pool_current) {
818                     s->pool_current->next = p;
819                 } else {
820                     s->pool_first = p;
821                 }
822             } else {
823                 p = p->next;
824             }
825         }
826     }
827     s->pool_current = p;
828     s->pool_cur = p->data + size;
829     s->pool_end = p->data + p->size;
830     return p->data;
831 }
832 
833 void tcg_pool_reset(TCGContext *s)
834 {
835     TCGPool *p, *t;
836     for (p = s->pool_first_large; p; p = t) {
837         t = p->next;
838         g_free(p);
839     }
840     s->pool_first_large = NULL;
841     s->pool_cur = s->pool_end = NULL;
842     s->pool_current = NULL;
843 }
844 
845 /*
846  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
847  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
848  * We only use these for layout in tcg_out_ld_helper_ret and
849  * tcg_out_st_helper_args, and share them between several of
850  * the helpers, with the end result that it's easier to build manually.
851  */
852 
853 #if TCG_TARGET_REG_BITS == 32
854 # define dh_typecode_ttl  dh_typecode_i32
855 #else
856 # define dh_typecode_ttl  dh_typecode_i64
857 #endif
858 
859 static TCGHelperInfo info_helper_ld32_mmu = {
860     .flags = TCG_CALL_NO_WG,
861     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
862               | dh_typemask(env, 1)
863               | dh_typemask(i64, 2)  /* uint64_t addr */
864               | dh_typemask(i32, 3)  /* unsigned oi */
865               | dh_typemask(ptr, 4)  /* uintptr_t ra */
866 };
867 
868 static TCGHelperInfo info_helper_ld64_mmu = {
869     .flags = TCG_CALL_NO_WG,
870     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
871               | dh_typemask(env, 1)
872               | dh_typemask(i64, 2)  /* uint64_t addr */
873               | dh_typemask(i32, 3)  /* unsigned oi */
874               | dh_typemask(ptr, 4)  /* uintptr_t ra */
875 };
876 
877 static TCGHelperInfo info_helper_ld128_mmu = {
878     .flags = TCG_CALL_NO_WG,
879     .typemask = dh_typemask(i128, 0) /* return Int128 */
880               | dh_typemask(env, 1)
881               | dh_typemask(i64, 2)  /* uint64_t addr */
882               | dh_typemask(i32, 3)  /* unsigned oi */
883               | dh_typemask(ptr, 4)  /* uintptr_t ra */
884 };
885 
886 static TCGHelperInfo info_helper_st32_mmu = {
887     .flags = TCG_CALL_NO_WG,
888     .typemask = dh_typemask(void, 0)
889               | dh_typemask(env, 1)
890               | dh_typemask(i64, 2)  /* uint64_t addr */
891               | dh_typemask(i32, 3)  /* uint32_t data */
892               | dh_typemask(i32, 4)  /* unsigned oi */
893               | dh_typemask(ptr, 5)  /* uintptr_t ra */
894 };
895 
896 static TCGHelperInfo info_helper_st64_mmu = {
897     .flags = TCG_CALL_NO_WG,
898     .typemask = dh_typemask(void, 0)
899               | dh_typemask(env, 1)
900               | dh_typemask(i64, 2)  /* uint64_t addr */
901               | dh_typemask(i64, 3)  /* uint64_t data */
902               | dh_typemask(i32, 4)  /* unsigned oi */
903               | dh_typemask(ptr, 5)  /* uintptr_t ra */
904 };
905 
906 static TCGHelperInfo info_helper_st128_mmu = {
907     .flags = TCG_CALL_NO_WG,
908     .typemask = dh_typemask(void, 0)
909               | dh_typemask(env, 1)
910               | dh_typemask(i64, 2)  /* uint64_t addr */
911               | dh_typemask(i128, 3) /* Int128 data */
912               | dh_typemask(i32, 4)  /* unsigned oi */
913               | dh_typemask(ptr, 5)  /* uintptr_t ra */
914 };
915 
916 #ifdef CONFIG_TCG_INTERPRETER
917 static ffi_type *typecode_to_ffi(int argmask)
918 {
919     /*
920      * libffi does not support __int128_t, so we have forced Int128
921      * to use the structure definition instead of the builtin type.
922      */
923     static ffi_type *ffi_type_i128_elements[3] = {
924         &ffi_type_uint64,
925         &ffi_type_uint64,
926         NULL
927     };
928     static ffi_type ffi_type_i128 = {
929         .size = 16,
930         .alignment = __alignof__(Int128),
931         .type = FFI_TYPE_STRUCT,
932         .elements = ffi_type_i128_elements,
933     };
934 
935     switch (argmask) {
936     case dh_typecode_void:
937         return &ffi_type_void;
938     case dh_typecode_i32:
939         return &ffi_type_uint32;
940     case dh_typecode_s32:
941         return &ffi_type_sint32;
942     case dh_typecode_i64:
943         return &ffi_type_uint64;
944     case dh_typecode_s64:
945         return &ffi_type_sint64;
946     case dh_typecode_ptr:
947         return &ffi_type_pointer;
948     case dh_typecode_i128:
949         return &ffi_type_i128;
950     }
951     g_assert_not_reached();
952 }
953 
954 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
955 {
956     unsigned typemask = info->typemask;
957     struct {
958         ffi_cif cif;
959         ffi_type *args[];
960     } *ca;
961     ffi_status status;
962     int nargs;
963 
964     /* Ignoring the return type, find the last non-zero field. */
965     nargs = 32 - clz32(typemask >> 3);
966     nargs = DIV_ROUND_UP(nargs, 3);
967     assert(nargs <= MAX_CALL_IARGS);
968 
969     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
970     ca->cif.rtype = typecode_to_ffi(typemask & 7);
971     ca->cif.nargs = nargs;
972 
973     if (nargs != 0) {
974         ca->cif.arg_types = ca->args;
975         for (int j = 0; j < nargs; ++j) {
976             int typecode = extract32(typemask, (j + 1) * 3, 3);
977             ca->args[j] = typecode_to_ffi(typecode);
978         }
979     }
980 
981     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
982                           ca->cif.rtype, ca->cif.arg_types);
983     assert(status == FFI_OK);
984 
985     return &ca->cif;
986 }
987 
988 #define HELPER_INFO_INIT(I)      (&(I)->cif)
989 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
990 #else
991 #define HELPER_INFO_INIT(I)      (&(I)->init)
992 #define HELPER_INFO_INIT_VAL(I)  1
993 #endif /* CONFIG_TCG_INTERPRETER */
994 
995 static inline bool arg_slot_reg_p(unsigned arg_slot)
996 {
997     /*
998      * Split the sizeof away from the comparison to avoid Werror from
999      * "unsigned < 0 is always false", when iarg_regs is empty.
1000      */
1001     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1002     return arg_slot < nreg;
1003 }
1004 
1005 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1006 {
1007     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1008     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1009 
1010     tcg_debug_assert(stk_slot < max);
1011     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1012 }
1013 
1014 typedef struct TCGCumulativeArgs {
1015     int arg_idx;                /* tcg_gen_callN args[] */
1016     int info_in_idx;            /* TCGHelperInfo in[] */
1017     int arg_slot;               /* regs+stack slot */
1018     int ref_slot;               /* stack slots for references */
1019 } TCGCumulativeArgs;
1020 
1021 static void layout_arg_even(TCGCumulativeArgs *cum)
1022 {
1023     cum->arg_slot += cum->arg_slot & 1;
1024 }
1025 
1026 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1027                          TCGCallArgumentKind kind)
1028 {
1029     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1030 
1031     *loc = (TCGCallArgumentLoc){
1032         .kind = kind,
1033         .arg_idx = cum->arg_idx,
1034         .arg_slot = cum->arg_slot,
1035     };
1036     cum->info_in_idx++;
1037     cum->arg_slot++;
1038 }
1039 
1040 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1041                                 TCGHelperInfo *info, int n)
1042 {
1043     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1044 
1045     for (int i = 0; i < n; ++i) {
1046         /* Layout all using the same arg_idx, adjusting the subindex. */
1047         loc[i] = (TCGCallArgumentLoc){
1048             .kind = TCG_CALL_ARG_NORMAL,
1049             .arg_idx = cum->arg_idx,
1050             .tmp_subindex = i,
1051             .arg_slot = cum->arg_slot + i,
1052         };
1053     }
1054     cum->info_in_idx += n;
1055     cum->arg_slot += n;
1056 }
1057 
1058 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1059 {
1060     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1061     int n = 128 / TCG_TARGET_REG_BITS;
1062 
1063     /* The first subindex carries the pointer. */
1064     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1065 
1066     /*
1067      * The callee is allowed to clobber memory associated with
1068      * structure pass by-reference.  Therefore we must make copies.
1069      * Allocate space from "ref_slot", which will be adjusted to
1070      * follow the parameters on the stack.
1071      */
1072     loc[0].ref_slot = cum->ref_slot;
1073 
1074     /*
1075      * Subsequent words also go into the reference slot, but
1076      * do not accumulate into the regular arguments.
1077      */
1078     for (int i = 1; i < n; ++i) {
1079         loc[i] = (TCGCallArgumentLoc){
1080             .kind = TCG_CALL_ARG_BY_REF_N,
1081             .arg_idx = cum->arg_idx,
1082             .tmp_subindex = i,
1083             .ref_slot = cum->ref_slot + i,
1084         };
1085     }
1086     cum->info_in_idx += n;
1087     cum->ref_slot += n;
1088 }
1089 
1090 static void init_call_layout(TCGHelperInfo *info)
1091 {
1092     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1093     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1094     unsigned typemask = info->typemask;
1095     unsigned typecode;
1096     TCGCumulativeArgs cum = { };
1097 
1098     /*
1099      * Parse and place any function return value.
1100      */
1101     typecode = typemask & 7;
1102     switch (typecode) {
1103     case dh_typecode_void:
1104         info->nr_out = 0;
1105         break;
1106     case dh_typecode_i32:
1107     case dh_typecode_s32:
1108     case dh_typecode_ptr:
1109         info->nr_out = 1;
1110         info->out_kind = TCG_CALL_RET_NORMAL;
1111         break;
1112     case dh_typecode_i64:
1113     case dh_typecode_s64:
1114         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1115         info->out_kind = TCG_CALL_RET_NORMAL;
1116         /* Query the last register now to trigger any assert early. */
1117         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1118         break;
1119     case dh_typecode_i128:
1120         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1121         info->out_kind = TCG_TARGET_CALL_RET_I128;
1122         switch (TCG_TARGET_CALL_RET_I128) {
1123         case TCG_CALL_RET_NORMAL:
1124             /* Query the last register now to trigger any assert early. */
1125             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1126             break;
1127         case TCG_CALL_RET_BY_VEC:
1128             /* Query the single register now to trigger any assert early. */
1129             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1130             break;
1131         case TCG_CALL_RET_BY_REF:
1132             /*
1133              * Allocate the first argument to the output.
1134              * We don't need to store this anywhere, just make it
1135              * unavailable for use in the input loop below.
1136              */
1137             cum.arg_slot = 1;
1138             break;
1139         default:
1140             qemu_build_not_reached();
1141         }
1142         break;
1143     default:
1144         g_assert_not_reached();
1145     }
1146 
1147     /*
1148      * Parse and place function arguments.
1149      */
1150     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1151         TCGCallArgumentKind kind;
1152         TCGType type;
1153 
1154         typecode = typemask & 7;
1155         switch (typecode) {
1156         case dh_typecode_i32:
1157         case dh_typecode_s32:
1158             type = TCG_TYPE_I32;
1159             break;
1160         case dh_typecode_i64:
1161         case dh_typecode_s64:
1162             type = TCG_TYPE_I64;
1163             break;
1164         case dh_typecode_ptr:
1165             type = TCG_TYPE_PTR;
1166             break;
1167         case dh_typecode_i128:
1168             type = TCG_TYPE_I128;
1169             break;
1170         default:
1171             g_assert_not_reached();
1172         }
1173 
1174         switch (type) {
1175         case TCG_TYPE_I32:
1176             switch (TCG_TARGET_CALL_ARG_I32) {
1177             case TCG_CALL_ARG_EVEN:
1178                 layout_arg_even(&cum);
1179                 /* fall through */
1180             case TCG_CALL_ARG_NORMAL:
1181                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1182                 break;
1183             case TCG_CALL_ARG_EXTEND:
1184                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1185                 layout_arg_1(&cum, info, kind);
1186                 break;
1187             default:
1188                 qemu_build_not_reached();
1189             }
1190             break;
1191 
1192         case TCG_TYPE_I64:
1193             switch (TCG_TARGET_CALL_ARG_I64) {
1194             case TCG_CALL_ARG_EVEN:
1195                 layout_arg_even(&cum);
1196                 /* fall through */
1197             case TCG_CALL_ARG_NORMAL:
1198                 if (TCG_TARGET_REG_BITS == 32) {
1199                     layout_arg_normal_n(&cum, info, 2);
1200                 } else {
1201                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1202                 }
1203                 break;
1204             default:
1205                 qemu_build_not_reached();
1206             }
1207             break;
1208 
1209         case TCG_TYPE_I128:
1210             switch (TCG_TARGET_CALL_ARG_I128) {
1211             case TCG_CALL_ARG_EVEN:
1212                 layout_arg_even(&cum);
1213                 /* fall through */
1214             case TCG_CALL_ARG_NORMAL:
1215                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1216                 break;
1217             case TCG_CALL_ARG_BY_REF:
1218                 layout_arg_by_ref(&cum, info);
1219                 break;
1220             default:
1221                 qemu_build_not_reached();
1222             }
1223             break;
1224 
1225         default:
1226             g_assert_not_reached();
1227         }
1228     }
1229     info->nr_in = cum.info_in_idx;
1230 
1231     /* Validate that we didn't overrun the input array. */
1232     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1233     /* Validate the backend has enough argument space. */
1234     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1235 
1236     /*
1237      * Relocate the "ref_slot" area to the end of the parameters.
1238      * Minimizing this stack offset helps code size for x86,
1239      * which has a signed 8-bit offset encoding.
1240      */
1241     if (cum.ref_slot != 0) {
1242         int ref_base = 0;
1243 
1244         if (cum.arg_slot > max_reg_slots) {
1245             int align = __alignof(Int128) / sizeof(tcg_target_long);
1246 
1247             ref_base = cum.arg_slot - max_reg_slots;
1248             if (align > 1) {
1249                 ref_base = ROUND_UP(ref_base, align);
1250             }
1251         }
1252         assert(ref_base + cum.ref_slot <= max_stk_slots);
1253         ref_base += max_reg_slots;
1254 
1255         if (ref_base != 0) {
1256             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1257                 TCGCallArgumentLoc *loc = &info->in[i];
1258                 switch (loc->kind) {
1259                 case TCG_CALL_ARG_BY_REF:
1260                 case TCG_CALL_ARG_BY_REF_N:
1261                     loc->ref_slot += ref_base;
1262                     break;
1263                 default:
1264                     break;
1265                 }
1266             }
1267         }
1268     }
1269 }
1270 
1271 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1272 static void process_op_defs(TCGContext *s);
1273 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1274                                             TCGReg reg, const char *name);
1275 
1276 static void tcg_context_init(unsigned max_cpus)
1277 {
1278     TCGContext *s = &tcg_init_ctx;
1279     int op, total_args, n, i;
1280     TCGOpDef *def;
1281     TCGArgConstraint *args_ct;
1282     TCGTemp *ts;
1283 
1284     memset(s, 0, sizeof(*s));
1285     s->nb_globals = 0;
1286 
1287     /* Count total number of arguments and allocate the corresponding
1288        space */
1289     total_args = 0;
1290     for(op = 0; op < NB_OPS; op++) {
1291         def = &tcg_op_defs[op];
1292         n = def->nb_iargs + def->nb_oargs;
1293         total_args += n;
1294     }
1295 
1296     args_ct = g_new0(TCGArgConstraint, total_args);
1297 
1298     for(op = 0; op < NB_OPS; op++) {
1299         def = &tcg_op_defs[op];
1300         def->args_ct = args_ct;
1301         n = def->nb_iargs + def->nb_oargs;
1302         args_ct += n;
1303     }
1304 
1305     init_call_layout(&info_helper_ld32_mmu);
1306     init_call_layout(&info_helper_ld64_mmu);
1307     init_call_layout(&info_helper_ld128_mmu);
1308     init_call_layout(&info_helper_st32_mmu);
1309     init_call_layout(&info_helper_st64_mmu);
1310     init_call_layout(&info_helper_st128_mmu);
1311 
1312     tcg_target_init(s);
1313     process_op_defs(s);
1314 
1315     /* Reverse the order of the saved registers, assuming they're all at
1316        the start of tcg_target_reg_alloc_order.  */
1317     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1318         int r = tcg_target_reg_alloc_order[n];
1319         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1320             break;
1321         }
1322     }
1323     for (i = 0; i < n; ++i) {
1324         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1325     }
1326     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1327         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1328     }
1329 
1330     alloc_tcg_plugin_context(s);
1331 
1332     tcg_ctx = s;
1333     /*
1334      * In user-mode we simply share the init context among threads, since we
1335      * use a single region. See the documentation tcg_region_init() for the
1336      * reasoning behind this.
1337      * In softmmu we will have at most max_cpus TCG threads.
1338      */
1339 #ifdef CONFIG_USER_ONLY
1340     tcg_ctxs = &tcg_ctx;
1341     tcg_cur_ctxs = 1;
1342     tcg_max_ctxs = 1;
1343 #else
1344     tcg_max_ctxs = max_cpus;
1345     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1346 #endif
1347 
1348     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1349     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1350     cpu_env = temp_tcgv_ptr(ts);
1351 }
1352 
1353 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1354 {
1355     tcg_context_init(max_cpus);
1356     tcg_region_init(tb_size, splitwx, max_cpus);
1357 }
1358 
1359 /*
1360  * Allocate TBs right before their corresponding translated code, making
1361  * sure that TBs and code are on different cache lines.
1362  */
1363 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1364 {
1365     uintptr_t align = qemu_icache_linesize;
1366     TranslationBlock *tb;
1367     void *next;
1368 
1369  retry:
1370     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1371     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1372 
1373     if (unlikely(next > s->code_gen_highwater)) {
1374         if (tcg_region_alloc(s)) {
1375             return NULL;
1376         }
1377         goto retry;
1378     }
1379     qatomic_set(&s->code_gen_ptr, next);
1380     s->data_gen_ptr = NULL;
1381     return tb;
1382 }
1383 
1384 void tcg_prologue_init(TCGContext *s)
1385 {
1386     size_t prologue_size;
1387 
1388     s->code_ptr = s->code_gen_ptr;
1389     s->code_buf = s->code_gen_ptr;
1390     s->data_gen_ptr = NULL;
1391 
1392 #ifndef CONFIG_TCG_INTERPRETER
1393     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1394 #endif
1395 
1396 #ifdef TCG_TARGET_NEED_POOL_LABELS
1397     s->pool_labels = NULL;
1398 #endif
1399 
1400     qemu_thread_jit_write();
1401     /* Generate the prologue.  */
1402     tcg_target_qemu_prologue(s);
1403 
1404 #ifdef TCG_TARGET_NEED_POOL_LABELS
1405     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1406     {
1407         int result = tcg_out_pool_finalize(s);
1408         tcg_debug_assert(result == 0);
1409     }
1410 #endif
1411 
1412     prologue_size = tcg_current_code_size(s);
1413     perf_report_prologue(s->code_gen_ptr, prologue_size);
1414 
1415 #ifndef CONFIG_TCG_INTERPRETER
1416     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1417                         (uintptr_t)s->code_buf, prologue_size);
1418 #endif
1419 
1420     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1421         FILE *logfile = qemu_log_trylock();
1422         if (logfile) {
1423             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1424             if (s->data_gen_ptr) {
1425                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1426                 size_t data_size = prologue_size - code_size;
1427                 size_t i;
1428 
1429                 disas(logfile, s->code_gen_ptr, code_size);
1430 
1431                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1432                     if (sizeof(tcg_target_ulong) == 8) {
1433                         fprintf(logfile,
1434                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1435                                 (uintptr_t)s->data_gen_ptr + i,
1436                                 *(uint64_t *)(s->data_gen_ptr + i));
1437                     } else {
1438                         fprintf(logfile,
1439                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1440                                 (uintptr_t)s->data_gen_ptr + i,
1441                                 *(uint32_t *)(s->data_gen_ptr + i));
1442                     }
1443                 }
1444             } else {
1445                 disas(logfile, s->code_gen_ptr, prologue_size);
1446             }
1447             fprintf(logfile, "\n");
1448             qemu_log_unlock(logfile);
1449         }
1450     }
1451 
1452 #ifndef CONFIG_TCG_INTERPRETER
1453     /*
1454      * Assert that goto_ptr is implemented completely, setting an epilogue.
1455      * For tci, we use NULL as the signal to return from the interpreter,
1456      * so skip this check.
1457      */
1458     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1459 #endif
1460 
1461     tcg_region_prologue_set(s);
1462 }
1463 
1464 void tcg_func_start(TCGContext *s)
1465 {
1466     tcg_pool_reset(s);
1467     s->nb_temps = s->nb_globals;
1468 
1469     /* No temps have been previously allocated for size or locality.  */
1470     memset(s->free_temps, 0, sizeof(s->free_temps));
1471 
1472     /* No constant temps have been previously allocated. */
1473     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1474         if (s->const_table[i]) {
1475             g_hash_table_remove_all(s->const_table[i]);
1476         }
1477     }
1478 
1479     s->nb_ops = 0;
1480     s->nb_labels = 0;
1481     s->current_frame_offset = s->frame_start;
1482 
1483 #ifdef CONFIG_DEBUG_TCG
1484     s->goto_tb_issue_mask = 0;
1485 #endif
1486 
1487     QTAILQ_INIT(&s->ops);
1488     QTAILQ_INIT(&s->free_ops);
1489     QSIMPLEQ_INIT(&s->labels);
1490 
1491     tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1492                      s->addr_type == TCG_TYPE_I64);
1493 
1494 #if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
1495     tcg_debug_assert(s->tlb_fast_offset < 0);
1496     tcg_debug_assert(s->tlb_fast_offset >= MIN_TLB_MASK_TABLE_OFS);
1497 #endif
1498 
1499     tcg_debug_assert(s->insn_start_words > 0);
1500 }
1501 
1502 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1503 {
1504     int n = s->nb_temps++;
1505 
1506     if (n >= TCG_MAX_TEMPS) {
1507         tcg_raise_tb_overflow(s);
1508     }
1509     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1510 }
1511 
1512 static TCGTemp *tcg_global_alloc(TCGContext *s)
1513 {
1514     TCGTemp *ts;
1515 
1516     tcg_debug_assert(s->nb_globals == s->nb_temps);
1517     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1518     s->nb_globals++;
1519     ts = tcg_temp_alloc(s);
1520     ts->kind = TEMP_GLOBAL;
1521 
1522     return ts;
1523 }
1524 
1525 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1526                                             TCGReg reg, const char *name)
1527 {
1528     TCGTemp *ts;
1529 
1530     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1531 
1532     ts = tcg_global_alloc(s);
1533     ts->base_type = type;
1534     ts->type = type;
1535     ts->kind = TEMP_FIXED;
1536     ts->reg = reg;
1537     ts->name = name;
1538     tcg_regset_set_reg(s->reserved_regs, reg);
1539 
1540     return ts;
1541 }
1542 
1543 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1544 {
1545     s->frame_start = start;
1546     s->frame_end = start + size;
1547     s->frame_temp
1548         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1549 }
1550 
1551 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1552                                      intptr_t offset, const char *name)
1553 {
1554     TCGContext *s = tcg_ctx;
1555     TCGTemp *base_ts = tcgv_ptr_temp(base);
1556     TCGTemp *ts = tcg_global_alloc(s);
1557     int indirect_reg = 0;
1558 
1559     switch (base_ts->kind) {
1560     case TEMP_FIXED:
1561         break;
1562     case TEMP_GLOBAL:
1563         /* We do not support double-indirect registers.  */
1564         tcg_debug_assert(!base_ts->indirect_reg);
1565         base_ts->indirect_base = 1;
1566         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1567                             ? 2 : 1);
1568         indirect_reg = 1;
1569         break;
1570     default:
1571         g_assert_not_reached();
1572     }
1573 
1574     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1575         TCGTemp *ts2 = tcg_global_alloc(s);
1576         char buf[64];
1577 
1578         ts->base_type = TCG_TYPE_I64;
1579         ts->type = TCG_TYPE_I32;
1580         ts->indirect_reg = indirect_reg;
1581         ts->mem_allocated = 1;
1582         ts->mem_base = base_ts;
1583         ts->mem_offset = offset;
1584         pstrcpy(buf, sizeof(buf), name);
1585         pstrcat(buf, sizeof(buf), "_0");
1586         ts->name = strdup(buf);
1587 
1588         tcg_debug_assert(ts2 == ts + 1);
1589         ts2->base_type = TCG_TYPE_I64;
1590         ts2->type = TCG_TYPE_I32;
1591         ts2->indirect_reg = indirect_reg;
1592         ts2->mem_allocated = 1;
1593         ts2->mem_base = base_ts;
1594         ts2->mem_offset = offset + 4;
1595         ts2->temp_subindex = 1;
1596         pstrcpy(buf, sizeof(buf), name);
1597         pstrcat(buf, sizeof(buf), "_1");
1598         ts2->name = strdup(buf);
1599     } else {
1600         ts->base_type = type;
1601         ts->type = type;
1602         ts->indirect_reg = indirect_reg;
1603         ts->mem_allocated = 1;
1604         ts->mem_base = base_ts;
1605         ts->mem_offset = offset;
1606         ts->name = name;
1607     }
1608     return ts;
1609 }
1610 
1611 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1612 {
1613     TCGContext *s = tcg_ctx;
1614     TCGTemp *ts;
1615     int n;
1616 
1617     if (kind == TEMP_EBB) {
1618         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1619 
1620         if (idx < TCG_MAX_TEMPS) {
1621             /* There is already an available temp with the right type.  */
1622             clear_bit(idx, s->free_temps[type].l);
1623 
1624             ts = &s->temps[idx];
1625             ts->temp_allocated = 1;
1626             tcg_debug_assert(ts->base_type == type);
1627             tcg_debug_assert(ts->kind == kind);
1628             return ts;
1629         }
1630     } else {
1631         tcg_debug_assert(kind == TEMP_TB);
1632     }
1633 
1634     switch (type) {
1635     case TCG_TYPE_I32:
1636     case TCG_TYPE_V64:
1637     case TCG_TYPE_V128:
1638     case TCG_TYPE_V256:
1639         n = 1;
1640         break;
1641     case TCG_TYPE_I64:
1642         n = 64 / TCG_TARGET_REG_BITS;
1643         break;
1644     case TCG_TYPE_I128:
1645         n = 128 / TCG_TARGET_REG_BITS;
1646         break;
1647     default:
1648         g_assert_not_reached();
1649     }
1650 
1651     ts = tcg_temp_alloc(s);
1652     ts->base_type = type;
1653     ts->temp_allocated = 1;
1654     ts->kind = kind;
1655 
1656     if (n == 1) {
1657         ts->type = type;
1658     } else {
1659         ts->type = TCG_TYPE_REG;
1660 
1661         for (int i = 1; i < n; ++i) {
1662             TCGTemp *ts2 = tcg_temp_alloc(s);
1663 
1664             tcg_debug_assert(ts2 == ts + i);
1665             ts2->base_type = type;
1666             ts2->type = TCG_TYPE_REG;
1667             ts2->temp_allocated = 1;
1668             ts2->temp_subindex = i;
1669             ts2->kind = kind;
1670         }
1671     }
1672     return ts;
1673 }
1674 
1675 TCGv_vec tcg_temp_new_vec(TCGType type)
1676 {
1677     TCGTemp *t;
1678 
1679 #ifdef CONFIG_DEBUG_TCG
1680     switch (type) {
1681     case TCG_TYPE_V64:
1682         assert(TCG_TARGET_HAS_v64);
1683         break;
1684     case TCG_TYPE_V128:
1685         assert(TCG_TARGET_HAS_v128);
1686         break;
1687     case TCG_TYPE_V256:
1688         assert(TCG_TARGET_HAS_v256);
1689         break;
1690     default:
1691         g_assert_not_reached();
1692     }
1693 #endif
1694 
1695     t = tcg_temp_new_internal(type, TEMP_EBB);
1696     return temp_tcgv_vec(t);
1697 }
1698 
1699 /* Create a new temp of the same type as an existing temp.  */
1700 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1701 {
1702     TCGTemp *t = tcgv_vec_temp(match);
1703 
1704     tcg_debug_assert(t->temp_allocated != 0);
1705 
1706     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1707     return temp_tcgv_vec(t);
1708 }
1709 
1710 void tcg_temp_free_internal(TCGTemp *ts)
1711 {
1712     TCGContext *s = tcg_ctx;
1713 
1714     switch (ts->kind) {
1715     case TEMP_CONST:
1716     case TEMP_TB:
1717         /* Silently ignore free. */
1718         break;
1719     case TEMP_EBB:
1720         tcg_debug_assert(ts->temp_allocated != 0);
1721         ts->temp_allocated = 0;
1722         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1723         break;
1724     default:
1725         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1726         g_assert_not_reached();
1727     }
1728 }
1729 
1730 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1731 {
1732     TCGContext *s = tcg_ctx;
1733     GHashTable *h = s->const_table[type];
1734     TCGTemp *ts;
1735 
1736     if (h == NULL) {
1737         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1738         s->const_table[type] = h;
1739     }
1740 
1741     ts = g_hash_table_lookup(h, &val);
1742     if (ts == NULL) {
1743         int64_t *val_ptr;
1744 
1745         ts = tcg_temp_alloc(s);
1746 
1747         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1748             TCGTemp *ts2 = tcg_temp_alloc(s);
1749 
1750             tcg_debug_assert(ts2 == ts + 1);
1751 
1752             ts->base_type = TCG_TYPE_I64;
1753             ts->type = TCG_TYPE_I32;
1754             ts->kind = TEMP_CONST;
1755             ts->temp_allocated = 1;
1756 
1757             ts2->base_type = TCG_TYPE_I64;
1758             ts2->type = TCG_TYPE_I32;
1759             ts2->kind = TEMP_CONST;
1760             ts2->temp_allocated = 1;
1761             ts2->temp_subindex = 1;
1762 
1763             /*
1764              * Retain the full value of the 64-bit constant in the low
1765              * part, so that the hash table works.  Actual uses will
1766              * truncate the value to the low part.
1767              */
1768             ts[HOST_BIG_ENDIAN].val = val;
1769             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1770             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1771         } else {
1772             ts->base_type = type;
1773             ts->type = type;
1774             ts->kind = TEMP_CONST;
1775             ts->temp_allocated = 1;
1776             ts->val = val;
1777             val_ptr = &ts->val;
1778         }
1779         g_hash_table_insert(h, val_ptr, ts);
1780     }
1781 
1782     return ts;
1783 }
1784 
1785 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1786 {
1787     val = dup_const(vece, val);
1788     return temp_tcgv_vec(tcg_constant_internal(type, val));
1789 }
1790 
1791 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1792 {
1793     TCGTemp *t = tcgv_vec_temp(match);
1794 
1795     tcg_debug_assert(t->temp_allocated != 0);
1796     return tcg_constant_vec(t->base_type, vece, val);
1797 }
1798 
1799 #ifdef CONFIG_DEBUG_TCG
1800 size_t temp_idx(TCGTemp *ts)
1801 {
1802     ptrdiff_t n = ts - tcg_ctx->temps;
1803     assert(n >= 0 && n < tcg_ctx->nb_temps);
1804     return n;
1805 }
1806 
1807 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
1808 {
1809     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
1810 
1811     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
1812     assert(o % sizeof(TCGTemp) == 0);
1813 
1814     return (void *)tcg_ctx + (uintptr_t)v;
1815 }
1816 #endif /* CONFIG_DEBUG_TCG */
1817 
1818 /* Return true if OP may appear in the opcode stream.
1819    Test the runtime variable that controls each opcode.  */
1820 bool tcg_op_supported(TCGOpcode op)
1821 {
1822     const bool have_vec
1823         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1824 
1825     switch (op) {
1826     case INDEX_op_discard:
1827     case INDEX_op_set_label:
1828     case INDEX_op_call:
1829     case INDEX_op_br:
1830     case INDEX_op_mb:
1831     case INDEX_op_insn_start:
1832     case INDEX_op_exit_tb:
1833     case INDEX_op_goto_tb:
1834     case INDEX_op_goto_ptr:
1835     case INDEX_op_qemu_ld_a32_i32:
1836     case INDEX_op_qemu_ld_a64_i32:
1837     case INDEX_op_qemu_st_a32_i32:
1838     case INDEX_op_qemu_st_a64_i32:
1839     case INDEX_op_qemu_ld_a32_i64:
1840     case INDEX_op_qemu_ld_a64_i64:
1841     case INDEX_op_qemu_st_a32_i64:
1842     case INDEX_op_qemu_st_a64_i64:
1843         return true;
1844 
1845     case INDEX_op_qemu_st8_a32_i32:
1846     case INDEX_op_qemu_st8_a64_i32:
1847         return TCG_TARGET_HAS_qemu_st8_i32;
1848 
1849     case INDEX_op_qemu_ld_a32_i128:
1850     case INDEX_op_qemu_ld_a64_i128:
1851     case INDEX_op_qemu_st_a32_i128:
1852     case INDEX_op_qemu_st_a64_i128:
1853         return TCG_TARGET_HAS_qemu_ldst_i128;
1854 
1855     case INDEX_op_mov_i32:
1856     case INDEX_op_setcond_i32:
1857     case INDEX_op_brcond_i32:
1858     case INDEX_op_ld8u_i32:
1859     case INDEX_op_ld8s_i32:
1860     case INDEX_op_ld16u_i32:
1861     case INDEX_op_ld16s_i32:
1862     case INDEX_op_ld_i32:
1863     case INDEX_op_st8_i32:
1864     case INDEX_op_st16_i32:
1865     case INDEX_op_st_i32:
1866     case INDEX_op_add_i32:
1867     case INDEX_op_sub_i32:
1868     case INDEX_op_mul_i32:
1869     case INDEX_op_and_i32:
1870     case INDEX_op_or_i32:
1871     case INDEX_op_xor_i32:
1872     case INDEX_op_shl_i32:
1873     case INDEX_op_shr_i32:
1874     case INDEX_op_sar_i32:
1875         return true;
1876 
1877     case INDEX_op_movcond_i32:
1878         return TCG_TARGET_HAS_movcond_i32;
1879     case INDEX_op_div_i32:
1880     case INDEX_op_divu_i32:
1881         return TCG_TARGET_HAS_div_i32;
1882     case INDEX_op_rem_i32:
1883     case INDEX_op_remu_i32:
1884         return TCG_TARGET_HAS_rem_i32;
1885     case INDEX_op_div2_i32:
1886     case INDEX_op_divu2_i32:
1887         return TCG_TARGET_HAS_div2_i32;
1888     case INDEX_op_rotl_i32:
1889     case INDEX_op_rotr_i32:
1890         return TCG_TARGET_HAS_rot_i32;
1891     case INDEX_op_deposit_i32:
1892         return TCG_TARGET_HAS_deposit_i32;
1893     case INDEX_op_extract_i32:
1894         return TCG_TARGET_HAS_extract_i32;
1895     case INDEX_op_sextract_i32:
1896         return TCG_TARGET_HAS_sextract_i32;
1897     case INDEX_op_extract2_i32:
1898         return TCG_TARGET_HAS_extract2_i32;
1899     case INDEX_op_add2_i32:
1900         return TCG_TARGET_HAS_add2_i32;
1901     case INDEX_op_sub2_i32:
1902         return TCG_TARGET_HAS_sub2_i32;
1903     case INDEX_op_mulu2_i32:
1904         return TCG_TARGET_HAS_mulu2_i32;
1905     case INDEX_op_muls2_i32:
1906         return TCG_TARGET_HAS_muls2_i32;
1907     case INDEX_op_muluh_i32:
1908         return TCG_TARGET_HAS_muluh_i32;
1909     case INDEX_op_mulsh_i32:
1910         return TCG_TARGET_HAS_mulsh_i32;
1911     case INDEX_op_ext8s_i32:
1912         return TCG_TARGET_HAS_ext8s_i32;
1913     case INDEX_op_ext16s_i32:
1914         return TCG_TARGET_HAS_ext16s_i32;
1915     case INDEX_op_ext8u_i32:
1916         return TCG_TARGET_HAS_ext8u_i32;
1917     case INDEX_op_ext16u_i32:
1918         return TCG_TARGET_HAS_ext16u_i32;
1919     case INDEX_op_bswap16_i32:
1920         return TCG_TARGET_HAS_bswap16_i32;
1921     case INDEX_op_bswap32_i32:
1922         return TCG_TARGET_HAS_bswap32_i32;
1923     case INDEX_op_not_i32:
1924         return TCG_TARGET_HAS_not_i32;
1925     case INDEX_op_neg_i32:
1926         return TCG_TARGET_HAS_neg_i32;
1927     case INDEX_op_andc_i32:
1928         return TCG_TARGET_HAS_andc_i32;
1929     case INDEX_op_orc_i32:
1930         return TCG_TARGET_HAS_orc_i32;
1931     case INDEX_op_eqv_i32:
1932         return TCG_TARGET_HAS_eqv_i32;
1933     case INDEX_op_nand_i32:
1934         return TCG_TARGET_HAS_nand_i32;
1935     case INDEX_op_nor_i32:
1936         return TCG_TARGET_HAS_nor_i32;
1937     case INDEX_op_clz_i32:
1938         return TCG_TARGET_HAS_clz_i32;
1939     case INDEX_op_ctz_i32:
1940         return TCG_TARGET_HAS_ctz_i32;
1941     case INDEX_op_ctpop_i32:
1942         return TCG_TARGET_HAS_ctpop_i32;
1943 
1944     case INDEX_op_brcond2_i32:
1945     case INDEX_op_setcond2_i32:
1946         return TCG_TARGET_REG_BITS == 32;
1947 
1948     case INDEX_op_mov_i64:
1949     case INDEX_op_setcond_i64:
1950     case INDEX_op_brcond_i64:
1951     case INDEX_op_ld8u_i64:
1952     case INDEX_op_ld8s_i64:
1953     case INDEX_op_ld16u_i64:
1954     case INDEX_op_ld16s_i64:
1955     case INDEX_op_ld32u_i64:
1956     case INDEX_op_ld32s_i64:
1957     case INDEX_op_ld_i64:
1958     case INDEX_op_st8_i64:
1959     case INDEX_op_st16_i64:
1960     case INDEX_op_st32_i64:
1961     case INDEX_op_st_i64:
1962     case INDEX_op_add_i64:
1963     case INDEX_op_sub_i64:
1964     case INDEX_op_mul_i64:
1965     case INDEX_op_and_i64:
1966     case INDEX_op_or_i64:
1967     case INDEX_op_xor_i64:
1968     case INDEX_op_shl_i64:
1969     case INDEX_op_shr_i64:
1970     case INDEX_op_sar_i64:
1971     case INDEX_op_ext_i32_i64:
1972     case INDEX_op_extu_i32_i64:
1973         return TCG_TARGET_REG_BITS == 64;
1974 
1975     case INDEX_op_movcond_i64:
1976         return TCG_TARGET_HAS_movcond_i64;
1977     case INDEX_op_div_i64:
1978     case INDEX_op_divu_i64:
1979         return TCG_TARGET_HAS_div_i64;
1980     case INDEX_op_rem_i64:
1981     case INDEX_op_remu_i64:
1982         return TCG_TARGET_HAS_rem_i64;
1983     case INDEX_op_div2_i64:
1984     case INDEX_op_divu2_i64:
1985         return TCG_TARGET_HAS_div2_i64;
1986     case INDEX_op_rotl_i64:
1987     case INDEX_op_rotr_i64:
1988         return TCG_TARGET_HAS_rot_i64;
1989     case INDEX_op_deposit_i64:
1990         return TCG_TARGET_HAS_deposit_i64;
1991     case INDEX_op_extract_i64:
1992         return TCG_TARGET_HAS_extract_i64;
1993     case INDEX_op_sextract_i64:
1994         return TCG_TARGET_HAS_sextract_i64;
1995     case INDEX_op_extract2_i64:
1996         return TCG_TARGET_HAS_extract2_i64;
1997     case INDEX_op_extrl_i64_i32:
1998         return TCG_TARGET_HAS_extrl_i64_i32;
1999     case INDEX_op_extrh_i64_i32:
2000         return TCG_TARGET_HAS_extrh_i64_i32;
2001     case INDEX_op_ext8s_i64:
2002         return TCG_TARGET_HAS_ext8s_i64;
2003     case INDEX_op_ext16s_i64:
2004         return TCG_TARGET_HAS_ext16s_i64;
2005     case INDEX_op_ext32s_i64:
2006         return TCG_TARGET_HAS_ext32s_i64;
2007     case INDEX_op_ext8u_i64:
2008         return TCG_TARGET_HAS_ext8u_i64;
2009     case INDEX_op_ext16u_i64:
2010         return TCG_TARGET_HAS_ext16u_i64;
2011     case INDEX_op_ext32u_i64:
2012         return TCG_TARGET_HAS_ext32u_i64;
2013     case INDEX_op_bswap16_i64:
2014         return TCG_TARGET_HAS_bswap16_i64;
2015     case INDEX_op_bswap32_i64:
2016         return TCG_TARGET_HAS_bswap32_i64;
2017     case INDEX_op_bswap64_i64:
2018         return TCG_TARGET_HAS_bswap64_i64;
2019     case INDEX_op_not_i64:
2020         return TCG_TARGET_HAS_not_i64;
2021     case INDEX_op_neg_i64:
2022         return TCG_TARGET_HAS_neg_i64;
2023     case INDEX_op_andc_i64:
2024         return TCG_TARGET_HAS_andc_i64;
2025     case INDEX_op_orc_i64:
2026         return TCG_TARGET_HAS_orc_i64;
2027     case INDEX_op_eqv_i64:
2028         return TCG_TARGET_HAS_eqv_i64;
2029     case INDEX_op_nand_i64:
2030         return TCG_TARGET_HAS_nand_i64;
2031     case INDEX_op_nor_i64:
2032         return TCG_TARGET_HAS_nor_i64;
2033     case INDEX_op_clz_i64:
2034         return TCG_TARGET_HAS_clz_i64;
2035     case INDEX_op_ctz_i64:
2036         return TCG_TARGET_HAS_ctz_i64;
2037     case INDEX_op_ctpop_i64:
2038         return TCG_TARGET_HAS_ctpop_i64;
2039     case INDEX_op_add2_i64:
2040         return TCG_TARGET_HAS_add2_i64;
2041     case INDEX_op_sub2_i64:
2042         return TCG_TARGET_HAS_sub2_i64;
2043     case INDEX_op_mulu2_i64:
2044         return TCG_TARGET_HAS_mulu2_i64;
2045     case INDEX_op_muls2_i64:
2046         return TCG_TARGET_HAS_muls2_i64;
2047     case INDEX_op_muluh_i64:
2048         return TCG_TARGET_HAS_muluh_i64;
2049     case INDEX_op_mulsh_i64:
2050         return TCG_TARGET_HAS_mulsh_i64;
2051 
2052     case INDEX_op_mov_vec:
2053     case INDEX_op_dup_vec:
2054     case INDEX_op_dupm_vec:
2055     case INDEX_op_ld_vec:
2056     case INDEX_op_st_vec:
2057     case INDEX_op_add_vec:
2058     case INDEX_op_sub_vec:
2059     case INDEX_op_and_vec:
2060     case INDEX_op_or_vec:
2061     case INDEX_op_xor_vec:
2062     case INDEX_op_cmp_vec:
2063         return have_vec;
2064     case INDEX_op_dup2_vec:
2065         return have_vec && TCG_TARGET_REG_BITS == 32;
2066     case INDEX_op_not_vec:
2067         return have_vec && TCG_TARGET_HAS_not_vec;
2068     case INDEX_op_neg_vec:
2069         return have_vec && TCG_TARGET_HAS_neg_vec;
2070     case INDEX_op_abs_vec:
2071         return have_vec && TCG_TARGET_HAS_abs_vec;
2072     case INDEX_op_andc_vec:
2073         return have_vec && TCG_TARGET_HAS_andc_vec;
2074     case INDEX_op_orc_vec:
2075         return have_vec && TCG_TARGET_HAS_orc_vec;
2076     case INDEX_op_nand_vec:
2077         return have_vec && TCG_TARGET_HAS_nand_vec;
2078     case INDEX_op_nor_vec:
2079         return have_vec && TCG_TARGET_HAS_nor_vec;
2080     case INDEX_op_eqv_vec:
2081         return have_vec && TCG_TARGET_HAS_eqv_vec;
2082     case INDEX_op_mul_vec:
2083         return have_vec && TCG_TARGET_HAS_mul_vec;
2084     case INDEX_op_shli_vec:
2085     case INDEX_op_shri_vec:
2086     case INDEX_op_sari_vec:
2087         return have_vec && TCG_TARGET_HAS_shi_vec;
2088     case INDEX_op_shls_vec:
2089     case INDEX_op_shrs_vec:
2090     case INDEX_op_sars_vec:
2091         return have_vec && TCG_TARGET_HAS_shs_vec;
2092     case INDEX_op_shlv_vec:
2093     case INDEX_op_shrv_vec:
2094     case INDEX_op_sarv_vec:
2095         return have_vec && TCG_TARGET_HAS_shv_vec;
2096     case INDEX_op_rotli_vec:
2097         return have_vec && TCG_TARGET_HAS_roti_vec;
2098     case INDEX_op_rotls_vec:
2099         return have_vec && TCG_TARGET_HAS_rots_vec;
2100     case INDEX_op_rotlv_vec:
2101     case INDEX_op_rotrv_vec:
2102         return have_vec && TCG_TARGET_HAS_rotv_vec;
2103     case INDEX_op_ssadd_vec:
2104     case INDEX_op_usadd_vec:
2105     case INDEX_op_sssub_vec:
2106     case INDEX_op_ussub_vec:
2107         return have_vec && TCG_TARGET_HAS_sat_vec;
2108     case INDEX_op_smin_vec:
2109     case INDEX_op_umin_vec:
2110     case INDEX_op_smax_vec:
2111     case INDEX_op_umax_vec:
2112         return have_vec && TCG_TARGET_HAS_minmax_vec;
2113     case INDEX_op_bitsel_vec:
2114         return have_vec && TCG_TARGET_HAS_bitsel_vec;
2115     case INDEX_op_cmpsel_vec:
2116         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2117 
2118     default:
2119         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2120         return true;
2121     }
2122 }
2123 
2124 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2125 
2126 static void tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args)
2127 {
2128     TCGv_i64 extend_free[MAX_CALL_IARGS];
2129     int n_extend = 0;
2130     TCGOp *op;
2131     int i, n, pi = 0, total_args;
2132 
2133     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2134         init_call_layout(info);
2135         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2136     }
2137 
2138     total_args = info->nr_out + info->nr_in + 2;
2139     op = tcg_op_alloc(INDEX_op_call, total_args);
2140 
2141 #ifdef CONFIG_PLUGIN
2142     /* Flag helpers that may affect guest state */
2143     if (tcg_ctx->plugin_insn &&
2144         !(info->flags & TCG_CALL_PLUGIN) &&
2145         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2146         tcg_ctx->plugin_insn->calls_helpers = true;
2147     }
2148 #endif
2149 
2150     TCGOP_CALLO(op) = n = info->nr_out;
2151     switch (n) {
2152     case 0:
2153         tcg_debug_assert(ret == NULL);
2154         break;
2155     case 1:
2156         tcg_debug_assert(ret != NULL);
2157         op->args[pi++] = temp_arg(ret);
2158         break;
2159     case 2:
2160     case 4:
2161         tcg_debug_assert(ret != NULL);
2162         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2163         tcg_debug_assert(ret->temp_subindex == 0);
2164         for (i = 0; i < n; ++i) {
2165             op->args[pi++] = temp_arg(ret + i);
2166         }
2167         break;
2168     default:
2169         g_assert_not_reached();
2170     }
2171 
2172     TCGOP_CALLI(op) = n = info->nr_in;
2173     for (i = 0; i < n; i++) {
2174         const TCGCallArgumentLoc *loc = &info->in[i];
2175         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2176 
2177         switch (loc->kind) {
2178         case TCG_CALL_ARG_NORMAL:
2179         case TCG_CALL_ARG_BY_REF:
2180         case TCG_CALL_ARG_BY_REF_N:
2181             op->args[pi++] = temp_arg(ts);
2182             break;
2183 
2184         case TCG_CALL_ARG_EXTEND_U:
2185         case TCG_CALL_ARG_EXTEND_S:
2186             {
2187                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2188                 TCGv_i32 orig = temp_tcgv_i32(ts);
2189 
2190                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2191                     tcg_gen_ext_i32_i64(temp, orig);
2192                 } else {
2193                     tcg_gen_extu_i32_i64(temp, orig);
2194                 }
2195                 op->args[pi++] = tcgv_i64_arg(temp);
2196                 extend_free[n_extend++] = temp;
2197             }
2198             break;
2199 
2200         default:
2201             g_assert_not_reached();
2202         }
2203     }
2204     op->args[pi++] = (uintptr_t)info->func;
2205     op->args[pi++] = (uintptr_t)info;
2206     tcg_debug_assert(pi == total_args);
2207 
2208     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2209 
2210     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2211     for (i = 0; i < n_extend; ++i) {
2212         tcg_temp_free_i64(extend_free[i]);
2213     }
2214 }
2215 
2216 void tcg_gen_call0(TCGHelperInfo *info, TCGTemp *ret)
2217 {
2218     tcg_gen_callN(info, ret, NULL);
2219 }
2220 
2221 void tcg_gen_call1(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2222 {
2223     tcg_gen_callN(info, ret, &t1);
2224 }
2225 
2226 void tcg_gen_call2(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2)
2227 {
2228     TCGTemp *args[2] = { t1, t2 };
2229     tcg_gen_callN(info, ret, args);
2230 }
2231 
2232 void tcg_gen_call3(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2233                    TCGTemp *t2, TCGTemp *t3)
2234 {
2235     TCGTemp *args[3] = { t1, t2, t3 };
2236     tcg_gen_callN(info, ret, args);
2237 }
2238 
2239 void tcg_gen_call4(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2240                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2241 {
2242     TCGTemp *args[4] = { t1, t2, t3, t4 };
2243     tcg_gen_callN(info, ret, args);
2244 }
2245 
2246 void tcg_gen_call5(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2247                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2248 {
2249     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2250     tcg_gen_callN(info, ret, args);
2251 }
2252 
2253 void tcg_gen_call6(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2,
2254                    TCGTemp *t3, TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2255 {
2256     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2257     tcg_gen_callN(info, ret, args);
2258 }
2259 
2260 void tcg_gen_call7(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2261                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2262                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2263 {
2264     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2265     tcg_gen_callN(info, ret, args);
2266 }
2267 
2268 static void tcg_reg_alloc_start(TCGContext *s)
2269 {
2270     int i, n;
2271 
2272     for (i = 0, n = s->nb_temps; i < n; i++) {
2273         TCGTemp *ts = &s->temps[i];
2274         TCGTempVal val = TEMP_VAL_MEM;
2275 
2276         switch (ts->kind) {
2277         case TEMP_CONST:
2278             val = TEMP_VAL_CONST;
2279             break;
2280         case TEMP_FIXED:
2281             val = TEMP_VAL_REG;
2282             break;
2283         case TEMP_GLOBAL:
2284             break;
2285         case TEMP_EBB:
2286             val = TEMP_VAL_DEAD;
2287             /* fall through */
2288         case TEMP_TB:
2289             ts->mem_allocated = 0;
2290             break;
2291         default:
2292             g_assert_not_reached();
2293         }
2294         ts->val_type = val;
2295     }
2296 
2297     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2298 }
2299 
2300 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2301                                  TCGTemp *ts)
2302 {
2303     int idx = temp_idx(ts);
2304 
2305     switch (ts->kind) {
2306     case TEMP_FIXED:
2307     case TEMP_GLOBAL:
2308         pstrcpy(buf, buf_size, ts->name);
2309         break;
2310     case TEMP_TB:
2311         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2312         break;
2313     case TEMP_EBB:
2314         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2315         break;
2316     case TEMP_CONST:
2317         switch (ts->type) {
2318         case TCG_TYPE_I32:
2319             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2320             break;
2321 #if TCG_TARGET_REG_BITS > 32
2322         case TCG_TYPE_I64:
2323             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2324             break;
2325 #endif
2326         case TCG_TYPE_V64:
2327         case TCG_TYPE_V128:
2328         case TCG_TYPE_V256:
2329             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2330                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2331             break;
2332         default:
2333             g_assert_not_reached();
2334         }
2335         break;
2336     }
2337     return buf;
2338 }
2339 
2340 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2341                              int buf_size, TCGArg arg)
2342 {
2343     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2344 }
2345 
2346 static const char * const cond_name[] =
2347 {
2348     [TCG_COND_NEVER] = "never",
2349     [TCG_COND_ALWAYS] = "always",
2350     [TCG_COND_EQ] = "eq",
2351     [TCG_COND_NE] = "ne",
2352     [TCG_COND_LT] = "lt",
2353     [TCG_COND_GE] = "ge",
2354     [TCG_COND_LE] = "le",
2355     [TCG_COND_GT] = "gt",
2356     [TCG_COND_LTU] = "ltu",
2357     [TCG_COND_GEU] = "geu",
2358     [TCG_COND_LEU] = "leu",
2359     [TCG_COND_GTU] = "gtu"
2360 };
2361 
2362 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2363 {
2364     [MO_UB]   = "ub",
2365     [MO_SB]   = "sb",
2366     [MO_LEUW] = "leuw",
2367     [MO_LESW] = "lesw",
2368     [MO_LEUL] = "leul",
2369     [MO_LESL] = "lesl",
2370     [MO_LEUQ] = "leq",
2371     [MO_BEUW] = "beuw",
2372     [MO_BESW] = "besw",
2373     [MO_BEUL] = "beul",
2374     [MO_BESL] = "besl",
2375     [MO_BEUQ] = "beq",
2376     [MO_128 + MO_BE] = "beo",
2377     [MO_128 + MO_LE] = "leo",
2378 };
2379 
2380 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2381     [MO_UNALN >> MO_ASHIFT]    = "un+",
2382     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2383     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2384     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2385     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2386     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2387     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2388     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2389 };
2390 
2391 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2392     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2393     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2394     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2395     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2396     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2397     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2398 };
2399 
2400 static const char bswap_flag_name[][6] = {
2401     [TCG_BSWAP_IZ] = "iz",
2402     [TCG_BSWAP_OZ] = "oz",
2403     [TCG_BSWAP_OS] = "os",
2404     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2405     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2406 };
2407 
2408 static inline bool tcg_regset_single(TCGRegSet d)
2409 {
2410     return (d & (d - 1)) == 0;
2411 }
2412 
2413 static inline TCGReg tcg_regset_first(TCGRegSet d)
2414 {
2415     if (TCG_TARGET_NB_REGS <= 32) {
2416         return ctz32(d);
2417     } else {
2418         return ctz64(d);
2419     }
2420 }
2421 
2422 /* Return only the number of characters output -- no error return. */
2423 #define ne_fprintf(...) \
2424     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2425 
2426 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2427 {
2428     char buf[128];
2429     TCGOp *op;
2430 
2431     QTAILQ_FOREACH(op, &s->ops, link) {
2432         int i, k, nb_oargs, nb_iargs, nb_cargs;
2433         const TCGOpDef *def;
2434         TCGOpcode c;
2435         int col = 0;
2436 
2437         c = op->opc;
2438         def = &tcg_op_defs[c];
2439 
2440         if (c == INDEX_op_insn_start) {
2441             nb_oargs = 0;
2442             col += ne_fprintf(f, "\n ----");
2443 
2444             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2445                 col += ne_fprintf(f, " %016" PRIx64,
2446                                   tcg_get_insn_start_param(op, i));
2447             }
2448         } else if (c == INDEX_op_call) {
2449             const TCGHelperInfo *info = tcg_call_info(op);
2450             void *func = tcg_call_func(op);
2451 
2452             /* variable number of arguments */
2453             nb_oargs = TCGOP_CALLO(op);
2454             nb_iargs = TCGOP_CALLI(op);
2455             nb_cargs = def->nb_cargs;
2456 
2457             col += ne_fprintf(f, " %s ", def->name);
2458 
2459             /*
2460              * Print the function name from TCGHelperInfo, if available.
2461              * Note that plugins have a template function for the info,
2462              * but the actual function pointer comes from the plugin.
2463              */
2464             if (func == info->func) {
2465                 col += ne_fprintf(f, "%s", info->name);
2466             } else {
2467                 col += ne_fprintf(f, "plugin(%p)", func);
2468             }
2469 
2470             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2471             for (i = 0; i < nb_oargs; i++) {
2472                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2473                                                             op->args[i]));
2474             }
2475             for (i = 0; i < nb_iargs; i++) {
2476                 TCGArg arg = op->args[nb_oargs + i];
2477                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2478                 col += ne_fprintf(f, ",%s", t);
2479             }
2480         } else {
2481             col += ne_fprintf(f, " %s ", def->name);
2482 
2483             nb_oargs = def->nb_oargs;
2484             nb_iargs = def->nb_iargs;
2485             nb_cargs = def->nb_cargs;
2486 
2487             if (def->flags & TCG_OPF_VECTOR) {
2488                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2489                                   8 << TCGOP_VECE(op));
2490             }
2491 
2492             k = 0;
2493             for (i = 0; i < nb_oargs; i++) {
2494                 const char *sep =  k ? "," : "";
2495                 col += ne_fprintf(f, "%s%s", sep,
2496                                   tcg_get_arg_str(s, buf, sizeof(buf),
2497                                                   op->args[k++]));
2498             }
2499             for (i = 0; i < nb_iargs; i++) {
2500                 const char *sep =  k ? "," : "";
2501                 col += ne_fprintf(f, "%s%s", sep,
2502                                   tcg_get_arg_str(s, buf, sizeof(buf),
2503                                                   op->args[k++]));
2504             }
2505             switch (c) {
2506             case INDEX_op_brcond_i32:
2507             case INDEX_op_setcond_i32:
2508             case INDEX_op_movcond_i32:
2509             case INDEX_op_brcond2_i32:
2510             case INDEX_op_setcond2_i32:
2511             case INDEX_op_brcond_i64:
2512             case INDEX_op_setcond_i64:
2513             case INDEX_op_movcond_i64:
2514             case INDEX_op_cmp_vec:
2515             case INDEX_op_cmpsel_vec:
2516                 if (op->args[k] < ARRAY_SIZE(cond_name)
2517                     && cond_name[op->args[k]]) {
2518                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2519                 } else {
2520                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2521                 }
2522                 i = 1;
2523                 break;
2524             case INDEX_op_qemu_ld_a32_i32:
2525             case INDEX_op_qemu_ld_a64_i32:
2526             case INDEX_op_qemu_st_a32_i32:
2527             case INDEX_op_qemu_st_a64_i32:
2528             case INDEX_op_qemu_st8_a32_i32:
2529             case INDEX_op_qemu_st8_a64_i32:
2530             case INDEX_op_qemu_ld_a32_i64:
2531             case INDEX_op_qemu_ld_a64_i64:
2532             case INDEX_op_qemu_st_a32_i64:
2533             case INDEX_op_qemu_st_a64_i64:
2534             case INDEX_op_qemu_ld_a32_i128:
2535             case INDEX_op_qemu_ld_a64_i128:
2536             case INDEX_op_qemu_st_a32_i128:
2537             case INDEX_op_qemu_st_a64_i128:
2538                 {
2539                     const char *s_al, *s_op, *s_at;
2540                     MemOpIdx oi = op->args[k++];
2541                     MemOp op = get_memop(oi);
2542                     unsigned ix = get_mmuidx(oi);
2543 
2544                     s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2545                     s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2546                     s_at = atom_name[(op & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2547                     op &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2548 
2549                     /* If all fields are accounted for, print symbolically. */
2550                     if (!op && s_al && s_op && s_at) {
2551                         col += ne_fprintf(f, ",%s%s%s,%u",
2552                                           s_at, s_al, s_op, ix);
2553                     } else {
2554                         op = get_memop(oi);
2555                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2556                     }
2557                     i = 1;
2558                 }
2559                 break;
2560             case INDEX_op_bswap16_i32:
2561             case INDEX_op_bswap16_i64:
2562             case INDEX_op_bswap32_i32:
2563             case INDEX_op_bswap32_i64:
2564             case INDEX_op_bswap64_i64:
2565                 {
2566                     TCGArg flags = op->args[k];
2567                     const char *name = NULL;
2568 
2569                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2570                         name = bswap_flag_name[flags];
2571                     }
2572                     if (name) {
2573                         col += ne_fprintf(f, ",%s", name);
2574                     } else {
2575                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2576                     }
2577                     i = k = 1;
2578                 }
2579                 break;
2580             default:
2581                 i = 0;
2582                 break;
2583             }
2584             switch (c) {
2585             case INDEX_op_set_label:
2586             case INDEX_op_br:
2587             case INDEX_op_brcond_i32:
2588             case INDEX_op_brcond_i64:
2589             case INDEX_op_brcond2_i32:
2590                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2591                                   arg_label(op->args[k])->id);
2592                 i++, k++;
2593                 break;
2594             case INDEX_op_mb:
2595                 {
2596                     TCGBar membar = op->args[k];
2597                     const char *b_op, *m_op;
2598 
2599                     switch (membar & TCG_BAR_SC) {
2600                     case 0:
2601                         b_op = "none";
2602                         break;
2603                     case TCG_BAR_LDAQ:
2604                         b_op = "acq";
2605                         break;
2606                     case TCG_BAR_STRL:
2607                         b_op = "rel";
2608                         break;
2609                     case TCG_BAR_SC:
2610                         b_op = "seq";
2611                         break;
2612                     default:
2613                         g_assert_not_reached();
2614                     }
2615 
2616                     switch (membar & TCG_MO_ALL) {
2617                     case 0:
2618                         m_op = "none";
2619                         break;
2620                     case TCG_MO_LD_LD:
2621                         m_op = "rr";
2622                         break;
2623                     case TCG_MO_LD_ST:
2624                         m_op = "rw";
2625                         break;
2626                     case TCG_MO_ST_LD:
2627                         m_op = "wr";
2628                         break;
2629                     case TCG_MO_ST_ST:
2630                         m_op = "ww";
2631                         break;
2632                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2633                         m_op = "rr+rw";
2634                         break;
2635                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2636                         m_op = "rr+wr";
2637                         break;
2638                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2639                         m_op = "rr+ww";
2640                         break;
2641                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2642                         m_op = "rw+wr";
2643                         break;
2644                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2645                         m_op = "rw+ww";
2646                         break;
2647                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2648                         m_op = "wr+ww";
2649                         break;
2650                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2651                         m_op = "rr+rw+wr";
2652                         break;
2653                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2654                         m_op = "rr+rw+ww";
2655                         break;
2656                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2657                         m_op = "rr+wr+ww";
2658                         break;
2659                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2660                         m_op = "rw+wr+ww";
2661                         break;
2662                     case TCG_MO_ALL:
2663                         m_op = "all";
2664                         break;
2665                     default:
2666                         g_assert_not_reached();
2667                     }
2668 
2669                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2670                     i++, k++;
2671                 }
2672                 break;
2673             default:
2674                 break;
2675             }
2676             for (; i < nb_cargs; i++, k++) {
2677                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2678                                   op->args[k]);
2679             }
2680         }
2681 
2682         if (have_prefs || op->life) {
2683             for (; col < 40; ++col) {
2684                 putc(' ', f);
2685             }
2686         }
2687 
2688         if (op->life) {
2689             unsigned life = op->life;
2690 
2691             if (life & (SYNC_ARG * 3)) {
2692                 ne_fprintf(f, "  sync:");
2693                 for (i = 0; i < 2; ++i) {
2694                     if (life & (SYNC_ARG << i)) {
2695                         ne_fprintf(f, " %d", i);
2696                     }
2697                 }
2698             }
2699             life /= DEAD_ARG;
2700             if (life) {
2701                 ne_fprintf(f, "  dead:");
2702                 for (i = 0; life; ++i, life >>= 1) {
2703                     if (life & 1) {
2704                         ne_fprintf(f, " %d", i);
2705                     }
2706                 }
2707             }
2708         }
2709 
2710         if (have_prefs) {
2711             for (i = 0; i < nb_oargs; ++i) {
2712                 TCGRegSet set = output_pref(op, i);
2713 
2714                 if (i == 0) {
2715                     ne_fprintf(f, "  pref=");
2716                 } else {
2717                     ne_fprintf(f, ",");
2718                 }
2719                 if (set == 0) {
2720                     ne_fprintf(f, "none");
2721                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2722                     ne_fprintf(f, "all");
2723 #ifdef CONFIG_DEBUG_TCG
2724                 } else if (tcg_regset_single(set)) {
2725                     TCGReg reg = tcg_regset_first(set);
2726                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2727 #endif
2728                 } else if (TCG_TARGET_NB_REGS <= 32) {
2729                     ne_fprintf(f, "0x%x", (uint32_t)set);
2730                 } else {
2731                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2732                 }
2733             }
2734         }
2735 
2736         putc('\n', f);
2737     }
2738 }
2739 
2740 /* we give more priority to constraints with less registers */
2741 static int get_constraint_priority(const TCGOpDef *def, int k)
2742 {
2743     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2744     int n = ctpop64(arg_ct->regs);
2745 
2746     /*
2747      * Sort constraints of a single register first, which includes output
2748      * aliases (which must exactly match the input already allocated).
2749      */
2750     if (n == 1 || arg_ct->oalias) {
2751         return INT_MAX;
2752     }
2753 
2754     /*
2755      * Sort register pairs next, first then second immediately after.
2756      * Arbitrarily sort multiple pairs by the index of the first reg;
2757      * there shouldn't be many pairs.
2758      */
2759     switch (arg_ct->pair) {
2760     case 1:
2761     case 3:
2762         return (k + 1) * 2;
2763     case 2:
2764         return (arg_ct->pair_index + 1) * 2 - 1;
2765     }
2766 
2767     /* Finally, sort by decreasing register count. */
2768     assert(n > 1);
2769     return -n;
2770 }
2771 
2772 /* sort from highest priority to lowest */
2773 static void sort_constraints(TCGOpDef *def, int start, int n)
2774 {
2775     int i, j;
2776     TCGArgConstraint *a = def->args_ct;
2777 
2778     for (i = 0; i < n; i++) {
2779         a[start + i].sort_index = start + i;
2780     }
2781     if (n <= 1) {
2782         return;
2783     }
2784     for (i = 0; i < n - 1; i++) {
2785         for (j = i + 1; j < n; j++) {
2786             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2787             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2788             if (p1 < p2) {
2789                 int tmp = a[start + i].sort_index;
2790                 a[start + i].sort_index = a[start + j].sort_index;
2791                 a[start + j].sort_index = tmp;
2792             }
2793         }
2794     }
2795 }
2796 
2797 static void process_op_defs(TCGContext *s)
2798 {
2799     TCGOpcode op;
2800 
2801     for (op = 0; op < NB_OPS; op++) {
2802         TCGOpDef *def = &tcg_op_defs[op];
2803         const TCGTargetOpDef *tdefs;
2804         bool saw_alias_pair = false;
2805         int i, o, i2, o2, nb_args;
2806 
2807         if (def->flags & TCG_OPF_NOT_PRESENT) {
2808             continue;
2809         }
2810 
2811         nb_args = def->nb_iargs + def->nb_oargs;
2812         if (nb_args == 0) {
2813             continue;
2814         }
2815 
2816         /*
2817          * Macro magic should make it impossible, but double-check that
2818          * the array index is in range.  Since the signness of an enum
2819          * is implementation defined, force the result to unsigned.
2820          */
2821         unsigned con_set = tcg_target_op_def(op);
2822         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2823         tdefs = &constraint_sets[con_set];
2824 
2825         for (i = 0; i < nb_args; i++) {
2826             const char *ct_str = tdefs->args_ct_str[i];
2827             bool input_p = i >= def->nb_oargs;
2828 
2829             /* Incomplete TCGTargetOpDef entry. */
2830             tcg_debug_assert(ct_str != NULL);
2831 
2832             switch (*ct_str) {
2833             case '0' ... '9':
2834                 o = *ct_str - '0';
2835                 tcg_debug_assert(input_p);
2836                 tcg_debug_assert(o < def->nb_oargs);
2837                 tcg_debug_assert(def->args_ct[o].regs != 0);
2838                 tcg_debug_assert(!def->args_ct[o].oalias);
2839                 def->args_ct[i] = def->args_ct[o];
2840                 /* The output sets oalias.  */
2841                 def->args_ct[o].oalias = 1;
2842                 def->args_ct[o].alias_index = i;
2843                 /* The input sets ialias. */
2844                 def->args_ct[i].ialias = 1;
2845                 def->args_ct[i].alias_index = o;
2846                 if (def->args_ct[i].pair) {
2847                     saw_alias_pair = true;
2848                 }
2849                 tcg_debug_assert(ct_str[1] == '\0');
2850                 continue;
2851 
2852             case '&':
2853                 tcg_debug_assert(!input_p);
2854                 def->args_ct[i].newreg = true;
2855                 ct_str++;
2856                 break;
2857 
2858             case 'p': /* plus */
2859                 /* Allocate to the register after the previous. */
2860                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2861                 o = i - 1;
2862                 tcg_debug_assert(!def->args_ct[o].pair);
2863                 tcg_debug_assert(!def->args_ct[o].ct);
2864                 def->args_ct[i] = (TCGArgConstraint){
2865                     .pair = 2,
2866                     .pair_index = o,
2867                     .regs = def->args_ct[o].regs << 1,
2868                 };
2869                 def->args_ct[o].pair = 1;
2870                 def->args_ct[o].pair_index = i;
2871                 tcg_debug_assert(ct_str[1] == '\0');
2872                 continue;
2873 
2874             case 'm': /* minus */
2875                 /* Allocate to the register before the previous. */
2876                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2877                 o = i - 1;
2878                 tcg_debug_assert(!def->args_ct[o].pair);
2879                 tcg_debug_assert(!def->args_ct[o].ct);
2880                 def->args_ct[i] = (TCGArgConstraint){
2881                     .pair = 1,
2882                     .pair_index = o,
2883                     .regs = def->args_ct[o].regs >> 1,
2884                 };
2885                 def->args_ct[o].pair = 2;
2886                 def->args_ct[o].pair_index = i;
2887                 tcg_debug_assert(ct_str[1] == '\0');
2888                 continue;
2889             }
2890 
2891             do {
2892                 switch (*ct_str) {
2893                 case 'i':
2894                     def->args_ct[i].ct |= TCG_CT_CONST;
2895                     break;
2896 
2897                 /* Include all of the target-specific constraints. */
2898 
2899 #undef CONST
2900 #define CONST(CASE, MASK) \
2901     case CASE: def->args_ct[i].ct |= MASK; break;
2902 #define REGS(CASE, MASK) \
2903     case CASE: def->args_ct[i].regs |= MASK; break;
2904 
2905 #include "tcg-target-con-str.h"
2906 
2907 #undef REGS
2908 #undef CONST
2909                 default:
2910                 case '0' ... '9':
2911                 case '&':
2912                 case 'p':
2913                 case 'm':
2914                     /* Typo in TCGTargetOpDef constraint. */
2915                     g_assert_not_reached();
2916                 }
2917             } while (*++ct_str != '\0');
2918         }
2919 
2920         /* TCGTargetOpDef entry with too much information? */
2921         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2922 
2923         /*
2924          * Fix up output pairs that are aliased with inputs.
2925          * When we created the alias, we copied pair from the output.
2926          * There are three cases:
2927          *    (1a) Pairs of inputs alias pairs of outputs.
2928          *    (1b) One input aliases the first of a pair of outputs.
2929          *    (2)  One input aliases the second of a pair of outputs.
2930          *
2931          * Case 1a is handled by making sure that the pair_index'es are
2932          * properly updated so that they appear the same as a pair of inputs.
2933          *
2934          * Case 1b is handled by setting the pair_index of the input to
2935          * itself, simply so it doesn't point to an unrelated argument.
2936          * Since we don't encounter the "second" during the input allocation
2937          * phase, nothing happens with the second half of the input pair.
2938          *
2939          * Case 2 is handled by setting the second input to pair=3, the
2940          * first output to pair=3, and the pair_index'es to match.
2941          */
2942         if (saw_alias_pair) {
2943             for (i = def->nb_oargs; i < nb_args; i++) {
2944                 /*
2945                  * Since [0-9pm] must be alone in the constraint string,
2946                  * the only way they can both be set is if the pair comes
2947                  * from the output alias.
2948                  */
2949                 if (!def->args_ct[i].ialias) {
2950                     continue;
2951                 }
2952                 switch (def->args_ct[i].pair) {
2953                 case 0:
2954                     break;
2955                 case 1:
2956                     o = def->args_ct[i].alias_index;
2957                     o2 = def->args_ct[o].pair_index;
2958                     tcg_debug_assert(def->args_ct[o].pair == 1);
2959                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2960                     if (def->args_ct[o2].oalias) {
2961                         /* Case 1a */
2962                         i2 = def->args_ct[o2].alias_index;
2963                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2964                         def->args_ct[i2].pair_index = i;
2965                         def->args_ct[i].pair_index = i2;
2966                     } else {
2967                         /* Case 1b */
2968                         def->args_ct[i].pair_index = i;
2969                     }
2970                     break;
2971                 case 2:
2972                     o = def->args_ct[i].alias_index;
2973                     o2 = def->args_ct[o].pair_index;
2974                     tcg_debug_assert(def->args_ct[o].pair == 2);
2975                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2976                     if (def->args_ct[o2].oalias) {
2977                         /* Case 1a */
2978                         i2 = def->args_ct[o2].alias_index;
2979                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2980                         def->args_ct[i2].pair_index = i;
2981                         def->args_ct[i].pair_index = i2;
2982                     } else {
2983                         /* Case 2 */
2984                         def->args_ct[i].pair = 3;
2985                         def->args_ct[o2].pair = 3;
2986                         def->args_ct[i].pair_index = o2;
2987                         def->args_ct[o2].pair_index = i;
2988                     }
2989                     break;
2990                 default:
2991                     g_assert_not_reached();
2992                 }
2993             }
2994         }
2995 
2996         /* sort the constraints (XXX: this is just an heuristic) */
2997         sort_constraints(def, 0, def->nb_oargs);
2998         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2999     }
3000 }
3001 
3002 static void remove_label_use(TCGOp *op, int idx)
3003 {
3004     TCGLabel *label = arg_label(op->args[idx]);
3005     TCGLabelUse *use;
3006 
3007     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3008         if (use->op == op) {
3009             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3010             return;
3011         }
3012     }
3013     g_assert_not_reached();
3014 }
3015 
3016 void tcg_op_remove(TCGContext *s, TCGOp *op)
3017 {
3018     switch (op->opc) {
3019     case INDEX_op_br:
3020         remove_label_use(op, 0);
3021         break;
3022     case INDEX_op_brcond_i32:
3023     case INDEX_op_brcond_i64:
3024         remove_label_use(op, 3);
3025         break;
3026     case INDEX_op_brcond2_i32:
3027         remove_label_use(op, 5);
3028         break;
3029     default:
3030         break;
3031     }
3032 
3033     QTAILQ_REMOVE(&s->ops, op, link);
3034     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3035     s->nb_ops--;
3036 }
3037 
3038 void tcg_remove_ops_after(TCGOp *op)
3039 {
3040     TCGContext *s = tcg_ctx;
3041 
3042     while (true) {
3043         TCGOp *last = tcg_last_op();
3044         if (last == op) {
3045             return;
3046         }
3047         tcg_op_remove(s, last);
3048     }
3049 }
3050 
3051 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3052 {
3053     TCGContext *s = tcg_ctx;
3054     TCGOp *op = NULL;
3055 
3056     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3057         QTAILQ_FOREACH(op, &s->free_ops, link) {
3058             if (nargs <= op->nargs) {
3059                 QTAILQ_REMOVE(&s->free_ops, op, link);
3060                 nargs = op->nargs;
3061                 goto found;
3062             }
3063         }
3064     }
3065 
3066     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3067     nargs = MAX(4, nargs);
3068     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3069 
3070  found:
3071     memset(op, 0, offsetof(TCGOp, link));
3072     op->opc = opc;
3073     op->nargs = nargs;
3074 
3075     /* Check for bitfield overflow. */
3076     tcg_debug_assert(op->nargs == nargs);
3077 
3078     s->nb_ops++;
3079     return op;
3080 }
3081 
3082 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3083 {
3084     TCGOp *op = tcg_op_alloc(opc, nargs);
3085     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3086     return op;
3087 }
3088 
3089 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3090                             TCGOpcode opc, unsigned nargs)
3091 {
3092     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3093     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3094     return new_op;
3095 }
3096 
3097 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3098                            TCGOpcode opc, unsigned nargs)
3099 {
3100     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3101     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3102     return new_op;
3103 }
3104 
3105 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3106 {
3107     TCGLabelUse *u;
3108 
3109     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3110         TCGOp *op = u->op;
3111         switch (op->opc) {
3112         case INDEX_op_br:
3113             op->args[0] = label_arg(to);
3114             break;
3115         case INDEX_op_brcond_i32:
3116         case INDEX_op_brcond_i64:
3117             op->args[3] = label_arg(to);
3118             break;
3119         case INDEX_op_brcond2_i32:
3120             op->args[5] = label_arg(to);
3121             break;
3122         default:
3123             g_assert_not_reached();
3124         }
3125     }
3126 
3127     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3128 }
3129 
3130 /* Reachable analysis : remove unreachable code.  */
3131 static void __attribute__((noinline))
3132 reachable_code_pass(TCGContext *s)
3133 {
3134     TCGOp *op, *op_next, *op_prev;
3135     bool dead = false;
3136 
3137     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3138         bool remove = dead;
3139         TCGLabel *label;
3140 
3141         switch (op->opc) {
3142         case INDEX_op_set_label:
3143             label = arg_label(op->args[0]);
3144 
3145             /*
3146              * Note that the first op in the TB is always a load,
3147              * so there is always something before a label.
3148              */
3149             op_prev = QTAILQ_PREV(op, link);
3150 
3151             /*
3152              * If we find two sequential labels, move all branches to
3153              * reference the second label and remove the first label.
3154              * Do this before branch to next optimization, so that the
3155              * middle label is out of the way.
3156              */
3157             if (op_prev->opc == INDEX_op_set_label) {
3158                 move_label_uses(label, arg_label(op_prev->args[0]));
3159                 tcg_op_remove(s, op_prev);
3160                 op_prev = QTAILQ_PREV(op, link);
3161             }
3162 
3163             /*
3164              * Optimization can fold conditional branches to unconditional.
3165              * If we find a label which is preceded by an unconditional
3166              * branch to next, remove the branch.  We couldn't do this when
3167              * processing the branch because any dead code between the branch
3168              * and label had not yet been removed.
3169              */
3170             if (op_prev->opc == INDEX_op_br &&
3171                 label == arg_label(op_prev->args[0])) {
3172                 tcg_op_remove(s, op_prev);
3173                 /* Fall through means insns become live again.  */
3174                 dead = false;
3175             }
3176 
3177             if (QSIMPLEQ_EMPTY(&label->branches)) {
3178                 /*
3179                  * While there is an occasional backward branch, virtually
3180                  * all branches generated by the translators are forward.
3181                  * Which means that generally we will have already removed
3182                  * all references to the label that will be, and there is
3183                  * little to be gained by iterating.
3184                  */
3185                 remove = true;
3186             } else {
3187                 /* Once we see a label, insns become live again.  */
3188                 dead = false;
3189                 remove = false;
3190             }
3191             break;
3192 
3193         case INDEX_op_br:
3194         case INDEX_op_exit_tb:
3195         case INDEX_op_goto_ptr:
3196             /* Unconditional branches; everything following is dead.  */
3197             dead = true;
3198             break;
3199 
3200         case INDEX_op_call:
3201             /* Notice noreturn helper calls, raising exceptions.  */
3202             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3203                 dead = true;
3204             }
3205             break;
3206 
3207         case INDEX_op_insn_start:
3208             /* Never remove -- we need to keep these for unwind.  */
3209             remove = false;
3210             break;
3211 
3212         default:
3213             break;
3214         }
3215 
3216         if (remove) {
3217             tcg_op_remove(s, op);
3218         }
3219     }
3220 }
3221 
3222 #define TS_DEAD  1
3223 #define TS_MEM   2
3224 
3225 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3226 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3227 
3228 /* For liveness_pass_1, the register preferences for a given temp.  */
3229 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3230 {
3231     return ts->state_ptr;
3232 }
3233 
3234 /* For liveness_pass_1, reset the preferences for a given temp to the
3235  * maximal regset for its type.
3236  */
3237 static inline void la_reset_pref(TCGTemp *ts)
3238 {
3239     *la_temp_pref(ts)
3240         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3241 }
3242 
3243 /* liveness analysis: end of function: all temps are dead, and globals
3244    should be in memory. */
3245 static void la_func_end(TCGContext *s, int ng, int nt)
3246 {
3247     int i;
3248 
3249     for (i = 0; i < ng; ++i) {
3250         s->temps[i].state = TS_DEAD | TS_MEM;
3251         la_reset_pref(&s->temps[i]);
3252     }
3253     for (i = ng; i < nt; ++i) {
3254         s->temps[i].state = TS_DEAD;
3255         la_reset_pref(&s->temps[i]);
3256     }
3257 }
3258 
3259 /* liveness analysis: end of basic block: all temps are dead, globals
3260    and local temps should be in memory. */
3261 static void la_bb_end(TCGContext *s, int ng, int nt)
3262 {
3263     int i;
3264 
3265     for (i = 0; i < nt; ++i) {
3266         TCGTemp *ts = &s->temps[i];
3267         int state;
3268 
3269         switch (ts->kind) {
3270         case TEMP_FIXED:
3271         case TEMP_GLOBAL:
3272         case TEMP_TB:
3273             state = TS_DEAD | TS_MEM;
3274             break;
3275         case TEMP_EBB:
3276         case TEMP_CONST:
3277             state = TS_DEAD;
3278             break;
3279         default:
3280             g_assert_not_reached();
3281         }
3282         ts->state = state;
3283         la_reset_pref(ts);
3284     }
3285 }
3286 
3287 /* liveness analysis: sync globals back to memory.  */
3288 static void la_global_sync(TCGContext *s, int ng)
3289 {
3290     int i;
3291 
3292     for (i = 0; i < ng; ++i) {
3293         int state = s->temps[i].state;
3294         s->temps[i].state = state | TS_MEM;
3295         if (state == TS_DEAD) {
3296             /* If the global was previously dead, reset prefs.  */
3297             la_reset_pref(&s->temps[i]);
3298         }
3299     }
3300 }
3301 
3302 /*
3303  * liveness analysis: conditional branch: all temps are dead unless
3304  * explicitly live-across-conditional-branch, globals and local temps
3305  * should be synced.
3306  */
3307 static void la_bb_sync(TCGContext *s, int ng, int nt)
3308 {
3309     la_global_sync(s, ng);
3310 
3311     for (int i = ng; i < nt; ++i) {
3312         TCGTemp *ts = &s->temps[i];
3313         int state;
3314 
3315         switch (ts->kind) {
3316         case TEMP_TB:
3317             state = ts->state;
3318             ts->state = state | TS_MEM;
3319             if (state != TS_DEAD) {
3320                 continue;
3321             }
3322             break;
3323         case TEMP_EBB:
3324         case TEMP_CONST:
3325             continue;
3326         default:
3327             g_assert_not_reached();
3328         }
3329         la_reset_pref(&s->temps[i]);
3330     }
3331 }
3332 
3333 /* liveness analysis: sync globals back to memory and kill.  */
3334 static void la_global_kill(TCGContext *s, int ng)
3335 {
3336     int i;
3337 
3338     for (i = 0; i < ng; i++) {
3339         s->temps[i].state = TS_DEAD | TS_MEM;
3340         la_reset_pref(&s->temps[i]);
3341     }
3342 }
3343 
3344 /* liveness analysis: note live globals crossing calls.  */
3345 static void la_cross_call(TCGContext *s, int nt)
3346 {
3347     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3348     int i;
3349 
3350     for (i = 0; i < nt; i++) {
3351         TCGTemp *ts = &s->temps[i];
3352         if (!(ts->state & TS_DEAD)) {
3353             TCGRegSet *pset = la_temp_pref(ts);
3354             TCGRegSet set = *pset;
3355 
3356             set &= mask;
3357             /* If the combination is not possible, restart.  */
3358             if (set == 0) {
3359                 set = tcg_target_available_regs[ts->type] & mask;
3360             }
3361             *pset = set;
3362         }
3363     }
3364 }
3365 
3366 /*
3367  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3368  * to TEMP_EBB, if possible.
3369  */
3370 static void __attribute__((noinline))
3371 liveness_pass_0(TCGContext *s)
3372 {
3373     void * const multiple_ebb = (void *)(uintptr_t)-1;
3374     int nb_temps = s->nb_temps;
3375     TCGOp *op, *ebb;
3376 
3377     for (int i = s->nb_globals; i < nb_temps; ++i) {
3378         s->temps[i].state_ptr = NULL;
3379     }
3380 
3381     /*
3382      * Represent each EBB by the op at which it begins.  In the case of
3383      * the first EBB, this is the first op, otherwise it is a label.
3384      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3385      * within a single EBB, else MULTIPLE_EBB.
3386      */
3387     ebb = QTAILQ_FIRST(&s->ops);
3388     QTAILQ_FOREACH(op, &s->ops, link) {
3389         const TCGOpDef *def;
3390         int nb_oargs, nb_iargs;
3391 
3392         switch (op->opc) {
3393         case INDEX_op_set_label:
3394             ebb = op;
3395             continue;
3396         case INDEX_op_discard:
3397             continue;
3398         case INDEX_op_call:
3399             nb_oargs = TCGOP_CALLO(op);
3400             nb_iargs = TCGOP_CALLI(op);
3401             break;
3402         default:
3403             def = &tcg_op_defs[op->opc];
3404             nb_oargs = def->nb_oargs;
3405             nb_iargs = def->nb_iargs;
3406             break;
3407         }
3408 
3409         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3410             TCGTemp *ts = arg_temp(op->args[i]);
3411 
3412             if (ts->kind != TEMP_TB) {
3413                 continue;
3414             }
3415             if (ts->state_ptr == NULL) {
3416                 ts->state_ptr = ebb;
3417             } else if (ts->state_ptr != ebb) {
3418                 ts->state_ptr = multiple_ebb;
3419             }
3420         }
3421     }
3422 
3423     /*
3424      * For TEMP_TB that turned out not to be used beyond one EBB,
3425      * reduce the liveness to TEMP_EBB.
3426      */
3427     for (int i = s->nb_globals; i < nb_temps; ++i) {
3428         TCGTemp *ts = &s->temps[i];
3429         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3430             ts->kind = TEMP_EBB;
3431         }
3432     }
3433 }
3434 
3435 /* Liveness analysis : update the opc_arg_life array to tell if a
3436    given input arguments is dead. Instructions updating dead
3437    temporaries are removed. */
3438 static void __attribute__((noinline))
3439 liveness_pass_1(TCGContext *s)
3440 {
3441     int nb_globals = s->nb_globals;
3442     int nb_temps = s->nb_temps;
3443     TCGOp *op, *op_prev;
3444     TCGRegSet *prefs;
3445     int i;
3446 
3447     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3448     for (i = 0; i < nb_temps; ++i) {
3449         s->temps[i].state_ptr = prefs + i;
3450     }
3451 
3452     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3453     la_func_end(s, nb_globals, nb_temps);
3454 
3455     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3456         int nb_iargs, nb_oargs;
3457         TCGOpcode opc_new, opc_new2;
3458         bool have_opc_new2;
3459         TCGLifeData arg_life = 0;
3460         TCGTemp *ts;
3461         TCGOpcode opc = op->opc;
3462         const TCGOpDef *def = &tcg_op_defs[opc];
3463 
3464         switch (opc) {
3465         case INDEX_op_call:
3466             {
3467                 const TCGHelperInfo *info = tcg_call_info(op);
3468                 int call_flags = tcg_call_flags(op);
3469 
3470                 nb_oargs = TCGOP_CALLO(op);
3471                 nb_iargs = TCGOP_CALLI(op);
3472 
3473                 /* pure functions can be removed if their result is unused */
3474                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3475                     for (i = 0; i < nb_oargs; i++) {
3476                         ts = arg_temp(op->args[i]);
3477                         if (ts->state != TS_DEAD) {
3478                             goto do_not_remove_call;
3479                         }
3480                     }
3481                     goto do_remove;
3482                 }
3483             do_not_remove_call:
3484 
3485                 /* Output args are dead.  */
3486                 for (i = 0; i < nb_oargs; i++) {
3487                     ts = arg_temp(op->args[i]);
3488                     if (ts->state & TS_DEAD) {
3489                         arg_life |= DEAD_ARG << i;
3490                     }
3491                     if (ts->state & TS_MEM) {
3492                         arg_life |= SYNC_ARG << i;
3493                     }
3494                     ts->state = TS_DEAD;
3495                     la_reset_pref(ts);
3496                 }
3497 
3498                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3499                 memset(op->output_pref, 0, sizeof(op->output_pref));
3500 
3501                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3502                                     TCG_CALL_NO_READ_GLOBALS))) {
3503                     la_global_kill(s, nb_globals);
3504                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3505                     la_global_sync(s, nb_globals);
3506                 }
3507 
3508                 /* Record arguments that die in this helper.  */
3509                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3510                     ts = arg_temp(op->args[i]);
3511                     if (ts->state & TS_DEAD) {
3512                         arg_life |= DEAD_ARG << i;
3513                     }
3514                 }
3515 
3516                 /* For all live registers, remove call-clobbered prefs.  */
3517                 la_cross_call(s, nb_temps);
3518 
3519                 /*
3520                  * Input arguments are live for preceding opcodes.
3521                  *
3522                  * For those arguments that die, and will be allocated in
3523                  * registers, clear the register set for that arg, to be
3524                  * filled in below.  For args that will be on the stack,
3525                  * reset to any available reg.  Process arguments in reverse
3526                  * order so that if a temp is used more than once, the stack
3527                  * reset to max happens before the register reset to 0.
3528                  */
3529                 for (i = nb_iargs - 1; i >= 0; i--) {
3530                     const TCGCallArgumentLoc *loc = &info->in[i];
3531                     ts = arg_temp(op->args[nb_oargs + i]);
3532 
3533                     if (ts->state & TS_DEAD) {
3534                         switch (loc->kind) {
3535                         case TCG_CALL_ARG_NORMAL:
3536                         case TCG_CALL_ARG_EXTEND_U:
3537                         case TCG_CALL_ARG_EXTEND_S:
3538                             if (arg_slot_reg_p(loc->arg_slot)) {
3539                                 *la_temp_pref(ts) = 0;
3540                                 break;
3541                             }
3542                             /* fall through */
3543                         default:
3544                             *la_temp_pref(ts) =
3545                                 tcg_target_available_regs[ts->type];
3546                             break;
3547                         }
3548                         ts->state &= ~TS_DEAD;
3549                     }
3550                 }
3551 
3552                 /*
3553                  * For each input argument, add its input register to prefs.
3554                  * If a temp is used once, this produces a single set bit;
3555                  * if a temp is used multiple times, this produces a set.
3556                  */
3557                 for (i = 0; i < nb_iargs; i++) {
3558                     const TCGCallArgumentLoc *loc = &info->in[i];
3559                     ts = arg_temp(op->args[nb_oargs + i]);
3560 
3561                     switch (loc->kind) {
3562                     case TCG_CALL_ARG_NORMAL:
3563                     case TCG_CALL_ARG_EXTEND_U:
3564                     case TCG_CALL_ARG_EXTEND_S:
3565                         if (arg_slot_reg_p(loc->arg_slot)) {
3566                             tcg_regset_set_reg(*la_temp_pref(ts),
3567                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3568                         }
3569                         break;
3570                     default:
3571                         break;
3572                     }
3573                 }
3574             }
3575             break;
3576         case INDEX_op_insn_start:
3577             break;
3578         case INDEX_op_discard:
3579             /* mark the temporary as dead */
3580             ts = arg_temp(op->args[0]);
3581             ts->state = TS_DEAD;
3582             la_reset_pref(ts);
3583             break;
3584 
3585         case INDEX_op_add2_i32:
3586             opc_new = INDEX_op_add_i32;
3587             goto do_addsub2;
3588         case INDEX_op_sub2_i32:
3589             opc_new = INDEX_op_sub_i32;
3590             goto do_addsub2;
3591         case INDEX_op_add2_i64:
3592             opc_new = INDEX_op_add_i64;
3593             goto do_addsub2;
3594         case INDEX_op_sub2_i64:
3595             opc_new = INDEX_op_sub_i64;
3596         do_addsub2:
3597             nb_iargs = 4;
3598             nb_oargs = 2;
3599             /* Test if the high part of the operation is dead, but not
3600                the low part.  The result can be optimized to a simple
3601                add or sub.  This happens often for x86_64 guest when the
3602                cpu mode is set to 32 bit.  */
3603             if (arg_temp(op->args[1])->state == TS_DEAD) {
3604                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3605                     goto do_remove;
3606                 }
3607                 /* Replace the opcode and adjust the args in place,
3608                    leaving 3 unused args at the end.  */
3609                 op->opc = opc = opc_new;
3610                 op->args[1] = op->args[2];
3611                 op->args[2] = op->args[4];
3612                 /* Fall through and mark the single-word operation live.  */
3613                 nb_iargs = 2;
3614                 nb_oargs = 1;
3615             }
3616             goto do_not_remove;
3617 
3618         case INDEX_op_mulu2_i32:
3619             opc_new = INDEX_op_mul_i32;
3620             opc_new2 = INDEX_op_muluh_i32;
3621             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3622             goto do_mul2;
3623         case INDEX_op_muls2_i32:
3624             opc_new = INDEX_op_mul_i32;
3625             opc_new2 = INDEX_op_mulsh_i32;
3626             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3627             goto do_mul2;
3628         case INDEX_op_mulu2_i64:
3629             opc_new = INDEX_op_mul_i64;
3630             opc_new2 = INDEX_op_muluh_i64;
3631             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3632             goto do_mul2;
3633         case INDEX_op_muls2_i64:
3634             opc_new = INDEX_op_mul_i64;
3635             opc_new2 = INDEX_op_mulsh_i64;
3636             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3637             goto do_mul2;
3638         do_mul2:
3639             nb_iargs = 2;
3640             nb_oargs = 2;
3641             if (arg_temp(op->args[1])->state == TS_DEAD) {
3642                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3643                     /* Both parts of the operation are dead.  */
3644                     goto do_remove;
3645                 }
3646                 /* The high part of the operation is dead; generate the low. */
3647                 op->opc = opc = opc_new;
3648                 op->args[1] = op->args[2];
3649                 op->args[2] = op->args[3];
3650             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3651                 /* The low part of the operation is dead; generate the high. */
3652                 op->opc = opc = opc_new2;
3653                 op->args[0] = op->args[1];
3654                 op->args[1] = op->args[2];
3655                 op->args[2] = op->args[3];
3656             } else {
3657                 goto do_not_remove;
3658             }
3659             /* Mark the single-word operation live.  */
3660             nb_oargs = 1;
3661             goto do_not_remove;
3662 
3663         default:
3664             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3665             nb_iargs = def->nb_iargs;
3666             nb_oargs = def->nb_oargs;
3667 
3668             /* Test if the operation can be removed because all
3669                its outputs are dead. We assume that nb_oargs == 0
3670                implies side effects */
3671             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3672                 for (i = 0; i < nb_oargs; i++) {
3673                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3674                         goto do_not_remove;
3675                     }
3676                 }
3677                 goto do_remove;
3678             }
3679             goto do_not_remove;
3680 
3681         do_remove:
3682             tcg_op_remove(s, op);
3683             break;
3684 
3685         do_not_remove:
3686             for (i = 0; i < nb_oargs; i++) {
3687                 ts = arg_temp(op->args[i]);
3688 
3689                 /* Remember the preference of the uses that followed.  */
3690                 if (i < ARRAY_SIZE(op->output_pref)) {
3691                     op->output_pref[i] = *la_temp_pref(ts);
3692                 }
3693 
3694                 /* Output args are dead.  */
3695                 if (ts->state & TS_DEAD) {
3696                     arg_life |= DEAD_ARG << i;
3697                 }
3698                 if (ts->state & TS_MEM) {
3699                     arg_life |= SYNC_ARG << i;
3700                 }
3701                 ts->state = TS_DEAD;
3702                 la_reset_pref(ts);
3703             }
3704 
3705             /* If end of basic block, update.  */
3706             if (def->flags & TCG_OPF_BB_EXIT) {
3707                 la_func_end(s, nb_globals, nb_temps);
3708             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3709                 la_bb_sync(s, nb_globals, nb_temps);
3710             } else if (def->flags & TCG_OPF_BB_END) {
3711                 la_bb_end(s, nb_globals, nb_temps);
3712             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3713                 la_global_sync(s, nb_globals);
3714                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3715                     la_cross_call(s, nb_temps);
3716                 }
3717             }
3718 
3719             /* Record arguments that die in this opcode.  */
3720             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3721                 ts = arg_temp(op->args[i]);
3722                 if (ts->state & TS_DEAD) {
3723                     arg_life |= DEAD_ARG << i;
3724                 }
3725             }
3726 
3727             /* Input arguments are live for preceding opcodes.  */
3728             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3729                 ts = arg_temp(op->args[i]);
3730                 if (ts->state & TS_DEAD) {
3731                     /* For operands that were dead, initially allow
3732                        all regs for the type.  */
3733                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3734                     ts->state &= ~TS_DEAD;
3735                 }
3736             }
3737 
3738             /* Incorporate constraints for this operand.  */
3739             switch (opc) {
3740             case INDEX_op_mov_i32:
3741             case INDEX_op_mov_i64:
3742                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3743                    have proper constraints.  That said, special case
3744                    moves to propagate preferences backward.  */
3745                 if (IS_DEAD_ARG(1)) {
3746                     *la_temp_pref(arg_temp(op->args[0]))
3747                         = *la_temp_pref(arg_temp(op->args[1]));
3748                 }
3749                 break;
3750 
3751             default:
3752                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3753                     const TCGArgConstraint *ct = &def->args_ct[i];
3754                     TCGRegSet set, *pset;
3755 
3756                     ts = arg_temp(op->args[i]);
3757                     pset = la_temp_pref(ts);
3758                     set = *pset;
3759 
3760                     set &= ct->regs;
3761                     if (ct->ialias) {
3762                         set &= output_pref(op, ct->alias_index);
3763                     }
3764                     /* If the combination is not possible, restart.  */
3765                     if (set == 0) {
3766                         set = ct->regs;
3767                     }
3768                     *pset = set;
3769                 }
3770                 break;
3771             }
3772             break;
3773         }
3774         op->life = arg_life;
3775     }
3776 }
3777 
3778 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3779 static bool __attribute__((noinline))
3780 liveness_pass_2(TCGContext *s)
3781 {
3782     int nb_globals = s->nb_globals;
3783     int nb_temps, i;
3784     bool changes = false;
3785     TCGOp *op, *op_next;
3786 
3787     /* Create a temporary for each indirect global.  */
3788     for (i = 0; i < nb_globals; ++i) {
3789         TCGTemp *its = &s->temps[i];
3790         if (its->indirect_reg) {
3791             TCGTemp *dts = tcg_temp_alloc(s);
3792             dts->type = its->type;
3793             dts->base_type = its->base_type;
3794             dts->temp_subindex = its->temp_subindex;
3795             dts->kind = TEMP_EBB;
3796             its->state_ptr = dts;
3797         } else {
3798             its->state_ptr = NULL;
3799         }
3800         /* All globals begin dead.  */
3801         its->state = TS_DEAD;
3802     }
3803     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3804         TCGTemp *its = &s->temps[i];
3805         its->state_ptr = NULL;
3806         its->state = TS_DEAD;
3807     }
3808 
3809     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3810         TCGOpcode opc = op->opc;
3811         const TCGOpDef *def = &tcg_op_defs[opc];
3812         TCGLifeData arg_life = op->life;
3813         int nb_iargs, nb_oargs, call_flags;
3814         TCGTemp *arg_ts, *dir_ts;
3815 
3816         if (opc == INDEX_op_call) {
3817             nb_oargs = TCGOP_CALLO(op);
3818             nb_iargs = TCGOP_CALLI(op);
3819             call_flags = tcg_call_flags(op);
3820         } else {
3821             nb_iargs = def->nb_iargs;
3822             nb_oargs = def->nb_oargs;
3823 
3824             /* Set flags similar to how calls require.  */
3825             if (def->flags & TCG_OPF_COND_BRANCH) {
3826                 /* Like reading globals: sync_globals */
3827                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3828             } else if (def->flags & TCG_OPF_BB_END) {
3829                 /* Like writing globals: save_globals */
3830                 call_flags = 0;
3831             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3832                 /* Like reading globals: sync_globals */
3833                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3834             } else {
3835                 /* No effect on globals.  */
3836                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3837                               TCG_CALL_NO_WRITE_GLOBALS);
3838             }
3839         }
3840 
3841         /* Make sure that input arguments are available.  */
3842         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3843             arg_ts = arg_temp(op->args[i]);
3844             dir_ts = arg_ts->state_ptr;
3845             if (dir_ts && arg_ts->state == TS_DEAD) {
3846                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3847                                   ? INDEX_op_ld_i32
3848                                   : INDEX_op_ld_i64);
3849                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3850 
3851                 lop->args[0] = temp_arg(dir_ts);
3852                 lop->args[1] = temp_arg(arg_ts->mem_base);
3853                 lop->args[2] = arg_ts->mem_offset;
3854 
3855                 /* Loaded, but synced with memory.  */
3856                 arg_ts->state = TS_MEM;
3857             }
3858         }
3859 
3860         /* Perform input replacement, and mark inputs that became dead.
3861            No action is required except keeping temp_state up to date
3862            so that we reload when needed.  */
3863         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3864             arg_ts = arg_temp(op->args[i]);
3865             dir_ts = arg_ts->state_ptr;
3866             if (dir_ts) {
3867                 op->args[i] = temp_arg(dir_ts);
3868                 changes = true;
3869                 if (IS_DEAD_ARG(i)) {
3870                     arg_ts->state = TS_DEAD;
3871                 }
3872             }
3873         }
3874 
3875         /* Liveness analysis should ensure that the following are
3876            all correct, for call sites and basic block end points.  */
3877         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3878             /* Nothing to do */
3879         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3880             for (i = 0; i < nb_globals; ++i) {
3881                 /* Liveness should see that globals are synced back,
3882                    that is, either TS_DEAD or TS_MEM.  */
3883                 arg_ts = &s->temps[i];
3884                 tcg_debug_assert(arg_ts->state_ptr == 0
3885                                  || arg_ts->state != 0);
3886             }
3887         } else {
3888             for (i = 0; i < nb_globals; ++i) {
3889                 /* Liveness should see that globals are saved back,
3890                    that is, TS_DEAD, waiting to be reloaded.  */
3891                 arg_ts = &s->temps[i];
3892                 tcg_debug_assert(arg_ts->state_ptr == 0
3893                                  || arg_ts->state == TS_DEAD);
3894             }
3895         }
3896 
3897         /* Outputs become available.  */
3898         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3899             arg_ts = arg_temp(op->args[0]);
3900             dir_ts = arg_ts->state_ptr;
3901             if (dir_ts) {
3902                 op->args[0] = temp_arg(dir_ts);
3903                 changes = true;
3904 
3905                 /* The output is now live and modified.  */
3906                 arg_ts->state = 0;
3907 
3908                 if (NEED_SYNC_ARG(0)) {
3909                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3910                                       ? INDEX_op_st_i32
3911                                       : INDEX_op_st_i64);
3912                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3913                     TCGTemp *out_ts = dir_ts;
3914 
3915                     if (IS_DEAD_ARG(0)) {
3916                         out_ts = arg_temp(op->args[1]);
3917                         arg_ts->state = TS_DEAD;
3918                         tcg_op_remove(s, op);
3919                     } else {
3920                         arg_ts->state = TS_MEM;
3921                     }
3922 
3923                     sop->args[0] = temp_arg(out_ts);
3924                     sop->args[1] = temp_arg(arg_ts->mem_base);
3925                     sop->args[2] = arg_ts->mem_offset;
3926                 } else {
3927                     tcg_debug_assert(!IS_DEAD_ARG(0));
3928                 }
3929             }
3930         } else {
3931             for (i = 0; i < nb_oargs; i++) {
3932                 arg_ts = arg_temp(op->args[i]);
3933                 dir_ts = arg_ts->state_ptr;
3934                 if (!dir_ts) {
3935                     continue;
3936                 }
3937                 op->args[i] = temp_arg(dir_ts);
3938                 changes = true;
3939 
3940                 /* The output is now live and modified.  */
3941                 arg_ts->state = 0;
3942 
3943                 /* Sync outputs upon their last write.  */
3944                 if (NEED_SYNC_ARG(i)) {
3945                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3946                                       ? INDEX_op_st_i32
3947                                       : INDEX_op_st_i64);
3948                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3949 
3950                     sop->args[0] = temp_arg(dir_ts);
3951                     sop->args[1] = temp_arg(arg_ts->mem_base);
3952                     sop->args[2] = arg_ts->mem_offset;
3953 
3954                     arg_ts->state = TS_MEM;
3955                 }
3956                 /* Drop outputs that are dead.  */
3957                 if (IS_DEAD_ARG(i)) {
3958                     arg_ts->state = TS_DEAD;
3959                 }
3960             }
3961         }
3962     }
3963 
3964     return changes;
3965 }
3966 
3967 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3968 {
3969     intptr_t off;
3970     int size, align;
3971 
3972     /* When allocating an object, look at the full type. */
3973     size = tcg_type_size(ts->base_type);
3974     switch (ts->base_type) {
3975     case TCG_TYPE_I32:
3976         align = 4;
3977         break;
3978     case TCG_TYPE_I64:
3979     case TCG_TYPE_V64:
3980         align = 8;
3981         break;
3982     case TCG_TYPE_I128:
3983     case TCG_TYPE_V128:
3984     case TCG_TYPE_V256:
3985         /*
3986          * Note that we do not require aligned storage for V256,
3987          * and that we provide alignment for I128 to match V128,
3988          * even if that's above what the host ABI requires.
3989          */
3990         align = 16;
3991         break;
3992     default:
3993         g_assert_not_reached();
3994     }
3995 
3996     /*
3997      * Assume the stack is sufficiently aligned.
3998      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3999      * and do not require 16 byte vector alignment.  This seems slightly
4000      * easier than fully parameterizing the above switch statement.
4001      */
4002     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4003     off = ROUND_UP(s->current_frame_offset, align);
4004 
4005     /* If we've exhausted the stack frame, restart with a smaller TB. */
4006     if (off + size > s->frame_end) {
4007         tcg_raise_tb_overflow(s);
4008     }
4009     s->current_frame_offset = off + size;
4010 #if defined(__sparc__)
4011     off += TCG_TARGET_STACK_BIAS;
4012 #endif
4013 
4014     /* If the object was subdivided, assign memory to all the parts. */
4015     if (ts->base_type != ts->type) {
4016         int part_size = tcg_type_size(ts->type);
4017         int part_count = size / part_size;
4018 
4019         /*
4020          * Each part is allocated sequentially in tcg_temp_new_internal.
4021          * Jump back to the first part by subtracting the current index.
4022          */
4023         ts -= ts->temp_subindex;
4024         for (int i = 0; i < part_count; ++i) {
4025             ts[i].mem_offset = off + i * part_size;
4026             ts[i].mem_base = s->frame_temp;
4027             ts[i].mem_allocated = 1;
4028         }
4029     } else {
4030         ts->mem_offset = off;
4031         ts->mem_base = s->frame_temp;
4032         ts->mem_allocated = 1;
4033     }
4034 }
4035 
4036 /* Assign @reg to @ts, and update reg_to_temp[]. */
4037 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4038 {
4039     if (ts->val_type == TEMP_VAL_REG) {
4040         TCGReg old = ts->reg;
4041         tcg_debug_assert(s->reg_to_temp[old] == ts);
4042         if (old == reg) {
4043             return;
4044         }
4045         s->reg_to_temp[old] = NULL;
4046     }
4047     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4048     s->reg_to_temp[reg] = ts;
4049     ts->val_type = TEMP_VAL_REG;
4050     ts->reg = reg;
4051 }
4052 
4053 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4054 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4055 {
4056     tcg_debug_assert(type != TEMP_VAL_REG);
4057     if (ts->val_type == TEMP_VAL_REG) {
4058         TCGReg reg = ts->reg;
4059         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4060         s->reg_to_temp[reg] = NULL;
4061     }
4062     ts->val_type = type;
4063 }
4064 
4065 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4066 
4067 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4068    mark it free; otherwise mark it dead.  */
4069 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4070 {
4071     TCGTempVal new_type;
4072 
4073     switch (ts->kind) {
4074     case TEMP_FIXED:
4075         return;
4076     case TEMP_GLOBAL:
4077     case TEMP_TB:
4078         new_type = TEMP_VAL_MEM;
4079         break;
4080     case TEMP_EBB:
4081         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4082         break;
4083     case TEMP_CONST:
4084         new_type = TEMP_VAL_CONST;
4085         break;
4086     default:
4087         g_assert_not_reached();
4088     }
4089     set_temp_val_nonreg(s, ts, new_type);
4090 }
4091 
4092 /* Mark a temporary as dead.  */
4093 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4094 {
4095     temp_free_or_dead(s, ts, 1);
4096 }
4097 
4098 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4099    registers needs to be allocated to store a constant.  If 'free_or_dead'
4100    is non-zero, subsequently release the temporary; if it is positive, the
4101    temp is dead; if it is negative, the temp is free.  */
4102 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4103                       TCGRegSet preferred_regs, int free_or_dead)
4104 {
4105     if (!temp_readonly(ts) && !ts->mem_coherent) {
4106         if (!ts->mem_allocated) {
4107             temp_allocate_frame(s, ts);
4108         }
4109         switch (ts->val_type) {
4110         case TEMP_VAL_CONST:
4111             /* If we're going to free the temp immediately, then we won't
4112                require it later in a register, so attempt to store the
4113                constant to memory directly.  */
4114             if (free_or_dead
4115                 && tcg_out_sti(s, ts->type, ts->val,
4116                                ts->mem_base->reg, ts->mem_offset)) {
4117                 break;
4118             }
4119             temp_load(s, ts, tcg_target_available_regs[ts->type],
4120                       allocated_regs, preferred_regs);
4121             /* fallthrough */
4122 
4123         case TEMP_VAL_REG:
4124             tcg_out_st(s, ts->type, ts->reg,
4125                        ts->mem_base->reg, ts->mem_offset);
4126             break;
4127 
4128         case TEMP_VAL_MEM:
4129             break;
4130 
4131         case TEMP_VAL_DEAD:
4132         default:
4133             g_assert_not_reached();
4134         }
4135         ts->mem_coherent = 1;
4136     }
4137     if (free_or_dead) {
4138         temp_free_or_dead(s, ts, free_or_dead);
4139     }
4140 }
4141 
4142 /* free register 'reg' by spilling the corresponding temporary if necessary */
4143 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4144 {
4145     TCGTemp *ts = s->reg_to_temp[reg];
4146     if (ts != NULL) {
4147         temp_sync(s, ts, allocated_regs, 0, -1);
4148     }
4149 }
4150 
4151 /**
4152  * tcg_reg_alloc:
4153  * @required_regs: Set of registers in which we must allocate.
4154  * @allocated_regs: Set of registers which must be avoided.
4155  * @preferred_regs: Set of registers we should prefer.
4156  * @rev: True if we search the registers in "indirect" order.
4157  *
4158  * The allocated register must be in @required_regs & ~@allocated_regs,
4159  * but if we can put it in @preferred_regs we may save a move later.
4160  */
4161 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4162                             TCGRegSet allocated_regs,
4163                             TCGRegSet preferred_regs, bool rev)
4164 {
4165     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4166     TCGRegSet reg_ct[2];
4167     const int *order;
4168 
4169     reg_ct[1] = required_regs & ~allocated_regs;
4170     tcg_debug_assert(reg_ct[1] != 0);
4171     reg_ct[0] = reg_ct[1] & preferred_regs;
4172 
4173     /* Skip the preferred_regs option if it cannot be satisfied,
4174        or if the preference made no difference.  */
4175     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4176 
4177     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4178 
4179     /* Try free registers, preferences first.  */
4180     for (j = f; j < 2; j++) {
4181         TCGRegSet set = reg_ct[j];
4182 
4183         if (tcg_regset_single(set)) {
4184             /* One register in the set.  */
4185             TCGReg reg = tcg_regset_first(set);
4186             if (s->reg_to_temp[reg] == NULL) {
4187                 return reg;
4188             }
4189         } else {
4190             for (i = 0; i < n; i++) {
4191                 TCGReg reg = order[i];
4192                 if (s->reg_to_temp[reg] == NULL &&
4193                     tcg_regset_test_reg(set, reg)) {
4194                     return reg;
4195                 }
4196             }
4197         }
4198     }
4199 
4200     /* We must spill something.  */
4201     for (j = f; j < 2; j++) {
4202         TCGRegSet set = reg_ct[j];
4203 
4204         if (tcg_regset_single(set)) {
4205             /* One register in the set.  */
4206             TCGReg reg = tcg_regset_first(set);
4207             tcg_reg_free(s, reg, allocated_regs);
4208             return reg;
4209         } else {
4210             for (i = 0; i < n; i++) {
4211                 TCGReg reg = order[i];
4212                 if (tcg_regset_test_reg(set, reg)) {
4213                     tcg_reg_free(s, reg, allocated_regs);
4214                     return reg;
4215                 }
4216             }
4217         }
4218     }
4219 
4220     g_assert_not_reached();
4221 }
4222 
4223 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4224                                  TCGRegSet allocated_regs,
4225                                  TCGRegSet preferred_regs, bool rev)
4226 {
4227     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4228     TCGRegSet reg_ct[2];
4229     const int *order;
4230 
4231     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4232     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4233     tcg_debug_assert(reg_ct[1] != 0);
4234     reg_ct[0] = reg_ct[1] & preferred_regs;
4235 
4236     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4237 
4238     /*
4239      * Skip the preferred_regs option if it cannot be satisfied,
4240      * or if the preference made no difference.
4241      */
4242     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4243 
4244     /*
4245      * Minimize the number of flushes by looking for 2 free registers first,
4246      * then a single flush, then two flushes.
4247      */
4248     for (fmin = 2; fmin >= 0; fmin--) {
4249         for (j = k; j < 2; j++) {
4250             TCGRegSet set = reg_ct[j];
4251 
4252             for (i = 0; i < n; i++) {
4253                 TCGReg reg = order[i];
4254 
4255                 if (tcg_regset_test_reg(set, reg)) {
4256                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4257                     if (f >= fmin) {
4258                         tcg_reg_free(s, reg, allocated_regs);
4259                         tcg_reg_free(s, reg + 1, allocated_regs);
4260                         return reg;
4261                     }
4262                 }
4263             }
4264         }
4265     }
4266     g_assert_not_reached();
4267 }
4268 
4269 /* Make sure the temporary is in a register.  If needed, allocate the register
4270    from DESIRED while avoiding ALLOCATED.  */
4271 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4272                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4273 {
4274     TCGReg reg;
4275 
4276     switch (ts->val_type) {
4277     case TEMP_VAL_REG:
4278         return;
4279     case TEMP_VAL_CONST:
4280         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4281                             preferred_regs, ts->indirect_base);
4282         if (ts->type <= TCG_TYPE_I64) {
4283             tcg_out_movi(s, ts->type, reg, ts->val);
4284         } else {
4285             uint64_t val = ts->val;
4286             MemOp vece = MO_64;
4287 
4288             /*
4289              * Find the minimal vector element that matches the constant.
4290              * The targets will, in general, have to do this search anyway,
4291              * do this generically.
4292              */
4293             if (val == dup_const(MO_8, val)) {
4294                 vece = MO_8;
4295             } else if (val == dup_const(MO_16, val)) {
4296                 vece = MO_16;
4297             } else if (val == dup_const(MO_32, val)) {
4298                 vece = MO_32;
4299             }
4300 
4301             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4302         }
4303         ts->mem_coherent = 0;
4304         break;
4305     case TEMP_VAL_MEM:
4306         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4307                             preferred_regs, ts->indirect_base);
4308         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4309         ts->mem_coherent = 1;
4310         break;
4311     case TEMP_VAL_DEAD:
4312     default:
4313         g_assert_not_reached();
4314     }
4315     set_temp_val_reg(s, ts, reg);
4316 }
4317 
4318 /* Save a temporary to memory. 'allocated_regs' is used in case a
4319    temporary registers needs to be allocated to store a constant.  */
4320 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4321 {
4322     /* The liveness analysis already ensures that globals are back
4323        in memory. Keep an tcg_debug_assert for safety. */
4324     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4325 }
4326 
4327 /* save globals to their canonical location and assume they can be
4328    modified be the following code. 'allocated_regs' is used in case a
4329    temporary registers needs to be allocated to store a constant. */
4330 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4331 {
4332     int i, n;
4333 
4334     for (i = 0, n = s->nb_globals; i < n; i++) {
4335         temp_save(s, &s->temps[i], allocated_regs);
4336     }
4337 }
4338 
4339 /* sync globals to their canonical location and assume they can be
4340    read by the following code. 'allocated_regs' is used in case a
4341    temporary registers needs to be allocated to store a constant. */
4342 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4343 {
4344     int i, n;
4345 
4346     for (i = 0, n = s->nb_globals; i < n; i++) {
4347         TCGTemp *ts = &s->temps[i];
4348         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4349                          || ts->kind == TEMP_FIXED
4350                          || ts->mem_coherent);
4351     }
4352 }
4353 
4354 /* at the end of a basic block, we assume all temporaries are dead and
4355    all globals are stored at their canonical location. */
4356 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4357 {
4358     int i;
4359 
4360     for (i = s->nb_globals; i < s->nb_temps; i++) {
4361         TCGTemp *ts = &s->temps[i];
4362 
4363         switch (ts->kind) {
4364         case TEMP_TB:
4365             temp_save(s, ts, allocated_regs);
4366             break;
4367         case TEMP_EBB:
4368             /* The liveness analysis already ensures that temps are dead.
4369                Keep an tcg_debug_assert for safety. */
4370             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4371             break;
4372         case TEMP_CONST:
4373             /* Similarly, we should have freed any allocated register. */
4374             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4375             break;
4376         default:
4377             g_assert_not_reached();
4378         }
4379     }
4380 
4381     save_globals(s, allocated_regs);
4382 }
4383 
4384 /*
4385  * At a conditional branch, we assume all temporaries are dead unless
4386  * explicitly live-across-conditional-branch; all globals and local
4387  * temps are synced to their location.
4388  */
4389 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4390 {
4391     sync_globals(s, allocated_regs);
4392 
4393     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4394         TCGTemp *ts = &s->temps[i];
4395         /*
4396          * The liveness analysis already ensures that temps are dead.
4397          * Keep tcg_debug_asserts for safety.
4398          */
4399         switch (ts->kind) {
4400         case TEMP_TB:
4401             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4402             break;
4403         case TEMP_EBB:
4404         case TEMP_CONST:
4405             break;
4406         default:
4407             g_assert_not_reached();
4408         }
4409     }
4410 }
4411 
4412 /*
4413  * Specialized code generation for INDEX_op_mov_* with a constant.
4414  */
4415 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4416                                   tcg_target_ulong val, TCGLifeData arg_life,
4417                                   TCGRegSet preferred_regs)
4418 {
4419     /* ENV should not be modified.  */
4420     tcg_debug_assert(!temp_readonly(ots));
4421 
4422     /* The movi is not explicitly generated here.  */
4423     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4424     ots->val = val;
4425     ots->mem_coherent = 0;
4426     if (NEED_SYNC_ARG(0)) {
4427         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4428     } else if (IS_DEAD_ARG(0)) {
4429         temp_dead(s, ots);
4430     }
4431 }
4432 
4433 /*
4434  * Specialized code generation for INDEX_op_mov_*.
4435  */
4436 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4437 {
4438     const TCGLifeData arg_life = op->life;
4439     TCGRegSet allocated_regs, preferred_regs;
4440     TCGTemp *ts, *ots;
4441     TCGType otype, itype;
4442     TCGReg oreg, ireg;
4443 
4444     allocated_regs = s->reserved_regs;
4445     preferred_regs = output_pref(op, 0);
4446     ots = arg_temp(op->args[0]);
4447     ts = arg_temp(op->args[1]);
4448 
4449     /* ENV should not be modified.  */
4450     tcg_debug_assert(!temp_readonly(ots));
4451 
4452     /* Note that otype != itype for no-op truncation.  */
4453     otype = ots->type;
4454     itype = ts->type;
4455 
4456     if (ts->val_type == TEMP_VAL_CONST) {
4457         /* propagate constant or generate sti */
4458         tcg_target_ulong val = ts->val;
4459         if (IS_DEAD_ARG(1)) {
4460             temp_dead(s, ts);
4461         }
4462         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4463         return;
4464     }
4465 
4466     /* If the source value is in memory we're going to be forced
4467        to have it in a register in order to perform the copy.  Copy
4468        the SOURCE value into its own register first, that way we
4469        don't have to reload SOURCE the next time it is used. */
4470     if (ts->val_type == TEMP_VAL_MEM) {
4471         temp_load(s, ts, tcg_target_available_regs[itype],
4472                   allocated_regs, preferred_regs);
4473     }
4474     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4475     ireg = ts->reg;
4476 
4477     if (IS_DEAD_ARG(0)) {
4478         /* mov to a non-saved dead register makes no sense (even with
4479            liveness analysis disabled). */
4480         tcg_debug_assert(NEED_SYNC_ARG(0));
4481         if (!ots->mem_allocated) {
4482             temp_allocate_frame(s, ots);
4483         }
4484         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4485         if (IS_DEAD_ARG(1)) {
4486             temp_dead(s, ts);
4487         }
4488         temp_dead(s, ots);
4489         return;
4490     }
4491 
4492     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4493         /*
4494          * The mov can be suppressed.  Kill input first, so that it
4495          * is unlinked from reg_to_temp, then set the output to the
4496          * reg that we saved from the input.
4497          */
4498         temp_dead(s, ts);
4499         oreg = ireg;
4500     } else {
4501         if (ots->val_type == TEMP_VAL_REG) {
4502             oreg = ots->reg;
4503         } else {
4504             /* Make sure to not spill the input register during allocation. */
4505             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4506                                  allocated_regs | ((TCGRegSet)1 << ireg),
4507                                  preferred_regs, ots->indirect_base);
4508         }
4509         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4510             /*
4511              * Cross register class move not supported.
4512              * Store the source register into the destination slot
4513              * and leave the destination temp as TEMP_VAL_MEM.
4514              */
4515             assert(!temp_readonly(ots));
4516             if (!ts->mem_allocated) {
4517                 temp_allocate_frame(s, ots);
4518             }
4519             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4520             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4521             ots->mem_coherent = 1;
4522             return;
4523         }
4524     }
4525     set_temp_val_reg(s, ots, oreg);
4526     ots->mem_coherent = 0;
4527 
4528     if (NEED_SYNC_ARG(0)) {
4529         temp_sync(s, ots, allocated_regs, 0, 0);
4530     }
4531 }
4532 
4533 /*
4534  * Specialized code generation for INDEX_op_dup_vec.
4535  */
4536 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4537 {
4538     const TCGLifeData arg_life = op->life;
4539     TCGRegSet dup_out_regs, dup_in_regs;
4540     TCGTemp *its, *ots;
4541     TCGType itype, vtype;
4542     unsigned vece;
4543     int lowpart_ofs;
4544     bool ok;
4545 
4546     ots = arg_temp(op->args[0]);
4547     its = arg_temp(op->args[1]);
4548 
4549     /* ENV should not be modified.  */
4550     tcg_debug_assert(!temp_readonly(ots));
4551 
4552     itype = its->type;
4553     vece = TCGOP_VECE(op);
4554     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4555 
4556     if (its->val_type == TEMP_VAL_CONST) {
4557         /* Propagate constant via movi -> dupi.  */
4558         tcg_target_ulong val = its->val;
4559         if (IS_DEAD_ARG(1)) {
4560             temp_dead(s, its);
4561         }
4562         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4563         return;
4564     }
4565 
4566     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4567     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4568 
4569     /* Allocate the output register now.  */
4570     if (ots->val_type != TEMP_VAL_REG) {
4571         TCGRegSet allocated_regs = s->reserved_regs;
4572         TCGReg oreg;
4573 
4574         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4575             /* Make sure to not spill the input register. */
4576             tcg_regset_set_reg(allocated_regs, its->reg);
4577         }
4578         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4579                              output_pref(op, 0), ots->indirect_base);
4580         set_temp_val_reg(s, ots, oreg);
4581     }
4582 
4583     switch (its->val_type) {
4584     case TEMP_VAL_REG:
4585         /*
4586          * The dup constriaints must be broad, covering all possible VECE.
4587          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4588          * to fail, indicating that extra moves are required for that case.
4589          */
4590         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4591             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4592                 goto done;
4593             }
4594             /* Try again from memory or a vector input register.  */
4595         }
4596         if (!its->mem_coherent) {
4597             /*
4598              * The input register is not synced, and so an extra store
4599              * would be required to use memory.  Attempt an integer-vector
4600              * register move first.  We do not have a TCGRegSet for this.
4601              */
4602             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4603                 break;
4604             }
4605             /* Sync the temp back to its slot and load from there.  */
4606             temp_sync(s, its, s->reserved_regs, 0, 0);
4607         }
4608         /* fall through */
4609 
4610     case TEMP_VAL_MEM:
4611         lowpart_ofs = 0;
4612         if (HOST_BIG_ENDIAN) {
4613             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4614         }
4615         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4616                              its->mem_offset + lowpart_ofs)) {
4617             goto done;
4618         }
4619         /* Load the input into the destination vector register. */
4620         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4621         break;
4622 
4623     default:
4624         g_assert_not_reached();
4625     }
4626 
4627     /* We now have a vector input register, so dup must succeed. */
4628     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4629     tcg_debug_assert(ok);
4630 
4631  done:
4632     ots->mem_coherent = 0;
4633     if (IS_DEAD_ARG(1)) {
4634         temp_dead(s, its);
4635     }
4636     if (NEED_SYNC_ARG(0)) {
4637         temp_sync(s, ots, s->reserved_regs, 0, 0);
4638     }
4639     if (IS_DEAD_ARG(0)) {
4640         temp_dead(s, ots);
4641     }
4642 }
4643 
4644 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4645 {
4646     const TCGLifeData arg_life = op->life;
4647     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4648     TCGRegSet i_allocated_regs;
4649     TCGRegSet o_allocated_regs;
4650     int i, k, nb_iargs, nb_oargs;
4651     TCGReg reg;
4652     TCGArg arg;
4653     const TCGArgConstraint *arg_ct;
4654     TCGTemp *ts;
4655     TCGArg new_args[TCG_MAX_OP_ARGS];
4656     int const_args[TCG_MAX_OP_ARGS];
4657 
4658     nb_oargs = def->nb_oargs;
4659     nb_iargs = def->nb_iargs;
4660 
4661     /* copy constants */
4662     memcpy(new_args + nb_oargs + nb_iargs,
4663            op->args + nb_oargs + nb_iargs,
4664            sizeof(TCGArg) * def->nb_cargs);
4665 
4666     i_allocated_regs = s->reserved_regs;
4667     o_allocated_regs = s->reserved_regs;
4668 
4669     /* satisfy input constraints */
4670     for (k = 0; k < nb_iargs; k++) {
4671         TCGRegSet i_preferred_regs, i_required_regs;
4672         bool allocate_new_reg, copyto_new_reg;
4673         TCGTemp *ts2;
4674         int i1, i2;
4675 
4676         i = def->args_ct[nb_oargs + k].sort_index;
4677         arg = op->args[i];
4678         arg_ct = &def->args_ct[i];
4679         ts = arg_temp(arg);
4680 
4681         if (ts->val_type == TEMP_VAL_CONST
4682             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4683             /* constant is OK for instruction */
4684             const_args[i] = 1;
4685             new_args[i] = ts->val;
4686             continue;
4687         }
4688 
4689         reg = ts->reg;
4690         i_preferred_regs = 0;
4691         i_required_regs = arg_ct->regs;
4692         allocate_new_reg = false;
4693         copyto_new_reg = false;
4694 
4695         switch (arg_ct->pair) {
4696         case 0: /* not paired */
4697             if (arg_ct->ialias) {
4698                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4699 
4700                 /*
4701                  * If the input is readonly, then it cannot also be an
4702                  * output and aliased to itself.  If the input is not
4703                  * dead after the instruction, we must allocate a new
4704                  * register and move it.
4705                  */
4706                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4707                     allocate_new_reg = true;
4708                 } else if (ts->val_type == TEMP_VAL_REG) {
4709                     /*
4710                      * Check if the current register has already been
4711                      * allocated for another input.
4712                      */
4713                     allocate_new_reg =
4714                         tcg_regset_test_reg(i_allocated_regs, reg);
4715                 }
4716             }
4717             if (!allocate_new_reg) {
4718                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4719                           i_preferred_regs);
4720                 reg = ts->reg;
4721                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4722             }
4723             if (allocate_new_reg) {
4724                 /*
4725                  * Allocate a new register matching the constraint
4726                  * and move the temporary register into it.
4727                  */
4728                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4729                           i_allocated_regs, 0);
4730                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4731                                     i_preferred_regs, ts->indirect_base);
4732                 copyto_new_reg = true;
4733             }
4734             break;
4735 
4736         case 1:
4737             /* First of an input pair; if i1 == i2, the second is an output. */
4738             i1 = i;
4739             i2 = arg_ct->pair_index;
4740             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4741 
4742             /*
4743              * It is easier to default to allocating a new pair
4744              * and to identify a few cases where it's not required.
4745              */
4746             if (arg_ct->ialias) {
4747                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4748                 if (IS_DEAD_ARG(i1) &&
4749                     IS_DEAD_ARG(i2) &&
4750                     !temp_readonly(ts) &&
4751                     ts->val_type == TEMP_VAL_REG &&
4752                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4753                     tcg_regset_test_reg(i_required_regs, reg) &&
4754                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4755                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4756                     (ts2
4757                      ? ts2->val_type == TEMP_VAL_REG &&
4758                        ts2->reg == reg + 1 &&
4759                        !temp_readonly(ts2)
4760                      : s->reg_to_temp[reg + 1] == NULL)) {
4761                     break;
4762                 }
4763             } else {
4764                 /* Without aliasing, the pair must also be an input. */
4765                 tcg_debug_assert(ts2);
4766                 if (ts->val_type == TEMP_VAL_REG &&
4767                     ts2->val_type == TEMP_VAL_REG &&
4768                     ts2->reg == reg + 1 &&
4769                     tcg_regset_test_reg(i_required_regs, reg)) {
4770                     break;
4771                 }
4772             }
4773             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4774                                      0, ts->indirect_base);
4775             goto do_pair;
4776 
4777         case 2: /* pair second */
4778             reg = new_args[arg_ct->pair_index] + 1;
4779             goto do_pair;
4780 
4781         case 3: /* ialias with second output, no first input */
4782             tcg_debug_assert(arg_ct->ialias);
4783             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4784 
4785             if (IS_DEAD_ARG(i) &&
4786                 !temp_readonly(ts) &&
4787                 ts->val_type == TEMP_VAL_REG &&
4788                 reg > 0 &&
4789                 s->reg_to_temp[reg - 1] == NULL &&
4790                 tcg_regset_test_reg(i_required_regs, reg) &&
4791                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4792                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4793                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4794                 break;
4795             }
4796             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4797                                      i_allocated_regs, 0,
4798                                      ts->indirect_base);
4799             tcg_regset_set_reg(i_allocated_regs, reg);
4800             reg += 1;
4801             goto do_pair;
4802 
4803         do_pair:
4804             /*
4805              * If an aliased input is not dead after the instruction,
4806              * we must allocate a new register and move it.
4807              */
4808             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4809                 TCGRegSet t_allocated_regs = i_allocated_regs;
4810 
4811                 /*
4812                  * Because of the alias, and the continued life, make sure
4813                  * that the temp is somewhere *other* than the reg pair,
4814                  * and we get a copy in reg.
4815                  */
4816                 tcg_regset_set_reg(t_allocated_regs, reg);
4817                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4818                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4819                     /* If ts was already in reg, copy it somewhere else. */
4820                     TCGReg nr;
4821                     bool ok;
4822 
4823                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4824                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4825                                        t_allocated_regs, 0, ts->indirect_base);
4826                     ok = tcg_out_mov(s, ts->type, nr, reg);
4827                     tcg_debug_assert(ok);
4828 
4829                     set_temp_val_reg(s, ts, nr);
4830                 } else {
4831                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4832                               t_allocated_regs, 0);
4833                     copyto_new_reg = true;
4834                 }
4835             } else {
4836                 /* Preferably allocate to reg, otherwise copy. */
4837                 i_required_regs = (TCGRegSet)1 << reg;
4838                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4839                           i_preferred_regs);
4840                 copyto_new_reg = ts->reg != reg;
4841             }
4842             break;
4843 
4844         default:
4845             g_assert_not_reached();
4846         }
4847 
4848         if (copyto_new_reg) {
4849             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4850                 /*
4851                  * Cross register class move not supported.  Sync the
4852                  * temp back to its slot and load from there.
4853                  */
4854                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4855                 tcg_out_ld(s, ts->type, reg,
4856                            ts->mem_base->reg, ts->mem_offset);
4857             }
4858         }
4859         new_args[i] = reg;
4860         const_args[i] = 0;
4861         tcg_regset_set_reg(i_allocated_regs, reg);
4862     }
4863 
4864     /* mark dead temporaries and free the associated registers */
4865     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4866         if (IS_DEAD_ARG(i)) {
4867             temp_dead(s, arg_temp(op->args[i]));
4868         }
4869     }
4870 
4871     if (def->flags & TCG_OPF_COND_BRANCH) {
4872         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4873     } else if (def->flags & TCG_OPF_BB_END) {
4874         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4875     } else {
4876         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4877             /* XXX: permit generic clobber register list ? */
4878             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4879                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4880                     tcg_reg_free(s, i, i_allocated_regs);
4881                 }
4882             }
4883         }
4884         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4885             /* sync globals if the op has side effects and might trigger
4886                an exception. */
4887             sync_globals(s, i_allocated_regs);
4888         }
4889 
4890         /* satisfy the output constraints */
4891         for(k = 0; k < nb_oargs; k++) {
4892             i = def->args_ct[k].sort_index;
4893             arg = op->args[i];
4894             arg_ct = &def->args_ct[i];
4895             ts = arg_temp(arg);
4896 
4897             /* ENV should not be modified.  */
4898             tcg_debug_assert(!temp_readonly(ts));
4899 
4900             switch (arg_ct->pair) {
4901             case 0: /* not paired */
4902                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4903                     reg = new_args[arg_ct->alias_index];
4904                 } else if (arg_ct->newreg) {
4905                     reg = tcg_reg_alloc(s, arg_ct->regs,
4906                                         i_allocated_regs | o_allocated_regs,
4907                                         output_pref(op, k), ts->indirect_base);
4908                 } else {
4909                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4910                                         output_pref(op, k), ts->indirect_base);
4911                 }
4912                 break;
4913 
4914             case 1: /* first of pair */
4915                 tcg_debug_assert(!arg_ct->newreg);
4916                 if (arg_ct->oalias) {
4917                     reg = new_args[arg_ct->alias_index];
4918                     break;
4919                 }
4920                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4921                                          output_pref(op, k), ts->indirect_base);
4922                 break;
4923 
4924             case 2: /* second of pair */
4925                 tcg_debug_assert(!arg_ct->newreg);
4926                 if (arg_ct->oalias) {
4927                     reg = new_args[arg_ct->alias_index];
4928                 } else {
4929                     reg = new_args[arg_ct->pair_index] + 1;
4930                 }
4931                 break;
4932 
4933             case 3: /* first of pair, aliasing with a second input */
4934                 tcg_debug_assert(!arg_ct->newreg);
4935                 reg = new_args[arg_ct->pair_index] - 1;
4936                 break;
4937 
4938             default:
4939                 g_assert_not_reached();
4940             }
4941             tcg_regset_set_reg(o_allocated_regs, reg);
4942             set_temp_val_reg(s, ts, reg);
4943             ts->mem_coherent = 0;
4944             new_args[i] = reg;
4945         }
4946     }
4947 
4948     /* emit instruction */
4949     switch (op->opc) {
4950     case INDEX_op_ext8s_i32:
4951         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4952         break;
4953     case INDEX_op_ext8s_i64:
4954         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4955         break;
4956     case INDEX_op_ext8u_i32:
4957     case INDEX_op_ext8u_i64:
4958         tcg_out_ext8u(s, new_args[0], new_args[1]);
4959         break;
4960     case INDEX_op_ext16s_i32:
4961         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4962         break;
4963     case INDEX_op_ext16s_i64:
4964         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4965         break;
4966     case INDEX_op_ext16u_i32:
4967     case INDEX_op_ext16u_i64:
4968         tcg_out_ext16u(s, new_args[0], new_args[1]);
4969         break;
4970     case INDEX_op_ext32s_i64:
4971         tcg_out_ext32s(s, new_args[0], new_args[1]);
4972         break;
4973     case INDEX_op_ext32u_i64:
4974         tcg_out_ext32u(s, new_args[0], new_args[1]);
4975         break;
4976     case INDEX_op_ext_i32_i64:
4977         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
4978         break;
4979     case INDEX_op_extu_i32_i64:
4980         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
4981         break;
4982     case INDEX_op_extrl_i64_i32:
4983         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
4984         break;
4985     default:
4986         if (def->flags & TCG_OPF_VECTOR) {
4987             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4988                            new_args, const_args);
4989         } else {
4990             tcg_out_op(s, op->opc, new_args, const_args);
4991         }
4992         break;
4993     }
4994 
4995     /* move the outputs in the correct register if needed */
4996     for(i = 0; i < nb_oargs; i++) {
4997         ts = arg_temp(op->args[i]);
4998 
4999         /* ENV should not be modified.  */
5000         tcg_debug_assert(!temp_readonly(ts));
5001 
5002         if (NEED_SYNC_ARG(i)) {
5003             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5004         } else if (IS_DEAD_ARG(i)) {
5005             temp_dead(s, ts);
5006         }
5007     }
5008 }
5009 
5010 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5011 {
5012     const TCGLifeData arg_life = op->life;
5013     TCGTemp *ots, *itsl, *itsh;
5014     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
5015 
5016     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5017     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5018     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5019 
5020     ots = arg_temp(op->args[0]);
5021     itsl = arg_temp(op->args[1]);
5022     itsh = arg_temp(op->args[2]);
5023 
5024     /* ENV should not be modified.  */
5025     tcg_debug_assert(!temp_readonly(ots));
5026 
5027     /* Allocate the output register now.  */
5028     if (ots->val_type != TEMP_VAL_REG) {
5029         TCGRegSet allocated_regs = s->reserved_regs;
5030         TCGRegSet dup_out_regs =
5031             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
5032         TCGReg oreg;
5033 
5034         /* Make sure to not spill the input registers. */
5035         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5036             tcg_regset_set_reg(allocated_regs, itsl->reg);
5037         }
5038         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5039             tcg_regset_set_reg(allocated_regs, itsh->reg);
5040         }
5041 
5042         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5043                              output_pref(op, 0), ots->indirect_base);
5044         set_temp_val_reg(s, ots, oreg);
5045     }
5046 
5047     /* Promote dup2 of immediates to dupi_vec. */
5048     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5049         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5050         MemOp vece = MO_64;
5051 
5052         if (val == dup_const(MO_8, val)) {
5053             vece = MO_8;
5054         } else if (val == dup_const(MO_16, val)) {
5055             vece = MO_16;
5056         } else if (val == dup_const(MO_32, val)) {
5057             vece = MO_32;
5058         }
5059 
5060         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5061         goto done;
5062     }
5063 
5064     /* If the two inputs form one 64-bit value, try dupm_vec. */
5065     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5066         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5067         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5068         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5069 
5070         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5071         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5072 
5073         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5074                              its->mem_base->reg, its->mem_offset)) {
5075             goto done;
5076         }
5077     }
5078 
5079     /* Fall back to generic expansion. */
5080     return false;
5081 
5082  done:
5083     ots->mem_coherent = 0;
5084     if (IS_DEAD_ARG(1)) {
5085         temp_dead(s, itsl);
5086     }
5087     if (IS_DEAD_ARG(2)) {
5088         temp_dead(s, itsh);
5089     }
5090     if (NEED_SYNC_ARG(0)) {
5091         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5092     } else if (IS_DEAD_ARG(0)) {
5093         temp_dead(s, ots);
5094     }
5095     return true;
5096 }
5097 
5098 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5099                          TCGRegSet allocated_regs)
5100 {
5101     if (ts->val_type == TEMP_VAL_REG) {
5102         if (ts->reg != reg) {
5103             tcg_reg_free(s, reg, allocated_regs);
5104             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5105                 /*
5106                  * Cross register class move not supported.  Sync the
5107                  * temp back to its slot and load from there.
5108                  */
5109                 temp_sync(s, ts, allocated_regs, 0, 0);
5110                 tcg_out_ld(s, ts->type, reg,
5111                            ts->mem_base->reg, ts->mem_offset);
5112             }
5113         }
5114     } else {
5115         TCGRegSet arg_set = 0;
5116 
5117         tcg_reg_free(s, reg, allocated_regs);
5118         tcg_regset_set_reg(arg_set, reg);
5119         temp_load(s, ts, arg_set, allocated_regs, 0);
5120     }
5121 }
5122 
5123 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5124                          TCGRegSet allocated_regs)
5125 {
5126     /*
5127      * When the destination is on the stack, load up the temp and store.
5128      * If there are many call-saved registers, the temp might live to
5129      * see another use; otherwise it'll be discarded.
5130      */
5131     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5132     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5133                arg_slot_stk_ofs(arg_slot));
5134 }
5135 
5136 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5137                             TCGTemp *ts, TCGRegSet *allocated_regs)
5138 {
5139     if (arg_slot_reg_p(l->arg_slot)) {
5140         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5141         load_arg_reg(s, reg, ts, *allocated_regs);
5142         tcg_regset_set_reg(*allocated_regs, reg);
5143     } else {
5144         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5145     }
5146 }
5147 
5148 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5149                          intptr_t ref_off, TCGRegSet *allocated_regs)
5150 {
5151     TCGReg reg;
5152 
5153     if (arg_slot_reg_p(arg_slot)) {
5154         reg = tcg_target_call_iarg_regs[arg_slot];
5155         tcg_reg_free(s, reg, *allocated_regs);
5156         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5157         tcg_regset_set_reg(*allocated_regs, reg);
5158     } else {
5159         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5160                             *allocated_regs, 0, false);
5161         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5162         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5163                    arg_slot_stk_ofs(arg_slot));
5164     }
5165 }
5166 
5167 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5168 {
5169     const int nb_oargs = TCGOP_CALLO(op);
5170     const int nb_iargs = TCGOP_CALLI(op);
5171     const TCGLifeData arg_life = op->life;
5172     const TCGHelperInfo *info = tcg_call_info(op);
5173     TCGRegSet allocated_regs = s->reserved_regs;
5174     int i;
5175 
5176     /*
5177      * Move inputs into place in reverse order,
5178      * so that we place stacked arguments first.
5179      */
5180     for (i = nb_iargs - 1; i >= 0; --i) {
5181         const TCGCallArgumentLoc *loc = &info->in[i];
5182         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5183 
5184         switch (loc->kind) {
5185         case TCG_CALL_ARG_NORMAL:
5186         case TCG_CALL_ARG_EXTEND_U:
5187         case TCG_CALL_ARG_EXTEND_S:
5188             load_arg_normal(s, loc, ts, &allocated_regs);
5189             break;
5190         case TCG_CALL_ARG_BY_REF:
5191             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5192             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5193                          arg_slot_stk_ofs(loc->ref_slot),
5194                          &allocated_regs);
5195             break;
5196         case TCG_CALL_ARG_BY_REF_N:
5197             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5198             break;
5199         default:
5200             g_assert_not_reached();
5201         }
5202     }
5203 
5204     /* Mark dead temporaries and free the associated registers.  */
5205     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5206         if (IS_DEAD_ARG(i)) {
5207             temp_dead(s, arg_temp(op->args[i]));
5208         }
5209     }
5210 
5211     /* Clobber call registers.  */
5212     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5213         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5214             tcg_reg_free(s, i, allocated_regs);
5215         }
5216     }
5217 
5218     /*
5219      * Save globals if they might be written by the helper,
5220      * sync them if they might be read.
5221      */
5222     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5223         /* Nothing to do */
5224     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5225         sync_globals(s, allocated_regs);
5226     } else {
5227         save_globals(s, allocated_regs);
5228     }
5229 
5230     /*
5231      * If the ABI passes a pointer to the returned struct as the first
5232      * argument, load that now.  Pass a pointer to the output home slot.
5233      */
5234     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5235         TCGTemp *ts = arg_temp(op->args[0]);
5236 
5237         if (!ts->mem_allocated) {
5238             temp_allocate_frame(s, ts);
5239         }
5240         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5241     }
5242 
5243     tcg_out_call(s, tcg_call_func(op), info);
5244 
5245     /* Assign output registers and emit moves if needed.  */
5246     switch (info->out_kind) {
5247     case TCG_CALL_RET_NORMAL:
5248         for (i = 0; i < nb_oargs; i++) {
5249             TCGTemp *ts = arg_temp(op->args[i]);
5250             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5251 
5252             /* ENV should not be modified.  */
5253             tcg_debug_assert(!temp_readonly(ts));
5254 
5255             set_temp_val_reg(s, ts, reg);
5256             ts->mem_coherent = 0;
5257         }
5258         break;
5259 
5260     case TCG_CALL_RET_BY_VEC:
5261         {
5262             TCGTemp *ts = arg_temp(op->args[0]);
5263 
5264             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5265             tcg_debug_assert(ts->temp_subindex == 0);
5266             if (!ts->mem_allocated) {
5267                 temp_allocate_frame(s, ts);
5268             }
5269             tcg_out_st(s, TCG_TYPE_V128,
5270                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5271                        ts->mem_base->reg, ts->mem_offset);
5272         }
5273         /* fall through to mark all parts in memory */
5274 
5275     case TCG_CALL_RET_BY_REF:
5276         /* The callee has performed a write through the reference. */
5277         for (i = 0; i < nb_oargs; i++) {
5278             TCGTemp *ts = arg_temp(op->args[i]);
5279             ts->val_type = TEMP_VAL_MEM;
5280         }
5281         break;
5282 
5283     default:
5284         g_assert_not_reached();
5285     }
5286 
5287     /* Flush or discard output registers as needed. */
5288     for (i = 0; i < nb_oargs; i++) {
5289         TCGTemp *ts = arg_temp(op->args[i]);
5290         if (NEED_SYNC_ARG(i)) {
5291             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5292         } else if (IS_DEAD_ARG(i)) {
5293             temp_dead(s, ts);
5294         }
5295     }
5296 }
5297 
5298 /**
5299  * atom_and_align_for_opc:
5300  * @s: tcg context
5301  * @opc: memory operation code
5302  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5303  * @allow_two_ops: true if we are prepared to issue two operations
5304  *
5305  * Return the alignment and atomicity to use for the inline fast path
5306  * for the given memory operation.  The alignment may be larger than
5307  * that specified in @opc, and the correct alignment will be diagnosed
5308  * by the slow path helper.
5309  *
5310  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5311  * and issue two loads or stores for subalignment.
5312  */
5313 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5314                                            MemOp host_atom, bool allow_two_ops)
5315 {
5316     MemOp align = get_alignment_bits(opc);
5317     MemOp size = opc & MO_SIZE;
5318     MemOp half = size ? size - 1 : 0;
5319     MemOp atmax;
5320     MemOp atom;
5321 
5322     /* When serialized, no further atomicity required.  */
5323     if (s->gen_tb->cflags & CF_PARALLEL) {
5324         atom = opc & MO_ATOM_MASK;
5325     } else {
5326         atom = MO_ATOM_NONE;
5327     }
5328 
5329     switch (atom) {
5330     case MO_ATOM_NONE:
5331         /* The operation requires no specific atomicity. */
5332         atmax = MO_8;
5333         break;
5334 
5335     case MO_ATOM_IFALIGN:
5336         atmax = size;
5337         break;
5338 
5339     case MO_ATOM_IFALIGN_PAIR:
5340         atmax = half;
5341         break;
5342 
5343     case MO_ATOM_WITHIN16:
5344         atmax = size;
5345         if (size == MO_128) {
5346             /* Misalignment implies !within16, and therefore no atomicity. */
5347         } else if (host_atom != MO_ATOM_WITHIN16) {
5348             /* The host does not implement within16, so require alignment. */
5349             align = MAX(align, size);
5350         }
5351         break;
5352 
5353     case MO_ATOM_WITHIN16_PAIR:
5354         atmax = size;
5355         /*
5356          * Misalignment implies !within16, and therefore half atomicity.
5357          * Any host prepared for two operations can implement this with
5358          * half alignment.
5359          */
5360         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5361             align = MAX(align, half);
5362         }
5363         break;
5364 
5365     case MO_ATOM_SUBALIGN:
5366         atmax = size;
5367         if (host_atom != MO_ATOM_SUBALIGN) {
5368             /* If unaligned but not odd, there are subobjects up to half. */
5369             if (allow_two_ops) {
5370                 align = MAX(align, half);
5371             } else {
5372                 align = MAX(align, size);
5373             }
5374         }
5375         break;
5376 
5377     default:
5378         g_assert_not_reached();
5379     }
5380 
5381     return (TCGAtomAlign){ .atom = atmax, .align = align };
5382 }
5383 
5384 /*
5385  * Similarly for qemu_ld/st slow path helpers.
5386  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5387  * using only the provided backend tcg_out_* functions.
5388  */
5389 
5390 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5391 {
5392     int ofs = arg_slot_stk_ofs(slot);
5393 
5394     /*
5395      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5396      * require extension to uint64_t, adjust the address for uint32_t.
5397      */
5398     if (HOST_BIG_ENDIAN &&
5399         TCG_TARGET_REG_BITS == 64 &&
5400         type == TCG_TYPE_I32) {
5401         ofs += 4;
5402     }
5403     return ofs;
5404 }
5405 
5406 static void tcg_out_helper_load_slots(TCGContext *s,
5407                                       unsigned nmov, TCGMovExtend *mov,
5408                                       const TCGLdstHelperParam *parm)
5409 {
5410     unsigned i;
5411     TCGReg dst3;
5412 
5413     /*
5414      * Start from the end, storing to the stack first.
5415      * This frees those registers, so we need not consider overlap.
5416      */
5417     for (i = nmov; i-- > 0; ) {
5418         unsigned slot = mov[i].dst;
5419 
5420         if (arg_slot_reg_p(slot)) {
5421             goto found_reg;
5422         }
5423 
5424         TCGReg src = mov[i].src;
5425         TCGType dst_type = mov[i].dst_type;
5426         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5427 
5428         /* The argument is going onto the stack; extend into scratch. */
5429         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5430             tcg_debug_assert(parm->ntmp != 0);
5431             mov[i].dst = src = parm->tmp[0];
5432             tcg_out_movext1(s, &mov[i]);
5433         }
5434 
5435         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5436                    tcg_out_helper_stk_ofs(dst_type, slot));
5437     }
5438     return;
5439 
5440  found_reg:
5441     /*
5442      * The remaining arguments are in registers.
5443      * Convert slot numbers to argument registers.
5444      */
5445     nmov = i + 1;
5446     for (i = 0; i < nmov; ++i) {
5447         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5448     }
5449 
5450     switch (nmov) {
5451     case 4:
5452         /* The backend must have provided enough temps for the worst case. */
5453         tcg_debug_assert(parm->ntmp >= 2);
5454 
5455         dst3 = mov[3].dst;
5456         for (unsigned j = 0; j < 3; ++j) {
5457             if (dst3 == mov[j].src) {
5458                 /*
5459                  * Conflict. Copy the source to a temporary, perform the
5460                  * remaining moves, then the extension from our scratch
5461                  * on the way out.
5462                  */
5463                 TCGReg scratch = parm->tmp[1];
5464 
5465                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5466                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5467                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5468                 break;
5469             }
5470         }
5471 
5472         /* No conflicts: perform this move and continue. */
5473         tcg_out_movext1(s, &mov[3]);
5474         /* fall through */
5475 
5476     case 3:
5477         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5478                         parm->ntmp ? parm->tmp[0] : -1);
5479         break;
5480     case 2:
5481         tcg_out_movext2(s, mov, mov + 1,
5482                         parm->ntmp ? parm->tmp[0] : -1);
5483         break;
5484     case 1:
5485         tcg_out_movext1(s, mov);
5486         break;
5487     default:
5488         g_assert_not_reached();
5489     }
5490 }
5491 
5492 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5493                                     TCGType type, tcg_target_long imm,
5494                                     const TCGLdstHelperParam *parm)
5495 {
5496     if (arg_slot_reg_p(slot)) {
5497         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5498     } else {
5499         int ofs = tcg_out_helper_stk_ofs(type, slot);
5500         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5501             tcg_debug_assert(parm->ntmp != 0);
5502             tcg_out_movi(s, type, parm->tmp[0], imm);
5503             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5504         }
5505     }
5506 }
5507 
5508 static void tcg_out_helper_load_common_args(TCGContext *s,
5509                                             const TCGLabelQemuLdst *ldst,
5510                                             const TCGLdstHelperParam *parm,
5511                                             const TCGHelperInfo *info,
5512                                             unsigned next_arg)
5513 {
5514     TCGMovExtend ptr_mov = {
5515         .dst_type = TCG_TYPE_PTR,
5516         .src_type = TCG_TYPE_PTR,
5517         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5518     };
5519     const TCGCallArgumentLoc *loc = &info->in[0];
5520     TCGType type;
5521     unsigned slot;
5522     tcg_target_ulong imm;
5523 
5524     /*
5525      * Handle env, which is always first.
5526      */
5527     ptr_mov.dst = loc->arg_slot;
5528     ptr_mov.src = TCG_AREG0;
5529     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5530 
5531     /*
5532      * Handle oi.
5533      */
5534     imm = ldst->oi;
5535     loc = &info->in[next_arg];
5536     type = TCG_TYPE_I32;
5537     switch (loc->kind) {
5538     case TCG_CALL_ARG_NORMAL:
5539         break;
5540     case TCG_CALL_ARG_EXTEND_U:
5541     case TCG_CALL_ARG_EXTEND_S:
5542         /* No extension required for MemOpIdx. */
5543         tcg_debug_assert(imm <= INT32_MAX);
5544         type = TCG_TYPE_REG;
5545         break;
5546     default:
5547         g_assert_not_reached();
5548     }
5549     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5550     next_arg++;
5551 
5552     /*
5553      * Handle ra.
5554      */
5555     loc = &info->in[next_arg];
5556     slot = loc->arg_slot;
5557     if (parm->ra_gen) {
5558         int arg_reg = -1;
5559         TCGReg ra_reg;
5560 
5561         if (arg_slot_reg_p(slot)) {
5562             arg_reg = tcg_target_call_iarg_regs[slot];
5563         }
5564         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5565 
5566         ptr_mov.dst = slot;
5567         ptr_mov.src = ra_reg;
5568         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5569     } else {
5570         imm = (uintptr_t)ldst->raddr;
5571         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5572     }
5573 }
5574 
5575 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5576                                        const TCGCallArgumentLoc *loc,
5577                                        TCGType dst_type, TCGType src_type,
5578                                        TCGReg lo, TCGReg hi)
5579 {
5580     MemOp reg_mo;
5581 
5582     if (dst_type <= TCG_TYPE_REG) {
5583         MemOp src_ext;
5584 
5585         switch (loc->kind) {
5586         case TCG_CALL_ARG_NORMAL:
5587             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5588             break;
5589         case TCG_CALL_ARG_EXTEND_U:
5590             dst_type = TCG_TYPE_REG;
5591             src_ext = MO_UL;
5592             break;
5593         case TCG_CALL_ARG_EXTEND_S:
5594             dst_type = TCG_TYPE_REG;
5595             src_ext = MO_SL;
5596             break;
5597         default:
5598             g_assert_not_reached();
5599         }
5600 
5601         mov[0].dst = loc->arg_slot;
5602         mov[0].dst_type = dst_type;
5603         mov[0].src = lo;
5604         mov[0].src_type = src_type;
5605         mov[0].src_ext = src_ext;
5606         return 1;
5607     }
5608 
5609     if (TCG_TARGET_REG_BITS == 32) {
5610         assert(dst_type == TCG_TYPE_I64);
5611         reg_mo = MO_32;
5612     } else {
5613         assert(dst_type == TCG_TYPE_I128);
5614         reg_mo = MO_64;
5615     }
5616 
5617     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5618     mov[0].src = lo;
5619     mov[0].dst_type = TCG_TYPE_REG;
5620     mov[0].src_type = TCG_TYPE_REG;
5621     mov[0].src_ext = reg_mo;
5622 
5623     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5624     mov[1].src = hi;
5625     mov[1].dst_type = TCG_TYPE_REG;
5626     mov[1].src_type = TCG_TYPE_REG;
5627     mov[1].src_ext = reg_mo;
5628 
5629     return 2;
5630 }
5631 
5632 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5633                                    const TCGLdstHelperParam *parm)
5634 {
5635     const TCGHelperInfo *info;
5636     const TCGCallArgumentLoc *loc;
5637     TCGMovExtend mov[2];
5638     unsigned next_arg, nmov;
5639     MemOp mop = get_memop(ldst->oi);
5640 
5641     switch (mop & MO_SIZE) {
5642     case MO_8:
5643     case MO_16:
5644     case MO_32:
5645         info = &info_helper_ld32_mmu;
5646         break;
5647     case MO_64:
5648         info = &info_helper_ld64_mmu;
5649         break;
5650     case MO_128:
5651         info = &info_helper_ld128_mmu;
5652         break;
5653     default:
5654         g_assert_not_reached();
5655     }
5656 
5657     /* Defer env argument. */
5658     next_arg = 1;
5659 
5660     loc = &info->in[next_arg];
5661     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5662         /*
5663          * 32-bit host with 32-bit guest: zero-extend the guest address
5664          * to 64-bits for the helper by storing the low part, then
5665          * load a zero for the high part.
5666          */
5667         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5668                                TCG_TYPE_I32, TCG_TYPE_I32,
5669                                ldst->addrlo_reg, -1);
5670         tcg_out_helper_load_slots(s, 1, mov, parm);
5671 
5672         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5673                                 TCG_TYPE_I32, 0, parm);
5674         next_arg += 2;
5675     } else {
5676         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5677                                       ldst->addrlo_reg, ldst->addrhi_reg);
5678         tcg_out_helper_load_slots(s, nmov, mov, parm);
5679         next_arg += nmov;
5680     }
5681 
5682     switch (info->out_kind) {
5683     case TCG_CALL_RET_NORMAL:
5684     case TCG_CALL_RET_BY_VEC:
5685         break;
5686     case TCG_CALL_RET_BY_REF:
5687         /*
5688          * The return reference is in the first argument slot.
5689          * We need memory in which to return: re-use the top of stack.
5690          */
5691         {
5692             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5693 
5694             if (arg_slot_reg_p(0)) {
5695                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5696                                  TCG_REG_CALL_STACK, ofs_slot0);
5697             } else {
5698                 tcg_debug_assert(parm->ntmp != 0);
5699                 tcg_out_addi_ptr(s, parm->tmp[0],
5700                                  TCG_REG_CALL_STACK, ofs_slot0);
5701                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5702                            TCG_REG_CALL_STACK, ofs_slot0);
5703             }
5704         }
5705         break;
5706     default:
5707         g_assert_not_reached();
5708     }
5709 
5710     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5711 }
5712 
5713 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5714                                   bool load_sign,
5715                                   const TCGLdstHelperParam *parm)
5716 {
5717     MemOp mop = get_memop(ldst->oi);
5718     TCGMovExtend mov[2];
5719     int ofs_slot0;
5720 
5721     switch (ldst->type) {
5722     case TCG_TYPE_I64:
5723         if (TCG_TARGET_REG_BITS == 32) {
5724             break;
5725         }
5726         /* fall through */
5727 
5728     case TCG_TYPE_I32:
5729         mov[0].dst = ldst->datalo_reg;
5730         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5731         mov[0].dst_type = ldst->type;
5732         mov[0].src_type = TCG_TYPE_REG;
5733 
5734         /*
5735          * If load_sign, then we allowed the helper to perform the
5736          * appropriate sign extension to tcg_target_ulong, and all
5737          * we need now is a plain move.
5738          *
5739          * If they do not, then we expect the relevant extension
5740          * instruction to be no more expensive than a move, and
5741          * we thus save the icache etc by only using one of two
5742          * helper functions.
5743          */
5744         if (load_sign || !(mop & MO_SIGN)) {
5745             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5746                 mov[0].src_ext = MO_32;
5747             } else {
5748                 mov[0].src_ext = MO_64;
5749             }
5750         } else {
5751             mov[0].src_ext = mop & MO_SSIZE;
5752         }
5753         tcg_out_movext1(s, mov);
5754         return;
5755 
5756     case TCG_TYPE_I128:
5757         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5758         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5759         switch (TCG_TARGET_CALL_RET_I128) {
5760         case TCG_CALL_RET_NORMAL:
5761             break;
5762         case TCG_CALL_RET_BY_VEC:
5763             tcg_out_st(s, TCG_TYPE_V128,
5764                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5765                        TCG_REG_CALL_STACK, ofs_slot0);
5766             /* fall through */
5767         case TCG_CALL_RET_BY_REF:
5768             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5769                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5770             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5771                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5772             return;
5773         default:
5774             g_assert_not_reached();
5775         }
5776         break;
5777 
5778     default:
5779         g_assert_not_reached();
5780     }
5781 
5782     mov[0].dst = ldst->datalo_reg;
5783     mov[0].src =
5784         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5785     mov[0].dst_type = TCG_TYPE_REG;
5786     mov[0].src_type = TCG_TYPE_REG;
5787     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5788 
5789     mov[1].dst = ldst->datahi_reg;
5790     mov[1].src =
5791         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5792     mov[1].dst_type = TCG_TYPE_REG;
5793     mov[1].src_type = TCG_TYPE_REG;
5794     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5795 
5796     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5797 }
5798 
5799 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5800                                    const TCGLdstHelperParam *parm)
5801 {
5802     const TCGHelperInfo *info;
5803     const TCGCallArgumentLoc *loc;
5804     TCGMovExtend mov[4];
5805     TCGType data_type;
5806     unsigned next_arg, nmov, n;
5807     MemOp mop = get_memop(ldst->oi);
5808 
5809     switch (mop & MO_SIZE) {
5810     case MO_8:
5811     case MO_16:
5812     case MO_32:
5813         info = &info_helper_st32_mmu;
5814         data_type = TCG_TYPE_I32;
5815         break;
5816     case MO_64:
5817         info = &info_helper_st64_mmu;
5818         data_type = TCG_TYPE_I64;
5819         break;
5820     case MO_128:
5821         info = &info_helper_st128_mmu;
5822         data_type = TCG_TYPE_I128;
5823         break;
5824     default:
5825         g_assert_not_reached();
5826     }
5827 
5828     /* Defer env argument. */
5829     next_arg = 1;
5830     nmov = 0;
5831 
5832     /* Handle addr argument. */
5833     loc = &info->in[next_arg];
5834     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5835         /*
5836          * 32-bit host with 32-bit guest: zero-extend the guest address
5837          * to 64-bits for the helper by storing the low part.  Later,
5838          * after we have processed the register inputs, we will load a
5839          * zero for the high part.
5840          */
5841         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5842                                TCG_TYPE_I32, TCG_TYPE_I32,
5843                                ldst->addrlo_reg, -1);
5844         next_arg += 2;
5845         nmov += 1;
5846     } else {
5847         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5848                                    ldst->addrlo_reg, ldst->addrhi_reg);
5849         next_arg += n;
5850         nmov += n;
5851     }
5852 
5853     /* Handle data argument. */
5854     loc = &info->in[next_arg];
5855     switch (loc->kind) {
5856     case TCG_CALL_ARG_NORMAL:
5857     case TCG_CALL_ARG_EXTEND_U:
5858     case TCG_CALL_ARG_EXTEND_S:
5859         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
5860                                    ldst->datalo_reg, ldst->datahi_reg);
5861         next_arg += n;
5862         nmov += n;
5863         tcg_out_helper_load_slots(s, nmov, mov, parm);
5864         break;
5865 
5866     case TCG_CALL_ARG_BY_REF:
5867         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5868         tcg_debug_assert(data_type == TCG_TYPE_I128);
5869         tcg_out_st(s, TCG_TYPE_I64,
5870                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
5871                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
5872         tcg_out_st(s, TCG_TYPE_I64,
5873                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
5874                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
5875 
5876         tcg_out_helper_load_slots(s, nmov, mov, parm);
5877 
5878         if (arg_slot_reg_p(loc->arg_slot)) {
5879             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
5880                              TCG_REG_CALL_STACK,
5881                              arg_slot_stk_ofs(loc->ref_slot));
5882         } else {
5883             tcg_debug_assert(parm->ntmp != 0);
5884             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
5885                              arg_slot_stk_ofs(loc->ref_slot));
5886             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5887                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
5888         }
5889         next_arg += 2;
5890         break;
5891 
5892     default:
5893         g_assert_not_reached();
5894     }
5895 
5896     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5897         /* Zero extend the address by loading a zero for the high part. */
5898         loc = &info->in[1 + !HOST_BIG_ENDIAN];
5899         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
5900     }
5901 
5902     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5903 }
5904 
5905 void tcg_dump_op_count(GString *buf)
5906 {
5907     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5908 }
5909 
5910 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
5911 {
5912     int i, start_words, num_insns;
5913     TCGOp *op;
5914 
5915     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
5916                  && qemu_log_in_addr_range(pc_start))) {
5917         FILE *logfile = qemu_log_trylock();
5918         if (logfile) {
5919             fprintf(logfile, "OP:\n");
5920             tcg_dump_ops(s, logfile, false);
5921             fprintf(logfile, "\n");
5922             qemu_log_unlock(logfile);
5923         }
5924     }
5925 
5926 #ifdef CONFIG_DEBUG_TCG
5927     /* Ensure all labels referenced have been emitted.  */
5928     {
5929         TCGLabel *l;
5930         bool error = false;
5931 
5932         QSIMPLEQ_FOREACH(l, &s->labels, next) {
5933             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
5934                 qemu_log_mask(CPU_LOG_TB_OP,
5935                               "$L%d referenced but not present.\n", l->id);
5936                 error = true;
5937             }
5938         }
5939         assert(!error);
5940     }
5941 #endif
5942 
5943     tcg_optimize(s);
5944 
5945     reachable_code_pass(s);
5946     liveness_pass_0(s);
5947     liveness_pass_1(s);
5948 
5949     if (s->nb_indirects > 0) {
5950         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
5951                      && qemu_log_in_addr_range(pc_start))) {
5952             FILE *logfile = qemu_log_trylock();
5953             if (logfile) {
5954                 fprintf(logfile, "OP before indirect lowering:\n");
5955                 tcg_dump_ops(s, logfile, false);
5956                 fprintf(logfile, "\n");
5957                 qemu_log_unlock(logfile);
5958             }
5959         }
5960 
5961         /* Replace indirect temps with direct temps.  */
5962         if (liveness_pass_2(s)) {
5963             /* If changes were made, re-run liveness.  */
5964             liveness_pass_1(s);
5965         }
5966     }
5967 
5968     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
5969                  && qemu_log_in_addr_range(pc_start))) {
5970         FILE *logfile = qemu_log_trylock();
5971         if (logfile) {
5972             fprintf(logfile, "OP after optimization and liveness analysis:\n");
5973             tcg_dump_ops(s, logfile, true);
5974             fprintf(logfile, "\n");
5975             qemu_log_unlock(logfile);
5976         }
5977     }
5978 
5979     /* Initialize goto_tb jump offsets. */
5980     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
5981     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
5982     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
5983     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
5984 
5985     tcg_reg_alloc_start(s);
5986 
5987     /*
5988      * Reset the buffer pointers when restarting after overflow.
5989      * TODO: Move this into translate-all.c with the rest of the
5990      * buffer management.  Having only this done here is confusing.
5991      */
5992     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
5993     s->code_ptr = s->code_buf;
5994 
5995 #ifdef TCG_TARGET_NEED_LDST_LABELS
5996     QSIMPLEQ_INIT(&s->ldst_labels);
5997 #endif
5998 #ifdef TCG_TARGET_NEED_POOL_LABELS
5999     s->pool_labels = NULL;
6000 #endif
6001 
6002     start_words = s->insn_start_words;
6003     s->gen_insn_data =
6004         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6005 
6006     num_insns = -1;
6007     QTAILQ_FOREACH(op, &s->ops, link) {
6008         TCGOpcode opc = op->opc;
6009 
6010         switch (opc) {
6011         case INDEX_op_mov_i32:
6012         case INDEX_op_mov_i64:
6013         case INDEX_op_mov_vec:
6014             tcg_reg_alloc_mov(s, op);
6015             break;
6016         case INDEX_op_dup_vec:
6017             tcg_reg_alloc_dup(s, op);
6018             break;
6019         case INDEX_op_insn_start:
6020             if (num_insns >= 0) {
6021                 size_t off = tcg_current_code_size(s);
6022                 s->gen_insn_end_off[num_insns] = off;
6023                 /* Assert that we do not overflow our stored offset.  */
6024                 assert(s->gen_insn_end_off[num_insns] == off);
6025             }
6026             num_insns++;
6027             for (i = 0; i < start_words; ++i) {
6028                 s->gen_insn_data[num_insns * start_words + i] =
6029                     tcg_get_insn_start_param(op, i);
6030             }
6031             break;
6032         case INDEX_op_discard:
6033             temp_dead(s, arg_temp(op->args[0]));
6034             break;
6035         case INDEX_op_set_label:
6036             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6037             tcg_out_label(s, arg_label(op->args[0]));
6038             break;
6039         case INDEX_op_call:
6040             tcg_reg_alloc_call(s, op);
6041             break;
6042         case INDEX_op_exit_tb:
6043             tcg_out_exit_tb(s, op->args[0]);
6044             break;
6045         case INDEX_op_goto_tb:
6046             tcg_out_goto_tb(s, op->args[0]);
6047             break;
6048         case INDEX_op_dup2_vec:
6049             if (tcg_reg_alloc_dup2(s, op)) {
6050                 break;
6051             }
6052             /* fall through */
6053         default:
6054             /* Sanity check that we've not introduced any unhandled opcodes. */
6055             tcg_debug_assert(tcg_op_supported(opc));
6056             /* Note: in order to speed up the code, it would be much
6057                faster to have specialized register allocator functions for
6058                some common argument patterns */
6059             tcg_reg_alloc_op(s, op);
6060             break;
6061         }
6062         /* Test for (pending) buffer overflow.  The assumption is that any
6063            one operation beginning below the high water mark cannot overrun
6064            the buffer completely.  Thus we can test for overflow after
6065            generating code without having to check during generation.  */
6066         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6067             return -1;
6068         }
6069         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6070         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6071             return -2;
6072         }
6073     }
6074     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6075     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6076 
6077     /* Generate TB finalization at the end of block */
6078 #ifdef TCG_TARGET_NEED_LDST_LABELS
6079     i = tcg_out_ldst_finalize(s);
6080     if (i < 0) {
6081         return i;
6082     }
6083 #endif
6084 #ifdef TCG_TARGET_NEED_POOL_LABELS
6085     i = tcg_out_pool_finalize(s);
6086     if (i < 0) {
6087         return i;
6088     }
6089 #endif
6090     if (!tcg_resolve_relocs(s)) {
6091         return -2;
6092     }
6093 
6094 #ifndef CONFIG_TCG_INTERPRETER
6095     /* flush instruction cache */
6096     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6097                         (uintptr_t)s->code_buf,
6098                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6099 #endif
6100 
6101     return tcg_current_code_size(s);
6102 }
6103 
6104 void tcg_dump_info(GString *buf)
6105 {
6106     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
6107 }
6108 
6109 #ifdef ELF_HOST_MACHINE
6110 /* In order to use this feature, the backend needs to do three things:
6111 
6112    (1) Define ELF_HOST_MACHINE to indicate both what value to
6113        put into the ELF image and to indicate support for the feature.
6114 
6115    (2) Define tcg_register_jit.  This should create a buffer containing
6116        the contents of a .debug_frame section that describes the post-
6117        prologue unwind info for the tcg machine.
6118 
6119    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6120 */
6121 
6122 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6123 typedef enum {
6124     JIT_NOACTION = 0,
6125     JIT_REGISTER_FN,
6126     JIT_UNREGISTER_FN
6127 } jit_actions_t;
6128 
6129 struct jit_code_entry {
6130     struct jit_code_entry *next_entry;
6131     struct jit_code_entry *prev_entry;
6132     const void *symfile_addr;
6133     uint64_t symfile_size;
6134 };
6135 
6136 struct jit_descriptor {
6137     uint32_t version;
6138     uint32_t action_flag;
6139     struct jit_code_entry *relevant_entry;
6140     struct jit_code_entry *first_entry;
6141 };
6142 
6143 void __jit_debug_register_code(void) __attribute__((noinline));
6144 void __jit_debug_register_code(void)
6145 {
6146     asm("");
6147 }
6148 
6149 /* Must statically initialize the version, because GDB may check
6150    the version before we can set it.  */
6151 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6152 
6153 /* End GDB interface.  */
6154 
6155 static int find_string(const char *strtab, const char *str)
6156 {
6157     const char *p = strtab + 1;
6158 
6159     while (1) {
6160         if (strcmp(p, str) == 0) {
6161             return p - strtab;
6162         }
6163         p += strlen(p) + 1;
6164     }
6165 }
6166 
6167 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6168                                  const void *debug_frame,
6169                                  size_t debug_frame_size)
6170 {
6171     struct __attribute__((packed)) DebugInfo {
6172         uint32_t  len;
6173         uint16_t  version;
6174         uint32_t  abbrev;
6175         uint8_t   ptr_size;
6176         uint8_t   cu_die;
6177         uint16_t  cu_lang;
6178         uintptr_t cu_low_pc;
6179         uintptr_t cu_high_pc;
6180         uint8_t   fn_die;
6181         char      fn_name[16];
6182         uintptr_t fn_low_pc;
6183         uintptr_t fn_high_pc;
6184         uint8_t   cu_eoc;
6185     };
6186 
6187     struct ElfImage {
6188         ElfW(Ehdr) ehdr;
6189         ElfW(Phdr) phdr;
6190         ElfW(Shdr) shdr[7];
6191         ElfW(Sym)  sym[2];
6192         struct DebugInfo di;
6193         uint8_t    da[24];
6194         char       str[80];
6195     };
6196 
6197     struct ElfImage *img;
6198 
6199     static const struct ElfImage img_template = {
6200         .ehdr = {
6201             .e_ident[EI_MAG0] = ELFMAG0,
6202             .e_ident[EI_MAG1] = ELFMAG1,
6203             .e_ident[EI_MAG2] = ELFMAG2,
6204             .e_ident[EI_MAG3] = ELFMAG3,
6205             .e_ident[EI_CLASS] = ELF_CLASS,
6206             .e_ident[EI_DATA] = ELF_DATA,
6207             .e_ident[EI_VERSION] = EV_CURRENT,
6208             .e_type = ET_EXEC,
6209             .e_machine = ELF_HOST_MACHINE,
6210             .e_version = EV_CURRENT,
6211             .e_phoff = offsetof(struct ElfImage, phdr),
6212             .e_shoff = offsetof(struct ElfImage, shdr),
6213             .e_ehsize = sizeof(ElfW(Shdr)),
6214             .e_phentsize = sizeof(ElfW(Phdr)),
6215             .e_phnum = 1,
6216             .e_shentsize = sizeof(ElfW(Shdr)),
6217             .e_shnum = ARRAY_SIZE(img->shdr),
6218             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6219 #ifdef ELF_HOST_FLAGS
6220             .e_flags = ELF_HOST_FLAGS,
6221 #endif
6222 #ifdef ELF_OSABI
6223             .e_ident[EI_OSABI] = ELF_OSABI,
6224 #endif
6225         },
6226         .phdr = {
6227             .p_type = PT_LOAD,
6228             .p_flags = PF_X,
6229         },
6230         .shdr = {
6231             [0] = { .sh_type = SHT_NULL },
6232             /* Trick: The contents of code_gen_buffer are not present in
6233                this fake ELF file; that got allocated elsewhere.  Therefore
6234                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6235                will not look for contents.  We can record any address.  */
6236             [1] = { /* .text */
6237                 .sh_type = SHT_NOBITS,
6238                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6239             },
6240             [2] = { /* .debug_info */
6241                 .sh_type = SHT_PROGBITS,
6242                 .sh_offset = offsetof(struct ElfImage, di),
6243                 .sh_size = sizeof(struct DebugInfo),
6244             },
6245             [3] = { /* .debug_abbrev */
6246                 .sh_type = SHT_PROGBITS,
6247                 .sh_offset = offsetof(struct ElfImage, da),
6248                 .sh_size = sizeof(img->da),
6249             },
6250             [4] = { /* .debug_frame */
6251                 .sh_type = SHT_PROGBITS,
6252                 .sh_offset = sizeof(struct ElfImage),
6253             },
6254             [5] = { /* .symtab */
6255                 .sh_type = SHT_SYMTAB,
6256                 .sh_offset = offsetof(struct ElfImage, sym),
6257                 .sh_size = sizeof(img->sym),
6258                 .sh_info = 1,
6259                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6260                 .sh_entsize = sizeof(ElfW(Sym)),
6261             },
6262             [6] = { /* .strtab */
6263                 .sh_type = SHT_STRTAB,
6264                 .sh_offset = offsetof(struct ElfImage, str),
6265                 .sh_size = sizeof(img->str),
6266             }
6267         },
6268         .sym = {
6269             [1] = { /* code_gen_buffer */
6270                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6271                 .st_shndx = 1,
6272             }
6273         },
6274         .di = {
6275             .len = sizeof(struct DebugInfo) - 4,
6276             .version = 2,
6277             .ptr_size = sizeof(void *),
6278             .cu_die = 1,
6279             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6280             .fn_die = 2,
6281             .fn_name = "code_gen_buffer"
6282         },
6283         .da = {
6284             1,          /* abbrev number (the cu) */
6285             0x11, 1,    /* DW_TAG_compile_unit, has children */
6286             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6287             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6288             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6289             0, 0,       /* end of abbrev */
6290             2,          /* abbrev number (the fn) */
6291             0x2e, 0,    /* DW_TAG_subprogram, no children */
6292             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6293             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6294             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6295             0, 0,       /* end of abbrev */
6296             0           /* no more abbrev */
6297         },
6298         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6299                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6300     };
6301 
6302     /* We only need a single jit entry; statically allocate it.  */
6303     static struct jit_code_entry one_entry;
6304 
6305     uintptr_t buf = (uintptr_t)buf_ptr;
6306     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6307     DebugFrameHeader *dfh;
6308 
6309     img = g_malloc(img_size);
6310     *img = img_template;
6311 
6312     img->phdr.p_vaddr = buf;
6313     img->phdr.p_paddr = buf;
6314     img->phdr.p_memsz = buf_size;
6315 
6316     img->shdr[1].sh_name = find_string(img->str, ".text");
6317     img->shdr[1].sh_addr = buf;
6318     img->shdr[1].sh_size = buf_size;
6319 
6320     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6321     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6322 
6323     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6324     img->shdr[4].sh_size = debug_frame_size;
6325 
6326     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6327     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6328 
6329     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6330     img->sym[1].st_value = buf;
6331     img->sym[1].st_size = buf_size;
6332 
6333     img->di.cu_low_pc = buf;
6334     img->di.cu_high_pc = buf + buf_size;
6335     img->di.fn_low_pc = buf;
6336     img->di.fn_high_pc = buf + buf_size;
6337 
6338     dfh = (DebugFrameHeader *)(img + 1);
6339     memcpy(dfh, debug_frame, debug_frame_size);
6340     dfh->fde.func_start = buf;
6341     dfh->fde.func_len = buf_size;
6342 
6343 #ifdef DEBUG_JIT
6344     /* Enable this block to be able to debug the ELF image file creation.
6345        One can use readelf, objdump, or other inspection utilities.  */
6346     {
6347         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6348         FILE *f = fopen(jit, "w+b");
6349         if (f) {
6350             if (fwrite(img, img_size, 1, f) != img_size) {
6351                 /* Avoid stupid unused return value warning for fwrite.  */
6352             }
6353             fclose(f);
6354         }
6355     }
6356 #endif
6357 
6358     one_entry.symfile_addr = img;
6359     one_entry.symfile_size = img_size;
6360 
6361     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6362     __jit_debug_descriptor.relevant_entry = &one_entry;
6363     __jit_debug_descriptor.first_entry = &one_entry;
6364     __jit_debug_register_code();
6365 }
6366 #else
6367 /* No support for the feature.  Provide the entry point expected by exec.c,
6368    and implement the internal function we declared earlier.  */
6369 
6370 static void tcg_register_jit_int(const void *buf, size_t size,
6371                                  const void *debug_frame,
6372                                  size_t debug_frame_size)
6373 {
6374 }
6375 
6376 void tcg_register_jit(const void *buf, size_t buf_size)
6377 {
6378 }
6379 #endif /* ELF_HOST_MACHINE */
6380 
6381 #if !TCG_TARGET_MAYBE_vec
6382 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6383 {
6384     g_assert_not_reached();
6385 }
6386 #endif
6387