xref: /openbmc/qemu/tcg/tcg.c (revision 3635502dd00bcfee3a6ab790d950c2fc4ace607b)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/tcg-op-common.h"
40 
41 #if UINTPTR_MAX == UINT32_MAX
42 # define ELF_CLASS  ELFCLASS32
43 #else
44 # define ELF_CLASS  ELFCLASS64
45 #endif
46 #if HOST_BIG_ENDIAN
47 # define ELF_DATA   ELFDATA2MSB
48 #else
49 # define ELF_DATA   ELFDATA2LSB
50 #endif
51 
52 #include "elf.h"
53 #include "exec/log.h"
54 #include "tcg/tcg-ldst.h"
55 #include "tcg/tcg-temp-internal.h"
56 #include "tcg-internal.h"
57 #include "accel/tcg/perf.h"
58 #ifdef CONFIG_USER_ONLY
59 #include "exec/user/guest-base.h"
60 #endif
61 
62 /* Forward declarations for functions declared in tcg-target.c.inc and
63    used here. */
64 static void tcg_target_init(TCGContext *s);
65 static void tcg_target_qemu_prologue(TCGContext *s);
66 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
67                         intptr_t value, intptr_t addend);
68 
69 /* The CIE and FDE header definitions will be common to all hosts.  */
70 typedef struct {
71     uint32_t len __attribute__((aligned((sizeof(void *)))));
72     uint32_t id;
73     uint8_t version;
74     char augmentation[1];
75     uint8_t code_align;
76     uint8_t data_align;
77     uint8_t return_column;
78 } DebugFrameCIE;
79 
80 typedef struct QEMU_PACKED {
81     uint32_t len __attribute__((aligned((sizeof(void *)))));
82     uint32_t cie_offset;
83     uintptr_t func_start;
84     uintptr_t func_len;
85 } DebugFrameFDEHeader;
86 
87 typedef struct QEMU_PACKED {
88     DebugFrameCIE cie;
89     DebugFrameFDEHeader fde;
90 } DebugFrameHeader;
91 
92 typedef struct TCGLabelQemuLdst {
93     bool is_ld;             /* qemu_ld: true, qemu_st: false */
94     MemOpIdx oi;
95     TCGType type;           /* result type of a load */
96     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
97     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
98     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
99     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
100     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
101     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
102     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
103 } TCGLabelQemuLdst;
104 
105 static void tcg_register_jit_int(const void *buf, size_t size,
106                                  const void *debug_frame,
107                                  size_t debug_frame_size)
108     __attribute__((unused));
109 
110 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
111 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
112                        intptr_t arg2);
113 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
114 static void tcg_out_movi(TCGContext *s, TCGType type,
115                          TCGReg ret, tcg_target_long arg);
116 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
117 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
118 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
119 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
120 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
121 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
122 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
123 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
124 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
125 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
126 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
127 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
128 static void tcg_out_goto_tb(TCGContext *s, int which);
129 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
130                        const TCGArg args[TCG_MAX_OP_ARGS],
131                        const int const_args[TCG_MAX_OP_ARGS]);
132 #if TCG_TARGET_MAYBE_vec
133 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
134                             TCGReg dst, TCGReg src);
135 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
136                              TCGReg dst, TCGReg base, intptr_t offset);
137 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
138                              TCGReg dst, int64_t arg);
139 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
140                            unsigned vecl, unsigned vece,
141                            const TCGArg args[TCG_MAX_OP_ARGS],
142                            const int const_args[TCG_MAX_OP_ARGS]);
143 #else
144 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
145                                    TCGReg dst, TCGReg src)
146 {
147     g_assert_not_reached();
148 }
149 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
150                                     TCGReg dst, TCGReg base, intptr_t offset)
151 {
152     g_assert_not_reached();
153 }
154 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
155                                     TCGReg dst, int64_t arg)
156 {
157     g_assert_not_reached();
158 }
159 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
160                                   unsigned vecl, unsigned vece,
161                                   const TCGArg args[TCG_MAX_OP_ARGS],
162                                   const int const_args[TCG_MAX_OP_ARGS])
163 {
164     g_assert_not_reached();
165 }
166 #endif
167 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
168                        intptr_t arg2);
169 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
170                         TCGReg base, intptr_t ofs);
171 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
172                          const TCGHelperInfo *info);
173 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
174 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
175 #ifdef TCG_TARGET_NEED_LDST_LABELS
176 static int tcg_out_ldst_finalize(TCGContext *s);
177 #endif
178 
179 typedef struct TCGLdstHelperParam {
180     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
181     unsigned ntmp;
182     int tmp[3];
183 } TCGLdstHelperParam;
184 
185 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
186                                    const TCGLdstHelperParam *p)
187     __attribute__((unused));
188 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
189                                   bool load_sign, const TCGLdstHelperParam *p)
190     __attribute__((unused));
191 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
192                                    const TCGLdstHelperParam *p)
193     __attribute__((unused));
194 
195 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
196     [MO_UB] = helper_ldub_mmu,
197     [MO_SB] = helper_ldsb_mmu,
198     [MO_UW] = helper_lduw_mmu,
199     [MO_SW] = helper_ldsw_mmu,
200     [MO_UL] = helper_ldul_mmu,
201     [MO_UQ] = helper_ldq_mmu,
202 #if TCG_TARGET_REG_BITS == 64
203     [MO_SL] = helper_ldsl_mmu,
204     [MO_128] = helper_ld16_mmu,
205 #endif
206 };
207 
208 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
209     [MO_8]  = helper_stb_mmu,
210     [MO_16] = helper_stw_mmu,
211     [MO_32] = helper_stl_mmu,
212     [MO_64] = helper_stq_mmu,
213 #if TCG_TARGET_REG_BITS == 64
214     [MO_128] = helper_st16_mmu,
215 #endif
216 };
217 
218 typedef struct {
219     MemOp atom;   /* lg2 bits of atomicity required */
220     MemOp align;  /* lg2 bits of alignment to use */
221 } TCGAtomAlign;
222 
223 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
224                                            MemOp host_atom, bool allow_two_ops)
225     __attribute__((unused));
226 
227 TCGContext tcg_init_ctx;
228 __thread TCGContext *tcg_ctx;
229 
230 TCGContext **tcg_ctxs;
231 unsigned int tcg_cur_ctxs;
232 unsigned int tcg_max_ctxs;
233 TCGv_env cpu_env = 0;
234 const void *tcg_code_gen_epilogue;
235 uintptr_t tcg_splitwx_diff;
236 
237 #ifndef CONFIG_TCG_INTERPRETER
238 tcg_prologue_fn *tcg_qemu_tb_exec;
239 #endif
240 
241 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
242 static TCGRegSet tcg_target_call_clobber_regs;
243 
244 #if TCG_TARGET_INSN_UNIT_SIZE == 1
245 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
246 {
247     *s->code_ptr++ = v;
248 }
249 
250 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
251                                                       uint8_t v)
252 {
253     *p = v;
254 }
255 #endif
256 
257 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
258 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
259 {
260     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
261         *s->code_ptr++ = v;
262     } else {
263         tcg_insn_unit *p = s->code_ptr;
264         memcpy(p, &v, sizeof(v));
265         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
266     }
267 }
268 
269 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
270                                                        uint16_t v)
271 {
272     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
273         *p = v;
274     } else {
275         memcpy(p, &v, sizeof(v));
276     }
277 }
278 #endif
279 
280 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
281 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
282 {
283     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
284         *s->code_ptr++ = v;
285     } else {
286         tcg_insn_unit *p = s->code_ptr;
287         memcpy(p, &v, sizeof(v));
288         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
289     }
290 }
291 
292 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
293                                                        uint32_t v)
294 {
295     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
296         *p = v;
297     } else {
298         memcpy(p, &v, sizeof(v));
299     }
300 }
301 #endif
302 
303 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
304 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
305 {
306     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
307         *s->code_ptr++ = v;
308     } else {
309         tcg_insn_unit *p = s->code_ptr;
310         memcpy(p, &v, sizeof(v));
311         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
312     }
313 }
314 
315 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
316                                                        uint64_t v)
317 {
318     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
319         *p = v;
320     } else {
321         memcpy(p, &v, sizeof(v));
322     }
323 }
324 #endif
325 
326 /* label relocation processing */
327 
328 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
329                           TCGLabel *l, intptr_t addend)
330 {
331     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
332 
333     r->type = type;
334     r->ptr = code_ptr;
335     r->addend = addend;
336     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
337 }
338 
339 static void tcg_out_label(TCGContext *s, TCGLabel *l)
340 {
341     tcg_debug_assert(!l->has_value);
342     l->has_value = 1;
343     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
344 }
345 
346 TCGLabel *gen_new_label(void)
347 {
348     TCGContext *s = tcg_ctx;
349     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
350 
351     memset(l, 0, sizeof(TCGLabel));
352     l->id = s->nb_labels++;
353     QSIMPLEQ_INIT(&l->branches);
354     QSIMPLEQ_INIT(&l->relocs);
355 
356     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
357 
358     return l;
359 }
360 
361 static bool tcg_resolve_relocs(TCGContext *s)
362 {
363     TCGLabel *l;
364 
365     QSIMPLEQ_FOREACH(l, &s->labels, next) {
366         TCGRelocation *r;
367         uintptr_t value = l->u.value;
368 
369         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
370             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
371                 return false;
372             }
373         }
374     }
375     return true;
376 }
377 
378 static void set_jmp_reset_offset(TCGContext *s, int which)
379 {
380     /*
381      * We will check for overflow at the end of the opcode loop in
382      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
383      */
384     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
385 }
386 
387 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
388 {
389     /*
390      * We will check for overflow at the end of the opcode loop in
391      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
392      */
393     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
394 }
395 
396 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
397 {
398     /*
399      * Return the read-execute version of the pointer, for the benefit
400      * of any pc-relative addressing mode.
401      */
402     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
403 }
404 
405 #if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
406 static int tlb_mask_table_ofs(TCGContext *s, int which)
407 {
408     return s->tlb_fast_offset + which * sizeof(CPUTLBDescFast);
409 }
410 #endif
411 
412 /* Signal overflow, starting over with fewer guest insns. */
413 static G_NORETURN
414 void tcg_raise_tb_overflow(TCGContext *s)
415 {
416     siglongjmp(s->jmp_trans, -2);
417 }
418 
419 /*
420  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
421  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
422  *
423  * However, tcg_out_helper_load_slots reuses this field to hold an
424  * argument slot number (which may designate a argument register or an
425  * argument stack slot), converting to TCGReg once all arguments that
426  * are destined for the stack are processed.
427  */
428 typedef struct TCGMovExtend {
429     unsigned dst;
430     TCGReg src;
431     TCGType dst_type;
432     TCGType src_type;
433     MemOp src_ext;
434 } TCGMovExtend;
435 
436 /**
437  * tcg_out_movext -- move and extend
438  * @s: tcg context
439  * @dst_type: integral type for destination
440  * @dst: destination register
441  * @src_type: integral type for source
442  * @src_ext: extension to apply to source
443  * @src: source register
444  *
445  * Move or extend @src into @dst, depending on @src_ext and the types.
446  */
447 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
448                            TCGType src_type, MemOp src_ext, TCGReg src)
449 {
450     switch (src_ext) {
451     case MO_UB:
452         tcg_out_ext8u(s, dst, src);
453         break;
454     case MO_SB:
455         tcg_out_ext8s(s, dst_type, dst, src);
456         break;
457     case MO_UW:
458         tcg_out_ext16u(s, dst, src);
459         break;
460     case MO_SW:
461         tcg_out_ext16s(s, dst_type, dst, src);
462         break;
463     case MO_UL:
464     case MO_SL:
465         if (dst_type == TCG_TYPE_I32) {
466             if (src_type == TCG_TYPE_I32) {
467                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
468             } else {
469                 tcg_out_extrl_i64_i32(s, dst, src);
470             }
471         } else if (src_type == TCG_TYPE_I32) {
472             if (src_ext & MO_SIGN) {
473                 tcg_out_exts_i32_i64(s, dst, src);
474             } else {
475                 tcg_out_extu_i32_i64(s, dst, src);
476             }
477         } else {
478             if (src_ext & MO_SIGN) {
479                 tcg_out_ext32s(s, dst, src);
480             } else {
481                 tcg_out_ext32u(s, dst, src);
482             }
483         }
484         break;
485     case MO_UQ:
486         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
487         if (dst_type == TCG_TYPE_I32) {
488             tcg_out_extrl_i64_i32(s, dst, src);
489         } else {
490             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
491         }
492         break;
493     default:
494         g_assert_not_reached();
495     }
496 }
497 
498 /* Minor variations on a theme, using a structure. */
499 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
500                                     TCGReg src)
501 {
502     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
503 }
504 
505 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
506 {
507     tcg_out_movext1_new_src(s, i, i->src);
508 }
509 
510 /**
511  * tcg_out_movext2 -- move and extend two pair
512  * @s: tcg context
513  * @i1: first move description
514  * @i2: second move description
515  * @scratch: temporary register, or -1 for none
516  *
517  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
518  * between the sources and destinations.
519  */
520 
521 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
522                             const TCGMovExtend *i2, int scratch)
523 {
524     TCGReg src1 = i1->src;
525     TCGReg src2 = i2->src;
526 
527     if (i1->dst != src2) {
528         tcg_out_movext1(s, i1);
529         tcg_out_movext1(s, i2);
530         return;
531     }
532     if (i2->dst == src1) {
533         TCGType src1_type = i1->src_type;
534         TCGType src2_type = i2->src_type;
535 
536         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
537             /* The data is now in the correct registers, now extend. */
538             src1 = i2->src;
539             src2 = i1->src;
540         } else {
541             tcg_debug_assert(scratch >= 0);
542             tcg_out_mov(s, src1_type, scratch, src1);
543             src1 = scratch;
544         }
545     }
546     tcg_out_movext1_new_src(s, i2, src2);
547     tcg_out_movext1_new_src(s, i1, src1);
548 }
549 
550 /**
551  * tcg_out_movext3 -- move and extend three pair
552  * @s: tcg context
553  * @i1: first move description
554  * @i2: second move description
555  * @i3: third move description
556  * @scratch: temporary register, or -1 for none
557  *
558  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
559  * between the sources and destinations.
560  */
561 
562 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
563                             const TCGMovExtend *i2, const TCGMovExtend *i3,
564                             int scratch)
565 {
566     TCGReg src1 = i1->src;
567     TCGReg src2 = i2->src;
568     TCGReg src3 = i3->src;
569 
570     if (i1->dst != src2 && i1->dst != src3) {
571         tcg_out_movext1(s, i1);
572         tcg_out_movext2(s, i2, i3, scratch);
573         return;
574     }
575     if (i2->dst != src1 && i2->dst != src3) {
576         tcg_out_movext1(s, i2);
577         tcg_out_movext2(s, i1, i3, scratch);
578         return;
579     }
580     if (i3->dst != src1 && i3->dst != src2) {
581         tcg_out_movext1(s, i3);
582         tcg_out_movext2(s, i1, i2, scratch);
583         return;
584     }
585 
586     /*
587      * There is a cycle.  Since there are only 3 nodes, the cycle is
588      * either "clockwise" or "anti-clockwise", and can be solved with
589      * a single scratch or two xchg.
590      */
591     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
592         /* "Clockwise" */
593         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
594             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
595             /* The data is now in the correct registers, now extend. */
596             tcg_out_movext1_new_src(s, i1, i1->dst);
597             tcg_out_movext1_new_src(s, i2, i2->dst);
598             tcg_out_movext1_new_src(s, i3, i3->dst);
599         } else {
600             tcg_debug_assert(scratch >= 0);
601             tcg_out_mov(s, i1->src_type, scratch, src1);
602             tcg_out_movext1(s, i3);
603             tcg_out_movext1(s, i2);
604             tcg_out_movext1_new_src(s, i1, scratch);
605         }
606     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
607         /* "Anti-clockwise" */
608         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
609             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
610             /* The data is now in the correct registers, now extend. */
611             tcg_out_movext1_new_src(s, i1, i1->dst);
612             tcg_out_movext1_new_src(s, i2, i2->dst);
613             tcg_out_movext1_new_src(s, i3, i3->dst);
614         } else {
615             tcg_debug_assert(scratch >= 0);
616             tcg_out_mov(s, i1->src_type, scratch, src1);
617             tcg_out_movext1(s, i2);
618             tcg_out_movext1(s, i3);
619             tcg_out_movext1_new_src(s, i1, scratch);
620         }
621     } else {
622         g_assert_not_reached();
623     }
624 }
625 
626 #define C_PFX1(P, A)                    P##A
627 #define C_PFX2(P, A, B)                 P##A##_##B
628 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
629 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
630 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
631 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
632 
633 /* Define an enumeration for the various combinations. */
634 
635 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
636 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
637 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
638 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
639 
640 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
641 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
642 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
643 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
644 
645 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
646 
647 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
648 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
649 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
650 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
651 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
652 
653 typedef enum {
654 #include "tcg-target-con-set.h"
655 } TCGConstraintSetIndex;
656 
657 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
658 
659 #undef C_O0_I1
660 #undef C_O0_I2
661 #undef C_O0_I3
662 #undef C_O0_I4
663 #undef C_O1_I1
664 #undef C_O1_I2
665 #undef C_O1_I3
666 #undef C_O1_I4
667 #undef C_N1_I2
668 #undef C_O2_I1
669 #undef C_O2_I2
670 #undef C_O2_I3
671 #undef C_O2_I4
672 #undef C_N1_O1_I4
673 
674 /* Put all of the constraint sets into an array, indexed by the enum. */
675 
676 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
677 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
678 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
679 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
680 
681 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
682 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
683 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
684 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
685 
686 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
687 
688 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
689 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
690 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
691 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
692 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
693 
694 static const TCGTargetOpDef constraint_sets[] = {
695 #include "tcg-target-con-set.h"
696 };
697 
698 
699 #undef C_O0_I1
700 #undef C_O0_I2
701 #undef C_O0_I3
702 #undef C_O0_I4
703 #undef C_O1_I1
704 #undef C_O1_I2
705 #undef C_O1_I3
706 #undef C_O1_I4
707 #undef C_N1_I2
708 #undef C_O2_I1
709 #undef C_O2_I2
710 #undef C_O2_I3
711 #undef C_O2_I4
712 #undef C_N1_O1_I4
713 
714 /* Expand the enumerator to be returned from tcg_target_op_def(). */
715 
716 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
717 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
718 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
719 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
720 
721 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
722 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
723 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
724 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
725 
726 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
727 
728 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
729 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
730 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
731 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
732 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
733 
734 #include "tcg-target.c.inc"
735 
736 static void alloc_tcg_plugin_context(TCGContext *s)
737 {
738 #ifdef CONFIG_PLUGIN
739     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
740     s->plugin_tb->insns =
741         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
742 #endif
743 }
744 
745 /*
746  * All TCG threads except the parent (i.e. the one that called tcg_context_init
747  * and registered the target's TCG globals) must register with this function
748  * before initiating translation.
749  *
750  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
751  * of tcg_region_init() for the reasoning behind this.
752  *
753  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
754  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
755  * is not used anymore for translation once this function is called.
756  *
757  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
758  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
759  */
760 #ifdef CONFIG_USER_ONLY
761 void tcg_register_thread(void)
762 {
763     tcg_ctx = &tcg_init_ctx;
764 }
765 #else
766 void tcg_register_thread(void)
767 {
768     TCGContext *s = g_malloc(sizeof(*s));
769     unsigned int i, n;
770 
771     *s = tcg_init_ctx;
772 
773     /* Relink mem_base.  */
774     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
775         if (tcg_init_ctx.temps[i].mem_base) {
776             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
777             tcg_debug_assert(b >= 0 && b < n);
778             s->temps[i].mem_base = &s->temps[b];
779         }
780     }
781 
782     /* Claim an entry in tcg_ctxs */
783     n = qatomic_fetch_inc(&tcg_cur_ctxs);
784     g_assert(n < tcg_max_ctxs);
785     qatomic_set(&tcg_ctxs[n], s);
786 
787     if (n > 0) {
788         alloc_tcg_plugin_context(s);
789         tcg_region_initial_alloc(s);
790     }
791 
792     tcg_ctx = s;
793 }
794 #endif /* !CONFIG_USER_ONLY */
795 
796 /* pool based memory allocation */
797 void *tcg_malloc_internal(TCGContext *s, int size)
798 {
799     TCGPool *p;
800     int pool_size;
801 
802     if (size > TCG_POOL_CHUNK_SIZE) {
803         /* big malloc: insert a new pool (XXX: could optimize) */
804         p = g_malloc(sizeof(TCGPool) + size);
805         p->size = size;
806         p->next = s->pool_first_large;
807         s->pool_first_large = p;
808         return p->data;
809     } else {
810         p = s->pool_current;
811         if (!p) {
812             p = s->pool_first;
813             if (!p)
814                 goto new_pool;
815         } else {
816             if (!p->next) {
817             new_pool:
818                 pool_size = TCG_POOL_CHUNK_SIZE;
819                 p = g_malloc(sizeof(TCGPool) + pool_size);
820                 p->size = pool_size;
821                 p->next = NULL;
822                 if (s->pool_current) {
823                     s->pool_current->next = p;
824                 } else {
825                     s->pool_first = p;
826                 }
827             } else {
828                 p = p->next;
829             }
830         }
831     }
832     s->pool_current = p;
833     s->pool_cur = p->data + size;
834     s->pool_end = p->data + p->size;
835     return p->data;
836 }
837 
838 void tcg_pool_reset(TCGContext *s)
839 {
840     TCGPool *p, *t;
841     for (p = s->pool_first_large; p; p = t) {
842         t = p->next;
843         g_free(p);
844     }
845     s->pool_first_large = NULL;
846     s->pool_cur = s->pool_end = NULL;
847     s->pool_current = NULL;
848 }
849 
850 /*
851  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
852  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
853  * We only use these for layout in tcg_out_ld_helper_ret and
854  * tcg_out_st_helper_args, and share them between several of
855  * the helpers, with the end result that it's easier to build manually.
856  */
857 
858 #if TCG_TARGET_REG_BITS == 32
859 # define dh_typecode_ttl  dh_typecode_i32
860 #else
861 # define dh_typecode_ttl  dh_typecode_i64
862 #endif
863 
864 static TCGHelperInfo info_helper_ld32_mmu = {
865     .flags = TCG_CALL_NO_WG,
866     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
867               | dh_typemask(env, 1)
868               | dh_typemask(i64, 2)  /* uint64_t addr */
869               | dh_typemask(i32, 3)  /* unsigned oi */
870               | dh_typemask(ptr, 4)  /* uintptr_t ra */
871 };
872 
873 static TCGHelperInfo info_helper_ld64_mmu = {
874     .flags = TCG_CALL_NO_WG,
875     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
876               | dh_typemask(env, 1)
877               | dh_typemask(i64, 2)  /* uint64_t addr */
878               | dh_typemask(i32, 3)  /* unsigned oi */
879               | dh_typemask(ptr, 4)  /* uintptr_t ra */
880 };
881 
882 static TCGHelperInfo info_helper_ld128_mmu = {
883     .flags = TCG_CALL_NO_WG,
884     .typemask = dh_typemask(i128, 0) /* return Int128 */
885               | dh_typemask(env, 1)
886               | dh_typemask(i64, 2)  /* uint64_t addr */
887               | dh_typemask(i32, 3)  /* unsigned oi */
888               | dh_typemask(ptr, 4)  /* uintptr_t ra */
889 };
890 
891 static TCGHelperInfo info_helper_st32_mmu = {
892     .flags = TCG_CALL_NO_WG,
893     .typemask = dh_typemask(void, 0)
894               | dh_typemask(env, 1)
895               | dh_typemask(i64, 2)  /* uint64_t addr */
896               | dh_typemask(i32, 3)  /* uint32_t data */
897               | dh_typemask(i32, 4)  /* unsigned oi */
898               | dh_typemask(ptr, 5)  /* uintptr_t ra */
899 };
900 
901 static TCGHelperInfo info_helper_st64_mmu = {
902     .flags = TCG_CALL_NO_WG,
903     .typemask = dh_typemask(void, 0)
904               | dh_typemask(env, 1)
905               | dh_typemask(i64, 2)  /* uint64_t addr */
906               | dh_typemask(i64, 3)  /* uint64_t data */
907               | dh_typemask(i32, 4)  /* unsigned oi */
908               | dh_typemask(ptr, 5)  /* uintptr_t ra */
909 };
910 
911 static TCGHelperInfo info_helper_st128_mmu = {
912     .flags = TCG_CALL_NO_WG,
913     .typemask = dh_typemask(void, 0)
914               | dh_typemask(env, 1)
915               | dh_typemask(i64, 2)  /* uint64_t addr */
916               | dh_typemask(i128, 3) /* Int128 data */
917               | dh_typemask(i32, 4)  /* unsigned oi */
918               | dh_typemask(ptr, 5)  /* uintptr_t ra */
919 };
920 
921 #ifdef CONFIG_TCG_INTERPRETER
922 static ffi_type *typecode_to_ffi(int argmask)
923 {
924     /*
925      * libffi does not support __int128_t, so we have forced Int128
926      * to use the structure definition instead of the builtin type.
927      */
928     static ffi_type *ffi_type_i128_elements[3] = {
929         &ffi_type_uint64,
930         &ffi_type_uint64,
931         NULL
932     };
933     static ffi_type ffi_type_i128 = {
934         .size = 16,
935         .alignment = __alignof__(Int128),
936         .type = FFI_TYPE_STRUCT,
937         .elements = ffi_type_i128_elements,
938     };
939 
940     switch (argmask) {
941     case dh_typecode_void:
942         return &ffi_type_void;
943     case dh_typecode_i32:
944         return &ffi_type_uint32;
945     case dh_typecode_s32:
946         return &ffi_type_sint32;
947     case dh_typecode_i64:
948         return &ffi_type_uint64;
949     case dh_typecode_s64:
950         return &ffi_type_sint64;
951     case dh_typecode_ptr:
952         return &ffi_type_pointer;
953     case dh_typecode_i128:
954         return &ffi_type_i128;
955     }
956     g_assert_not_reached();
957 }
958 
959 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
960 {
961     unsigned typemask = info->typemask;
962     struct {
963         ffi_cif cif;
964         ffi_type *args[];
965     } *ca;
966     ffi_status status;
967     int nargs;
968 
969     /* Ignoring the return type, find the last non-zero field. */
970     nargs = 32 - clz32(typemask >> 3);
971     nargs = DIV_ROUND_UP(nargs, 3);
972     assert(nargs <= MAX_CALL_IARGS);
973 
974     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
975     ca->cif.rtype = typecode_to_ffi(typemask & 7);
976     ca->cif.nargs = nargs;
977 
978     if (nargs != 0) {
979         ca->cif.arg_types = ca->args;
980         for (int j = 0; j < nargs; ++j) {
981             int typecode = extract32(typemask, (j + 1) * 3, 3);
982             ca->args[j] = typecode_to_ffi(typecode);
983         }
984     }
985 
986     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
987                           ca->cif.rtype, ca->cif.arg_types);
988     assert(status == FFI_OK);
989 
990     return &ca->cif;
991 }
992 
993 #define HELPER_INFO_INIT(I)      (&(I)->cif)
994 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
995 #else
996 #define HELPER_INFO_INIT(I)      (&(I)->init)
997 #define HELPER_INFO_INIT_VAL(I)  1
998 #endif /* CONFIG_TCG_INTERPRETER */
999 
1000 static inline bool arg_slot_reg_p(unsigned arg_slot)
1001 {
1002     /*
1003      * Split the sizeof away from the comparison to avoid Werror from
1004      * "unsigned < 0 is always false", when iarg_regs is empty.
1005      */
1006     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1007     return arg_slot < nreg;
1008 }
1009 
1010 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1011 {
1012     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1013     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1014 
1015     tcg_debug_assert(stk_slot < max);
1016     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1017 }
1018 
1019 typedef struct TCGCumulativeArgs {
1020     int arg_idx;                /* tcg_gen_callN args[] */
1021     int info_in_idx;            /* TCGHelperInfo in[] */
1022     int arg_slot;               /* regs+stack slot */
1023     int ref_slot;               /* stack slots for references */
1024 } TCGCumulativeArgs;
1025 
1026 static void layout_arg_even(TCGCumulativeArgs *cum)
1027 {
1028     cum->arg_slot += cum->arg_slot & 1;
1029 }
1030 
1031 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1032                          TCGCallArgumentKind kind)
1033 {
1034     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1035 
1036     *loc = (TCGCallArgumentLoc){
1037         .kind = kind,
1038         .arg_idx = cum->arg_idx,
1039         .arg_slot = cum->arg_slot,
1040     };
1041     cum->info_in_idx++;
1042     cum->arg_slot++;
1043 }
1044 
1045 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1046                                 TCGHelperInfo *info, int n)
1047 {
1048     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1049 
1050     for (int i = 0; i < n; ++i) {
1051         /* Layout all using the same arg_idx, adjusting the subindex. */
1052         loc[i] = (TCGCallArgumentLoc){
1053             .kind = TCG_CALL_ARG_NORMAL,
1054             .arg_idx = cum->arg_idx,
1055             .tmp_subindex = i,
1056             .arg_slot = cum->arg_slot + i,
1057         };
1058     }
1059     cum->info_in_idx += n;
1060     cum->arg_slot += n;
1061 }
1062 
1063 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1064 {
1065     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1066     int n = 128 / TCG_TARGET_REG_BITS;
1067 
1068     /* The first subindex carries the pointer. */
1069     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1070 
1071     /*
1072      * The callee is allowed to clobber memory associated with
1073      * structure pass by-reference.  Therefore we must make copies.
1074      * Allocate space from "ref_slot", which will be adjusted to
1075      * follow the parameters on the stack.
1076      */
1077     loc[0].ref_slot = cum->ref_slot;
1078 
1079     /*
1080      * Subsequent words also go into the reference slot, but
1081      * do not accumulate into the regular arguments.
1082      */
1083     for (int i = 1; i < n; ++i) {
1084         loc[i] = (TCGCallArgumentLoc){
1085             .kind = TCG_CALL_ARG_BY_REF_N,
1086             .arg_idx = cum->arg_idx,
1087             .tmp_subindex = i,
1088             .ref_slot = cum->ref_slot + i,
1089         };
1090     }
1091     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1092     cum->ref_slot += n;
1093 }
1094 
1095 static void init_call_layout(TCGHelperInfo *info)
1096 {
1097     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1098     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1099     unsigned typemask = info->typemask;
1100     unsigned typecode;
1101     TCGCumulativeArgs cum = { };
1102 
1103     /*
1104      * Parse and place any function return value.
1105      */
1106     typecode = typemask & 7;
1107     switch (typecode) {
1108     case dh_typecode_void:
1109         info->nr_out = 0;
1110         break;
1111     case dh_typecode_i32:
1112     case dh_typecode_s32:
1113     case dh_typecode_ptr:
1114         info->nr_out = 1;
1115         info->out_kind = TCG_CALL_RET_NORMAL;
1116         break;
1117     case dh_typecode_i64:
1118     case dh_typecode_s64:
1119         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1120         info->out_kind = TCG_CALL_RET_NORMAL;
1121         /* Query the last register now to trigger any assert early. */
1122         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1123         break;
1124     case dh_typecode_i128:
1125         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1126         info->out_kind = TCG_TARGET_CALL_RET_I128;
1127         switch (TCG_TARGET_CALL_RET_I128) {
1128         case TCG_CALL_RET_NORMAL:
1129             /* Query the last register now to trigger any assert early. */
1130             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1131             break;
1132         case TCG_CALL_RET_BY_VEC:
1133             /* Query the single register now to trigger any assert early. */
1134             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1135             break;
1136         case TCG_CALL_RET_BY_REF:
1137             /*
1138              * Allocate the first argument to the output.
1139              * We don't need to store this anywhere, just make it
1140              * unavailable for use in the input loop below.
1141              */
1142             cum.arg_slot = 1;
1143             break;
1144         default:
1145             qemu_build_not_reached();
1146         }
1147         break;
1148     default:
1149         g_assert_not_reached();
1150     }
1151 
1152     /*
1153      * Parse and place function arguments.
1154      */
1155     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1156         TCGCallArgumentKind kind;
1157         TCGType type;
1158 
1159         typecode = typemask & 7;
1160         switch (typecode) {
1161         case dh_typecode_i32:
1162         case dh_typecode_s32:
1163             type = TCG_TYPE_I32;
1164             break;
1165         case dh_typecode_i64:
1166         case dh_typecode_s64:
1167             type = TCG_TYPE_I64;
1168             break;
1169         case dh_typecode_ptr:
1170             type = TCG_TYPE_PTR;
1171             break;
1172         case dh_typecode_i128:
1173             type = TCG_TYPE_I128;
1174             break;
1175         default:
1176             g_assert_not_reached();
1177         }
1178 
1179         switch (type) {
1180         case TCG_TYPE_I32:
1181             switch (TCG_TARGET_CALL_ARG_I32) {
1182             case TCG_CALL_ARG_EVEN:
1183                 layout_arg_even(&cum);
1184                 /* fall through */
1185             case TCG_CALL_ARG_NORMAL:
1186                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1187                 break;
1188             case TCG_CALL_ARG_EXTEND:
1189                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1190                 layout_arg_1(&cum, info, kind);
1191                 break;
1192             default:
1193                 qemu_build_not_reached();
1194             }
1195             break;
1196 
1197         case TCG_TYPE_I64:
1198             switch (TCG_TARGET_CALL_ARG_I64) {
1199             case TCG_CALL_ARG_EVEN:
1200                 layout_arg_even(&cum);
1201                 /* fall through */
1202             case TCG_CALL_ARG_NORMAL:
1203                 if (TCG_TARGET_REG_BITS == 32) {
1204                     layout_arg_normal_n(&cum, info, 2);
1205                 } else {
1206                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1207                 }
1208                 break;
1209             default:
1210                 qemu_build_not_reached();
1211             }
1212             break;
1213 
1214         case TCG_TYPE_I128:
1215             switch (TCG_TARGET_CALL_ARG_I128) {
1216             case TCG_CALL_ARG_EVEN:
1217                 layout_arg_even(&cum);
1218                 /* fall through */
1219             case TCG_CALL_ARG_NORMAL:
1220                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1221                 break;
1222             case TCG_CALL_ARG_BY_REF:
1223                 layout_arg_by_ref(&cum, info);
1224                 break;
1225             default:
1226                 qemu_build_not_reached();
1227             }
1228             break;
1229 
1230         default:
1231             g_assert_not_reached();
1232         }
1233     }
1234     info->nr_in = cum.info_in_idx;
1235 
1236     /* Validate that we didn't overrun the input array. */
1237     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1238     /* Validate the backend has enough argument space. */
1239     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1240 
1241     /*
1242      * Relocate the "ref_slot" area to the end of the parameters.
1243      * Minimizing this stack offset helps code size for x86,
1244      * which has a signed 8-bit offset encoding.
1245      */
1246     if (cum.ref_slot != 0) {
1247         int ref_base = 0;
1248 
1249         if (cum.arg_slot > max_reg_slots) {
1250             int align = __alignof(Int128) / sizeof(tcg_target_long);
1251 
1252             ref_base = cum.arg_slot - max_reg_slots;
1253             if (align > 1) {
1254                 ref_base = ROUND_UP(ref_base, align);
1255             }
1256         }
1257         assert(ref_base + cum.ref_slot <= max_stk_slots);
1258         ref_base += max_reg_slots;
1259 
1260         if (ref_base != 0) {
1261             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1262                 TCGCallArgumentLoc *loc = &info->in[i];
1263                 switch (loc->kind) {
1264                 case TCG_CALL_ARG_BY_REF:
1265                 case TCG_CALL_ARG_BY_REF_N:
1266                     loc->ref_slot += ref_base;
1267                     break;
1268                 default:
1269                     break;
1270                 }
1271             }
1272         }
1273     }
1274 }
1275 
1276 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1277 static void process_op_defs(TCGContext *s);
1278 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1279                                             TCGReg reg, const char *name);
1280 
1281 static void tcg_context_init(unsigned max_cpus)
1282 {
1283     TCGContext *s = &tcg_init_ctx;
1284     int op, total_args, n, i;
1285     TCGOpDef *def;
1286     TCGArgConstraint *args_ct;
1287     TCGTemp *ts;
1288 
1289     memset(s, 0, sizeof(*s));
1290     s->nb_globals = 0;
1291 
1292     /* Count total number of arguments and allocate the corresponding
1293        space */
1294     total_args = 0;
1295     for(op = 0; op < NB_OPS; op++) {
1296         def = &tcg_op_defs[op];
1297         n = def->nb_iargs + def->nb_oargs;
1298         total_args += n;
1299     }
1300 
1301     args_ct = g_new0(TCGArgConstraint, total_args);
1302 
1303     for(op = 0; op < NB_OPS; op++) {
1304         def = &tcg_op_defs[op];
1305         def->args_ct = args_ct;
1306         n = def->nb_iargs + def->nb_oargs;
1307         args_ct += n;
1308     }
1309 
1310     init_call_layout(&info_helper_ld32_mmu);
1311     init_call_layout(&info_helper_ld64_mmu);
1312     init_call_layout(&info_helper_ld128_mmu);
1313     init_call_layout(&info_helper_st32_mmu);
1314     init_call_layout(&info_helper_st64_mmu);
1315     init_call_layout(&info_helper_st128_mmu);
1316 
1317     tcg_target_init(s);
1318     process_op_defs(s);
1319 
1320     /* Reverse the order of the saved registers, assuming they're all at
1321        the start of tcg_target_reg_alloc_order.  */
1322     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1323         int r = tcg_target_reg_alloc_order[n];
1324         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1325             break;
1326         }
1327     }
1328     for (i = 0; i < n; ++i) {
1329         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1330     }
1331     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1332         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1333     }
1334 
1335     alloc_tcg_plugin_context(s);
1336 
1337     tcg_ctx = s;
1338     /*
1339      * In user-mode we simply share the init context among threads, since we
1340      * use a single region. See the documentation tcg_region_init() for the
1341      * reasoning behind this.
1342      * In softmmu we will have at most max_cpus TCG threads.
1343      */
1344 #ifdef CONFIG_USER_ONLY
1345     tcg_ctxs = &tcg_ctx;
1346     tcg_cur_ctxs = 1;
1347     tcg_max_ctxs = 1;
1348 #else
1349     tcg_max_ctxs = max_cpus;
1350     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1351 #endif
1352 
1353     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1354     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1355     cpu_env = temp_tcgv_ptr(ts);
1356 }
1357 
1358 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1359 {
1360     tcg_context_init(max_cpus);
1361     tcg_region_init(tb_size, splitwx, max_cpus);
1362 }
1363 
1364 /*
1365  * Allocate TBs right before their corresponding translated code, making
1366  * sure that TBs and code are on different cache lines.
1367  */
1368 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1369 {
1370     uintptr_t align = qemu_icache_linesize;
1371     TranslationBlock *tb;
1372     void *next;
1373 
1374  retry:
1375     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1376     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1377 
1378     if (unlikely(next > s->code_gen_highwater)) {
1379         if (tcg_region_alloc(s)) {
1380             return NULL;
1381         }
1382         goto retry;
1383     }
1384     qatomic_set(&s->code_gen_ptr, next);
1385     s->data_gen_ptr = NULL;
1386     return tb;
1387 }
1388 
1389 void tcg_prologue_init(TCGContext *s)
1390 {
1391     size_t prologue_size;
1392 
1393     s->code_ptr = s->code_gen_ptr;
1394     s->code_buf = s->code_gen_ptr;
1395     s->data_gen_ptr = NULL;
1396 
1397 #ifndef CONFIG_TCG_INTERPRETER
1398     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1399 #endif
1400 
1401 #ifdef TCG_TARGET_NEED_POOL_LABELS
1402     s->pool_labels = NULL;
1403 #endif
1404 
1405     qemu_thread_jit_write();
1406     /* Generate the prologue.  */
1407     tcg_target_qemu_prologue(s);
1408 
1409 #ifdef TCG_TARGET_NEED_POOL_LABELS
1410     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1411     {
1412         int result = tcg_out_pool_finalize(s);
1413         tcg_debug_assert(result == 0);
1414     }
1415 #endif
1416 
1417     prologue_size = tcg_current_code_size(s);
1418     perf_report_prologue(s->code_gen_ptr, prologue_size);
1419 
1420 #ifndef CONFIG_TCG_INTERPRETER
1421     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1422                         (uintptr_t)s->code_buf, prologue_size);
1423 #endif
1424 
1425     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1426         FILE *logfile = qemu_log_trylock();
1427         if (logfile) {
1428             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1429             if (s->data_gen_ptr) {
1430                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1431                 size_t data_size = prologue_size - code_size;
1432                 size_t i;
1433 
1434                 disas(logfile, s->code_gen_ptr, code_size);
1435 
1436                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1437                     if (sizeof(tcg_target_ulong) == 8) {
1438                         fprintf(logfile,
1439                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1440                                 (uintptr_t)s->data_gen_ptr + i,
1441                                 *(uint64_t *)(s->data_gen_ptr + i));
1442                     } else {
1443                         fprintf(logfile,
1444                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1445                                 (uintptr_t)s->data_gen_ptr + i,
1446                                 *(uint32_t *)(s->data_gen_ptr + i));
1447                     }
1448                 }
1449             } else {
1450                 disas(logfile, s->code_gen_ptr, prologue_size);
1451             }
1452             fprintf(logfile, "\n");
1453             qemu_log_unlock(logfile);
1454         }
1455     }
1456 
1457 #ifndef CONFIG_TCG_INTERPRETER
1458     /*
1459      * Assert that goto_ptr is implemented completely, setting an epilogue.
1460      * For tci, we use NULL as the signal to return from the interpreter,
1461      * so skip this check.
1462      */
1463     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1464 #endif
1465 
1466     tcg_region_prologue_set(s);
1467 }
1468 
1469 void tcg_func_start(TCGContext *s)
1470 {
1471     tcg_pool_reset(s);
1472     s->nb_temps = s->nb_globals;
1473 
1474     /* No temps have been previously allocated for size or locality.  */
1475     memset(s->free_temps, 0, sizeof(s->free_temps));
1476 
1477     /* No constant temps have been previously allocated. */
1478     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1479         if (s->const_table[i]) {
1480             g_hash_table_remove_all(s->const_table[i]);
1481         }
1482     }
1483 
1484     s->nb_ops = 0;
1485     s->nb_labels = 0;
1486     s->current_frame_offset = s->frame_start;
1487 
1488 #ifdef CONFIG_DEBUG_TCG
1489     s->goto_tb_issue_mask = 0;
1490 #endif
1491 
1492     QTAILQ_INIT(&s->ops);
1493     QTAILQ_INIT(&s->free_ops);
1494     QSIMPLEQ_INIT(&s->labels);
1495 
1496     tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1497                      s->addr_type == TCG_TYPE_I64);
1498 
1499 #if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
1500     tcg_debug_assert(s->tlb_fast_offset < 0);
1501     tcg_debug_assert(s->tlb_fast_offset >= MIN_TLB_MASK_TABLE_OFS);
1502 #endif
1503 
1504     tcg_debug_assert(s->insn_start_words > 0);
1505 }
1506 
1507 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1508 {
1509     int n = s->nb_temps++;
1510 
1511     if (n >= TCG_MAX_TEMPS) {
1512         tcg_raise_tb_overflow(s);
1513     }
1514     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1515 }
1516 
1517 static TCGTemp *tcg_global_alloc(TCGContext *s)
1518 {
1519     TCGTemp *ts;
1520 
1521     tcg_debug_assert(s->nb_globals == s->nb_temps);
1522     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1523     s->nb_globals++;
1524     ts = tcg_temp_alloc(s);
1525     ts->kind = TEMP_GLOBAL;
1526 
1527     return ts;
1528 }
1529 
1530 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1531                                             TCGReg reg, const char *name)
1532 {
1533     TCGTemp *ts;
1534 
1535     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1536 
1537     ts = tcg_global_alloc(s);
1538     ts->base_type = type;
1539     ts->type = type;
1540     ts->kind = TEMP_FIXED;
1541     ts->reg = reg;
1542     ts->name = name;
1543     tcg_regset_set_reg(s->reserved_regs, reg);
1544 
1545     return ts;
1546 }
1547 
1548 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1549 {
1550     s->frame_start = start;
1551     s->frame_end = start + size;
1552     s->frame_temp
1553         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1554 }
1555 
1556 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1557                                      intptr_t offset, const char *name)
1558 {
1559     TCGContext *s = tcg_ctx;
1560     TCGTemp *base_ts = tcgv_ptr_temp(base);
1561     TCGTemp *ts = tcg_global_alloc(s);
1562     int indirect_reg = 0;
1563 
1564     switch (base_ts->kind) {
1565     case TEMP_FIXED:
1566         break;
1567     case TEMP_GLOBAL:
1568         /* We do not support double-indirect registers.  */
1569         tcg_debug_assert(!base_ts->indirect_reg);
1570         base_ts->indirect_base = 1;
1571         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1572                             ? 2 : 1);
1573         indirect_reg = 1;
1574         break;
1575     default:
1576         g_assert_not_reached();
1577     }
1578 
1579     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1580         TCGTemp *ts2 = tcg_global_alloc(s);
1581         char buf[64];
1582 
1583         ts->base_type = TCG_TYPE_I64;
1584         ts->type = TCG_TYPE_I32;
1585         ts->indirect_reg = indirect_reg;
1586         ts->mem_allocated = 1;
1587         ts->mem_base = base_ts;
1588         ts->mem_offset = offset;
1589         pstrcpy(buf, sizeof(buf), name);
1590         pstrcat(buf, sizeof(buf), "_0");
1591         ts->name = strdup(buf);
1592 
1593         tcg_debug_assert(ts2 == ts + 1);
1594         ts2->base_type = TCG_TYPE_I64;
1595         ts2->type = TCG_TYPE_I32;
1596         ts2->indirect_reg = indirect_reg;
1597         ts2->mem_allocated = 1;
1598         ts2->mem_base = base_ts;
1599         ts2->mem_offset = offset + 4;
1600         ts2->temp_subindex = 1;
1601         pstrcpy(buf, sizeof(buf), name);
1602         pstrcat(buf, sizeof(buf), "_1");
1603         ts2->name = strdup(buf);
1604     } else {
1605         ts->base_type = type;
1606         ts->type = type;
1607         ts->indirect_reg = indirect_reg;
1608         ts->mem_allocated = 1;
1609         ts->mem_base = base_ts;
1610         ts->mem_offset = offset;
1611         ts->name = name;
1612     }
1613     return ts;
1614 }
1615 
1616 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1617 {
1618     TCGContext *s = tcg_ctx;
1619     TCGTemp *ts;
1620     int n;
1621 
1622     if (kind == TEMP_EBB) {
1623         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1624 
1625         if (idx < TCG_MAX_TEMPS) {
1626             /* There is already an available temp with the right type.  */
1627             clear_bit(idx, s->free_temps[type].l);
1628 
1629             ts = &s->temps[idx];
1630             ts->temp_allocated = 1;
1631             tcg_debug_assert(ts->base_type == type);
1632             tcg_debug_assert(ts->kind == kind);
1633             return ts;
1634         }
1635     } else {
1636         tcg_debug_assert(kind == TEMP_TB);
1637     }
1638 
1639     switch (type) {
1640     case TCG_TYPE_I32:
1641     case TCG_TYPE_V64:
1642     case TCG_TYPE_V128:
1643     case TCG_TYPE_V256:
1644         n = 1;
1645         break;
1646     case TCG_TYPE_I64:
1647         n = 64 / TCG_TARGET_REG_BITS;
1648         break;
1649     case TCG_TYPE_I128:
1650         n = 128 / TCG_TARGET_REG_BITS;
1651         break;
1652     default:
1653         g_assert_not_reached();
1654     }
1655 
1656     ts = tcg_temp_alloc(s);
1657     ts->base_type = type;
1658     ts->temp_allocated = 1;
1659     ts->kind = kind;
1660 
1661     if (n == 1) {
1662         ts->type = type;
1663     } else {
1664         ts->type = TCG_TYPE_REG;
1665 
1666         for (int i = 1; i < n; ++i) {
1667             TCGTemp *ts2 = tcg_temp_alloc(s);
1668 
1669             tcg_debug_assert(ts2 == ts + i);
1670             ts2->base_type = type;
1671             ts2->type = TCG_TYPE_REG;
1672             ts2->temp_allocated = 1;
1673             ts2->temp_subindex = i;
1674             ts2->kind = kind;
1675         }
1676     }
1677     return ts;
1678 }
1679 
1680 TCGv_vec tcg_temp_new_vec(TCGType type)
1681 {
1682     TCGTemp *t;
1683 
1684 #ifdef CONFIG_DEBUG_TCG
1685     switch (type) {
1686     case TCG_TYPE_V64:
1687         assert(TCG_TARGET_HAS_v64);
1688         break;
1689     case TCG_TYPE_V128:
1690         assert(TCG_TARGET_HAS_v128);
1691         break;
1692     case TCG_TYPE_V256:
1693         assert(TCG_TARGET_HAS_v256);
1694         break;
1695     default:
1696         g_assert_not_reached();
1697     }
1698 #endif
1699 
1700     t = tcg_temp_new_internal(type, TEMP_EBB);
1701     return temp_tcgv_vec(t);
1702 }
1703 
1704 /* Create a new temp of the same type as an existing temp.  */
1705 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1706 {
1707     TCGTemp *t = tcgv_vec_temp(match);
1708 
1709     tcg_debug_assert(t->temp_allocated != 0);
1710 
1711     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1712     return temp_tcgv_vec(t);
1713 }
1714 
1715 void tcg_temp_free_internal(TCGTemp *ts)
1716 {
1717     TCGContext *s = tcg_ctx;
1718 
1719     switch (ts->kind) {
1720     case TEMP_CONST:
1721     case TEMP_TB:
1722         /* Silently ignore free. */
1723         break;
1724     case TEMP_EBB:
1725         tcg_debug_assert(ts->temp_allocated != 0);
1726         ts->temp_allocated = 0;
1727         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1728         break;
1729     default:
1730         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1731         g_assert_not_reached();
1732     }
1733 }
1734 
1735 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1736 {
1737     TCGContext *s = tcg_ctx;
1738     GHashTable *h = s->const_table[type];
1739     TCGTemp *ts;
1740 
1741     if (h == NULL) {
1742         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1743         s->const_table[type] = h;
1744     }
1745 
1746     ts = g_hash_table_lookup(h, &val);
1747     if (ts == NULL) {
1748         int64_t *val_ptr;
1749 
1750         ts = tcg_temp_alloc(s);
1751 
1752         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1753             TCGTemp *ts2 = tcg_temp_alloc(s);
1754 
1755             tcg_debug_assert(ts2 == ts + 1);
1756 
1757             ts->base_type = TCG_TYPE_I64;
1758             ts->type = TCG_TYPE_I32;
1759             ts->kind = TEMP_CONST;
1760             ts->temp_allocated = 1;
1761 
1762             ts2->base_type = TCG_TYPE_I64;
1763             ts2->type = TCG_TYPE_I32;
1764             ts2->kind = TEMP_CONST;
1765             ts2->temp_allocated = 1;
1766             ts2->temp_subindex = 1;
1767 
1768             /*
1769              * Retain the full value of the 64-bit constant in the low
1770              * part, so that the hash table works.  Actual uses will
1771              * truncate the value to the low part.
1772              */
1773             ts[HOST_BIG_ENDIAN].val = val;
1774             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1775             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1776         } else {
1777             ts->base_type = type;
1778             ts->type = type;
1779             ts->kind = TEMP_CONST;
1780             ts->temp_allocated = 1;
1781             ts->val = val;
1782             val_ptr = &ts->val;
1783         }
1784         g_hash_table_insert(h, val_ptr, ts);
1785     }
1786 
1787     return ts;
1788 }
1789 
1790 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1791 {
1792     val = dup_const(vece, val);
1793     return temp_tcgv_vec(tcg_constant_internal(type, val));
1794 }
1795 
1796 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1797 {
1798     TCGTemp *t = tcgv_vec_temp(match);
1799 
1800     tcg_debug_assert(t->temp_allocated != 0);
1801     return tcg_constant_vec(t->base_type, vece, val);
1802 }
1803 
1804 #ifdef CONFIG_DEBUG_TCG
1805 size_t temp_idx(TCGTemp *ts)
1806 {
1807     ptrdiff_t n = ts - tcg_ctx->temps;
1808     assert(n >= 0 && n < tcg_ctx->nb_temps);
1809     return n;
1810 }
1811 
1812 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
1813 {
1814     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
1815 
1816     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
1817     assert(o % sizeof(TCGTemp) == 0);
1818 
1819     return (void *)tcg_ctx + (uintptr_t)v;
1820 }
1821 #endif /* CONFIG_DEBUG_TCG */
1822 
1823 /* Return true if OP may appear in the opcode stream.
1824    Test the runtime variable that controls each opcode.  */
1825 bool tcg_op_supported(TCGOpcode op)
1826 {
1827     const bool have_vec
1828         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1829 
1830     switch (op) {
1831     case INDEX_op_discard:
1832     case INDEX_op_set_label:
1833     case INDEX_op_call:
1834     case INDEX_op_br:
1835     case INDEX_op_mb:
1836     case INDEX_op_insn_start:
1837     case INDEX_op_exit_tb:
1838     case INDEX_op_goto_tb:
1839     case INDEX_op_goto_ptr:
1840     case INDEX_op_qemu_ld_a32_i32:
1841     case INDEX_op_qemu_ld_a64_i32:
1842     case INDEX_op_qemu_st_a32_i32:
1843     case INDEX_op_qemu_st_a64_i32:
1844     case INDEX_op_qemu_ld_a32_i64:
1845     case INDEX_op_qemu_ld_a64_i64:
1846     case INDEX_op_qemu_st_a32_i64:
1847     case INDEX_op_qemu_st_a64_i64:
1848         return true;
1849 
1850     case INDEX_op_qemu_st8_a32_i32:
1851     case INDEX_op_qemu_st8_a64_i32:
1852         return TCG_TARGET_HAS_qemu_st8_i32;
1853 
1854     case INDEX_op_qemu_ld_a32_i128:
1855     case INDEX_op_qemu_ld_a64_i128:
1856     case INDEX_op_qemu_st_a32_i128:
1857     case INDEX_op_qemu_st_a64_i128:
1858         return TCG_TARGET_HAS_qemu_ldst_i128;
1859 
1860     case INDEX_op_mov_i32:
1861     case INDEX_op_setcond_i32:
1862     case INDEX_op_brcond_i32:
1863     case INDEX_op_ld8u_i32:
1864     case INDEX_op_ld8s_i32:
1865     case INDEX_op_ld16u_i32:
1866     case INDEX_op_ld16s_i32:
1867     case INDEX_op_ld_i32:
1868     case INDEX_op_st8_i32:
1869     case INDEX_op_st16_i32:
1870     case INDEX_op_st_i32:
1871     case INDEX_op_add_i32:
1872     case INDEX_op_sub_i32:
1873     case INDEX_op_mul_i32:
1874     case INDEX_op_and_i32:
1875     case INDEX_op_or_i32:
1876     case INDEX_op_xor_i32:
1877     case INDEX_op_shl_i32:
1878     case INDEX_op_shr_i32:
1879     case INDEX_op_sar_i32:
1880         return true;
1881 
1882     case INDEX_op_negsetcond_i32:
1883         return TCG_TARGET_HAS_negsetcond_i32;
1884     case INDEX_op_movcond_i32:
1885         return TCG_TARGET_HAS_movcond_i32;
1886     case INDEX_op_div_i32:
1887     case INDEX_op_divu_i32:
1888         return TCG_TARGET_HAS_div_i32;
1889     case INDEX_op_rem_i32:
1890     case INDEX_op_remu_i32:
1891         return TCG_TARGET_HAS_rem_i32;
1892     case INDEX_op_div2_i32:
1893     case INDEX_op_divu2_i32:
1894         return TCG_TARGET_HAS_div2_i32;
1895     case INDEX_op_rotl_i32:
1896     case INDEX_op_rotr_i32:
1897         return TCG_TARGET_HAS_rot_i32;
1898     case INDEX_op_deposit_i32:
1899         return TCG_TARGET_HAS_deposit_i32;
1900     case INDEX_op_extract_i32:
1901         return TCG_TARGET_HAS_extract_i32;
1902     case INDEX_op_sextract_i32:
1903         return TCG_TARGET_HAS_sextract_i32;
1904     case INDEX_op_extract2_i32:
1905         return TCG_TARGET_HAS_extract2_i32;
1906     case INDEX_op_add2_i32:
1907         return TCG_TARGET_HAS_add2_i32;
1908     case INDEX_op_sub2_i32:
1909         return TCG_TARGET_HAS_sub2_i32;
1910     case INDEX_op_mulu2_i32:
1911         return TCG_TARGET_HAS_mulu2_i32;
1912     case INDEX_op_muls2_i32:
1913         return TCG_TARGET_HAS_muls2_i32;
1914     case INDEX_op_muluh_i32:
1915         return TCG_TARGET_HAS_muluh_i32;
1916     case INDEX_op_mulsh_i32:
1917         return TCG_TARGET_HAS_mulsh_i32;
1918     case INDEX_op_ext8s_i32:
1919         return TCG_TARGET_HAS_ext8s_i32;
1920     case INDEX_op_ext16s_i32:
1921         return TCG_TARGET_HAS_ext16s_i32;
1922     case INDEX_op_ext8u_i32:
1923         return TCG_TARGET_HAS_ext8u_i32;
1924     case INDEX_op_ext16u_i32:
1925         return TCG_TARGET_HAS_ext16u_i32;
1926     case INDEX_op_bswap16_i32:
1927         return TCG_TARGET_HAS_bswap16_i32;
1928     case INDEX_op_bswap32_i32:
1929         return TCG_TARGET_HAS_bswap32_i32;
1930     case INDEX_op_not_i32:
1931         return TCG_TARGET_HAS_not_i32;
1932     case INDEX_op_neg_i32:
1933         return TCG_TARGET_HAS_neg_i32;
1934     case INDEX_op_andc_i32:
1935         return TCG_TARGET_HAS_andc_i32;
1936     case INDEX_op_orc_i32:
1937         return TCG_TARGET_HAS_orc_i32;
1938     case INDEX_op_eqv_i32:
1939         return TCG_TARGET_HAS_eqv_i32;
1940     case INDEX_op_nand_i32:
1941         return TCG_TARGET_HAS_nand_i32;
1942     case INDEX_op_nor_i32:
1943         return TCG_TARGET_HAS_nor_i32;
1944     case INDEX_op_clz_i32:
1945         return TCG_TARGET_HAS_clz_i32;
1946     case INDEX_op_ctz_i32:
1947         return TCG_TARGET_HAS_ctz_i32;
1948     case INDEX_op_ctpop_i32:
1949         return TCG_TARGET_HAS_ctpop_i32;
1950 
1951     case INDEX_op_brcond2_i32:
1952     case INDEX_op_setcond2_i32:
1953         return TCG_TARGET_REG_BITS == 32;
1954 
1955     case INDEX_op_mov_i64:
1956     case INDEX_op_setcond_i64:
1957     case INDEX_op_brcond_i64:
1958     case INDEX_op_ld8u_i64:
1959     case INDEX_op_ld8s_i64:
1960     case INDEX_op_ld16u_i64:
1961     case INDEX_op_ld16s_i64:
1962     case INDEX_op_ld32u_i64:
1963     case INDEX_op_ld32s_i64:
1964     case INDEX_op_ld_i64:
1965     case INDEX_op_st8_i64:
1966     case INDEX_op_st16_i64:
1967     case INDEX_op_st32_i64:
1968     case INDEX_op_st_i64:
1969     case INDEX_op_add_i64:
1970     case INDEX_op_sub_i64:
1971     case INDEX_op_mul_i64:
1972     case INDEX_op_and_i64:
1973     case INDEX_op_or_i64:
1974     case INDEX_op_xor_i64:
1975     case INDEX_op_shl_i64:
1976     case INDEX_op_shr_i64:
1977     case INDEX_op_sar_i64:
1978     case INDEX_op_ext_i32_i64:
1979     case INDEX_op_extu_i32_i64:
1980         return TCG_TARGET_REG_BITS == 64;
1981 
1982     case INDEX_op_negsetcond_i64:
1983         return TCG_TARGET_HAS_negsetcond_i64;
1984     case INDEX_op_movcond_i64:
1985         return TCG_TARGET_HAS_movcond_i64;
1986     case INDEX_op_div_i64:
1987     case INDEX_op_divu_i64:
1988         return TCG_TARGET_HAS_div_i64;
1989     case INDEX_op_rem_i64:
1990     case INDEX_op_remu_i64:
1991         return TCG_TARGET_HAS_rem_i64;
1992     case INDEX_op_div2_i64:
1993     case INDEX_op_divu2_i64:
1994         return TCG_TARGET_HAS_div2_i64;
1995     case INDEX_op_rotl_i64:
1996     case INDEX_op_rotr_i64:
1997         return TCG_TARGET_HAS_rot_i64;
1998     case INDEX_op_deposit_i64:
1999         return TCG_TARGET_HAS_deposit_i64;
2000     case INDEX_op_extract_i64:
2001         return TCG_TARGET_HAS_extract_i64;
2002     case INDEX_op_sextract_i64:
2003         return TCG_TARGET_HAS_sextract_i64;
2004     case INDEX_op_extract2_i64:
2005         return TCG_TARGET_HAS_extract2_i64;
2006     case INDEX_op_extrl_i64_i32:
2007     case INDEX_op_extrh_i64_i32:
2008         return TCG_TARGET_HAS_extr_i64_i32;
2009     case INDEX_op_ext8s_i64:
2010         return TCG_TARGET_HAS_ext8s_i64;
2011     case INDEX_op_ext16s_i64:
2012         return TCG_TARGET_HAS_ext16s_i64;
2013     case INDEX_op_ext32s_i64:
2014         return TCG_TARGET_HAS_ext32s_i64;
2015     case INDEX_op_ext8u_i64:
2016         return TCG_TARGET_HAS_ext8u_i64;
2017     case INDEX_op_ext16u_i64:
2018         return TCG_TARGET_HAS_ext16u_i64;
2019     case INDEX_op_ext32u_i64:
2020         return TCG_TARGET_HAS_ext32u_i64;
2021     case INDEX_op_bswap16_i64:
2022         return TCG_TARGET_HAS_bswap16_i64;
2023     case INDEX_op_bswap32_i64:
2024         return TCG_TARGET_HAS_bswap32_i64;
2025     case INDEX_op_bswap64_i64:
2026         return TCG_TARGET_HAS_bswap64_i64;
2027     case INDEX_op_not_i64:
2028         return TCG_TARGET_HAS_not_i64;
2029     case INDEX_op_neg_i64:
2030         return TCG_TARGET_HAS_neg_i64;
2031     case INDEX_op_andc_i64:
2032         return TCG_TARGET_HAS_andc_i64;
2033     case INDEX_op_orc_i64:
2034         return TCG_TARGET_HAS_orc_i64;
2035     case INDEX_op_eqv_i64:
2036         return TCG_TARGET_HAS_eqv_i64;
2037     case INDEX_op_nand_i64:
2038         return TCG_TARGET_HAS_nand_i64;
2039     case INDEX_op_nor_i64:
2040         return TCG_TARGET_HAS_nor_i64;
2041     case INDEX_op_clz_i64:
2042         return TCG_TARGET_HAS_clz_i64;
2043     case INDEX_op_ctz_i64:
2044         return TCG_TARGET_HAS_ctz_i64;
2045     case INDEX_op_ctpop_i64:
2046         return TCG_TARGET_HAS_ctpop_i64;
2047     case INDEX_op_add2_i64:
2048         return TCG_TARGET_HAS_add2_i64;
2049     case INDEX_op_sub2_i64:
2050         return TCG_TARGET_HAS_sub2_i64;
2051     case INDEX_op_mulu2_i64:
2052         return TCG_TARGET_HAS_mulu2_i64;
2053     case INDEX_op_muls2_i64:
2054         return TCG_TARGET_HAS_muls2_i64;
2055     case INDEX_op_muluh_i64:
2056         return TCG_TARGET_HAS_muluh_i64;
2057     case INDEX_op_mulsh_i64:
2058         return TCG_TARGET_HAS_mulsh_i64;
2059 
2060     case INDEX_op_mov_vec:
2061     case INDEX_op_dup_vec:
2062     case INDEX_op_dupm_vec:
2063     case INDEX_op_ld_vec:
2064     case INDEX_op_st_vec:
2065     case INDEX_op_add_vec:
2066     case INDEX_op_sub_vec:
2067     case INDEX_op_and_vec:
2068     case INDEX_op_or_vec:
2069     case INDEX_op_xor_vec:
2070     case INDEX_op_cmp_vec:
2071         return have_vec;
2072     case INDEX_op_dup2_vec:
2073         return have_vec && TCG_TARGET_REG_BITS == 32;
2074     case INDEX_op_not_vec:
2075         return have_vec && TCG_TARGET_HAS_not_vec;
2076     case INDEX_op_neg_vec:
2077         return have_vec && TCG_TARGET_HAS_neg_vec;
2078     case INDEX_op_abs_vec:
2079         return have_vec && TCG_TARGET_HAS_abs_vec;
2080     case INDEX_op_andc_vec:
2081         return have_vec && TCG_TARGET_HAS_andc_vec;
2082     case INDEX_op_orc_vec:
2083         return have_vec && TCG_TARGET_HAS_orc_vec;
2084     case INDEX_op_nand_vec:
2085         return have_vec && TCG_TARGET_HAS_nand_vec;
2086     case INDEX_op_nor_vec:
2087         return have_vec && TCG_TARGET_HAS_nor_vec;
2088     case INDEX_op_eqv_vec:
2089         return have_vec && TCG_TARGET_HAS_eqv_vec;
2090     case INDEX_op_mul_vec:
2091         return have_vec && TCG_TARGET_HAS_mul_vec;
2092     case INDEX_op_shli_vec:
2093     case INDEX_op_shri_vec:
2094     case INDEX_op_sari_vec:
2095         return have_vec && TCG_TARGET_HAS_shi_vec;
2096     case INDEX_op_shls_vec:
2097     case INDEX_op_shrs_vec:
2098     case INDEX_op_sars_vec:
2099         return have_vec && TCG_TARGET_HAS_shs_vec;
2100     case INDEX_op_shlv_vec:
2101     case INDEX_op_shrv_vec:
2102     case INDEX_op_sarv_vec:
2103         return have_vec && TCG_TARGET_HAS_shv_vec;
2104     case INDEX_op_rotli_vec:
2105         return have_vec && TCG_TARGET_HAS_roti_vec;
2106     case INDEX_op_rotls_vec:
2107         return have_vec && TCG_TARGET_HAS_rots_vec;
2108     case INDEX_op_rotlv_vec:
2109     case INDEX_op_rotrv_vec:
2110         return have_vec && TCG_TARGET_HAS_rotv_vec;
2111     case INDEX_op_ssadd_vec:
2112     case INDEX_op_usadd_vec:
2113     case INDEX_op_sssub_vec:
2114     case INDEX_op_ussub_vec:
2115         return have_vec && TCG_TARGET_HAS_sat_vec;
2116     case INDEX_op_smin_vec:
2117     case INDEX_op_umin_vec:
2118     case INDEX_op_smax_vec:
2119     case INDEX_op_umax_vec:
2120         return have_vec && TCG_TARGET_HAS_minmax_vec;
2121     case INDEX_op_bitsel_vec:
2122         return have_vec && TCG_TARGET_HAS_bitsel_vec;
2123     case INDEX_op_cmpsel_vec:
2124         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2125 
2126     default:
2127         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2128         return true;
2129     }
2130 }
2131 
2132 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2133 
2134 static void tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args)
2135 {
2136     TCGv_i64 extend_free[MAX_CALL_IARGS];
2137     int n_extend = 0;
2138     TCGOp *op;
2139     int i, n, pi = 0, total_args;
2140 
2141     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2142         init_call_layout(info);
2143         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2144     }
2145 
2146     total_args = info->nr_out + info->nr_in + 2;
2147     op = tcg_op_alloc(INDEX_op_call, total_args);
2148 
2149 #ifdef CONFIG_PLUGIN
2150     /* Flag helpers that may affect guest state */
2151     if (tcg_ctx->plugin_insn &&
2152         !(info->flags & TCG_CALL_PLUGIN) &&
2153         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2154         tcg_ctx->plugin_insn->calls_helpers = true;
2155     }
2156 #endif
2157 
2158     TCGOP_CALLO(op) = n = info->nr_out;
2159     switch (n) {
2160     case 0:
2161         tcg_debug_assert(ret == NULL);
2162         break;
2163     case 1:
2164         tcg_debug_assert(ret != NULL);
2165         op->args[pi++] = temp_arg(ret);
2166         break;
2167     case 2:
2168     case 4:
2169         tcg_debug_assert(ret != NULL);
2170         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2171         tcg_debug_assert(ret->temp_subindex == 0);
2172         for (i = 0; i < n; ++i) {
2173             op->args[pi++] = temp_arg(ret + i);
2174         }
2175         break;
2176     default:
2177         g_assert_not_reached();
2178     }
2179 
2180     TCGOP_CALLI(op) = n = info->nr_in;
2181     for (i = 0; i < n; i++) {
2182         const TCGCallArgumentLoc *loc = &info->in[i];
2183         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2184 
2185         switch (loc->kind) {
2186         case TCG_CALL_ARG_NORMAL:
2187         case TCG_CALL_ARG_BY_REF:
2188         case TCG_CALL_ARG_BY_REF_N:
2189             op->args[pi++] = temp_arg(ts);
2190             break;
2191 
2192         case TCG_CALL_ARG_EXTEND_U:
2193         case TCG_CALL_ARG_EXTEND_S:
2194             {
2195                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2196                 TCGv_i32 orig = temp_tcgv_i32(ts);
2197 
2198                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2199                     tcg_gen_ext_i32_i64(temp, orig);
2200                 } else {
2201                     tcg_gen_extu_i32_i64(temp, orig);
2202                 }
2203                 op->args[pi++] = tcgv_i64_arg(temp);
2204                 extend_free[n_extend++] = temp;
2205             }
2206             break;
2207 
2208         default:
2209             g_assert_not_reached();
2210         }
2211     }
2212     op->args[pi++] = (uintptr_t)info->func;
2213     op->args[pi++] = (uintptr_t)info;
2214     tcg_debug_assert(pi == total_args);
2215 
2216     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2217 
2218     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2219     for (i = 0; i < n_extend; ++i) {
2220         tcg_temp_free_i64(extend_free[i]);
2221     }
2222 }
2223 
2224 void tcg_gen_call0(TCGHelperInfo *info, TCGTemp *ret)
2225 {
2226     tcg_gen_callN(info, ret, NULL);
2227 }
2228 
2229 void tcg_gen_call1(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2230 {
2231     tcg_gen_callN(info, ret, &t1);
2232 }
2233 
2234 void tcg_gen_call2(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2)
2235 {
2236     TCGTemp *args[2] = { t1, t2 };
2237     tcg_gen_callN(info, ret, args);
2238 }
2239 
2240 void tcg_gen_call3(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2241                    TCGTemp *t2, TCGTemp *t3)
2242 {
2243     TCGTemp *args[3] = { t1, t2, t3 };
2244     tcg_gen_callN(info, ret, args);
2245 }
2246 
2247 void tcg_gen_call4(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2248                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2249 {
2250     TCGTemp *args[4] = { t1, t2, t3, t4 };
2251     tcg_gen_callN(info, ret, args);
2252 }
2253 
2254 void tcg_gen_call5(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2255                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2256 {
2257     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2258     tcg_gen_callN(info, ret, args);
2259 }
2260 
2261 void tcg_gen_call6(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2,
2262                    TCGTemp *t3, TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2263 {
2264     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2265     tcg_gen_callN(info, ret, args);
2266 }
2267 
2268 void tcg_gen_call7(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2269                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2270                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2271 {
2272     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2273     tcg_gen_callN(info, ret, args);
2274 }
2275 
2276 static void tcg_reg_alloc_start(TCGContext *s)
2277 {
2278     int i, n;
2279 
2280     for (i = 0, n = s->nb_temps; i < n; i++) {
2281         TCGTemp *ts = &s->temps[i];
2282         TCGTempVal val = TEMP_VAL_MEM;
2283 
2284         switch (ts->kind) {
2285         case TEMP_CONST:
2286             val = TEMP_VAL_CONST;
2287             break;
2288         case TEMP_FIXED:
2289             val = TEMP_VAL_REG;
2290             break;
2291         case TEMP_GLOBAL:
2292             break;
2293         case TEMP_EBB:
2294             val = TEMP_VAL_DEAD;
2295             /* fall through */
2296         case TEMP_TB:
2297             ts->mem_allocated = 0;
2298             break;
2299         default:
2300             g_assert_not_reached();
2301         }
2302         ts->val_type = val;
2303     }
2304 
2305     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2306 }
2307 
2308 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2309                                  TCGTemp *ts)
2310 {
2311     int idx = temp_idx(ts);
2312 
2313     switch (ts->kind) {
2314     case TEMP_FIXED:
2315     case TEMP_GLOBAL:
2316         pstrcpy(buf, buf_size, ts->name);
2317         break;
2318     case TEMP_TB:
2319         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2320         break;
2321     case TEMP_EBB:
2322         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2323         break;
2324     case TEMP_CONST:
2325         switch (ts->type) {
2326         case TCG_TYPE_I32:
2327             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2328             break;
2329 #if TCG_TARGET_REG_BITS > 32
2330         case TCG_TYPE_I64:
2331             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2332             break;
2333 #endif
2334         case TCG_TYPE_V64:
2335         case TCG_TYPE_V128:
2336         case TCG_TYPE_V256:
2337             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2338                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2339             break;
2340         default:
2341             g_assert_not_reached();
2342         }
2343         break;
2344     }
2345     return buf;
2346 }
2347 
2348 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2349                              int buf_size, TCGArg arg)
2350 {
2351     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2352 }
2353 
2354 static const char * const cond_name[] =
2355 {
2356     [TCG_COND_NEVER] = "never",
2357     [TCG_COND_ALWAYS] = "always",
2358     [TCG_COND_EQ] = "eq",
2359     [TCG_COND_NE] = "ne",
2360     [TCG_COND_LT] = "lt",
2361     [TCG_COND_GE] = "ge",
2362     [TCG_COND_LE] = "le",
2363     [TCG_COND_GT] = "gt",
2364     [TCG_COND_LTU] = "ltu",
2365     [TCG_COND_GEU] = "geu",
2366     [TCG_COND_LEU] = "leu",
2367     [TCG_COND_GTU] = "gtu"
2368 };
2369 
2370 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2371 {
2372     [MO_UB]   = "ub",
2373     [MO_SB]   = "sb",
2374     [MO_LEUW] = "leuw",
2375     [MO_LESW] = "lesw",
2376     [MO_LEUL] = "leul",
2377     [MO_LESL] = "lesl",
2378     [MO_LEUQ] = "leq",
2379     [MO_BEUW] = "beuw",
2380     [MO_BESW] = "besw",
2381     [MO_BEUL] = "beul",
2382     [MO_BESL] = "besl",
2383     [MO_BEUQ] = "beq",
2384     [MO_128 + MO_BE] = "beo",
2385     [MO_128 + MO_LE] = "leo",
2386 };
2387 
2388 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2389     [MO_UNALN >> MO_ASHIFT]    = "un+",
2390     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2391     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2392     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2393     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2394     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2395     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2396     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2397 };
2398 
2399 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2400     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2401     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2402     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2403     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2404     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2405     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2406 };
2407 
2408 static const char bswap_flag_name[][6] = {
2409     [TCG_BSWAP_IZ] = "iz",
2410     [TCG_BSWAP_OZ] = "oz",
2411     [TCG_BSWAP_OS] = "os",
2412     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2413     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2414 };
2415 
2416 static inline bool tcg_regset_single(TCGRegSet d)
2417 {
2418     return (d & (d - 1)) == 0;
2419 }
2420 
2421 static inline TCGReg tcg_regset_first(TCGRegSet d)
2422 {
2423     if (TCG_TARGET_NB_REGS <= 32) {
2424         return ctz32(d);
2425     } else {
2426         return ctz64(d);
2427     }
2428 }
2429 
2430 /* Return only the number of characters output -- no error return. */
2431 #define ne_fprintf(...) \
2432     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2433 
2434 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2435 {
2436     char buf[128];
2437     TCGOp *op;
2438 
2439     QTAILQ_FOREACH(op, &s->ops, link) {
2440         int i, k, nb_oargs, nb_iargs, nb_cargs;
2441         const TCGOpDef *def;
2442         TCGOpcode c;
2443         int col = 0;
2444 
2445         c = op->opc;
2446         def = &tcg_op_defs[c];
2447 
2448         if (c == INDEX_op_insn_start) {
2449             nb_oargs = 0;
2450             col += ne_fprintf(f, "\n ----");
2451 
2452             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2453                 col += ne_fprintf(f, " %016" PRIx64,
2454                                   tcg_get_insn_start_param(op, i));
2455             }
2456         } else if (c == INDEX_op_call) {
2457             const TCGHelperInfo *info = tcg_call_info(op);
2458             void *func = tcg_call_func(op);
2459 
2460             /* variable number of arguments */
2461             nb_oargs = TCGOP_CALLO(op);
2462             nb_iargs = TCGOP_CALLI(op);
2463             nb_cargs = def->nb_cargs;
2464 
2465             col += ne_fprintf(f, " %s ", def->name);
2466 
2467             /*
2468              * Print the function name from TCGHelperInfo, if available.
2469              * Note that plugins have a template function for the info,
2470              * but the actual function pointer comes from the plugin.
2471              */
2472             if (func == info->func) {
2473                 col += ne_fprintf(f, "%s", info->name);
2474             } else {
2475                 col += ne_fprintf(f, "plugin(%p)", func);
2476             }
2477 
2478             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2479             for (i = 0; i < nb_oargs; i++) {
2480                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2481                                                             op->args[i]));
2482             }
2483             for (i = 0; i < nb_iargs; i++) {
2484                 TCGArg arg = op->args[nb_oargs + i];
2485                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2486                 col += ne_fprintf(f, ",%s", t);
2487             }
2488         } else {
2489             col += ne_fprintf(f, " %s ", def->name);
2490 
2491             nb_oargs = def->nb_oargs;
2492             nb_iargs = def->nb_iargs;
2493             nb_cargs = def->nb_cargs;
2494 
2495             if (def->flags & TCG_OPF_VECTOR) {
2496                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2497                                   8 << TCGOP_VECE(op));
2498             }
2499 
2500             k = 0;
2501             for (i = 0; i < nb_oargs; i++) {
2502                 const char *sep =  k ? "," : "";
2503                 col += ne_fprintf(f, "%s%s", sep,
2504                                   tcg_get_arg_str(s, buf, sizeof(buf),
2505                                                   op->args[k++]));
2506             }
2507             for (i = 0; i < nb_iargs; i++) {
2508                 const char *sep =  k ? "," : "";
2509                 col += ne_fprintf(f, "%s%s", sep,
2510                                   tcg_get_arg_str(s, buf, sizeof(buf),
2511                                                   op->args[k++]));
2512             }
2513             switch (c) {
2514             case INDEX_op_brcond_i32:
2515             case INDEX_op_setcond_i32:
2516             case INDEX_op_negsetcond_i32:
2517             case INDEX_op_movcond_i32:
2518             case INDEX_op_brcond2_i32:
2519             case INDEX_op_setcond2_i32:
2520             case INDEX_op_brcond_i64:
2521             case INDEX_op_setcond_i64:
2522             case INDEX_op_negsetcond_i64:
2523             case INDEX_op_movcond_i64:
2524             case INDEX_op_cmp_vec:
2525             case INDEX_op_cmpsel_vec:
2526                 if (op->args[k] < ARRAY_SIZE(cond_name)
2527                     && cond_name[op->args[k]]) {
2528                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2529                 } else {
2530                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2531                 }
2532                 i = 1;
2533                 break;
2534             case INDEX_op_qemu_ld_a32_i32:
2535             case INDEX_op_qemu_ld_a64_i32:
2536             case INDEX_op_qemu_st_a32_i32:
2537             case INDEX_op_qemu_st_a64_i32:
2538             case INDEX_op_qemu_st8_a32_i32:
2539             case INDEX_op_qemu_st8_a64_i32:
2540             case INDEX_op_qemu_ld_a32_i64:
2541             case INDEX_op_qemu_ld_a64_i64:
2542             case INDEX_op_qemu_st_a32_i64:
2543             case INDEX_op_qemu_st_a64_i64:
2544             case INDEX_op_qemu_ld_a32_i128:
2545             case INDEX_op_qemu_ld_a64_i128:
2546             case INDEX_op_qemu_st_a32_i128:
2547             case INDEX_op_qemu_st_a64_i128:
2548                 {
2549                     const char *s_al, *s_op, *s_at;
2550                     MemOpIdx oi = op->args[k++];
2551                     MemOp op = get_memop(oi);
2552                     unsigned ix = get_mmuidx(oi);
2553 
2554                     s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2555                     s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2556                     s_at = atom_name[(op & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2557                     op &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2558 
2559                     /* If all fields are accounted for, print symbolically. */
2560                     if (!op && s_al && s_op && s_at) {
2561                         col += ne_fprintf(f, ",%s%s%s,%u",
2562                                           s_at, s_al, s_op, ix);
2563                     } else {
2564                         op = get_memop(oi);
2565                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2566                     }
2567                     i = 1;
2568                 }
2569                 break;
2570             case INDEX_op_bswap16_i32:
2571             case INDEX_op_bswap16_i64:
2572             case INDEX_op_bswap32_i32:
2573             case INDEX_op_bswap32_i64:
2574             case INDEX_op_bswap64_i64:
2575                 {
2576                     TCGArg flags = op->args[k];
2577                     const char *name = NULL;
2578 
2579                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2580                         name = bswap_flag_name[flags];
2581                     }
2582                     if (name) {
2583                         col += ne_fprintf(f, ",%s", name);
2584                     } else {
2585                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2586                     }
2587                     i = k = 1;
2588                 }
2589                 break;
2590             default:
2591                 i = 0;
2592                 break;
2593             }
2594             switch (c) {
2595             case INDEX_op_set_label:
2596             case INDEX_op_br:
2597             case INDEX_op_brcond_i32:
2598             case INDEX_op_brcond_i64:
2599             case INDEX_op_brcond2_i32:
2600                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2601                                   arg_label(op->args[k])->id);
2602                 i++, k++;
2603                 break;
2604             case INDEX_op_mb:
2605                 {
2606                     TCGBar membar = op->args[k];
2607                     const char *b_op, *m_op;
2608 
2609                     switch (membar & TCG_BAR_SC) {
2610                     case 0:
2611                         b_op = "none";
2612                         break;
2613                     case TCG_BAR_LDAQ:
2614                         b_op = "acq";
2615                         break;
2616                     case TCG_BAR_STRL:
2617                         b_op = "rel";
2618                         break;
2619                     case TCG_BAR_SC:
2620                         b_op = "seq";
2621                         break;
2622                     default:
2623                         g_assert_not_reached();
2624                     }
2625 
2626                     switch (membar & TCG_MO_ALL) {
2627                     case 0:
2628                         m_op = "none";
2629                         break;
2630                     case TCG_MO_LD_LD:
2631                         m_op = "rr";
2632                         break;
2633                     case TCG_MO_LD_ST:
2634                         m_op = "rw";
2635                         break;
2636                     case TCG_MO_ST_LD:
2637                         m_op = "wr";
2638                         break;
2639                     case TCG_MO_ST_ST:
2640                         m_op = "ww";
2641                         break;
2642                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2643                         m_op = "rr+rw";
2644                         break;
2645                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2646                         m_op = "rr+wr";
2647                         break;
2648                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2649                         m_op = "rr+ww";
2650                         break;
2651                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2652                         m_op = "rw+wr";
2653                         break;
2654                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2655                         m_op = "rw+ww";
2656                         break;
2657                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2658                         m_op = "wr+ww";
2659                         break;
2660                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2661                         m_op = "rr+rw+wr";
2662                         break;
2663                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2664                         m_op = "rr+rw+ww";
2665                         break;
2666                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2667                         m_op = "rr+wr+ww";
2668                         break;
2669                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2670                         m_op = "rw+wr+ww";
2671                         break;
2672                     case TCG_MO_ALL:
2673                         m_op = "all";
2674                         break;
2675                     default:
2676                         g_assert_not_reached();
2677                     }
2678 
2679                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2680                     i++, k++;
2681                 }
2682                 break;
2683             default:
2684                 break;
2685             }
2686             for (; i < nb_cargs; i++, k++) {
2687                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2688                                   op->args[k]);
2689             }
2690         }
2691 
2692         if (have_prefs || op->life) {
2693             for (; col < 40; ++col) {
2694                 putc(' ', f);
2695             }
2696         }
2697 
2698         if (op->life) {
2699             unsigned life = op->life;
2700 
2701             if (life & (SYNC_ARG * 3)) {
2702                 ne_fprintf(f, "  sync:");
2703                 for (i = 0; i < 2; ++i) {
2704                     if (life & (SYNC_ARG << i)) {
2705                         ne_fprintf(f, " %d", i);
2706                     }
2707                 }
2708             }
2709             life /= DEAD_ARG;
2710             if (life) {
2711                 ne_fprintf(f, "  dead:");
2712                 for (i = 0; life; ++i, life >>= 1) {
2713                     if (life & 1) {
2714                         ne_fprintf(f, " %d", i);
2715                     }
2716                 }
2717             }
2718         }
2719 
2720         if (have_prefs) {
2721             for (i = 0; i < nb_oargs; ++i) {
2722                 TCGRegSet set = output_pref(op, i);
2723 
2724                 if (i == 0) {
2725                     ne_fprintf(f, "  pref=");
2726                 } else {
2727                     ne_fprintf(f, ",");
2728                 }
2729                 if (set == 0) {
2730                     ne_fprintf(f, "none");
2731                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2732                     ne_fprintf(f, "all");
2733 #ifdef CONFIG_DEBUG_TCG
2734                 } else if (tcg_regset_single(set)) {
2735                     TCGReg reg = tcg_regset_first(set);
2736                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2737 #endif
2738                 } else if (TCG_TARGET_NB_REGS <= 32) {
2739                     ne_fprintf(f, "0x%x", (uint32_t)set);
2740                 } else {
2741                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2742                 }
2743             }
2744         }
2745 
2746         putc('\n', f);
2747     }
2748 }
2749 
2750 /* we give more priority to constraints with less registers */
2751 static int get_constraint_priority(const TCGOpDef *def, int k)
2752 {
2753     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2754     int n = ctpop64(arg_ct->regs);
2755 
2756     /*
2757      * Sort constraints of a single register first, which includes output
2758      * aliases (which must exactly match the input already allocated).
2759      */
2760     if (n == 1 || arg_ct->oalias) {
2761         return INT_MAX;
2762     }
2763 
2764     /*
2765      * Sort register pairs next, first then second immediately after.
2766      * Arbitrarily sort multiple pairs by the index of the first reg;
2767      * there shouldn't be many pairs.
2768      */
2769     switch (arg_ct->pair) {
2770     case 1:
2771     case 3:
2772         return (k + 1) * 2;
2773     case 2:
2774         return (arg_ct->pair_index + 1) * 2 - 1;
2775     }
2776 
2777     /* Finally, sort by decreasing register count. */
2778     assert(n > 1);
2779     return -n;
2780 }
2781 
2782 /* sort from highest priority to lowest */
2783 static void sort_constraints(TCGOpDef *def, int start, int n)
2784 {
2785     int i, j;
2786     TCGArgConstraint *a = def->args_ct;
2787 
2788     for (i = 0; i < n; i++) {
2789         a[start + i].sort_index = start + i;
2790     }
2791     if (n <= 1) {
2792         return;
2793     }
2794     for (i = 0; i < n - 1; i++) {
2795         for (j = i + 1; j < n; j++) {
2796             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2797             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2798             if (p1 < p2) {
2799                 int tmp = a[start + i].sort_index;
2800                 a[start + i].sort_index = a[start + j].sort_index;
2801                 a[start + j].sort_index = tmp;
2802             }
2803         }
2804     }
2805 }
2806 
2807 static void process_op_defs(TCGContext *s)
2808 {
2809     TCGOpcode op;
2810 
2811     for (op = 0; op < NB_OPS; op++) {
2812         TCGOpDef *def = &tcg_op_defs[op];
2813         const TCGTargetOpDef *tdefs;
2814         bool saw_alias_pair = false;
2815         int i, o, i2, o2, nb_args;
2816 
2817         if (def->flags & TCG_OPF_NOT_PRESENT) {
2818             continue;
2819         }
2820 
2821         nb_args = def->nb_iargs + def->nb_oargs;
2822         if (nb_args == 0) {
2823             continue;
2824         }
2825 
2826         /*
2827          * Macro magic should make it impossible, but double-check that
2828          * the array index is in range.  Since the signness of an enum
2829          * is implementation defined, force the result to unsigned.
2830          */
2831         unsigned con_set = tcg_target_op_def(op);
2832         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2833         tdefs = &constraint_sets[con_set];
2834 
2835         for (i = 0; i < nb_args; i++) {
2836             const char *ct_str = tdefs->args_ct_str[i];
2837             bool input_p = i >= def->nb_oargs;
2838 
2839             /* Incomplete TCGTargetOpDef entry. */
2840             tcg_debug_assert(ct_str != NULL);
2841 
2842             switch (*ct_str) {
2843             case '0' ... '9':
2844                 o = *ct_str - '0';
2845                 tcg_debug_assert(input_p);
2846                 tcg_debug_assert(o < def->nb_oargs);
2847                 tcg_debug_assert(def->args_ct[o].regs != 0);
2848                 tcg_debug_assert(!def->args_ct[o].oalias);
2849                 def->args_ct[i] = def->args_ct[o];
2850                 /* The output sets oalias.  */
2851                 def->args_ct[o].oalias = 1;
2852                 def->args_ct[o].alias_index = i;
2853                 /* The input sets ialias. */
2854                 def->args_ct[i].ialias = 1;
2855                 def->args_ct[i].alias_index = o;
2856                 if (def->args_ct[i].pair) {
2857                     saw_alias_pair = true;
2858                 }
2859                 tcg_debug_assert(ct_str[1] == '\0');
2860                 continue;
2861 
2862             case '&':
2863                 tcg_debug_assert(!input_p);
2864                 def->args_ct[i].newreg = true;
2865                 ct_str++;
2866                 break;
2867 
2868             case 'p': /* plus */
2869                 /* Allocate to the register after the previous. */
2870                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2871                 o = i - 1;
2872                 tcg_debug_assert(!def->args_ct[o].pair);
2873                 tcg_debug_assert(!def->args_ct[o].ct);
2874                 def->args_ct[i] = (TCGArgConstraint){
2875                     .pair = 2,
2876                     .pair_index = o,
2877                     .regs = def->args_ct[o].regs << 1,
2878                 };
2879                 def->args_ct[o].pair = 1;
2880                 def->args_ct[o].pair_index = i;
2881                 tcg_debug_assert(ct_str[1] == '\0');
2882                 continue;
2883 
2884             case 'm': /* minus */
2885                 /* Allocate to the register before the previous. */
2886                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2887                 o = i - 1;
2888                 tcg_debug_assert(!def->args_ct[o].pair);
2889                 tcg_debug_assert(!def->args_ct[o].ct);
2890                 def->args_ct[i] = (TCGArgConstraint){
2891                     .pair = 1,
2892                     .pair_index = o,
2893                     .regs = def->args_ct[o].regs >> 1,
2894                 };
2895                 def->args_ct[o].pair = 2;
2896                 def->args_ct[o].pair_index = i;
2897                 tcg_debug_assert(ct_str[1] == '\0');
2898                 continue;
2899             }
2900 
2901             do {
2902                 switch (*ct_str) {
2903                 case 'i':
2904                     def->args_ct[i].ct |= TCG_CT_CONST;
2905                     break;
2906 
2907                 /* Include all of the target-specific constraints. */
2908 
2909 #undef CONST
2910 #define CONST(CASE, MASK) \
2911     case CASE: def->args_ct[i].ct |= MASK; break;
2912 #define REGS(CASE, MASK) \
2913     case CASE: def->args_ct[i].regs |= MASK; break;
2914 
2915 #include "tcg-target-con-str.h"
2916 
2917 #undef REGS
2918 #undef CONST
2919                 default:
2920                 case '0' ... '9':
2921                 case '&':
2922                 case 'p':
2923                 case 'm':
2924                     /* Typo in TCGTargetOpDef constraint. */
2925                     g_assert_not_reached();
2926                 }
2927             } while (*++ct_str != '\0');
2928         }
2929 
2930         /* TCGTargetOpDef entry with too much information? */
2931         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2932 
2933         /*
2934          * Fix up output pairs that are aliased with inputs.
2935          * When we created the alias, we copied pair from the output.
2936          * There are three cases:
2937          *    (1a) Pairs of inputs alias pairs of outputs.
2938          *    (1b) One input aliases the first of a pair of outputs.
2939          *    (2)  One input aliases the second of a pair of outputs.
2940          *
2941          * Case 1a is handled by making sure that the pair_index'es are
2942          * properly updated so that they appear the same as a pair of inputs.
2943          *
2944          * Case 1b is handled by setting the pair_index of the input to
2945          * itself, simply so it doesn't point to an unrelated argument.
2946          * Since we don't encounter the "second" during the input allocation
2947          * phase, nothing happens with the second half of the input pair.
2948          *
2949          * Case 2 is handled by setting the second input to pair=3, the
2950          * first output to pair=3, and the pair_index'es to match.
2951          */
2952         if (saw_alias_pair) {
2953             for (i = def->nb_oargs; i < nb_args; i++) {
2954                 /*
2955                  * Since [0-9pm] must be alone in the constraint string,
2956                  * the only way they can both be set is if the pair comes
2957                  * from the output alias.
2958                  */
2959                 if (!def->args_ct[i].ialias) {
2960                     continue;
2961                 }
2962                 switch (def->args_ct[i].pair) {
2963                 case 0:
2964                     break;
2965                 case 1:
2966                     o = def->args_ct[i].alias_index;
2967                     o2 = def->args_ct[o].pair_index;
2968                     tcg_debug_assert(def->args_ct[o].pair == 1);
2969                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2970                     if (def->args_ct[o2].oalias) {
2971                         /* Case 1a */
2972                         i2 = def->args_ct[o2].alias_index;
2973                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2974                         def->args_ct[i2].pair_index = i;
2975                         def->args_ct[i].pair_index = i2;
2976                     } else {
2977                         /* Case 1b */
2978                         def->args_ct[i].pair_index = i;
2979                     }
2980                     break;
2981                 case 2:
2982                     o = def->args_ct[i].alias_index;
2983                     o2 = def->args_ct[o].pair_index;
2984                     tcg_debug_assert(def->args_ct[o].pair == 2);
2985                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2986                     if (def->args_ct[o2].oalias) {
2987                         /* Case 1a */
2988                         i2 = def->args_ct[o2].alias_index;
2989                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2990                         def->args_ct[i2].pair_index = i;
2991                         def->args_ct[i].pair_index = i2;
2992                     } else {
2993                         /* Case 2 */
2994                         def->args_ct[i].pair = 3;
2995                         def->args_ct[o2].pair = 3;
2996                         def->args_ct[i].pair_index = o2;
2997                         def->args_ct[o2].pair_index = i;
2998                     }
2999                     break;
3000                 default:
3001                     g_assert_not_reached();
3002                 }
3003             }
3004         }
3005 
3006         /* sort the constraints (XXX: this is just an heuristic) */
3007         sort_constraints(def, 0, def->nb_oargs);
3008         sort_constraints(def, def->nb_oargs, def->nb_iargs);
3009     }
3010 }
3011 
3012 static void remove_label_use(TCGOp *op, int idx)
3013 {
3014     TCGLabel *label = arg_label(op->args[idx]);
3015     TCGLabelUse *use;
3016 
3017     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3018         if (use->op == op) {
3019             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3020             return;
3021         }
3022     }
3023     g_assert_not_reached();
3024 }
3025 
3026 void tcg_op_remove(TCGContext *s, TCGOp *op)
3027 {
3028     switch (op->opc) {
3029     case INDEX_op_br:
3030         remove_label_use(op, 0);
3031         break;
3032     case INDEX_op_brcond_i32:
3033     case INDEX_op_brcond_i64:
3034         remove_label_use(op, 3);
3035         break;
3036     case INDEX_op_brcond2_i32:
3037         remove_label_use(op, 5);
3038         break;
3039     default:
3040         break;
3041     }
3042 
3043     QTAILQ_REMOVE(&s->ops, op, link);
3044     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3045     s->nb_ops--;
3046 }
3047 
3048 void tcg_remove_ops_after(TCGOp *op)
3049 {
3050     TCGContext *s = tcg_ctx;
3051 
3052     while (true) {
3053         TCGOp *last = tcg_last_op();
3054         if (last == op) {
3055             return;
3056         }
3057         tcg_op_remove(s, last);
3058     }
3059 }
3060 
3061 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3062 {
3063     TCGContext *s = tcg_ctx;
3064     TCGOp *op = NULL;
3065 
3066     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3067         QTAILQ_FOREACH(op, &s->free_ops, link) {
3068             if (nargs <= op->nargs) {
3069                 QTAILQ_REMOVE(&s->free_ops, op, link);
3070                 nargs = op->nargs;
3071                 goto found;
3072             }
3073         }
3074     }
3075 
3076     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3077     nargs = MAX(4, nargs);
3078     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3079 
3080  found:
3081     memset(op, 0, offsetof(TCGOp, link));
3082     op->opc = opc;
3083     op->nargs = nargs;
3084 
3085     /* Check for bitfield overflow. */
3086     tcg_debug_assert(op->nargs == nargs);
3087 
3088     s->nb_ops++;
3089     return op;
3090 }
3091 
3092 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3093 {
3094     TCGOp *op = tcg_op_alloc(opc, nargs);
3095     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3096     return op;
3097 }
3098 
3099 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3100                             TCGOpcode opc, unsigned nargs)
3101 {
3102     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3103     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3104     return new_op;
3105 }
3106 
3107 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3108                            TCGOpcode opc, unsigned nargs)
3109 {
3110     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3111     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3112     return new_op;
3113 }
3114 
3115 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3116 {
3117     TCGLabelUse *u;
3118 
3119     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3120         TCGOp *op = u->op;
3121         switch (op->opc) {
3122         case INDEX_op_br:
3123             op->args[0] = label_arg(to);
3124             break;
3125         case INDEX_op_brcond_i32:
3126         case INDEX_op_brcond_i64:
3127             op->args[3] = label_arg(to);
3128             break;
3129         case INDEX_op_brcond2_i32:
3130             op->args[5] = label_arg(to);
3131             break;
3132         default:
3133             g_assert_not_reached();
3134         }
3135     }
3136 
3137     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3138 }
3139 
3140 /* Reachable analysis : remove unreachable code.  */
3141 static void __attribute__((noinline))
3142 reachable_code_pass(TCGContext *s)
3143 {
3144     TCGOp *op, *op_next, *op_prev;
3145     bool dead = false;
3146 
3147     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3148         bool remove = dead;
3149         TCGLabel *label;
3150 
3151         switch (op->opc) {
3152         case INDEX_op_set_label:
3153             label = arg_label(op->args[0]);
3154 
3155             /*
3156              * Note that the first op in the TB is always a load,
3157              * so there is always something before a label.
3158              */
3159             op_prev = QTAILQ_PREV(op, link);
3160 
3161             /*
3162              * If we find two sequential labels, move all branches to
3163              * reference the second label and remove the first label.
3164              * Do this before branch to next optimization, so that the
3165              * middle label is out of the way.
3166              */
3167             if (op_prev->opc == INDEX_op_set_label) {
3168                 move_label_uses(label, arg_label(op_prev->args[0]));
3169                 tcg_op_remove(s, op_prev);
3170                 op_prev = QTAILQ_PREV(op, link);
3171             }
3172 
3173             /*
3174              * Optimization can fold conditional branches to unconditional.
3175              * If we find a label which is preceded by an unconditional
3176              * branch to next, remove the branch.  We couldn't do this when
3177              * processing the branch because any dead code between the branch
3178              * and label had not yet been removed.
3179              */
3180             if (op_prev->opc == INDEX_op_br &&
3181                 label == arg_label(op_prev->args[0])) {
3182                 tcg_op_remove(s, op_prev);
3183                 /* Fall through means insns become live again.  */
3184                 dead = false;
3185             }
3186 
3187             if (QSIMPLEQ_EMPTY(&label->branches)) {
3188                 /*
3189                  * While there is an occasional backward branch, virtually
3190                  * all branches generated by the translators are forward.
3191                  * Which means that generally we will have already removed
3192                  * all references to the label that will be, and there is
3193                  * little to be gained by iterating.
3194                  */
3195                 remove = true;
3196             } else {
3197                 /* Once we see a label, insns become live again.  */
3198                 dead = false;
3199                 remove = false;
3200             }
3201             break;
3202 
3203         case INDEX_op_br:
3204         case INDEX_op_exit_tb:
3205         case INDEX_op_goto_ptr:
3206             /* Unconditional branches; everything following is dead.  */
3207             dead = true;
3208             break;
3209 
3210         case INDEX_op_call:
3211             /* Notice noreturn helper calls, raising exceptions.  */
3212             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3213                 dead = true;
3214             }
3215             break;
3216 
3217         case INDEX_op_insn_start:
3218             /* Never remove -- we need to keep these for unwind.  */
3219             remove = false;
3220             break;
3221 
3222         default:
3223             break;
3224         }
3225 
3226         if (remove) {
3227             tcg_op_remove(s, op);
3228         }
3229     }
3230 }
3231 
3232 #define TS_DEAD  1
3233 #define TS_MEM   2
3234 
3235 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3236 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3237 
3238 /* For liveness_pass_1, the register preferences for a given temp.  */
3239 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3240 {
3241     return ts->state_ptr;
3242 }
3243 
3244 /* For liveness_pass_1, reset the preferences for a given temp to the
3245  * maximal regset for its type.
3246  */
3247 static inline void la_reset_pref(TCGTemp *ts)
3248 {
3249     *la_temp_pref(ts)
3250         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3251 }
3252 
3253 /* liveness analysis: end of function: all temps are dead, and globals
3254    should be in memory. */
3255 static void la_func_end(TCGContext *s, int ng, int nt)
3256 {
3257     int i;
3258 
3259     for (i = 0; i < ng; ++i) {
3260         s->temps[i].state = TS_DEAD | TS_MEM;
3261         la_reset_pref(&s->temps[i]);
3262     }
3263     for (i = ng; i < nt; ++i) {
3264         s->temps[i].state = TS_DEAD;
3265         la_reset_pref(&s->temps[i]);
3266     }
3267 }
3268 
3269 /* liveness analysis: end of basic block: all temps are dead, globals
3270    and local temps should be in memory. */
3271 static void la_bb_end(TCGContext *s, int ng, int nt)
3272 {
3273     int i;
3274 
3275     for (i = 0; i < nt; ++i) {
3276         TCGTemp *ts = &s->temps[i];
3277         int state;
3278 
3279         switch (ts->kind) {
3280         case TEMP_FIXED:
3281         case TEMP_GLOBAL:
3282         case TEMP_TB:
3283             state = TS_DEAD | TS_MEM;
3284             break;
3285         case TEMP_EBB:
3286         case TEMP_CONST:
3287             state = TS_DEAD;
3288             break;
3289         default:
3290             g_assert_not_reached();
3291         }
3292         ts->state = state;
3293         la_reset_pref(ts);
3294     }
3295 }
3296 
3297 /* liveness analysis: sync globals back to memory.  */
3298 static void la_global_sync(TCGContext *s, int ng)
3299 {
3300     int i;
3301 
3302     for (i = 0; i < ng; ++i) {
3303         int state = s->temps[i].state;
3304         s->temps[i].state = state | TS_MEM;
3305         if (state == TS_DEAD) {
3306             /* If the global was previously dead, reset prefs.  */
3307             la_reset_pref(&s->temps[i]);
3308         }
3309     }
3310 }
3311 
3312 /*
3313  * liveness analysis: conditional branch: all temps are dead unless
3314  * explicitly live-across-conditional-branch, globals and local temps
3315  * should be synced.
3316  */
3317 static void la_bb_sync(TCGContext *s, int ng, int nt)
3318 {
3319     la_global_sync(s, ng);
3320 
3321     for (int i = ng; i < nt; ++i) {
3322         TCGTemp *ts = &s->temps[i];
3323         int state;
3324 
3325         switch (ts->kind) {
3326         case TEMP_TB:
3327             state = ts->state;
3328             ts->state = state | TS_MEM;
3329             if (state != TS_DEAD) {
3330                 continue;
3331             }
3332             break;
3333         case TEMP_EBB:
3334         case TEMP_CONST:
3335             continue;
3336         default:
3337             g_assert_not_reached();
3338         }
3339         la_reset_pref(&s->temps[i]);
3340     }
3341 }
3342 
3343 /* liveness analysis: sync globals back to memory and kill.  */
3344 static void la_global_kill(TCGContext *s, int ng)
3345 {
3346     int i;
3347 
3348     for (i = 0; i < ng; i++) {
3349         s->temps[i].state = TS_DEAD | TS_MEM;
3350         la_reset_pref(&s->temps[i]);
3351     }
3352 }
3353 
3354 /* liveness analysis: note live globals crossing calls.  */
3355 static void la_cross_call(TCGContext *s, int nt)
3356 {
3357     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3358     int i;
3359 
3360     for (i = 0; i < nt; i++) {
3361         TCGTemp *ts = &s->temps[i];
3362         if (!(ts->state & TS_DEAD)) {
3363             TCGRegSet *pset = la_temp_pref(ts);
3364             TCGRegSet set = *pset;
3365 
3366             set &= mask;
3367             /* If the combination is not possible, restart.  */
3368             if (set == 0) {
3369                 set = tcg_target_available_regs[ts->type] & mask;
3370             }
3371             *pset = set;
3372         }
3373     }
3374 }
3375 
3376 /*
3377  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3378  * to TEMP_EBB, if possible.
3379  */
3380 static void __attribute__((noinline))
3381 liveness_pass_0(TCGContext *s)
3382 {
3383     void * const multiple_ebb = (void *)(uintptr_t)-1;
3384     int nb_temps = s->nb_temps;
3385     TCGOp *op, *ebb;
3386 
3387     for (int i = s->nb_globals; i < nb_temps; ++i) {
3388         s->temps[i].state_ptr = NULL;
3389     }
3390 
3391     /*
3392      * Represent each EBB by the op at which it begins.  In the case of
3393      * the first EBB, this is the first op, otherwise it is a label.
3394      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3395      * within a single EBB, else MULTIPLE_EBB.
3396      */
3397     ebb = QTAILQ_FIRST(&s->ops);
3398     QTAILQ_FOREACH(op, &s->ops, link) {
3399         const TCGOpDef *def;
3400         int nb_oargs, nb_iargs;
3401 
3402         switch (op->opc) {
3403         case INDEX_op_set_label:
3404             ebb = op;
3405             continue;
3406         case INDEX_op_discard:
3407             continue;
3408         case INDEX_op_call:
3409             nb_oargs = TCGOP_CALLO(op);
3410             nb_iargs = TCGOP_CALLI(op);
3411             break;
3412         default:
3413             def = &tcg_op_defs[op->opc];
3414             nb_oargs = def->nb_oargs;
3415             nb_iargs = def->nb_iargs;
3416             break;
3417         }
3418 
3419         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3420             TCGTemp *ts = arg_temp(op->args[i]);
3421 
3422             if (ts->kind != TEMP_TB) {
3423                 continue;
3424             }
3425             if (ts->state_ptr == NULL) {
3426                 ts->state_ptr = ebb;
3427             } else if (ts->state_ptr != ebb) {
3428                 ts->state_ptr = multiple_ebb;
3429             }
3430         }
3431     }
3432 
3433     /*
3434      * For TEMP_TB that turned out not to be used beyond one EBB,
3435      * reduce the liveness to TEMP_EBB.
3436      */
3437     for (int i = s->nb_globals; i < nb_temps; ++i) {
3438         TCGTemp *ts = &s->temps[i];
3439         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3440             ts->kind = TEMP_EBB;
3441         }
3442     }
3443 }
3444 
3445 /* Liveness analysis : update the opc_arg_life array to tell if a
3446    given input arguments is dead. Instructions updating dead
3447    temporaries are removed. */
3448 static void __attribute__((noinline))
3449 liveness_pass_1(TCGContext *s)
3450 {
3451     int nb_globals = s->nb_globals;
3452     int nb_temps = s->nb_temps;
3453     TCGOp *op, *op_prev;
3454     TCGRegSet *prefs;
3455     int i;
3456 
3457     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3458     for (i = 0; i < nb_temps; ++i) {
3459         s->temps[i].state_ptr = prefs + i;
3460     }
3461 
3462     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3463     la_func_end(s, nb_globals, nb_temps);
3464 
3465     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3466         int nb_iargs, nb_oargs;
3467         TCGOpcode opc_new, opc_new2;
3468         bool have_opc_new2;
3469         TCGLifeData arg_life = 0;
3470         TCGTemp *ts;
3471         TCGOpcode opc = op->opc;
3472         const TCGOpDef *def = &tcg_op_defs[opc];
3473 
3474         switch (opc) {
3475         case INDEX_op_call:
3476             {
3477                 const TCGHelperInfo *info = tcg_call_info(op);
3478                 int call_flags = tcg_call_flags(op);
3479 
3480                 nb_oargs = TCGOP_CALLO(op);
3481                 nb_iargs = TCGOP_CALLI(op);
3482 
3483                 /* pure functions can be removed if their result is unused */
3484                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3485                     for (i = 0; i < nb_oargs; i++) {
3486                         ts = arg_temp(op->args[i]);
3487                         if (ts->state != TS_DEAD) {
3488                             goto do_not_remove_call;
3489                         }
3490                     }
3491                     goto do_remove;
3492                 }
3493             do_not_remove_call:
3494 
3495                 /* Output args are dead.  */
3496                 for (i = 0; i < nb_oargs; i++) {
3497                     ts = arg_temp(op->args[i]);
3498                     if (ts->state & TS_DEAD) {
3499                         arg_life |= DEAD_ARG << i;
3500                     }
3501                     if (ts->state & TS_MEM) {
3502                         arg_life |= SYNC_ARG << i;
3503                     }
3504                     ts->state = TS_DEAD;
3505                     la_reset_pref(ts);
3506                 }
3507 
3508                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3509                 memset(op->output_pref, 0, sizeof(op->output_pref));
3510 
3511                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3512                                     TCG_CALL_NO_READ_GLOBALS))) {
3513                     la_global_kill(s, nb_globals);
3514                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3515                     la_global_sync(s, nb_globals);
3516                 }
3517 
3518                 /* Record arguments that die in this helper.  */
3519                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3520                     ts = arg_temp(op->args[i]);
3521                     if (ts->state & TS_DEAD) {
3522                         arg_life |= DEAD_ARG << i;
3523                     }
3524                 }
3525 
3526                 /* For all live registers, remove call-clobbered prefs.  */
3527                 la_cross_call(s, nb_temps);
3528 
3529                 /*
3530                  * Input arguments are live for preceding opcodes.
3531                  *
3532                  * For those arguments that die, and will be allocated in
3533                  * registers, clear the register set for that arg, to be
3534                  * filled in below.  For args that will be on the stack,
3535                  * reset to any available reg.  Process arguments in reverse
3536                  * order so that if a temp is used more than once, the stack
3537                  * reset to max happens before the register reset to 0.
3538                  */
3539                 for (i = nb_iargs - 1; i >= 0; i--) {
3540                     const TCGCallArgumentLoc *loc = &info->in[i];
3541                     ts = arg_temp(op->args[nb_oargs + i]);
3542 
3543                     if (ts->state & TS_DEAD) {
3544                         switch (loc->kind) {
3545                         case TCG_CALL_ARG_NORMAL:
3546                         case TCG_CALL_ARG_EXTEND_U:
3547                         case TCG_CALL_ARG_EXTEND_S:
3548                             if (arg_slot_reg_p(loc->arg_slot)) {
3549                                 *la_temp_pref(ts) = 0;
3550                                 break;
3551                             }
3552                             /* fall through */
3553                         default:
3554                             *la_temp_pref(ts) =
3555                                 tcg_target_available_regs[ts->type];
3556                             break;
3557                         }
3558                         ts->state &= ~TS_DEAD;
3559                     }
3560                 }
3561 
3562                 /*
3563                  * For each input argument, add its input register to prefs.
3564                  * If a temp is used once, this produces a single set bit;
3565                  * if a temp is used multiple times, this produces a set.
3566                  */
3567                 for (i = 0; i < nb_iargs; i++) {
3568                     const TCGCallArgumentLoc *loc = &info->in[i];
3569                     ts = arg_temp(op->args[nb_oargs + i]);
3570 
3571                     switch (loc->kind) {
3572                     case TCG_CALL_ARG_NORMAL:
3573                     case TCG_CALL_ARG_EXTEND_U:
3574                     case TCG_CALL_ARG_EXTEND_S:
3575                         if (arg_slot_reg_p(loc->arg_slot)) {
3576                             tcg_regset_set_reg(*la_temp_pref(ts),
3577                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3578                         }
3579                         break;
3580                     default:
3581                         break;
3582                     }
3583                 }
3584             }
3585             break;
3586         case INDEX_op_insn_start:
3587             break;
3588         case INDEX_op_discard:
3589             /* mark the temporary as dead */
3590             ts = arg_temp(op->args[0]);
3591             ts->state = TS_DEAD;
3592             la_reset_pref(ts);
3593             break;
3594 
3595         case INDEX_op_add2_i32:
3596             opc_new = INDEX_op_add_i32;
3597             goto do_addsub2;
3598         case INDEX_op_sub2_i32:
3599             opc_new = INDEX_op_sub_i32;
3600             goto do_addsub2;
3601         case INDEX_op_add2_i64:
3602             opc_new = INDEX_op_add_i64;
3603             goto do_addsub2;
3604         case INDEX_op_sub2_i64:
3605             opc_new = INDEX_op_sub_i64;
3606         do_addsub2:
3607             nb_iargs = 4;
3608             nb_oargs = 2;
3609             /* Test if the high part of the operation is dead, but not
3610                the low part.  The result can be optimized to a simple
3611                add or sub.  This happens often for x86_64 guest when the
3612                cpu mode is set to 32 bit.  */
3613             if (arg_temp(op->args[1])->state == TS_DEAD) {
3614                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3615                     goto do_remove;
3616                 }
3617                 /* Replace the opcode and adjust the args in place,
3618                    leaving 3 unused args at the end.  */
3619                 op->opc = opc = opc_new;
3620                 op->args[1] = op->args[2];
3621                 op->args[2] = op->args[4];
3622                 /* Fall through and mark the single-word operation live.  */
3623                 nb_iargs = 2;
3624                 nb_oargs = 1;
3625             }
3626             goto do_not_remove;
3627 
3628         case INDEX_op_mulu2_i32:
3629             opc_new = INDEX_op_mul_i32;
3630             opc_new2 = INDEX_op_muluh_i32;
3631             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3632             goto do_mul2;
3633         case INDEX_op_muls2_i32:
3634             opc_new = INDEX_op_mul_i32;
3635             opc_new2 = INDEX_op_mulsh_i32;
3636             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3637             goto do_mul2;
3638         case INDEX_op_mulu2_i64:
3639             opc_new = INDEX_op_mul_i64;
3640             opc_new2 = INDEX_op_muluh_i64;
3641             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3642             goto do_mul2;
3643         case INDEX_op_muls2_i64:
3644             opc_new = INDEX_op_mul_i64;
3645             opc_new2 = INDEX_op_mulsh_i64;
3646             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3647             goto do_mul2;
3648         do_mul2:
3649             nb_iargs = 2;
3650             nb_oargs = 2;
3651             if (arg_temp(op->args[1])->state == TS_DEAD) {
3652                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3653                     /* Both parts of the operation are dead.  */
3654                     goto do_remove;
3655                 }
3656                 /* The high part of the operation is dead; generate the low. */
3657                 op->opc = opc = opc_new;
3658                 op->args[1] = op->args[2];
3659                 op->args[2] = op->args[3];
3660             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3661                 /* The low part of the operation is dead; generate the high. */
3662                 op->opc = opc = opc_new2;
3663                 op->args[0] = op->args[1];
3664                 op->args[1] = op->args[2];
3665                 op->args[2] = op->args[3];
3666             } else {
3667                 goto do_not_remove;
3668             }
3669             /* Mark the single-word operation live.  */
3670             nb_oargs = 1;
3671             goto do_not_remove;
3672 
3673         default:
3674             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3675             nb_iargs = def->nb_iargs;
3676             nb_oargs = def->nb_oargs;
3677 
3678             /* Test if the operation can be removed because all
3679                its outputs are dead. We assume that nb_oargs == 0
3680                implies side effects */
3681             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3682                 for (i = 0; i < nb_oargs; i++) {
3683                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3684                         goto do_not_remove;
3685                     }
3686                 }
3687                 goto do_remove;
3688             }
3689             goto do_not_remove;
3690 
3691         do_remove:
3692             tcg_op_remove(s, op);
3693             break;
3694 
3695         do_not_remove:
3696             for (i = 0; i < nb_oargs; i++) {
3697                 ts = arg_temp(op->args[i]);
3698 
3699                 /* Remember the preference of the uses that followed.  */
3700                 if (i < ARRAY_SIZE(op->output_pref)) {
3701                     op->output_pref[i] = *la_temp_pref(ts);
3702                 }
3703 
3704                 /* Output args are dead.  */
3705                 if (ts->state & TS_DEAD) {
3706                     arg_life |= DEAD_ARG << i;
3707                 }
3708                 if (ts->state & TS_MEM) {
3709                     arg_life |= SYNC_ARG << i;
3710                 }
3711                 ts->state = TS_DEAD;
3712                 la_reset_pref(ts);
3713             }
3714 
3715             /* If end of basic block, update.  */
3716             if (def->flags & TCG_OPF_BB_EXIT) {
3717                 la_func_end(s, nb_globals, nb_temps);
3718             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3719                 la_bb_sync(s, nb_globals, nb_temps);
3720             } else if (def->flags & TCG_OPF_BB_END) {
3721                 la_bb_end(s, nb_globals, nb_temps);
3722             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3723                 la_global_sync(s, nb_globals);
3724                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3725                     la_cross_call(s, nb_temps);
3726                 }
3727             }
3728 
3729             /* Record arguments that die in this opcode.  */
3730             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3731                 ts = arg_temp(op->args[i]);
3732                 if (ts->state & TS_DEAD) {
3733                     arg_life |= DEAD_ARG << i;
3734                 }
3735             }
3736 
3737             /* Input arguments are live for preceding opcodes.  */
3738             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3739                 ts = arg_temp(op->args[i]);
3740                 if (ts->state & TS_DEAD) {
3741                     /* For operands that were dead, initially allow
3742                        all regs for the type.  */
3743                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3744                     ts->state &= ~TS_DEAD;
3745                 }
3746             }
3747 
3748             /* Incorporate constraints for this operand.  */
3749             switch (opc) {
3750             case INDEX_op_mov_i32:
3751             case INDEX_op_mov_i64:
3752                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3753                    have proper constraints.  That said, special case
3754                    moves to propagate preferences backward.  */
3755                 if (IS_DEAD_ARG(1)) {
3756                     *la_temp_pref(arg_temp(op->args[0]))
3757                         = *la_temp_pref(arg_temp(op->args[1]));
3758                 }
3759                 break;
3760 
3761             default:
3762                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3763                     const TCGArgConstraint *ct = &def->args_ct[i];
3764                     TCGRegSet set, *pset;
3765 
3766                     ts = arg_temp(op->args[i]);
3767                     pset = la_temp_pref(ts);
3768                     set = *pset;
3769 
3770                     set &= ct->regs;
3771                     if (ct->ialias) {
3772                         set &= output_pref(op, ct->alias_index);
3773                     }
3774                     /* If the combination is not possible, restart.  */
3775                     if (set == 0) {
3776                         set = ct->regs;
3777                     }
3778                     *pset = set;
3779                 }
3780                 break;
3781             }
3782             break;
3783         }
3784         op->life = arg_life;
3785     }
3786 }
3787 
3788 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3789 static bool __attribute__((noinline))
3790 liveness_pass_2(TCGContext *s)
3791 {
3792     int nb_globals = s->nb_globals;
3793     int nb_temps, i;
3794     bool changes = false;
3795     TCGOp *op, *op_next;
3796 
3797     /* Create a temporary for each indirect global.  */
3798     for (i = 0; i < nb_globals; ++i) {
3799         TCGTemp *its = &s->temps[i];
3800         if (its->indirect_reg) {
3801             TCGTemp *dts = tcg_temp_alloc(s);
3802             dts->type = its->type;
3803             dts->base_type = its->base_type;
3804             dts->temp_subindex = its->temp_subindex;
3805             dts->kind = TEMP_EBB;
3806             its->state_ptr = dts;
3807         } else {
3808             its->state_ptr = NULL;
3809         }
3810         /* All globals begin dead.  */
3811         its->state = TS_DEAD;
3812     }
3813     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3814         TCGTemp *its = &s->temps[i];
3815         its->state_ptr = NULL;
3816         its->state = TS_DEAD;
3817     }
3818 
3819     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3820         TCGOpcode opc = op->opc;
3821         const TCGOpDef *def = &tcg_op_defs[opc];
3822         TCGLifeData arg_life = op->life;
3823         int nb_iargs, nb_oargs, call_flags;
3824         TCGTemp *arg_ts, *dir_ts;
3825 
3826         if (opc == INDEX_op_call) {
3827             nb_oargs = TCGOP_CALLO(op);
3828             nb_iargs = TCGOP_CALLI(op);
3829             call_flags = tcg_call_flags(op);
3830         } else {
3831             nb_iargs = def->nb_iargs;
3832             nb_oargs = def->nb_oargs;
3833 
3834             /* Set flags similar to how calls require.  */
3835             if (def->flags & TCG_OPF_COND_BRANCH) {
3836                 /* Like reading globals: sync_globals */
3837                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3838             } else if (def->flags & TCG_OPF_BB_END) {
3839                 /* Like writing globals: save_globals */
3840                 call_flags = 0;
3841             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3842                 /* Like reading globals: sync_globals */
3843                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3844             } else {
3845                 /* No effect on globals.  */
3846                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3847                               TCG_CALL_NO_WRITE_GLOBALS);
3848             }
3849         }
3850 
3851         /* Make sure that input arguments are available.  */
3852         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3853             arg_ts = arg_temp(op->args[i]);
3854             dir_ts = arg_ts->state_ptr;
3855             if (dir_ts && arg_ts->state == TS_DEAD) {
3856                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3857                                   ? INDEX_op_ld_i32
3858                                   : INDEX_op_ld_i64);
3859                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3860 
3861                 lop->args[0] = temp_arg(dir_ts);
3862                 lop->args[1] = temp_arg(arg_ts->mem_base);
3863                 lop->args[2] = arg_ts->mem_offset;
3864 
3865                 /* Loaded, but synced with memory.  */
3866                 arg_ts->state = TS_MEM;
3867             }
3868         }
3869 
3870         /* Perform input replacement, and mark inputs that became dead.
3871            No action is required except keeping temp_state up to date
3872            so that we reload when needed.  */
3873         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3874             arg_ts = arg_temp(op->args[i]);
3875             dir_ts = arg_ts->state_ptr;
3876             if (dir_ts) {
3877                 op->args[i] = temp_arg(dir_ts);
3878                 changes = true;
3879                 if (IS_DEAD_ARG(i)) {
3880                     arg_ts->state = TS_DEAD;
3881                 }
3882             }
3883         }
3884 
3885         /* Liveness analysis should ensure that the following are
3886            all correct, for call sites and basic block end points.  */
3887         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3888             /* Nothing to do */
3889         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3890             for (i = 0; i < nb_globals; ++i) {
3891                 /* Liveness should see that globals are synced back,
3892                    that is, either TS_DEAD or TS_MEM.  */
3893                 arg_ts = &s->temps[i];
3894                 tcg_debug_assert(arg_ts->state_ptr == 0
3895                                  || arg_ts->state != 0);
3896             }
3897         } else {
3898             for (i = 0; i < nb_globals; ++i) {
3899                 /* Liveness should see that globals are saved back,
3900                    that is, TS_DEAD, waiting to be reloaded.  */
3901                 arg_ts = &s->temps[i];
3902                 tcg_debug_assert(arg_ts->state_ptr == 0
3903                                  || arg_ts->state == TS_DEAD);
3904             }
3905         }
3906 
3907         /* Outputs become available.  */
3908         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3909             arg_ts = arg_temp(op->args[0]);
3910             dir_ts = arg_ts->state_ptr;
3911             if (dir_ts) {
3912                 op->args[0] = temp_arg(dir_ts);
3913                 changes = true;
3914 
3915                 /* The output is now live and modified.  */
3916                 arg_ts->state = 0;
3917 
3918                 if (NEED_SYNC_ARG(0)) {
3919                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3920                                       ? INDEX_op_st_i32
3921                                       : INDEX_op_st_i64);
3922                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3923                     TCGTemp *out_ts = dir_ts;
3924 
3925                     if (IS_DEAD_ARG(0)) {
3926                         out_ts = arg_temp(op->args[1]);
3927                         arg_ts->state = TS_DEAD;
3928                         tcg_op_remove(s, op);
3929                     } else {
3930                         arg_ts->state = TS_MEM;
3931                     }
3932 
3933                     sop->args[0] = temp_arg(out_ts);
3934                     sop->args[1] = temp_arg(arg_ts->mem_base);
3935                     sop->args[2] = arg_ts->mem_offset;
3936                 } else {
3937                     tcg_debug_assert(!IS_DEAD_ARG(0));
3938                 }
3939             }
3940         } else {
3941             for (i = 0; i < nb_oargs; i++) {
3942                 arg_ts = arg_temp(op->args[i]);
3943                 dir_ts = arg_ts->state_ptr;
3944                 if (!dir_ts) {
3945                     continue;
3946                 }
3947                 op->args[i] = temp_arg(dir_ts);
3948                 changes = true;
3949 
3950                 /* The output is now live and modified.  */
3951                 arg_ts->state = 0;
3952 
3953                 /* Sync outputs upon their last write.  */
3954                 if (NEED_SYNC_ARG(i)) {
3955                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3956                                       ? INDEX_op_st_i32
3957                                       : INDEX_op_st_i64);
3958                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3959 
3960                     sop->args[0] = temp_arg(dir_ts);
3961                     sop->args[1] = temp_arg(arg_ts->mem_base);
3962                     sop->args[2] = arg_ts->mem_offset;
3963 
3964                     arg_ts->state = TS_MEM;
3965                 }
3966                 /* Drop outputs that are dead.  */
3967                 if (IS_DEAD_ARG(i)) {
3968                     arg_ts->state = TS_DEAD;
3969                 }
3970             }
3971         }
3972     }
3973 
3974     return changes;
3975 }
3976 
3977 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3978 {
3979     intptr_t off;
3980     int size, align;
3981 
3982     /* When allocating an object, look at the full type. */
3983     size = tcg_type_size(ts->base_type);
3984     switch (ts->base_type) {
3985     case TCG_TYPE_I32:
3986         align = 4;
3987         break;
3988     case TCG_TYPE_I64:
3989     case TCG_TYPE_V64:
3990         align = 8;
3991         break;
3992     case TCG_TYPE_I128:
3993     case TCG_TYPE_V128:
3994     case TCG_TYPE_V256:
3995         /*
3996          * Note that we do not require aligned storage for V256,
3997          * and that we provide alignment for I128 to match V128,
3998          * even if that's above what the host ABI requires.
3999          */
4000         align = 16;
4001         break;
4002     default:
4003         g_assert_not_reached();
4004     }
4005 
4006     /*
4007      * Assume the stack is sufficiently aligned.
4008      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4009      * and do not require 16 byte vector alignment.  This seems slightly
4010      * easier than fully parameterizing the above switch statement.
4011      */
4012     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4013     off = ROUND_UP(s->current_frame_offset, align);
4014 
4015     /* If we've exhausted the stack frame, restart with a smaller TB. */
4016     if (off + size > s->frame_end) {
4017         tcg_raise_tb_overflow(s);
4018     }
4019     s->current_frame_offset = off + size;
4020 #if defined(__sparc__)
4021     off += TCG_TARGET_STACK_BIAS;
4022 #endif
4023 
4024     /* If the object was subdivided, assign memory to all the parts. */
4025     if (ts->base_type != ts->type) {
4026         int part_size = tcg_type_size(ts->type);
4027         int part_count = size / part_size;
4028 
4029         /*
4030          * Each part is allocated sequentially in tcg_temp_new_internal.
4031          * Jump back to the first part by subtracting the current index.
4032          */
4033         ts -= ts->temp_subindex;
4034         for (int i = 0; i < part_count; ++i) {
4035             ts[i].mem_offset = off + i * part_size;
4036             ts[i].mem_base = s->frame_temp;
4037             ts[i].mem_allocated = 1;
4038         }
4039     } else {
4040         ts->mem_offset = off;
4041         ts->mem_base = s->frame_temp;
4042         ts->mem_allocated = 1;
4043     }
4044 }
4045 
4046 /* Assign @reg to @ts, and update reg_to_temp[]. */
4047 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4048 {
4049     if (ts->val_type == TEMP_VAL_REG) {
4050         TCGReg old = ts->reg;
4051         tcg_debug_assert(s->reg_to_temp[old] == ts);
4052         if (old == reg) {
4053             return;
4054         }
4055         s->reg_to_temp[old] = NULL;
4056     }
4057     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4058     s->reg_to_temp[reg] = ts;
4059     ts->val_type = TEMP_VAL_REG;
4060     ts->reg = reg;
4061 }
4062 
4063 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4064 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4065 {
4066     tcg_debug_assert(type != TEMP_VAL_REG);
4067     if (ts->val_type == TEMP_VAL_REG) {
4068         TCGReg reg = ts->reg;
4069         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4070         s->reg_to_temp[reg] = NULL;
4071     }
4072     ts->val_type = type;
4073 }
4074 
4075 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4076 
4077 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4078    mark it free; otherwise mark it dead.  */
4079 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4080 {
4081     TCGTempVal new_type;
4082 
4083     switch (ts->kind) {
4084     case TEMP_FIXED:
4085         return;
4086     case TEMP_GLOBAL:
4087     case TEMP_TB:
4088         new_type = TEMP_VAL_MEM;
4089         break;
4090     case TEMP_EBB:
4091         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4092         break;
4093     case TEMP_CONST:
4094         new_type = TEMP_VAL_CONST;
4095         break;
4096     default:
4097         g_assert_not_reached();
4098     }
4099     set_temp_val_nonreg(s, ts, new_type);
4100 }
4101 
4102 /* Mark a temporary as dead.  */
4103 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4104 {
4105     temp_free_or_dead(s, ts, 1);
4106 }
4107 
4108 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4109    registers needs to be allocated to store a constant.  If 'free_or_dead'
4110    is non-zero, subsequently release the temporary; if it is positive, the
4111    temp is dead; if it is negative, the temp is free.  */
4112 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4113                       TCGRegSet preferred_regs, int free_or_dead)
4114 {
4115     if (!temp_readonly(ts) && !ts->mem_coherent) {
4116         if (!ts->mem_allocated) {
4117             temp_allocate_frame(s, ts);
4118         }
4119         switch (ts->val_type) {
4120         case TEMP_VAL_CONST:
4121             /* If we're going to free the temp immediately, then we won't
4122                require it later in a register, so attempt to store the
4123                constant to memory directly.  */
4124             if (free_or_dead
4125                 && tcg_out_sti(s, ts->type, ts->val,
4126                                ts->mem_base->reg, ts->mem_offset)) {
4127                 break;
4128             }
4129             temp_load(s, ts, tcg_target_available_regs[ts->type],
4130                       allocated_regs, preferred_regs);
4131             /* fallthrough */
4132 
4133         case TEMP_VAL_REG:
4134             tcg_out_st(s, ts->type, ts->reg,
4135                        ts->mem_base->reg, ts->mem_offset);
4136             break;
4137 
4138         case TEMP_VAL_MEM:
4139             break;
4140 
4141         case TEMP_VAL_DEAD:
4142         default:
4143             g_assert_not_reached();
4144         }
4145         ts->mem_coherent = 1;
4146     }
4147     if (free_or_dead) {
4148         temp_free_or_dead(s, ts, free_or_dead);
4149     }
4150 }
4151 
4152 /* free register 'reg' by spilling the corresponding temporary if necessary */
4153 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4154 {
4155     TCGTemp *ts = s->reg_to_temp[reg];
4156     if (ts != NULL) {
4157         temp_sync(s, ts, allocated_regs, 0, -1);
4158     }
4159 }
4160 
4161 /**
4162  * tcg_reg_alloc:
4163  * @required_regs: Set of registers in which we must allocate.
4164  * @allocated_regs: Set of registers which must be avoided.
4165  * @preferred_regs: Set of registers we should prefer.
4166  * @rev: True if we search the registers in "indirect" order.
4167  *
4168  * The allocated register must be in @required_regs & ~@allocated_regs,
4169  * but if we can put it in @preferred_regs we may save a move later.
4170  */
4171 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4172                             TCGRegSet allocated_regs,
4173                             TCGRegSet preferred_regs, bool rev)
4174 {
4175     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4176     TCGRegSet reg_ct[2];
4177     const int *order;
4178 
4179     reg_ct[1] = required_regs & ~allocated_regs;
4180     tcg_debug_assert(reg_ct[1] != 0);
4181     reg_ct[0] = reg_ct[1] & preferred_regs;
4182 
4183     /* Skip the preferred_regs option if it cannot be satisfied,
4184        or if the preference made no difference.  */
4185     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4186 
4187     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4188 
4189     /* Try free registers, preferences first.  */
4190     for (j = f; j < 2; j++) {
4191         TCGRegSet set = reg_ct[j];
4192 
4193         if (tcg_regset_single(set)) {
4194             /* One register in the set.  */
4195             TCGReg reg = tcg_regset_first(set);
4196             if (s->reg_to_temp[reg] == NULL) {
4197                 return reg;
4198             }
4199         } else {
4200             for (i = 0; i < n; i++) {
4201                 TCGReg reg = order[i];
4202                 if (s->reg_to_temp[reg] == NULL &&
4203                     tcg_regset_test_reg(set, reg)) {
4204                     return reg;
4205                 }
4206             }
4207         }
4208     }
4209 
4210     /* We must spill something.  */
4211     for (j = f; j < 2; j++) {
4212         TCGRegSet set = reg_ct[j];
4213 
4214         if (tcg_regset_single(set)) {
4215             /* One register in the set.  */
4216             TCGReg reg = tcg_regset_first(set);
4217             tcg_reg_free(s, reg, allocated_regs);
4218             return reg;
4219         } else {
4220             for (i = 0; i < n; i++) {
4221                 TCGReg reg = order[i];
4222                 if (tcg_regset_test_reg(set, reg)) {
4223                     tcg_reg_free(s, reg, allocated_regs);
4224                     return reg;
4225                 }
4226             }
4227         }
4228     }
4229 
4230     g_assert_not_reached();
4231 }
4232 
4233 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4234                                  TCGRegSet allocated_regs,
4235                                  TCGRegSet preferred_regs, bool rev)
4236 {
4237     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4238     TCGRegSet reg_ct[2];
4239     const int *order;
4240 
4241     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4242     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4243     tcg_debug_assert(reg_ct[1] != 0);
4244     reg_ct[0] = reg_ct[1] & preferred_regs;
4245 
4246     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4247 
4248     /*
4249      * Skip the preferred_regs option if it cannot be satisfied,
4250      * or if the preference made no difference.
4251      */
4252     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4253 
4254     /*
4255      * Minimize the number of flushes by looking for 2 free registers first,
4256      * then a single flush, then two flushes.
4257      */
4258     for (fmin = 2; fmin >= 0; fmin--) {
4259         for (j = k; j < 2; j++) {
4260             TCGRegSet set = reg_ct[j];
4261 
4262             for (i = 0; i < n; i++) {
4263                 TCGReg reg = order[i];
4264 
4265                 if (tcg_regset_test_reg(set, reg)) {
4266                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4267                     if (f >= fmin) {
4268                         tcg_reg_free(s, reg, allocated_regs);
4269                         tcg_reg_free(s, reg + 1, allocated_regs);
4270                         return reg;
4271                     }
4272                 }
4273             }
4274         }
4275     }
4276     g_assert_not_reached();
4277 }
4278 
4279 /* Make sure the temporary is in a register.  If needed, allocate the register
4280    from DESIRED while avoiding ALLOCATED.  */
4281 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4282                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4283 {
4284     TCGReg reg;
4285 
4286     switch (ts->val_type) {
4287     case TEMP_VAL_REG:
4288         return;
4289     case TEMP_VAL_CONST:
4290         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4291                             preferred_regs, ts->indirect_base);
4292         if (ts->type <= TCG_TYPE_I64) {
4293             tcg_out_movi(s, ts->type, reg, ts->val);
4294         } else {
4295             uint64_t val = ts->val;
4296             MemOp vece = MO_64;
4297 
4298             /*
4299              * Find the minimal vector element that matches the constant.
4300              * The targets will, in general, have to do this search anyway,
4301              * do this generically.
4302              */
4303             if (val == dup_const(MO_8, val)) {
4304                 vece = MO_8;
4305             } else if (val == dup_const(MO_16, val)) {
4306                 vece = MO_16;
4307             } else if (val == dup_const(MO_32, val)) {
4308                 vece = MO_32;
4309             }
4310 
4311             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4312         }
4313         ts->mem_coherent = 0;
4314         break;
4315     case TEMP_VAL_MEM:
4316         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4317                             preferred_regs, ts->indirect_base);
4318         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4319         ts->mem_coherent = 1;
4320         break;
4321     case TEMP_VAL_DEAD:
4322     default:
4323         g_assert_not_reached();
4324     }
4325     set_temp_val_reg(s, ts, reg);
4326 }
4327 
4328 /* Save a temporary to memory. 'allocated_regs' is used in case a
4329    temporary registers needs to be allocated to store a constant.  */
4330 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4331 {
4332     /* The liveness analysis already ensures that globals are back
4333        in memory. Keep an tcg_debug_assert for safety. */
4334     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4335 }
4336 
4337 /* save globals to their canonical location and assume they can be
4338    modified be the following code. 'allocated_regs' is used in case a
4339    temporary registers needs to be allocated to store a constant. */
4340 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4341 {
4342     int i, n;
4343 
4344     for (i = 0, n = s->nb_globals; i < n; i++) {
4345         temp_save(s, &s->temps[i], allocated_regs);
4346     }
4347 }
4348 
4349 /* sync globals to their canonical location and assume they can be
4350    read by the following code. 'allocated_regs' is used in case a
4351    temporary registers needs to be allocated to store a constant. */
4352 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4353 {
4354     int i, n;
4355 
4356     for (i = 0, n = s->nb_globals; i < n; i++) {
4357         TCGTemp *ts = &s->temps[i];
4358         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4359                          || ts->kind == TEMP_FIXED
4360                          || ts->mem_coherent);
4361     }
4362 }
4363 
4364 /* at the end of a basic block, we assume all temporaries are dead and
4365    all globals are stored at their canonical location. */
4366 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4367 {
4368     int i;
4369 
4370     for (i = s->nb_globals; i < s->nb_temps; i++) {
4371         TCGTemp *ts = &s->temps[i];
4372 
4373         switch (ts->kind) {
4374         case TEMP_TB:
4375             temp_save(s, ts, allocated_regs);
4376             break;
4377         case TEMP_EBB:
4378             /* The liveness analysis already ensures that temps are dead.
4379                Keep an tcg_debug_assert for safety. */
4380             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4381             break;
4382         case TEMP_CONST:
4383             /* Similarly, we should have freed any allocated register. */
4384             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4385             break;
4386         default:
4387             g_assert_not_reached();
4388         }
4389     }
4390 
4391     save_globals(s, allocated_regs);
4392 }
4393 
4394 /*
4395  * At a conditional branch, we assume all temporaries are dead unless
4396  * explicitly live-across-conditional-branch; all globals and local
4397  * temps are synced to their location.
4398  */
4399 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4400 {
4401     sync_globals(s, allocated_regs);
4402 
4403     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4404         TCGTemp *ts = &s->temps[i];
4405         /*
4406          * The liveness analysis already ensures that temps are dead.
4407          * Keep tcg_debug_asserts for safety.
4408          */
4409         switch (ts->kind) {
4410         case TEMP_TB:
4411             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4412             break;
4413         case TEMP_EBB:
4414         case TEMP_CONST:
4415             break;
4416         default:
4417             g_assert_not_reached();
4418         }
4419     }
4420 }
4421 
4422 /*
4423  * Specialized code generation for INDEX_op_mov_* with a constant.
4424  */
4425 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4426                                   tcg_target_ulong val, TCGLifeData arg_life,
4427                                   TCGRegSet preferred_regs)
4428 {
4429     /* ENV should not be modified.  */
4430     tcg_debug_assert(!temp_readonly(ots));
4431 
4432     /* The movi is not explicitly generated here.  */
4433     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4434     ots->val = val;
4435     ots->mem_coherent = 0;
4436     if (NEED_SYNC_ARG(0)) {
4437         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4438     } else if (IS_DEAD_ARG(0)) {
4439         temp_dead(s, ots);
4440     }
4441 }
4442 
4443 /*
4444  * Specialized code generation for INDEX_op_mov_*.
4445  */
4446 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4447 {
4448     const TCGLifeData arg_life = op->life;
4449     TCGRegSet allocated_regs, preferred_regs;
4450     TCGTemp *ts, *ots;
4451     TCGType otype, itype;
4452     TCGReg oreg, ireg;
4453 
4454     allocated_regs = s->reserved_regs;
4455     preferred_regs = output_pref(op, 0);
4456     ots = arg_temp(op->args[0]);
4457     ts = arg_temp(op->args[1]);
4458 
4459     /* ENV should not be modified.  */
4460     tcg_debug_assert(!temp_readonly(ots));
4461 
4462     /* Note that otype != itype for no-op truncation.  */
4463     otype = ots->type;
4464     itype = ts->type;
4465 
4466     if (ts->val_type == TEMP_VAL_CONST) {
4467         /* propagate constant or generate sti */
4468         tcg_target_ulong val = ts->val;
4469         if (IS_DEAD_ARG(1)) {
4470             temp_dead(s, ts);
4471         }
4472         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4473         return;
4474     }
4475 
4476     /* If the source value is in memory we're going to be forced
4477        to have it in a register in order to perform the copy.  Copy
4478        the SOURCE value into its own register first, that way we
4479        don't have to reload SOURCE the next time it is used. */
4480     if (ts->val_type == TEMP_VAL_MEM) {
4481         temp_load(s, ts, tcg_target_available_regs[itype],
4482                   allocated_regs, preferred_regs);
4483     }
4484     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4485     ireg = ts->reg;
4486 
4487     if (IS_DEAD_ARG(0)) {
4488         /* mov to a non-saved dead register makes no sense (even with
4489            liveness analysis disabled). */
4490         tcg_debug_assert(NEED_SYNC_ARG(0));
4491         if (!ots->mem_allocated) {
4492             temp_allocate_frame(s, ots);
4493         }
4494         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4495         if (IS_DEAD_ARG(1)) {
4496             temp_dead(s, ts);
4497         }
4498         temp_dead(s, ots);
4499         return;
4500     }
4501 
4502     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4503         /*
4504          * The mov can be suppressed.  Kill input first, so that it
4505          * is unlinked from reg_to_temp, then set the output to the
4506          * reg that we saved from the input.
4507          */
4508         temp_dead(s, ts);
4509         oreg = ireg;
4510     } else {
4511         if (ots->val_type == TEMP_VAL_REG) {
4512             oreg = ots->reg;
4513         } else {
4514             /* Make sure to not spill the input register during allocation. */
4515             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4516                                  allocated_regs | ((TCGRegSet)1 << ireg),
4517                                  preferred_regs, ots->indirect_base);
4518         }
4519         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4520             /*
4521              * Cross register class move not supported.
4522              * Store the source register into the destination slot
4523              * and leave the destination temp as TEMP_VAL_MEM.
4524              */
4525             assert(!temp_readonly(ots));
4526             if (!ts->mem_allocated) {
4527                 temp_allocate_frame(s, ots);
4528             }
4529             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4530             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4531             ots->mem_coherent = 1;
4532             return;
4533         }
4534     }
4535     set_temp_val_reg(s, ots, oreg);
4536     ots->mem_coherent = 0;
4537 
4538     if (NEED_SYNC_ARG(0)) {
4539         temp_sync(s, ots, allocated_regs, 0, 0);
4540     }
4541 }
4542 
4543 /*
4544  * Specialized code generation for INDEX_op_dup_vec.
4545  */
4546 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4547 {
4548     const TCGLifeData arg_life = op->life;
4549     TCGRegSet dup_out_regs, dup_in_regs;
4550     TCGTemp *its, *ots;
4551     TCGType itype, vtype;
4552     unsigned vece;
4553     int lowpart_ofs;
4554     bool ok;
4555 
4556     ots = arg_temp(op->args[0]);
4557     its = arg_temp(op->args[1]);
4558 
4559     /* ENV should not be modified.  */
4560     tcg_debug_assert(!temp_readonly(ots));
4561 
4562     itype = its->type;
4563     vece = TCGOP_VECE(op);
4564     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4565 
4566     if (its->val_type == TEMP_VAL_CONST) {
4567         /* Propagate constant via movi -> dupi.  */
4568         tcg_target_ulong val = its->val;
4569         if (IS_DEAD_ARG(1)) {
4570             temp_dead(s, its);
4571         }
4572         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4573         return;
4574     }
4575 
4576     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4577     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4578 
4579     /* Allocate the output register now.  */
4580     if (ots->val_type != TEMP_VAL_REG) {
4581         TCGRegSet allocated_regs = s->reserved_regs;
4582         TCGReg oreg;
4583 
4584         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4585             /* Make sure to not spill the input register. */
4586             tcg_regset_set_reg(allocated_regs, its->reg);
4587         }
4588         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4589                              output_pref(op, 0), ots->indirect_base);
4590         set_temp_val_reg(s, ots, oreg);
4591     }
4592 
4593     switch (its->val_type) {
4594     case TEMP_VAL_REG:
4595         /*
4596          * The dup constriaints must be broad, covering all possible VECE.
4597          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4598          * to fail, indicating that extra moves are required for that case.
4599          */
4600         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4601             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4602                 goto done;
4603             }
4604             /* Try again from memory or a vector input register.  */
4605         }
4606         if (!its->mem_coherent) {
4607             /*
4608              * The input register is not synced, and so an extra store
4609              * would be required to use memory.  Attempt an integer-vector
4610              * register move first.  We do not have a TCGRegSet for this.
4611              */
4612             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4613                 break;
4614             }
4615             /* Sync the temp back to its slot and load from there.  */
4616             temp_sync(s, its, s->reserved_regs, 0, 0);
4617         }
4618         /* fall through */
4619 
4620     case TEMP_VAL_MEM:
4621         lowpart_ofs = 0;
4622         if (HOST_BIG_ENDIAN) {
4623             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4624         }
4625         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4626                              its->mem_offset + lowpart_ofs)) {
4627             goto done;
4628         }
4629         /* Load the input into the destination vector register. */
4630         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4631         break;
4632 
4633     default:
4634         g_assert_not_reached();
4635     }
4636 
4637     /* We now have a vector input register, so dup must succeed. */
4638     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4639     tcg_debug_assert(ok);
4640 
4641  done:
4642     ots->mem_coherent = 0;
4643     if (IS_DEAD_ARG(1)) {
4644         temp_dead(s, its);
4645     }
4646     if (NEED_SYNC_ARG(0)) {
4647         temp_sync(s, ots, s->reserved_regs, 0, 0);
4648     }
4649     if (IS_DEAD_ARG(0)) {
4650         temp_dead(s, ots);
4651     }
4652 }
4653 
4654 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4655 {
4656     const TCGLifeData arg_life = op->life;
4657     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4658     TCGRegSet i_allocated_regs;
4659     TCGRegSet o_allocated_regs;
4660     int i, k, nb_iargs, nb_oargs;
4661     TCGReg reg;
4662     TCGArg arg;
4663     const TCGArgConstraint *arg_ct;
4664     TCGTemp *ts;
4665     TCGArg new_args[TCG_MAX_OP_ARGS];
4666     int const_args[TCG_MAX_OP_ARGS];
4667 
4668     nb_oargs = def->nb_oargs;
4669     nb_iargs = def->nb_iargs;
4670 
4671     /* copy constants */
4672     memcpy(new_args + nb_oargs + nb_iargs,
4673            op->args + nb_oargs + nb_iargs,
4674            sizeof(TCGArg) * def->nb_cargs);
4675 
4676     i_allocated_regs = s->reserved_regs;
4677     o_allocated_regs = s->reserved_regs;
4678 
4679     /* satisfy input constraints */
4680     for (k = 0; k < nb_iargs; k++) {
4681         TCGRegSet i_preferred_regs, i_required_regs;
4682         bool allocate_new_reg, copyto_new_reg;
4683         TCGTemp *ts2;
4684         int i1, i2;
4685 
4686         i = def->args_ct[nb_oargs + k].sort_index;
4687         arg = op->args[i];
4688         arg_ct = &def->args_ct[i];
4689         ts = arg_temp(arg);
4690 
4691         if (ts->val_type == TEMP_VAL_CONST
4692             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4693             /* constant is OK for instruction */
4694             const_args[i] = 1;
4695             new_args[i] = ts->val;
4696             continue;
4697         }
4698 
4699         reg = ts->reg;
4700         i_preferred_regs = 0;
4701         i_required_regs = arg_ct->regs;
4702         allocate_new_reg = false;
4703         copyto_new_reg = false;
4704 
4705         switch (arg_ct->pair) {
4706         case 0: /* not paired */
4707             if (arg_ct->ialias) {
4708                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4709 
4710                 /*
4711                  * If the input is readonly, then it cannot also be an
4712                  * output and aliased to itself.  If the input is not
4713                  * dead after the instruction, we must allocate a new
4714                  * register and move it.
4715                  */
4716                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
4717                     || def->args_ct[arg_ct->alias_index].newreg) {
4718                     allocate_new_reg = true;
4719                 } else if (ts->val_type == TEMP_VAL_REG) {
4720                     /*
4721                      * Check if the current register has already been
4722                      * allocated for another input.
4723                      */
4724                     allocate_new_reg =
4725                         tcg_regset_test_reg(i_allocated_regs, reg);
4726                 }
4727             }
4728             if (!allocate_new_reg) {
4729                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4730                           i_preferred_regs);
4731                 reg = ts->reg;
4732                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4733             }
4734             if (allocate_new_reg) {
4735                 /*
4736                  * Allocate a new register matching the constraint
4737                  * and move the temporary register into it.
4738                  */
4739                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4740                           i_allocated_regs, 0);
4741                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4742                                     i_preferred_regs, ts->indirect_base);
4743                 copyto_new_reg = true;
4744             }
4745             break;
4746 
4747         case 1:
4748             /* First of an input pair; if i1 == i2, the second is an output. */
4749             i1 = i;
4750             i2 = arg_ct->pair_index;
4751             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4752 
4753             /*
4754              * It is easier to default to allocating a new pair
4755              * and to identify a few cases where it's not required.
4756              */
4757             if (arg_ct->ialias) {
4758                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4759                 if (IS_DEAD_ARG(i1) &&
4760                     IS_DEAD_ARG(i2) &&
4761                     !temp_readonly(ts) &&
4762                     ts->val_type == TEMP_VAL_REG &&
4763                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4764                     tcg_regset_test_reg(i_required_regs, reg) &&
4765                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4766                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4767                     (ts2
4768                      ? ts2->val_type == TEMP_VAL_REG &&
4769                        ts2->reg == reg + 1 &&
4770                        !temp_readonly(ts2)
4771                      : s->reg_to_temp[reg + 1] == NULL)) {
4772                     break;
4773                 }
4774             } else {
4775                 /* Without aliasing, the pair must also be an input. */
4776                 tcg_debug_assert(ts2);
4777                 if (ts->val_type == TEMP_VAL_REG &&
4778                     ts2->val_type == TEMP_VAL_REG &&
4779                     ts2->reg == reg + 1 &&
4780                     tcg_regset_test_reg(i_required_regs, reg)) {
4781                     break;
4782                 }
4783             }
4784             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4785                                      0, ts->indirect_base);
4786             goto do_pair;
4787 
4788         case 2: /* pair second */
4789             reg = new_args[arg_ct->pair_index] + 1;
4790             goto do_pair;
4791 
4792         case 3: /* ialias with second output, no first input */
4793             tcg_debug_assert(arg_ct->ialias);
4794             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4795 
4796             if (IS_DEAD_ARG(i) &&
4797                 !temp_readonly(ts) &&
4798                 ts->val_type == TEMP_VAL_REG &&
4799                 reg > 0 &&
4800                 s->reg_to_temp[reg - 1] == NULL &&
4801                 tcg_regset_test_reg(i_required_regs, reg) &&
4802                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4803                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4804                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4805                 break;
4806             }
4807             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4808                                      i_allocated_regs, 0,
4809                                      ts->indirect_base);
4810             tcg_regset_set_reg(i_allocated_regs, reg);
4811             reg += 1;
4812             goto do_pair;
4813 
4814         do_pair:
4815             /*
4816              * If an aliased input is not dead after the instruction,
4817              * we must allocate a new register and move it.
4818              */
4819             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4820                 TCGRegSet t_allocated_regs = i_allocated_regs;
4821 
4822                 /*
4823                  * Because of the alias, and the continued life, make sure
4824                  * that the temp is somewhere *other* than the reg pair,
4825                  * and we get a copy in reg.
4826                  */
4827                 tcg_regset_set_reg(t_allocated_regs, reg);
4828                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4829                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4830                     /* If ts was already in reg, copy it somewhere else. */
4831                     TCGReg nr;
4832                     bool ok;
4833 
4834                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4835                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4836                                        t_allocated_regs, 0, ts->indirect_base);
4837                     ok = tcg_out_mov(s, ts->type, nr, reg);
4838                     tcg_debug_assert(ok);
4839 
4840                     set_temp_val_reg(s, ts, nr);
4841                 } else {
4842                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4843                               t_allocated_regs, 0);
4844                     copyto_new_reg = true;
4845                 }
4846             } else {
4847                 /* Preferably allocate to reg, otherwise copy. */
4848                 i_required_regs = (TCGRegSet)1 << reg;
4849                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4850                           i_preferred_regs);
4851                 copyto_new_reg = ts->reg != reg;
4852             }
4853             break;
4854 
4855         default:
4856             g_assert_not_reached();
4857         }
4858 
4859         if (copyto_new_reg) {
4860             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4861                 /*
4862                  * Cross register class move not supported.  Sync the
4863                  * temp back to its slot and load from there.
4864                  */
4865                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4866                 tcg_out_ld(s, ts->type, reg,
4867                            ts->mem_base->reg, ts->mem_offset);
4868             }
4869         }
4870         new_args[i] = reg;
4871         const_args[i] = 0;
4872         tcg_regset_set_reg(i_allocated_regs, reg);
4873     }
4874 
4875     /* mark dead temporaries and free the associated registers */
4876     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4877         if (IS_DEAD_ARG(i)) {
4878             temp_dead(s, arg_temp(op->args[i]));
4879         }
4880     }
4881 
4882     if (def->flags & TCG_OPF_COND_BRANCH) {
4883         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4884     } else if (def->flags & TCG_OPF_BB_END) {
4885         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4886     } else {
4887         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4888             /* XXX: permit generic clobber register list ? */
4889             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4890                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4891                     tcg_reg_free(s, i, i_allocated_regs);
4892                 }
4893             }
4894         }
4895         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4896             /* sync globals if the op has side effects and might trigger
4897                an exception. */
4898             sync_globals(s, i_allocated_regs);
4899         }
4900 
4901         /* satisfy the output constraints */
4902         for(k = 0; k < nb_oargs; k++) {
4903             i = def->args_ct[k].sort_index;
4904             arg = op->args[i];
4905             arg_ct = &def->args_ct[i];
4906             ts = arg_temp(arg);
4907 
4908             /* ENV should not be modified.  */
4909             tcg_debug_assert(!temp_readonly(ts));
4910 
4911             switch (arg_ct->pair) {
4912             case 0: /* not paired */
4913                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4914                     reg = new_args[arg_ct->alias_index];
4915                 } else if (arg_ct->newreg) {
4916                     reg = tcg_reg_alloc(s, arg_ct->regs,
4917                                         i_allocated_regs | o_allocated_regs,
4918                                         output_pref(op, k), ts->indirect_base);
4919                 } else {
4920                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4921                                         output_pref(op, k), ts->indirect_base);
4922                 }
4923                 break;
4924 
4925             case 1: /* first of pair */
4926                 tcg_debug_assert(!arg_ct->newreg);
4927                 if (arg_ct->oalias) {
4928                     reg = new_args[arg_ct->alias_index];
4929                     break;
4930                 }
4931                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4932                                          output_pref(op, k), ts->indirect_base);
4933                 break;
4934 
4935             case 2: /* second of pair */
4936                 tcg_debug_assert(!arg_ct->newreg);
4937                 if (arg_ct->oalias) {
4938                     reg = new_args[arg_ct->alias_index];
4939                 } else {
4940                     reg = new_args[arg_ct->pair_index] + 1;
4941                 }
4942                 break;
4943 
4944             case 3: /* first of pair, aliasing with a second input */
4945                 tcg_debug_assert(!arg_ct->newreg);
4946                 reg = new_args[arg_ct->pair_index] - 1;
4947                 break;
4948 
4949             default:
4950                 g_assert_not_reached();
4951             }
4952             tcg_regset_set_reg(o_allocated_regs, reg);
4953             set_temp_val_reg(s, ts, reg);
4954             ts->mem_coherent = 0;
4955             new_args[i] = reg;
4956         }
4957     }
4958 
4959     /* emit instruction */
4960     switch (op->opc) {
4961     case INDEX_op_ext8s_i32:
4962         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4963         break;
4964     case INDEX_op_ext8s_i64:
4965         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4966         break;
4967     case INDEX_op_ext8u_i32:
4968     case INDEX_op_ext8u_i64:
4969         tcg_out_ext8u(s, new_args[0], new_args[1]);
4970         break;
4971     case INDEX_op_ext16s_i32:
4972         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4973         break;
4974     case INDEX_op_ext16s_i64:
4975         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4976         break;
4977     case INDEX_op_ext16u_i32:
4978     case INDEX_op_ext16u_i64:
4979         tcg_out_ext16u(s, new_args[0], new_args[1]);
4980         break;
4981     case INDEX_op_ext32s_i64:
4982         tcg_out_ext32s(s, new_args[0], new_args[1]);
4983         break;
4984     case INDEX_op_ext32u_i64:
4985         tcg_out_ext32u(s, new_args[0], new_args[1]);
4986         break;
4987     case INDEX_op_ext_i32_i64:
4988         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
4989         break;
4990     case INDEX_op_extu_i32_i64:
4991         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
4992         break;
4993     case INDEX_op_extrl_i64_i32:
4994         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
4995         break;
4996     default:
4997         if (def->flags & TCG_OPF_VECTOR) {
4998             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4999                            new_args, const_args);
5000         } else {
5001             tcg_out_op(s, op->opc, new_args, const_args);
5002         }
5003         break;
5004     }
5005 
5006     /* move the outputs in the correct register if needed */
5007     for(i = 0; i < nb_oargs; i++) {
5008         ts = arg_temp(op->args[i]);
5009 
5010         /* ENV should not be modified.  */
5011         tcg_debug_assert(!temp_readonly(ts));
5012 
5013         if (NEED_SYNC_ARG(i)) {
5014             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5015         } else if (IS_DEAD_ARG(i)) {
5016             temp_dead(s, ts);
5017         }
5018     }
5019 }
5020 
5021 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5022 {
5023     const TCGLifeData arg_life = op->life;
5024     TCGTemp *ots, *itsl, *itsh;
5025     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
5026 
5027     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5028     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5029     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5030 
5031     ots = arg_temp(op->args[0]);
5032     itsl = arg_temp(op->args[1]);
5033     itsh = arg_temp(op->args[2]);
5034 
5035     /* ENV should not be modified.  */
5036     tcg_debug_assert(!temp_readonly(ots));
5037 
5038     /* Allocate the output register now.  */
5039     if (ots->val_type != TEMP_VAL_REG) {
5040         TCGRegSet allocated_regs = s->reserved_regs;
5041         TCGRegSet dup_out_regs =
5042             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
5043         TCGReg oreg;
5044 
5045         /* Make sure to not spill the input registers. */
5046         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5047             tcg_regset_set_reg(allocated_regs, itsl->reg);
5048         }
5049         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5050             tcg_regset_set_reg(allocated_regs, itsh->reg);
5051         }
5052 
5053         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5054                              output_pref(op, 0), ots->indirect_base);
5055         set_temp_val_reg(s, ots, oreg);
5056     }
5057 
5058     /* Promote dup2 of immediates to dupi_vec. */
5059     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5060         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5061         MemOp vece = MO_64;
5062 
5063         if (val == dup_const(MO_8, val)) {
5064             vece = MO_8;
5065         } else if (val == dup_const(MO_16, val)) {
5066             vece = MO_16;
5067         } else if (val == dup_const(MO_32, val)) {
5068             vece = MO_32;
5069         }
5070 
5071         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5072         goto done;
5073     }
5074 
5075     /* If the two inputs form one 64-bit value, try dupm_vec. */
5076     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5077         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5078         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5079         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5080 
5081         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5082         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5083 
5084         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5085                              its->mem_base->reg, its->mem_offset)) {
5086             goto done;
5087         }
5088     }
5089 
5090     /* Fall back to generic expansion. */
5091     return false;
5092 
5093  done:
5094     ots->mem_coherent = 0;
5095     if (IS_DEAD_ARG(1)) {
5096         temp_dead(s, itsl);
5097     }
5098     if (IS_DEAD_ARG(2)) {
5099         temp_dead(s, itsh);
5100     }
5101     if (NEED_SYNC_ARG(0)) {
5102         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5103     } else if (IS_DEAD_ARG(0)) {
5104         temp_dead(s, ots);
5105     }
5106     return true;
5107 }
5108 
5109 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5110                          TCGRegSet allocated_regs)
5111 {
5112     if (ts->val_type == TEMP_VAL_REG) {
5113         if (ts->reg != reg) {
5114             tcg_reg_free(s, reg, allocated_regs);
5115             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5116                 /*
5117                  * Cross register class move not supported.  Sync the
5118                  * temp back to its slot and load from there.
5119                  */
5120                 temp_sync(s, ts, allocated_regs, 0, 0);
5121                 tcg_out_ld(s, ts->type, reg,
5122                            ts->mem_base->reg, ts->mem_offset);
5123             }
5124         }
5125     } else {
5126         TCGRegSet arg_set = 0;
5127 
5128         tcg_reg_free(s, reg, allocated_regs);
5129         tcg_regset_set_reg(arg_set, reg);
5130         temp_load(s, ts, arg_set, allocated_regs, 0);
5131     }
5132 }
5133 
5134 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5135                          TCGRegSet allocated_regs)
5136 {
5137     /*
5138      * When the destination is on the stack, load up the temp and store.
5139      * If there are many call-saved registers, the temp might live to
5140      * see another use; otherwise it'll be discarded.
5141      */
5142     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5143     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5144                arg_slot_stk_ofs(arg_slot));
5145 }
5146 
5147 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5148                             TCGTemp *ts, TCGRegSet *allocated_regs)
5149 {
5150     if (arg_slot_reg_p(l->arg_slot)) {
5151         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5152         load_arg_reg(s, reg, ts, *allocated_regs);
5153         tcg_regset_set_reg(*allocated_regs, reg);
5154     } else {
5155         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5156     }
5157 }
5158 
5159 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5160                          intptr_t ref_off, TCGRegSet *allocated_regs)
5161 {
5162     TCGReg reg;
5163 
5164     if (arg_slot_reg_p(arg_slot)) {
5165         reg = tcg_target_call_iarg_regs[arg_slot];
5166         tcg_reg_free(s, reg, *allocated_regs);
5167         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5168         tcg_regset_set_reg(*allocated_regs, reg);
5169     } else {
5170         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5171                             *allocated_regs, 0, false);
5172         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5173         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5174                    arg_slot_stk_ofs(arg_slot));
5175     }
5176 }
5177 
5178 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5179 {
5180     const int nb_oargs = TCGOP_CALLO(op);
5181     const int nb_iargs = TCGOP_CALLI(op);
5182     const TCGLifeData arg_life = op->life;
5183     const TCGHelperInfo *info = tcg_call_info(op);
5184     TCGRegSet allocated_regs = s->reserved_regs;
5185     int i;
5186 
5187     /*
5188      * Move inputs into place in reverse order,
5189      * so that we place stacked arguments first.
5190      */
5191     for (i = nb_iargs - 1; i >= 0; --i) {
5192         const TCGCallArgumentLoc *loc = &info->in[i];
5193         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5194 
5195         switch (loc->kind) {
5196         case TCG_CALL_ARG_NORMAL:
5197         case TCG_CALL_ARG_EXTEND_U:
5198         case TCG_CALL_ARG_EXTEND_S:
5199             load_arg_normal(s, loc, ts, &allocated_regs);
5200             break;
5201         case TCG_CALL_ARG_BY_REF:
5202             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5203             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5204                          arg_slot_stk_ofs(loc->ref_slot),
5205                          &allocated_regs);
5206             break;
5207         case TCG_CALL_ARG_BY_REF_N:
5208             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5209             break;
5210         default:
5211             g_assert_not_reached();
5212         }
5213     }
5214 
5215     /* Mark dead temporaries and free the associated registers.  */
5216     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5217         if (IS_DEAD_ARG(i)) {
5218             temp_dead(s, arg_temp(op->args[i]));
5219         }
5220     }
5221 
5222     /* Clobber call registers.  */
5223     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5224         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5225             tcg_reg_free(s, i, allocated_regs);
5226         }
5227     }
5228 
5229     /*
5230      * Save globals if they might be written by the helper,
5231      * sync them if they might be read.
5232      */
5233     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5234         /* Nothing to do */
5235     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5236         sync_globals(s, allocated_regs);
5237     } else {
5238         save_globals(s, allocated_regs);
5239     }
5240 
5241     /*
5242      * If the ABI passes a pointer to the returned struct as the first
5243      * argument, load that now.  Pass a pointer to the output home slot.
5244      */
5245     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5246         TCGTemp *ts = arg_temp(op->args[0]);
5247 
5248         if (!ts->mem_allocated) {
5249             temp_allocate_frame(s, ts);
5250         }
5251         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5252     }
5253 
5254     tcg_out_call(s, tcg_call_func(op), info);
5255 
5256     /* Assign output registers and emit moves if needed.  */
5257     switch (info->out_kind) {
5258     case TCG_CALL_RET_NORMAL:
5259         for (i = 0; i < nb_oargs; i++) {
5260             TCGTemp *ts = arg_temp(op->args[i]);
5261             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5262 
5263             /* ENV should not be modified.  */
5264             tcg_debug_assert(!temp_readonly(ts));
5265 
5266             set_temp_val_reg(s, ts, reg);
5267             ts->mem_coherent = 0;
5268         }
5269         break;
5270 
5271     case TCG_CALL_RET_BY_VEC:
5272         {
5273             TCGTemp *ts = arg_temp(op->args[0]);
5274 
5275             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5276             tcg_debug_assert(ts->temp_subindex == 0);
5277             if (!ts->mem_allocated) {
5278                 temp_allocate_frame(s, ts);
5279             }
5280             tcg_out_st(s, TCG_TYPE_V128,
5281                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5282                        ts->mem_base->reg, ts->mem_offset);
5283         }
5284         /* fall through to mark all parts in memory */
5285 
5286     case TCG_CALL_RET_BY_REF:
5287         /* The callee has performed a write through the reference. */
5288         for (i = 0; i < nb_oargs; i++) {
5289             TCGTemp *ts = arg_temp(op->args[i]);
5290             ts->val_type = TEMP_VAL_MEM;
5291         }
5292         break;
5293 
5294     default:
5295         g_assert_not_reached();
5296     }
5297 
5298     /* Flush or discard output registers as needed. */
5299     for (i = 0; i < nb_oargs; i++) {
5300         TCGTemp *ts = arg_temp(op->args[i]);
5301         if (NEED_SYNC_ARG(i)) {
5302             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5303         } else if (IS_DEAD_ARG(i)) {
5304             temp_dead(s, ts);
5305         }
5306     }
5307 }
5308 
5309 /**
5310  * atom_and_align_for_opc:
5311  * @s: tcg context
5312  * @opc: memory operation code
5313  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5314  * @allow_two_ops: true if we are prepared to issue two operations
5315  *
5316  * Return the alignment and atomicity to use for the inline fast path
5317  * for the given memory operation.  The alignment may be larger than
5318  * that specified in @opc, and the correct alignment will be diagnosed
5319  * by the slow path helper.
5320  *
5321  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5322  * and issue two loads or stores for subalignment.
5323  */
5324 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5325                                            MemOp host_atom, bool allow_two_ops)
5326 {
5327     MemOp align = get_alignment_bits(opc);
5328     MemOp size = opc & MO_SIZE;
5329     MemOp half = size ? size - 1 : 0;
5330     MemOp atmax;
5331     MemOp atom;
5332 
5333     /* When serialized, no further atomicity required.  */
5334     if (s->gen_tb->cflags & CF_PARALLEL) {
5335         atom = opc & MO_ATOM_MASK;
5336     } else {
5337         atom = MO_ATOM_NONE;
5338     }
5339 
5340     switch (atom) {
5341     case MO_ATOM_NONE:
5342         /* The operation requires no specific atomicity. */
5343         atmax = MO_8;
5344         break;
5345 
5346     case MO_ATOM_IFALIGN:
5347         atmax = size;
5348         break;
5349 
5350     case MO_ATOM_IFALIGN_PAIR:
5351         atmax = half;
5352         break;
5353 
5354     case MO_ATOM_WITHIN16:
5355         atmax = size;
5356         if (size == MO_128) {
5357             /* Misalignment implies !within16, and therefore no atomicity. */
5358         } else if (host_atom != MO_ATOM_WITHIN16) {
5359             /* The host does not implement within16, so require alignment. */
5360             align = MAX(align, size);
5361         }
5362         break;
5363 
5364     case MO_ATOM_WITHIN16_PAIR:
5365         atmax = size;
5366         /*
5367          * Misalignment implies !within16, and therefore half atomicity.
5368          * Any host prepared for two operations can implement this with
5369          * half alignment.
5370          */
5371         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5372             align = MAX(align, half);
5373         }
5374         break;
5375 
5376     case MO_ATOM_SUBALIGN:
5377         atmax = size;
5378         if (host_atom != MO_ATOM_SUBALIGN) {
5379             /* If unaligned but not odd, there are subobjects up to half. */
5380             if (allow_two_ops) {
5381                 align = MAX(align, half);
5382             } else {
5383                 align = MAX(align, size);
5384             }
5385         }
5386         break;
5387 
5388     default:
5389         g_assert_not_reached();
5390     }
5391 
5392     return (TCGAtomAlign){ .atom = atmax, .align = align };
5393 }
5394 
5395 /*
5396  * Similarly for qemu_ld/st slow path helpers.
5397  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5398  * using only the provided backend tcg_out_* functions.
5399  */
5400 
5401 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5402 {
5403     int ofs = arg_slot_stk_ofs(slot);
5404 
5405     /*
5406      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5407      * require extension to uint64_t, adjust the address for uint32_t.
5408      */
5409     if (HOST_BIG_ENDIAN &&
5410         TCG_TARGET_REG_BITS == 64 &&
5411         type == TCG_TYPE_I32) {
5412         ofs += 4;
5413     }
5414     return ofs;
5415 }
5416 
5417 static void tcg_out_helper_load_slots(TCGContext *s,
5418                                       unsigned nmov, TCGMovExtend *mov,
5419                                       const TCGLdstHelperParam *parm)
5420 {
5421     unsigned i;
5422     TCGReg dst3;
5423 
5424     /*
5425      * Start from the end, storing to the stack first.
5426      * This frees those registers, so we need not consider overlap.
5427      */
5428     for (i = nmov; i-- > 0; ) {
5429         unsigned slot = mov[i].dst;
5430 
5431         if (arg_slot_reg_p(slot)) {
5432             goto found_reg;
5433         }
5434 
5435         TCGReg src = mov[i].src;
5436         TCGType dst_type = mov[i].dst_type;
5437         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5438 
5439         /* The argument is going onto the stack; extend into scratch. */
5440         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5441             tcg_debug_assert(parm->ntmp != 0);
5442             mov[i].dst = src = parm->tmp[0];
5443             tcg_out_movext1(s, &mov[i]);
5444         }
5445 
5446         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5447                    tcg_out_helper_stk_ofs(dst_type, slot));
5448     }
5449     return;
5450 
5451  found_reg:
5452     /*
5453      * The remaining arguments are in registers.
5454      * Convert slot numbers to argument registers.
5455      */
5456     nmov = i + 1;
5457     for (i = 0; i < nmov; ++i) {
5458         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5459     }
5460 
5461     switch (nmov) {
5462     case 4:
5463         /* The backend must have provided enough temps for the worst case. */
5464         tcg_debug_assert(parm->ntmp >= 2);
5465 
5466         dst3 = mov[3].dst;
5467         for (unsigned j = 0; j < 3; ++j) {
5468             if (dst3 == mov[j].src) {
5469                 /*
5470                  * Conflict. Copy the source to a temporary, perform the
5471                  * remaining moves, then the extension from our scratch
5472                  * on the way out.
5473                  */
5474                 TCGReg scratch = parm->tmp[1];
5475 
5476                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5477                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5478                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5479                 break;
5480             }
5481         }
5482 
5483         /* No conflicts: perform this move and continue. */
5484         tcg_out_movext1(s, &mov[3]);
5485         /* fall through */
5486 
5487     case 3:
5488         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5489                         parm->ntmp ? parm->tmp[0] : -1);
5490         break;
5491     case 2:
5492         tcg_out_movext2(s, mov, mov + 1,
5493                         parm->ntmp ? parm->tmp[0] : -1);
5494         break;
5495     case 1:
5496         tcg_out_movext1(s, mov);
5497         break;
5498     default:
5499         g_assert_not_reached();
5500     }
5501 }
5502 
5503 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5504                                     TCGType type, tcg_target_long imm,
5505                                     const TCGLdstHelperParam *parm)
5506 {
5507     if (arg_slot_reg_p(slot)) {
5508         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5509     } else {
5510         int ofs = tcg_out_helper_stk_ofs(type, slot);
5511         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5512             tcg_debug_assert(parm->ntmp != 0);
5513             tcg_out_movi(s, type, parm->tmp[0], imm);
5514             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5515         }
5516     }
5517 }
5518 
5519 static void tcg_out_helper_load_common_args(TCGContext *s,
5520                                             const TCGLabelQemuLdst *ldst,
5521                                             const TCGLdstHelperParam *parm,
5522                                             const TCGHelperInfo *info,
5523                                             unsigned next_arg)
5524 {
5525     TCGMovExtend ptr_mov = {
5526         .dst_type = TCG_TYPE_PTR,
5527         .src_type = TCG_TYPE_PTR,
5528         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5529     };
5530     const TCGCallArgumentLoc *loc = &info->in[0];
5531     TCGType type;
5532     unsigned slot;
5533     tcg_target_ulong imm;
5534 
5535     /*
5536      * Handle env, which is always first.
5537      */
5538     ptr_mov.dst = loc->arg_slot;
5539     ptr_mov.src = TCG_AREG0;
5540     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5541 
5542     /*
5543      * Handle oi.
5544      */
5545     imm = ldst->oi;
5546     loc = &info->in[next_arg];
5547     type = TCG_TYPE_I32;
5548     switch (loc->kind) {
5549     case TCG_CALL_ARG_NORMAL:
5550         break;
5551     case TCG_CALL_ARG_EXTEND_U:
5552     case TCG_CALL_ARG_EXTEND_S:
5553         /* No extension required for MemOpIdx. */
5554         tcg_debug_assert(imm <= INT32_MAX);
5555         type = TCG_TYPE_REG;
5556         break;
5557     default:
5558         g_assert_not_reached();
5559     }
5560     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5561     next_arg++;
5562 
5563     /*
5564      * Handle ra.
5565      */
5566     loc = &info->in[next_arg];
5567     slot = loc->arg_slot;
5568     if (parm->ra_gen) {
5569         int arg_reg = -1;
5570         TCGReg ra_reg;
5571 
5572         if (arg_slot_reg_p(slot)) {
5573             arg_reg = tcg_target_call_iarg_regs[slot];
5574         }
5575         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5576 
5577         ptr_mov.dst = slot;
5578         ptr_mov.src = ra_reg;
5579         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5580     } else {
5581         imm = (uintptr_t)ldst->raddr;
5582         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5583     }
5584 }
5585 
5586 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5587                                        const TCGCallArgumentLoc *loc,
5588                                        TCGType dst_type, TCGType src_type,
5589                                        TCGReg lo, TCGReg hi)
5590 {
5591     MemOp reg_mo;
5592 
5593     if (dst_type <= TCG_TYPE_REG) {
5594         MemOp src_ext;
5595 
5596         switch (loc->kind) {
5597         case TCG_CALL_ARG_NORMAL:
5598             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5599             break;
5600         case TCG_CALL_ARG_EXTEND_U:
5601             dst_type = TCG_TYPE_REG;
5602             src_ext = MO_UL;
5603             break;
5604         case TCG_CALL_ARG_EXTEND_S:
5605             dst_type = TCG_TYPE_REG;
5606             src_ext = MO_SL;
5607             break;
5608         default:
5609             g_assert_not_reached();
5610         }
5611 
5612         mov[0].dst = loc->arg_slot;
5613         mov[0].dst_type = dst_type;
5614         mov[0].src = lo;
5615         mov[0].src_type = src_type;
5616         mov[0].src_ext = src_ext;
5617         return 1;
5618     }
5619 
5620     if (TCG_TARGET_REG_BITS == 32) {
5621         assert(dst_type == TCG_TYPE_I64);
5622         reg_mo = MO_32;
5623     } else {
5624         assert(dst_type == TCG_TYPE_I128);
5625         reg_mo = MO_64;
5626     }
5627 
5628     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5629     mov[0].src = lo;
5630     mov[0].dst_type = TCG_TYPE_REG;
5631     mov[0].src_type = TCG_TYPE_REG;
5632     mov[0].src_ext = reg_mo;
5633 
5634     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5635     mov[1].src = hi;
5636     mov[1].dst_type = TCG_TYPE_REG;
5637     mov[1].src_type = TCG_TYPE_REG;
5638     mov[1].src_ext = reg_mo;
5639 
5640     return 2;
5641 }
5642 
5643 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5644                                    const TCGLdstHelperParam *parm)
5645 {
5646     const TCGHelperInfo *info;
5647     const TCGCallArgumentLoc *loc;
5648     TCGMovExtend mov[2];
5649     unsigned next_arg, nmov;
5650     MemOp mop = get_memop(ldst->oi);
5651 
5652     switch (mop & MO_SIZE) {
5653     case MO_8:
5654     case MO_16:
5655     case MO_32:
5656         info = &info_helper_ld32_mmu;
5657         break;
5658     case MO_64:
5659         info = &info_helper_ld64_mmu;
5660         break;
5661     case MO_128:
5662         info = &info_helper_ld128_mmu;
5663         break;
5664     default:
5665         g_assert_not_reached();
5666     }
5667 
5668     /* Defer env argument. */
5669     next_arg = 1;
5670 
5671     loc = &info->in[next_arg];
5672     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5673         /*
5674          * 32-bit host with 32-bit guest: zero-extend the guest address
5675          * to 64-bits for the helper by storing the low part, then
5676          * load a zero for the high part.
5677          */
5678         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5679                                TCG_TYPE_I32, TCG_TYPE_I32,
5680                                ldst->addrlo_reg, -1);
5681         tcg_out_helper_load_slots(s, 1, mov, parm);
5682 
5683         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5684                                 TCG_TYPE_I32, 0, parm);
5685         next_arg += 2;
5686     } else {
5687         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5688                                       ldst->addrlo_reg, ldst->addrhi_reg);
5689         tcg_out_helper_load_slots(s, nmov, mov, parm);
5690         next_arg += nmov;
5691     }
5692 
5693     switch (info->out_kind) {
5694     case TCG_CALL_RET_NORMAL:
5695     case TCG_CALL_RET_BY_VEC:
5696         break;
5697     case TCG_CALL_RET_BY_REF:
5698         /*
5699          * The return reference is in the first argument slot.
5700          * We need memory in which to return: re-use the top of stack.
5701          */
5702         {
5703             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5704 
5705             if (arg_slot_reg_p(0)) {
5706                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5707                                  TCG_REG_CALL_STACK, ofs_slot0);
5708             } else {
5709                 tcg_debug_assert(parm->ntmp != 0);
5710                 tcg_out_addi_ptr(s, parm->tmp[0],
5711                                  TCG_REG_CALL_STACK, ofs_slot0);
5712                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5713                            TCG_REG_CALL_STACK, ofs_slot0);
5714             }
5715         }
5716         break;
5717     default:
5718         g_assert_not_reached();
5719     }
5720 
5721     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5722 }
5723 
5724 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5725                                   bool load_sign,
5726                                   const TCGLdstHelperParam *parm)
5727 {
5728     MemOp mop = get_memop(ldst->oi);
5729     TCGMovExtend mov[2];
5730     int ofs_slot0;
5731 
5732     switch (ldst->type) {
5733     case TCG_TYPE_I64:
5734         if (TCG_TARGET_REG_BITS == 32) {
5735             break;
5736         }
5737         /* fall through */
5738 
5739     case TCG_TYPE_I32:
5740         mov[0].dst = ldst->datalo_reg;
5741         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5742         mov[0].dst_type = ldst->type;
5743         mov[0].src_type = TCG_TYPE_REG;
5744 
5745         /*
5746          * If load_sign, then we allowed the helper to perform the
5747          * appropriate sign extension to tcg_target_ulong, and all
5748          * we need now is a plain move.
5749          *
5750          * If they do not, then we expect the relevant extension
5751          * instruction to be no more expensive than a move, and
5752          * we thus save the icache etc by only using one of two
5753          * helper functions.
5754          */
5755         if (load_sign || !(mop & MO_SIGN)) {
5756             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5757                 mov[0].src_ext = MO_32;
5758             } else {
5759                 mov[0].src_ext = MO_64;
5760             }
5761         } else {
5762             mov[0].src_ext = mop & MO_SSIZE;
5763         }
5764         tcg_out_movext1(s, mov);
5765         return;
5766 
5767     case TCG_TYPE_I128:
5768         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5769         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5770         switch (TCG_TARGET_CALL_RET_I128) {
5771         case TCG_CALL_RET_NORMAL:
5772             break;
5773         case TCG_CALL_RET_BY_VEC:
5774             tcg_out_st(s, TCG_TYPE_V128,
5775                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5776                        TCG_REG_CALL_STACK, ofs_slot0);
5777             /* fall through */
5778         case TCG_CALL_RET_BY_REF:
5779             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5780                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5781             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5782                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5783             return;
5784         default:
5785             g_assert_not_reached();
5786         }
5787         break;
5788 
5789     default:
5790         g_assert_not_reached();
5791     }
5792 
5793     mov[0].dst = ldst->datalo_reg;
5794     mov[0].src =
5795         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5796     mov[0].dst_type = TCG_TYPE_REG;
5797     mov[0].src_type = TCG_TYPE_REG;
5798     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5799 
5800     mov[1].dst = ldst->datahi_reg;
5801     mov[1].src =
5802         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5803     mov[1].dst_type = TCG_TYPE_REG;
5804     mov[1].src_type = TCG_TYPE_REG;
5805     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5806 
5807     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5808 }
5809 
5810 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5811                                    const TCGLdstHelperParam *parm)
5812 {
5813     const TCGHelperInfo *info;
5814     const TCGCallArgumentLoc *loc;
5815     TCGMovExtend mov[4];
5816     TCGType data_type;
5817     unsigned next_arg, nmov, n;
5818     MemOp mop = get_memop(ldst->oi);
5819 
5820     switch (mop & MO_SIZE) {
5821     case MO_8:
5822     case MO_16:
5823     case MO_32:
5824         info = &info_helper_st32_mmu;
5825         data_type = TCG_TYPE_I32;
5826         break;
5827     case MO_64:
5828         info = &info_helper_st64_mmu;
5829         data_type = TCG_TYPE_I64;
5830         break;
5831     case MO_128:
5832         info = &info_helper_st128_mmu;
5833         data_type = TCG_TYPE_I128;
5834         break;
5835     default:
5836         g_assert_not_reached();
5837     }
5838 
5839     /* Defer env argument. */
5840     next_arg = 1;
5841     nmov = 0;
5842 
5843     /* Handle addr argument. */
5844     loc = &info->in[next_arg];
5845     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5846         /*
5847          * 32-bit host with 32-bit guest: zero-extend the guest address
5848          * to 64-bits for the helper by storing the low part.  Later,
5849          * after we have processed the register inputs, we will load a
5850          * zero for the high part.
5851          */
5852         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5853                                TCG_TYPE_I32, TCG_TYPE_I32,
5854                                ldst->addrlo_reg, -1);
5855         next_arg += 2;
5856         nmov += 1;
5857     } else {
5858         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5859                                    ldst->addrlo_reg, ldst->addrhi_reg);
5860         next_arg += n;
5861         nmov += n;
5862     }
5863 
5864     /* Handle data argument. */
5865     loc = &info->in[next_arg];
5866     switch (loc->kind) {
5867     case TCG_CALL_ARG_NORMAL:
5868     case TCG_CALL_ARG_EXTEND_U:
5869     case TCG_CALL_ARG_EXTEND_S:
5870         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
5871                                    ldst->datalo_reg, ldst->datahi_reg);
5872         next_arg += n;
5873         nmov += n;
5874         tcg_out_helper_load_slots(s, nmov, mov, parm);
5875         break;
5876 
5877     case TCG_CALL_ARG_BY_REF:
5878         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5879         tcg_debug_assert(data_type == TCG_TYPE_I128);
5880         tcg_out_st(s, TCG_TYPE_I64,
5881                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
5882                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
5883         tcg_out_st(s, TCG_TYPE_I64,
5884                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
5885                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
5886 
5887         tcg_out_helper_load_slots(s, nmov, mov, parm);
5888 
5889         if (arg_slot_reg_p(loc->arg_slot)) {
5890             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
5891                              TCG_REG_CALL_STACK,
5892                              arg_slot_stk_ofs(loc->ref_slot));
5893         } else {
5894             tcg_debug_assert(parm->ntmp != 0);
5895             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
5896                              arg_slot_stk_ofs(loc->ref_slot));
5897             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5898                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
5899         }
5900         next_arg += 2;
5901         break;
5902 
5903     default:
5904         g_assert_not_reached();
5905     }
5906 
5907     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5908         /* Zero extend the address by loading a zero for the high part. */
5909         loc = &info->in[1 + !HOST_BIG_ENDIAN];
5910         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
5911     }
5912 
5913     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5914 }
5915 
5916 void tcg_dump_op_count(GString *buf)
5917 {
5918     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5919 }
5920 
5921 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
5922 {
5923     int i, start_words, num_insns;
5924     TCGOp *op;
5925 
5926     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
5927                  && qemu_log_in_addr_range(pc_start))) {
5928         FILE *logfile = qemu_log_trylock();
5929         if (logfile) {
5930             fprintf(logfile, "OP:\n");
5931             tcg_dump_ops(s, logfile, false);
5932             fprintf(logfile, "\n");
5933             qemu_log_unlock(logfile);
5934         }
5935     }
5936 
5937 #ifdef CONFIG_DEBUG_TCG
5938     /* Ensure all labels referenced have been emitted.  */
5939     {
5940         TCGLabel *l;
5941         bool error = false;
5942 
5943         QSIMPLEQ_FOREACH(l, &s->labels, next) {
5944             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
5945                 qemu_log_mask(CPU_LOG_TB_OP,
5946                               "$L%d referenced but not present.\n", l->id);
5947                 error = true;
5948             }
5949         }
5950         assert(!error);
5951     }
5952 #endif
5953 
5954     tcg_optimize(s);
5955 
5956     reachable_code_pass(s);
5957     liveness_pass_0(s);
5958     liveness_pass_1(s);
5959 
5960     if (s->nb_indirects > 0) {
5961         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
5962                      && qemu_log_in_addr_range(pc_start))) {
5963             FILE *logfile = qemu_log_trylock();
5964             if (logfile) {
5965                 fprintf(logfile, "OP before indirect lowering:\n");
5966                 tcg_dump_ops(s, logfile, false);
5967                 fprintf(logfile, "\n");
5968                 qemu_log_unlock(logfile);
5969             }
5970         }
5971 
5972         /* Replace indirect temps with direct temps.  */
5973         if (liveness_pass_2(s)) {
5974             /* If changes were made, re-run liveness.  */
5975             liveness_pass_1(s);
5976         }
5977     }
5978 
5979     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
5980                  && qemu_log_in_addr_range(pc_start))) {
5981         FILE *logfile = qemu_log_trylock();
5982         if (logfile) {
5983             fprintf(logfile, "OP after optimization and liveness analysis:\n");
5984             tcg_dump_ops(s, logfile, true);
5985             fprintf(logfile, "\n");
5986             qemu_log_unlock(logfile);
5987         }
5988     }
5989 
5990     /* Initialize goto_tb jump offsets. */
5991     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
5992     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
5993     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
5994     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
5995 
5996     tcg_reg_alloc_start(s);
5997 
5998     /*
5999      * Reset the buffer pointers when restarting after overflow.
6000      * TODO: Move this into translate-all.c with the rest of the
6001      * buffer management.  Having only this done here is confusing.
6002      */
6003     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6004     s->code_ptr = s->code_buf;
6005 
6006 #ifdef TCG_TARGET_NEED_LDST_LABELS
6007     QSIMPLEQ_INIT(&s->ldst_labels);
6008 #endif
6009 #ifdef TCG_TARGET_NEED_POOL_LABELS
6010     s->pool_labels = NULL;
6011 #endif
6012 
6013     start_words = s->insn_start_words;
6014     s->gen_insn_data =
6015         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6016 
6017     num_insns = -1;
6018     QTAILQ_FOREACH(op, &s->ops, link) {
6019         TCGOpcode opc = op->opc;
6020 
6021         switch (opc) {
6022         case INDEX_op_mov_i32:
6023         case INDEX_op_mov_i64:
6024         case INDEX_op_mov_vec:
6025             tcg_reg_alloc_mov(s, op);
6026             break;
6027         case INDEX_op_dup_vec:
6028             tcg_reg_alloc_dup(s, op);
6029             break;
6030         case INDEX_op_insn_start:
6031             if (num_insns >= 0) {
6032                 size_t off = tcg_current_code_size(s);
6033                 s->gen_insn_end_off[num_insns] = off;
6034                 /* Assert that we do not overflow our stored offset.  */
6035                 assert(s->gen_insn_end_off[num_insns] == off);
6036             }
6037             num_insns++;
6038             for (i = 0; i < start_words; ++i) {
6039                 s->gen_insn_data[num_insns * start_words + i] =
6040                     tcg_get_insn_start_param(op, i);
6041             }
6042             break;
6043         case INDEX_op_discard:
6044             temp_dead(s, arg_temp(op->args[0]));
6045             break;
6046         case INDEX_op_set_label:
6047             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6048             tcg_out_label(s, arg_label(op->args[0]));
6049             break;
6050         case INDEX_op_call:
6051             tcg_reg_alloc_call(s, op);
6052             break;
6053         case INDEX_op_exit_tb:
6054             tcg_out_exit_tb(s, op->args[0]);
6055             break;
6056         case INDEX_op_goto_tb:
6057             tcg_out_goto_tb(s, op->args[0]);
6058             break;
6059         case INDEX_op_dup2_vec:
6060             if (tcg_reg_alloc_dup2(s, op)) {
6061                 break;
6062             }
6063             /* fall through */
6064         default:
6065             /* Sanity check that we've not introduced any unhandled opcodes. */
6066             tcg_debug_assert(tcg_op_supported(opc));
6067             /* Note: in order to speed up the code, it would be much
6068                faster to have specialized register allocator functions for
6069                some common argument patterns */
6070             tcg_reg_alloc_op(s, op);
6071             break;
6072         }
6073         /* Test for (pending) buffer overflow.  The assumption is that any
6074            one operation beginning below the high water mark cannot overrun
6075            the buffer completely.  Thus we can test for overflow after
6076            generating code without having to check during generation.  */
6077         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6078             return -1;
6079         }
6080         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6081         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6082             return -2;
6083         }
6084     }
6085     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6086     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6087 
6088     /* Generate TB finalization at the end of block */
6089 #ifdef TCG_TARGET_NEED_LDST_LABELS
6090     i = tcg_out_ldst_finalize(s);
6091     if (i < 0) {
6092         return i;
6093     }
6094 #endif
6095 #ifdef TCG_TARGET_NEED_POOL_LABELS
6096     i = tcg_out_pool_finalize(s);
6097     if (i < 0) {
6098         return i;
6099     }
6100 #endif
6101     if (!tcg_resolve_relocs(s)) {
6102         return -2;
6103     }
6104 
6105 #ifndef CONFIG_TCG_INTERPRETER
6106     /* flush instruction cache */
6107     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6108                         (uintptr_t)s->code_buf,
6109                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6110 #endif
6111 
6112     return tcg_current_code_size(s);
6113 }
6114 
6115 void tcg_dump_info(GString *buf)
6116 {
6117     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
6118 }
6119 
6120 #ifdef ELF_HOST_MACHINE
6121 /* In order to use this feature, the backend needs to do three things:
6122 
6123    (1) Define ELF_HOST_MACHINE to indicate both what value to
6124        put into the ELF image and to indicate support for the feature.
6125 
6126    (2) Define tcg_register_jit.  This should create a buffer containing
6127        the contents of a .debug_frame section that describes the post-
6128        prologue unwind info for the tcg machine.
6129 
6130    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6131 */
6132 
6133 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6134 typedef enum {
6135     JIT_NOACTION = 0,
6136     JIT_REGISTER_FN,
6137     JIT_UNREGISTER_FN
6138 } jit_actions_t;
6139 
6140 struct jit_code_entry {
6141     struct jit_code_entry *next_entry;
6142     struct jit_code_entry *prev_entry;
6143     const void *symfile_addr;
6144     uint64_t symfile_size;
6145 };
6146 
6147 struct jit_descriptor {
6148     uint32_t version;
6149     uint32_t action_flag;
6150     struct jit_code_entry *relevant_entry;
6151     struct jit_code_entry *first_entry;
6152 };
6153 
6154 void __jit_debug_register_code(void) __attribute__((noinline));
6155 void __jit_debug_register_code(void)
6156 {
6157     asm("");
6158 }
6159 
6160 /* Must statically initialize the version, because GDB may check
6161    the version before we can set it.  */
6162 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6163 
6164 /* End GDB interface.  */
6165 
6166 static int find_string(const char *strtab, const char *str)
6167 {
6168     const char *p = strtab + 1;
6169 
6170     while (1) {
6171         if (strcmp(p, str) == 0) {
6172             return p - strtab;
6173         }
6174         p += strlen(p) + 1;
6175     }
6176 }
6177 
6178 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6179                                  const void *debug_frame,
6180                                  size_t debug_frame_size)
6181 {
6182     struct __attribute__((packed)) DebugInfo {
6183         uint32_t  len;
6184         uint16_t  version;
6185         uint32_t  abbrev;
6186         uint8_t   ptr_size;
6187         uint8_t   cu_die;
6188         uint16_t  cu_lang;
6189         uintptr_t cu_low_pc;
6190         uintptr_t cu_high_pc;
6191         uint8_t   fn_die;
6192         char      fn_name[16];
6193         uintptr_t fn_low_pc;
6194         uintptr_t fn_high_pc;
6195         uint8_t   cu_eoc;
6196     };
6197 
6198     struct ElfImage {
6199         ElfW(Ehdr) ehdr;
6200         ElfW(Phdr) phdr;
6201         ElfW(Shdr) shdr[7];
6202         ElfW(Sym)  sym[2];
6203         struct DebugInfo di;
6204         uint8_t    da[24];
6205         char       str[80];
6206     };
6207 
6208     struct ElfImage *img;
6209 
6210     static const struct ElfImage img_template = {
6211         .ehdr = {
6212             .e_ident[EI_MAG0] = ELFMAG0,
6213             .e_ident[EI_MAG1] = ELFMAG1,
6214             .e_ident[EI_MAG2] = ELFMAG2,
6215             .e_ident[EI_MAG3] = ELFMAG3,
6216             .e_ident[EI_CLASS] = ELF_CLASS,
6217             .e_ident[EI_DATA] = ELF_DATA,
6218             .e_ident[EI_VERSION] = EV_CURRENT,
6219             .e_type = ET_EXEC,
6220             .e_machine = ELF_HOST_MACHINE,
6221             .e_version = EV_CURRENT,
6222             .e_phoff = offsetof(struct ElfImage, phdr),
6223             .e_shoff = offsetof(struct ElfImage, shdr),
6224             .e_ehsize = sizeof(ElfW(Shdr)),
6225             .e_phentsize = sizeof(ElfW(Phdr)),
6226             .e_phnum = 1,
6227             .e_shentsize = sizeof(ElfW(Shdr)),
6228             .e_shnum = ARRAY_SIZE(img->shdr),
6229             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6230 #ifdef ELF_HOST_FLAGS
6231             .e_flags = ELF_HOST_FLAGS,
6232 #endif
6233 #ifdef ELF_OSABI
6234             .e_ident[EI_OSABI] = ELF_OSABI,
6235 #endif
6236         },
6237         .phdr = {
6238             .p_type = PT_LOAD,
6239             .p_flags = PF_X,
6240         },
6241         .shdr = {
6242             [0] = { .sh_type = SHT_NULL },
6243             /* Trick: The contents of code_gen_buffer are not present in
6244                this fake ELF file; that got allocated elsewhere.  Therefore
6245                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6246                will not look for contents.  We can record any address.  */
6247             [1] = { /* .text */
6248                 .sh_type = SHT_NOBITS,
6249                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6250             },
6251             [2] = { /* .debug_info */
6252                 .sh_type = SHT_PROGBITS,
6253                 .sh_offset = offsetof(struct ElfImage, di),
6254                 .sh_size = sizeof(struct DebugInfo),
6255             },
6256             [3] = { /* .debug_abbrev */
6257                 .sh_type = SHT_PROGBITS,
6258                 .sh_offset = offsetof(struct ElfImage, da),
6259                 .sh_size = sizeof(img->da),
6260             },
6261             [4] = { /* .debug_frame */
6262                 .sh_type = SHT_PROGBITS,
6263                 .sh_offset = sizeof(struct ElfImage),
6264             },
6265             [5] = { /* .symtab */
6266                 .sh_type = SHT_SYMTAB,
6267                 .sh_offset = offsetof(struct ElfImage, sym),
6268                 .sh_size = sizeof(img->sym),
6269                 .sh_info = 1,
6270                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6271                 .sh_entsize = sizeof(ElfW(Sym)),
6272             },
6273             [6] = { /* .strtab */
6274                 .sh_type = SHT_STRTAB,
6275                 .sh_offset = offsetof(struct ElfImage, str),
6276                 .sh_size = sizeof(img->str),
6277             }
6278         },
6279         .sym = {
6280             [1] = { /* code_gen_buffer */
6281                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6282                 .st_shndx = 1,
6283             }
6284         },
6285         .di = {
6286             .len = sizeof(struct DebugInfo) - 4,
6287             .version = 2,
6288             .ptr_size = sizeof(void *),
6289             .cu_die = 1,
6290             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6291             .fn_die = 2,
6292             .fn_name = "code_gen_buffer"
6293         },
6294         .da = {
6295             1,          /* abbrev number (the cu) */
6296             0x11, 1,    /* DW_TAG_compile_unit, has children */
6297             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6298             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6299             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6300             0, 0,       /* end of abbrev */
6301             2,          /* abbrev number (the fn) */
6302             0x2e, 0,    /* DW_TAG_subprogram, no children */
6303             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6304             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6305             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6306             0, 0,       /* end of abbrev */
6307             0           /* no more abbrev */
6308         },
6309         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6310                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6311     };
6312 
6313     /* We only need a single jit entry; statically allocate it.  */
6314     static struct jit_code_entry one_entry;
6315 
6316     uintptr_t buf = (uintptr_t)buf_ptr;
6317     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6318     DebugFrameHeader *dfh;
6319 
6320     img = g_malloc(img_size);
6321     *img = img_template;
6322 
6323     img->phdr.p_vaddr = buf;
6324     img->phdr.p_paddr = buf;
6325     img->phdr.p_memsz = buf_size;
6326 
6327     img->shdr[1].sh_name = find_string(img->str, ".text");
6328     img->shdr[1].sh_addr = buf;
6329     img->shdr[1].sh_size = buf_size;
6330 
6331     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6332     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6333 
6334     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6335     img->shdr[4].sh_size = debug_frame_size;
6336 
6337     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6338     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6339 
6340     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6341     img->sym[1].st_value = buf;
6342     img->sym[1].st_size = buf_size;
6343 
6344     img->di.cu_low_pc = buf;
6345     img->di.cu_high_pc = buf + buf_size;
6346     img->di.fn_low_pc = buf;
6347     img->di.fn_high_pc = buf + buf_size;
6348 
6349     dfh = (DebugFrameHeader *)(img + 1);
6350     memcpy(dfh, debug_frame, debug_frame_size);
6351     dfh->fde.func_start = buf;
6352     dfh->fde.func_len = buf_size;
6353 
6354 #ifdef DEBUG_JIT
6355     /* Enable this block to be able to debug the ELF image file creation.
6356        One can use readelf, objdump, or other inspection utilities.  */
6357     {
6358         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6359         FILE *f = fopen(jit, "w+b");
6360         if (f) {
6361             if (fwrite(img, img_size, 1, f) != img_size) {
6362                 /* Avoid stupid unused return value warning for fwrite.  */
6363             }
6364             fclose(f);
6365         }
6366     }
6367 #endif
6368 
6369     one_entry.symfile_addr = img;
6370     one_entry.symfile_size = img_size;
6371 
6372     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6373     __jit_debug_descriptor.relevant_entry = &one_entry;
6374     __jit_debug_descriptor.first_entry = &one_entry;
6375     __jit_debug_register_code();
6376 }
6377 #else
6378 /* No support for the feature.  Provide the entry point expected by exec.c,
6379    and implement the internal function we declared earlier.  */
6380 
6381 static void tcg_register_jit_int(const void *buf, size_t size,
6382                                  const void *debug_frame,
6383                                  size_t debug_frame_size)
6384 {
6385 }
6386 
6387 void tcg_register_jit(const void *buf, size_t buf_size)
6388 {
6389 }
6390 #endif /* ELF_HOST_MACHINE */
6391 
6392 #if !TCG_TARGET_MAYBE_vec
6393 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6394 {
6395     g_assert_not_reached();
6396 }
6397 #endif
6398