xref: /openbmc/qemu/tcg/tcg.c (revision 6a0f7ff7)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/tcg-op-common.h"
40 
41 #if UINTPTR_MAX == UINT32_MAX
42 # define ELF_CLASS  ELFCLASS32
43 #else
44 # define ELF_CLASS  ELFCLASS64
45 #endif
46 #if HOST_BIG_ENDIAN
47 # define ELF_DATA   ELFDATA2MSB
48 #else
49 # define ELF_DATA   ELFDATA2LSB
50 #endif
51 
52 #include "elf.h"
53 #include "exec/log.h"
54 #include "tcg/tcg-ldst.h"
55 #include "tcg/tcg-temp-internal.h"
56 #include "tcg-internal.h"
57 #include "accel/tcg/perf.h"
58 #ifdef CONFIG_USER_ONLY
59 #include "exec/user/guest-base.h"
60 #endif
61 
62 /* Forward declarations for functions declared in tcg-target.c.inc and
63    used here. */
64 static void tcg_target_init(TCGContext *s);
65 static void tcg_target_qemu_prologue(TCGContext *s);
66 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
67                         intptr_t value, intptr_t addend);
68 
69 /* The CIE and FDE header definitions will be common to all hosts.  */
70 typedef struct {
71     uint32_t len __attribute__((aligned((sizeof(void *)))));
72     uint32_t id;
73     uint8_t version;
74     char augmentation[1];
75     uint8_t code_align;
76     uint8_t data_align;
77     uint8_t return_column;
78 } DebugFrameCIE;
79 
80 typedef struct QEMU_PACKED {
81     uint32_t len __attribute__((aligned((sizeof(void *)))));
82     uint32_t cie_offset;
83     uintptr_t func_start;
84     uintptr_t func_len;
85 } DebugFrameFDEHeader;
86 
87 typedef struct QEMU_PACKED {
88     DebugFrameCIE cie;
89     DebugFrameFDEHeader fde;
90 } DebugFrameHeader;
91 
92 typedef struct TCGLabelQemuLdst {
93     bool is_ld;             /* qemu_ld: true, qemu_st: false */
94     MemOpIdx oi;
95     TCGType type;           /* result type of a load */
96     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
97     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
98     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
99     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
100     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
101     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
102     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
103 } TCGLabelQemuLdst;
104 
105 static void tcg_register_jit_int(const void *buf, size_t size,
106                                  const void *debug_frame,
107                                  size_t debug_frame_size)
108     __attribute__((unused));
109 
110 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
111 static void tcg_out_tb_start(TCGContext *s);
112 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
113                        intptr_t arg2);
114 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
115 static void tcg_out_movi(TCGContext *s, TCGType type,
116                          TCGReg ret, tcg_target_long arg);
117 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
118 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
119 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
120 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
121 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
122 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
123 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
124 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
125 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
127 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
128 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
129 static void tcg_out_goto_tb(TCGContext *s, int which);
130 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
131                        const TCGArg args[TCG_MAX_OP_ARGS],
132                        const int const_args[TCG_MAX_OP_ARGS]);
133 #if TCG_TARGET_MAYBE_vec
134 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
135                             TCGReg dst, TCGReg src);
136 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
137                              TCGReg dst, TCGReg base, intptr_t offset);
138 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
139                              TCGReg dst, int64_t arg);
140 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
141                            unsigned vecl, unsigned vece,
142                            const TCGArg args[TCG_MAX_OP_ARGS],
143                            const int const_args[TCG_MAX_OP_ARGS]);
144 #else
145 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
146                                    TCGReg dst, TCGReg src)
147 {
148     g_assert_not_reached();
149 }
150 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
151                                     TCGReg dst, TCGReg base, intptr_t offset)
152 {
153     g_assert_not_reached();
154 }
155 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
156                                     TCGReg dst, int64_t arg)
157 {
158     g_assert_not_reached();
159 }
160 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
161                                   unsigned vecl, unsigned vece,
162                                   const TCGArg args[TCG_MAX_OP_ARGS],
163                                   const int const_args[TCG_MAX_OP_ARGS])
164 {
165     g_assert_not_reached();
166 }
167 #endif
168 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
169                        intptr_t arg2);
170 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
171                         TCGReg base, intptr_t ofs);
172 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
173                          const TCGHelperInfo *info);
174 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
175 static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece);
176 #ifdef TCG_TARGET_NEED_LDST_LABELS
177 static int tcg_out_ldst_finalize(TCGContext *s);
178 #endif
179 
180 typedef struct TCGLdstHelperParam {
181     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
182     unsigned ntmp;
183     int tmp[3];
184 } TCGLdstHelperParam;
185 
186 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
187                                    const TCGLdstHelperParam *p)
188     __attribute__((unused));
189 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
190                                   bool load_sign, const TCGLdstHelperParam *p)
191     __attribute__((unused));
192 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
193                                    const TCGLdstHelperParam *p)
194     __attribute__((unused));
195 
196 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
197     [MO_UB] = helper_ldub_mmu,
198     [MO_SB] = helper_ldsb_mmu,
199     [MO_UW] = helper_lduw_mmu,
200     [MO_SW] = helper_ldsw_mmu,
201     [MO_UL] = helper_ldul_mmu,
202     [MO_UQ] = helper_ldq_mmu,
203 #if TCG_TARGET_REG_BITS == 64
204     [MO_SL] = helper_ldsl_mmu,
205     [MO_128] = helper_ld16_mmu,
206 #endif
207 };
208 
209 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
210     [MO_8]  = helper_stb_mmu,
211     [MO_16] = helper_stw_mmu,
212     [MO_32] = helper_stl_mmu,
213     [MO_64] = helper_stq_mmu,
214 #if TCG_TARGET_REG_BITS == 64
215     [MO_128] = helper_st16_mmu,
216 #endif
217 };
218 
219 typedef struct {
220     MemOp atom;   /* lg2 bits of atomicity required */
221     MemOp align;  /* lg2 bits of alignment to use */
222 } TCGAtomAlign;
223 
224 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
225                                            MemOp host_atom, bool allow_two_ops)
226     __attribute__((unused));
227 
228 TCGContext tcg_init_ctx;
229 __thread TCGContext *tcg_ctx;
230 
231 TCGContext **tcg_ctxs;
232 unsigned int tcg_cur_ctxs;
233 unsigned int tcg_max_ctxs;
234 TCGv_env cpu_env = 0;
235 const void *tcg_code_gen_epilogue;
236 uintptr_t tcg_splitwx_diff;
237 
238 #ifndef CONFIG_TCG_INTERPRETER
239 tcg_prologue_fn *tcg_qemu_tb_exec;
240 #endif
241 
242 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
243 static TCGRegSet tcg_target_call_clobber_regs;
244 
245 #if TCG_TARGET_INSN_UNIT_SIZE == 1
246 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
247 {
248     *s->code_ptr++ = v;
249 }
250 
251 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
252                                                       uint8_t v)
253 {
254     *p = v;
255 }
256 #endif
257 
258 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
259 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
260 {
261     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
262         *s->code_ptr++ = v;
263     } else {
264         tcg_insn_unit *p = s->code_ptr;
265         memcpy(p, &v, sizeof(v));
266         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
267     }
268 }
269 
270 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
271                                                        uint16_t v)
272 {
273     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
274         *p = v;
275     } else {
276         memcpy(p, &v, sizeof(v));
277     }
278 }
279 #endif
280 
281 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
282 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
283 {
284     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
285         *s->code_ptr++ = v;
286     } else {
287         tcg_insn_unit *p = s->code_ptr;
288         memcpy(p, &v, sizeof(v));
289         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
290     }
291 }
292 
293 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
294                                                        uint32_t v)
295 {
296     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
297         *p = v;
298     } else {
299         memcpy(p, &v, sizeof(v));
300     }
301 }
302 #endif
303 
304 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
305 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
306 {
307     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
308         *s->code_ptr++ = v;
309     } else {
310         tcg_insn_unit *p = s->code_ptr;
311         memcpy(p, &v, sizeof(v));
312         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
313     }
314 }
315 
316 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
317                                                        uint64_t v)
318 {
319     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
320         *p = v;
321     } else {
322         memcpy(p, &v, sizeof(v));
323     }
324 }
325 #endif
326 
327 /* label relocation processing */
328 
329 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
330                           TCGLabel *l, intptr_t addend)
331 {
332     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
333 
334     r->type = type;
335     r->ptr = code_ptr;
336     r->addend = addend;
337     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
338 }
339 
340 static void tcg_out_label(TCGContext *s, TCGLabel *l)
341 {
342     tcg_debug_assert(!l->has_value);
343     l->has_value = 1;
344     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
345 }
346 
347 TCGLabel *gen_new_label(void)
348 {
349     TCGContext *s = tcg_ctx;
350     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
351 
352     memset(l, 0, sizeof(TCGLabel));
353     l->id = s->nb_labels++;
354     QSIMPLEQ_INIT(&l->branches);
355     QSIMPLEQ_INIT(&l->relocs);
356 
357     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
358 
359     return l;
360 }
361 
362 static bool tcg_resolve_relocs(TCGContext *s)
363 {
364     TCGLabel *l;
365 
366     QSIMPLEQ_FOREACH(l, &s->labels, next) {
367         TCGRelocation *r;
368         uintptr_t value = l->u.value;
369 
370         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
371             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
372                 return false;
373             }
374         }
375     }
376     return true;
377 }
378 
379 static void set_jmp_reset_offset(TCGContext *s, int which)
380 {
381     /*
382      * We will check for overflow at the end of the opcode loop in
383      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
384      */
385     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
386 }
387 
388 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
389 {
390     /*
391      * We will check for overflow at the end of the opcode loop in
392      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
393      */
394     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
395 }
396 
397 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
398 {
399     /*
400      * Return the read-execute version of the pointer, for the benefit
401      * of any pc-relative addressing mode.
402      */
403     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
404 }
405 
406 #if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
407 static int tlb_mask_table_ofs(TCGContext *s, int which)
408 {
409     return s->tlb_fast_offset + which * sizeof(CPUTLBDescFast);
410 }
411 #endif
412 
413 /* Signal overflow, starting over with fewer guest insns. */
414 static G_NORETURN
415 void tcg_raise_tb_overflow(TCGContext *s)
416 {
417     siglongjmp(s->jmp_trans, -2);
418 }
419 
420 /*
421  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
422  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
423  *
424  * However, tcg_out_helper_load_slots reuses this field to hold an
425  * argument slot number (which may designate a argument register or an
426  * argument stack slot), converting to TCGReg once all arguments that
427  * are destined for the stack are processed.
428  */
429 typedef struct TCGMovExtend {
430     unsigned dst;
431     TCGReg src;
432     TCGType dst_type;
433     TCGType src_type;
434     MemOp src_ext;
435 } TCGMovExtend;
436 
437 /**
438  * tcg_out_movext -- move and extend
439  * @s: tcg context
440  * @dst_type: integral type for destination
441  * @dst: destination register
442  * @src_type: integral type for source
443  * @src_ext: extension to apply to source
444  * @src: source register
445  *
446  * Move or extend @src into @dst, depending on @src_ext and the types.
447  */
448 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
449                            TCGType src_type, MemOp src_ext, TCGReg src)
450 {
451     switch (src_ext) {
452     case MO_UB:
453         tcg_out_ext8u(s, dst, src);
454         break;
455     case MO_SB:
456         tcg_out_ext8s(s, dst_type, dst, src);
457         break;
458     case MO_UW:
459         tcg_out_ext16u(s, dst, src);
460         break;
461     case MO_SW:
462         tcg_out_ext16s(s, dst_type, dst, src);
463         break;
464     case MO_UL:
465     case MO_SL:
466         if (dst_type == TCG_TYPE_I32) {
467             if (src_type == TCG_TYPE_I32) {
468                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
469             } else {
470                 tcg_out_extrl_i64_i32(s, dst, src);
471             }
472         } else if (src_type == TCG_TYPE_I32) {
473             if (src_ext & MO_SIGN) {
474                 tcg_out_exts_i32_i64(s, dst, src);
475             } else {
476                 tcg_out_extu_i32_i64(s, dst, src);
477             }
478         } else {
479             if (src_ext & MO_SIGN) {
480                 tcg_out_ext32s(s, dst, src);
481             } else {
482                 tcg_out_ext32u(s, dst, src);
483             }
484         }
485         break;
486     case MO_UQ:
487         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
488         if (dst_type == TCG_TYPE_I32) {
489             tcg_out_extrl_i64_i32(s, dst, src);
490         } else {
491             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
492         }
493         break;
494     default:
495         g_assert_not_reached();
496     }
497 }
498 
499 /* Minor variations on a theme, using a structure. */
500 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
501                                     TCGReg src)
502 {
503     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
504 }
505 
506 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
507 {
508     tcg_out_movext1_new_src(s, i, i->src);
509 }
510 
511 /**
512  * tcg_out_movext2 -- move and extend two pair
513  * @s: tcg context
514  * @i1: first move description
515  * @i2: second move description
516  * @scratch: temporary register, or -1 for none
517  *
518  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
519  * between the sources and destinations.
520  */
521 
522 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
523                             const TCGMovExtend *i2, int scratch)
524 {
525     TCGReg src1 = i1->src;
526     TCGReg src2 = i2->src;
527 
528     if (i1->dst != src2) {
529         tcg_out_movext1(s, i1);
530         tcg_out_movext1(s, i2);
531         return;
532     }
533     if (i2->dst == src1) {
534         TCGType src1_type = i1->src_type;
535         TCGType src2_type = i2->src_type;
536 
537         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
538             /* The data is now in the correct registers, now extend. */
539             src1 = i2->src;
540             src2 = i1->src;
541         } else {
542             tcg_debug_assert(scratch >= 0);
543             tcg_out_mov(s, src1_type, scratch, src1);
544             src1 = scratch;
545         }
546     }
547     tcg_out_movext1_new_src(s, i2, src2);
548     tcg_out_movext1_new_src(s, i1, src1);
549 }
550 
551 /**
552  * tcg_out_movext3 -- move and extend three pair
553  * @s: tcg context
554  * @i1: first move description
555  * @i2: second move description
556  * @i3: third move description
557  * @scratch: temporary register, or -1 for none
558  *
559  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
560  * between the sources and destinations.
561  */
562 
563 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
564                             const TCGMovExtend *i2, const TCGMovExtend *i3,
565                             int scratch)
566 {
567     TCGReg src1 = i1->src;
568     TCGReg src2 = i2->src;
569     TCGReg src3 = i3->src;
570 
571     if (i1->dst != src2 && i1->dst != src3) {
572         tcg_out_movext1(s, i1);
573         tcg_out_movext2(s, i2, i3, scratch);
574         return;
575     }
576     if (i2->dst != src1 && i2->dst != src3) {
577         tcg_out_movext1(s, i2);
578         tcg_out_movext2(s, i1, i3, scratch);
579         return;
580     }
581     if (i3->dst != src1 && i3->dst != src2) {
582         tcg_out_movext1(s, i3);
583         tcg_out_movext2(s, i1, i2, scratch);
584         return;
585     }
586 
587     /*
588      * There is a cycle.  Since there are only 3 nodes, the cycle is
589      * either "clockwise" or "anti-clockwise", and can be solved with
590      * a single scratch or two xchg.
591      */
592     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
593         /* "Clockwise" */
594         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
595             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
596             /* The data is now in the correct registers, now extend. */
597             tcg_out_movext1_new_src(s, i1, i1->dst);
598             tcg_out_movext1_new_src(s, i2, i2->dst);
599             tcg_out_movext1_new_src(s, i3, i3->dst);
600         } else {
601             tcg_debug_assert(scratch >= 0);
602             tcg_out_mov(s, i1->src_type, scratch, src1);
603             tcg_out_movext1(s, i3);
604             tcg_out_movext1(s, i2);
605             tcg_out_movext1_new_src(s, i1, scratch);
606         }
607     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
608         /* "Anti-clockwise" */
609         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
610             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
611             /* The data is now in the correct registers, now extend. */
612             tcg_out_movext1_new_src(s, i1, i1->dst);
613             tcg_out_movext1_new_src(s, i2, i2->dst);
614             tcg_out_movext1_new_src(s, i3, i3->dst);
615         } else {
616             tcg_debug_assert(scratch >= 0);
617             tcg_out_mov(s, i1->src_type, scratch, src1);
618             tcg_out_movext1(s, i2);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1_new_src(s, i1, scratch);
621         }
622     } else {
623         g_assert_not_reached();
624     }
625 }
626 
627 #define C_PFX1(P, A)                    P##A
628 #define C_PFX2(P, A, B)                 P##A##_##B
629 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
630 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
631 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
632 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
633 
634 /* Define an enumeration for the various combinations. */
635 
636 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
637 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
638 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
639 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
640 
641 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
642 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
643 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
644 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
645 
646 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
647 
648 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
649 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
650 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
651 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
652 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
653 
654 typedef enum {
655 #include "tcg-target-con-set.h"
656 } TCGConstraintSetIndex;
657 
658 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
659 
660 #undef C_O0_I1
661 #undef C_O0_I2
662 #undef C_O0_I3
663 #undef C_O0_I4
664 #undef C_O1_I1
665 #undef C_O1_I2
666 #undef C_O1_I3
667 #undef C_O1_I4
668 #undef C_N1_I2
669 #undef C_O2_I1
670 #undef C_O2_I2
671 #undef C_O2_I3
672 #undef C_O2_I4
673 #undef C_N1_O1_I4
674 
675 /* Put all of the constraint sets into an array, indexed by the enum. */
676 
677 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
678 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
679 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
680 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
681 
682 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
683 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
684 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
685 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
686 
687 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
688 
689 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
690 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
691 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
692 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
693 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
694 
695 static const TCGTargetOpDef constraint_sets[] = {
696 #include "tcg-target-con-set.h"
697 };
698 
699 
700 #undef C_O0_I1
701 #undef C_O0_I2
702 #undef C_O0_I3
703 #undef C_O0_I4
704 #undef C_O1_I1
705 #undef C_O1_I2
706 #undef C_O1_I3
707 #undef C_O1_I4
708 #undef C_N1_I2
709 #undef C_O2_I1
710 #undef C_O2_I2
711 #undef C_O2_I3
712 #undef C_O2_I4
713 #undef C_N1_O1_I4
714 
715 /* Expand the enumerator to be returned from tcg_target_op_def(). */
716 
717 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
718 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
719 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
720 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
721 
722 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
723 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
724 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
725 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
726 
727 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
728 
729 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
730 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
731 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
732 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
733 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
734 
735 #include "tcg-target.c.inc"
736 
737 static void alloc_tcg_plugin_context(TCGContext *s)
738 {
739 #ifdef CONFIG_PLUGIN
740     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
741     s->plugin_tb->insns =
742         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
743 #endif
744 }
745 
746 /*
747  * All TCG threads except the parent (i.e. the one that called tcg_context_init
748  * and registered the target's TCG globals) must register with this function
749  * before initiating translation.
750  *
751  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
752  * of tcg_region_init() for the reasoning behind this.
753  *
754  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
755  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
756  * is not used anymore for translation once this function is called.
757  *
758  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
759  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
760  */
761 #ifdef CONFIG_USER_ONLY
762 void tcg_register_thread(void)
763 {
764     tcg_ctx = &tcg_init_ctx;
765 }
766 #else
767 void tcg_register_thread(void)
768 {
769     TCGContext *s = g_malloc(sizeof(*s));
770     unsigned int i, n;
771 
772     *s = tcg_init_ctx;
773 
774     /* Relink mem_base.  */
775     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
776         if (tcg_init_ctx.temps[i].mem_base) {
777             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
778             tcg_debug_assert(b >= 0 && b < n);
779             s->temps[i].mem_base = &s->temps[b];
780         }
781     }
782 
783     /* Claim an entry in tcg_ctxs */
784     n = qatomic_fetch_inc(&tcg_cur_ctxs);
785     g_assert(n < tcg_max_ctxs);
786     qatomic_set(&tcg_ctxs[n], s);
787 
788     if (n > 0) {
789         alloc_tcg_plugin_context(s);
790         tcg_region_initial_alloc(s);
791     }
792 
793     tcg_ctx = s;
794 }
795 #endif /* !CONFIG_USER_ONLY */
796 
797 /* pool based memory allocation */
798 void *tcg_malloc_internal(TCGContext *s, int size)
799 {
800     TCGPool *p;
801     int pool_size;
802 
803     if (size > TCG_POOL_CHUNK_SIZE) {
804         /* big malloc: insert a new pool (XXX: could optimize) */
805         p = g_malloc(sizeof(TCGPool) + size);
806         p->size = size;
807         p->next = s->pool_first_large;
808         s->pool_first_large = p;
809         return p->data;
810     } else {
811         p = s->pool_current;
812         if (!p) {
813             p = s->pool_first;
814             if (!p)
815                 goto new_pool;
816         } else {
817             if (!p->next) {
818             new_pool:
819                 pool_size = TCG_POOL_CHUNK_SIZE;
820                 p = g_malloc(sizeof(TCGPool) + pool_size);
821                 p->size = pool_size;
822                 p->next = NULL;
823                 if (s->pool_current) {
824                     s->pool_current->next = p;
825                 } else {
826                     s->pool_first = p;
827                 }
828             } else {
829                 p = p->next;
830             }
831         }
832     }
833     s->pool_current = p;
834     s->pool_cur = p->data + size;
835     s->pool_end = p->data + p->size;
836     return p->data;
837 }
838 
839 void tcg_pool_reset(TCGContext *s)
840 {
841     TCGPool *p, *t;
842     for (p = s->pool_first_large; p; p = t) {
843         t = p->next;
844         g_free(p);
845     }
846     s->pool_first_large = NULL;
847     s->pool_cur = s->pool_end = NULL;
848     s->pool_current = NULL;
849 }
850 
851 /*
852  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
853  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
854  * We only use these for layout in tcg_out_ld_helper_ret and
855  * tcg_out_st_helper_args, and share them between several of
856  * the helpers, with the end result that it's easier to build manually.
857  */
858 
859 #if TCG_TARGET_REG_BITS == 32
860 # define dh_typecode_ttl  dh_typecode_i32
861 #else
862 # define dh_typecode_ttl  dh_typecode_i64
863 #endif
864 
865 static TCGHelperInfo info_helper_ld32_mmu = {
866     .flags = TCG_CALL_NO_WG,
867     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
868               | dh_typemask(env, 1)
869               | dh_typemask(i64, 2)  /* uint64_t addr */
870               | dh_typemask(i32, 3)  /* unsigned oi */
871               | dh_typemask(ptr, 4)  /* uintptr_t ra */
872 };
873 
874 static TCGHelperInfo info_helper_ld64_mmu = {
875     .flags = TCG_CALL_NO_WG,
876     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
877               | dh_typemask(env, 1)
878               | dh_typemask(i64, 2)  /* uint64_t addr */
879               | dh_typemask(i32, 3)  /* unsigned oi */
880               | dh_typemask(ptr, 4)  /* uintptr_t ra */
881 };
882 
883 static TCGHelperInfo info_helper_ld128_mmu = {
884     .flags = TCG_CALL_NO_WG,
885     .typemask = dh_typemask(i128, 0) /* return Int128 */
886               | dh_typemask(env, 1)
887               | dh_typemask(i64, 2)  /* uint64_t addr */
888               | dh_typemask(i32, 3)  /* unsigned oi */
889               | dh_typemask(ptr, 4)  /* uintptr_t ra */
890 };
891 
892 static TCGHelperInfo info_helper_st32_mmu = {
893     .flags = TCG_CALL_NO_WG,
894     .typemask = dh_typemask(void, 0)
895               | dh_typemask(env, 1)
896               | dh_typemask(i64, 2)  /* uint64_t addr */
897               | dh_typemask(i32, 3)  /* uint32_t data */
898               | dh_typemask(i32, 4)  /* unsigned oi */
899               | dh_typemask(ptr, 5)  /* uintptr_t ra */
900 };
901 
902 static TCGHelperInfo info_helper_st64_mmu = {
903     .flags = TCG_CALL_NO_WG,
904     .typemask = dh_typemask(void, 0)
905               | dh_typemask(env, 1)
906               | dh_typemask(i64, 2)  /* uint64_t addr */
907               | dh_typemask(i64, 3)  /* uint64_t data */
908               | dh_typemask(i32, 4)  /* unsigned oi */
909               | dh_typemask(ptr, 5)  /* uintptr_t ra */
910 };
911 
912 static TCGHelperInfo info_helper_st128_mmu = {
913     .flags = TCG_CALL_NO_WG,
914     .typemask = dh_typemask(void, 0)
915               | dh_typemask(env, 1)
916               | dh_typemask(i64, 2)  /* uint64_t addr */
917               | dh_typemask(i128, 3) /* Int128 data */
918               | dh_typemask(i32, 4)  /* unsigned oi */
919               | dh_typemask(ptr, 5)  /* uintptr_t ra */
920 };
921 
922 #ifdef CONFIG_TCG_INTERPRETER
923 static ffi_type *typecode_to_ffi(int argmask)
924 {
925     /*
926      * libffi does not support __int128_t, so we have forced Int128
927      * to use the structure definition instead of the builtin type.
928      */
929     static ffi_type *ffi_type_i128_elements[3] = {
930         &ffi_type_uint64,
931         &ffi_type_uint64,
932         NULL
933     };
934     static ffi_type ffi_type_i128 = {
935         .size = 16,
936         .alignment = __alignof__(Int128),
937         .type = FFI_TYPE_STRUCT,
938         .elements = ffi_type_i128_elements,
939     };
940 
941     switch (argmask) {
942     case dh_typecode_void:
943         return &ffi_type_void;
944     case dh_typecode_i32:
945         return &ffi_type_uint32;
946     case dh_typecode_s32:
947         return &ffi_type_sint32;
948     case dh_typecode_i64:
949         return &ffi_type_uint64;
950     case dh_typecode_s64:
951         return &ffi_type_sint64;
952     case dh_typecode_ptr:
953         return &ffi_type_pointer;
954     case dh_typecode_i128:
955         return &ffi_type_i128;
956     }
957     g_assert_not_reached();
958 }
959 
960 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
961 {
962     unsigned typemask = info->typemask;
963     struct {
964         ffi_cif cif;
965         ffi_type *args[];
966     } *ca;
967     ffi_status status;
968     int nargs;
969 
970     /* Ignoring the return type, find the last non-zero field. */
971     nargs = 32 - clz32(typemask >> 3);
972     nargs = DIV_ROUND_UP(nargs, 3);
973     assert(nargs <= MAX_CALL_IARGS);
974 
975     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
976     ca->cif.rtype = typecode_to_ffi(typemask & 7);
977     ca->cif.nargs = nargs;
978 
979     if (nargs != 0) {
980         ca->cif.arg_types = ca->args;
981         for (int j = 0; j < nargs; ++j) {
982             int typecode = extract32(typemask, (j + 1) * 3, 3);
983             ca->args[j] = typecode_to_ffi(typecode);
984         }
985     }
986 
987     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
988                           ca->cif.rtype, ca->cif.arg_types);
989     assert(status == FFI_OK);
990 
991     return &ca->cif;
992 }
993 
994 #define HELPER_INFO_INIT(I)      (&(I)->cif)
995 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
996 #else
997 #define HELPER_INFO_INIT(I)      (&(I)->init)
998 #define HELPER_INFO_INIT_VAL(I)  1
999 #endif /* CONFIG_TCG_INTERPRETER */
1000 
1001 static inline bool arg_slot_reg_p(unsigned arg_slot)
1002 {
1003     /*
1004      * Split the sizeof away from the comparison to avoid Werror from
1005      * "unsigned < 0 is always false", when iarg_regs is empty.
1006      */
1007     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1008     return arg_slot < nreg;
1009 }
1010 
1011 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1012 {
1013     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1014     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1015 
1016     tcg_debug_assert(stk_slot < max);
1017     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1018 }
1019 
1020 typedef struct TCGCumulativeArgs {
1021     int arg_idx;                /* tcg_gen_callN args[] */
1022     int info_in_idx;            /* TCGHelperInfo in[] */
1023     int arg_slot;               /* regs+stack slot */
1024     int ref_slot;               /* stack slots for references */
1025 } TCGCumulativeArgs;
1026 
1027 static void layout_arg_even(TCGCumulativeArgs *cum)
1028 {
1029     cum->arg_slot += cum->arg_slot & 1;
1030 }
1031 
1032 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1033                          TCGCallArgumentKind kind)
1034 {
1035     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1036 
1037     *loc = (TCGCallArgumentLoc){
1038         .kind = kind,
1039         .arg_idx = cum->arg_idx,
1040         .arg_slot = cum->arg_slot,
1041     };
1042     cum->info_in_idx++;
1043     cum->arg_slot++;
1044 }
1045 
1046 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1047                                 TCGHelperInfo *info, int n)
1048 {
1049     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1050 
1051     for (int i = 0; i < n; ++i) {
1052         /* Layout all using the same arg_idx, adjusting the subindex. */
1053         loc[i] = (TCGCallArgumentLoc){
1054             .kind = TCG_CALL_ARG_NORMAL,
1055             .arg_idx = cum->arg_idx,
1056             .tmp_subindex = i,
1057             .arg_slot = cum->arg_slot + i,
1058         };
1059     }
1060     cum->info_in_idx += n;
1061     cum->arg_slot += n;
1062 }
1063 
1064 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1065 {
1066     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1067     int n = 128 / TCG_TARGET_REG_BITS;
1068 
1069     /* The first subindex carries the pointer. */
1070     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1071 
1072     /*
1073      * The callee is allowed to clobber memory associated with
1074      * structure pass by-reference.  Therefore we must make copies.
1075      * Allocate space from "ref_slot", which will be adjusted to
1076      * follow the parameters on the stack.
1077      */
1078     loc[0].ref_slot = cum->ref_slot;
1079 
1080     /*
1081      * Subsequent words also go into the reference slot, but
1082      * do not accumulate into the regular arguments.
1083      */
1084     for (int i = 1; i < n; ++i) {
1085         loc[i] = (TCGCallArgumentLoc){
1086             .kind = TCG_CALL_ARG_BY_REF_N,
1087             .arg_idx = cum->arg_idx,
1088             .tmp_subindex = i,
1089             .ref_slot = cum->ref_slot + i,
1090         };
1091     }
1092     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1093     cum->ref_slot += n;
1094 }
1095 
1096 static void init_call_layout(TCGHelperInfo *info)
1097 {
1098     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1099     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1100     unsigned typemask = info->typemask;
1101     unsigned typecode;
1102     TCGCumulativeArgs cum = { };
1103 
1104     /*
1105      * Parse and place any function return value.
1106      */
1107     typecode = typemask & 7;
1108     switch (typecode) {
1109     case dh_typecode_void:
1110         info->nr_out = 0;
1111         break;
1112     case dh_typecode_i32:
1113     case dh_typecode_s32:
1114     case dh_typecode_ptr:
1115         info->nr_out = 1;
1116         info->out_kind = TCG_CALL_RET_NORMAL;
1117         break;
1118     case dh_typecode_i64:
1119     case dh_typecode_s64:
1120         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1121         info->out_kind = TCG_CALL_RET_NORMAL;
1122         /* Query the last register now to trigger any assert early. */
1123         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1124         break;
1125     case dh_typecode_i128:
1126         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1127         info->out_kind = TCG_TARGET_CALL_RET_I128;
1128         switch (TCG_TARGET_CALL_RET_I128) {
1129         case TCG_CALL_RET_NORMAL:
1130             /* Query the last register now to trigger any assert early. */
1131             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1132             break;
1133         case TCG_CALL_RET_BY_VEC:
1134             /* Query the single register now to trigger any assert early. */
1135             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1136             break;
1137         case TCG_CALL_RET_BY_REF:
1138             /*
1139              * Allocate the first argument to the output.
1140              * We don't need to store this anywhere, just make it
1141              * unavailable for use in the input loop below.
1142              */
1143             cum.arg_slot = 1;
1144             break;
1145         default:
1146             qemu_build_not_reached();
1147         }
1148         break;
1149     default:
1150         g_assert_not_reached();
1151     }
1152 
1153     /*
1154      * Parse and place function arguments.
1155      */
1156     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1157         TCGCallArgumentKind kind;
1158         TCGType type;
1159 
1160         typecode = typemask & 7;
1161         switch (typecode) {
1162         case dh_typecode_i32:
1163         case dh_typecode_s32:
1164             type = TCG_TYPE_I32;
1165             break;
1166         case dh_typecode_i64:
1167         case dh_typecode_s64:
1168             type = TCG_TYPE_I64;
1169             break;
1170         case dh_typecode_ptr:
1171             type = TCG_TYPE_PTR;
1172             break;
1173         case dh_typecode_i128:
1174             type = TCG_TYPE_I128;
1175             break;
1176         default:
1177             g_assert_not_reached();
1178         }
1179 
1180         switch (type) {
1181         case TCG_TYPE_I32:
1182             switch (TCG_TARGET_CALL_ARG_I32) {
1183             case TCG_CALL_ARG_EVEN:
1184                 layout_arg_even(&cum);
1185                 /* fall through */
1186             case TCG_CALL_ARG_NORMAL:
1187                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1188                 break;
1189             case TCG_CALL_ARG_EXTEND:
1190                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1191                 layout_arg_1(&cum, info, kind);
1192                 break;
1193             default:
1194                 qemu_build_not_reached();
1195             }
1196             break;
1197 
1198         case TCG_TYPE_I64:
1199             switch (TCG_TARGET_CALL_ARG_I64) {
1200             case TCG_CALL_ARG_EVEN:
1201                 layout_arg_even(&cum);
1202                 /* fall through */
1203             case TCG_CALL_ARG_NORMAL:
1204                 if (TCG_TARGET_REG_BITS == 32) {
1205                     layout_arg_normal_n(&cum, info, 2);
1206                 } else {
1207                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1208                 }
1209                 break;
1210             default:
1211                 qemu_build_not_reached();
1212             }
1213             break;
1214 
1215         case TCG_TYPE_I128:
1216             switch (TCG_TARGET_CALL_ARG_I128) {
1217             case TCG_CALL_ARG_EVEN:
1218                 layout_arg_even(&cum);
1219                 /* fall through */
1220             case TCG_CALL_ARG_NORMAL:
1221                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1222                 break;
1223             case TCG_CALL_ARG_BY_REF:
1224                 layout_arg_by_ref(&cum, info);
1225                 break;
1226             default:
1227                 qemu_build_not_reached();
1228             }
1229             break;
1230 
1231         default:
1232             g_assert_not_reached();
1233         }
1234     }
1235     info->nr_in = cum.info_in_idx;
1236 
1237     /* Validate that we didn't overrun the input array. */
1238     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1239     /* Validate the backend has enough argument space. */
1240     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1241 
1242     /*
1243      * Relocate the "ref_slot" area to the end of the parameters.
1244      * Minimizing this stack offset helps code size for x86,
1245      * which has a signed 8-bit offset encoding.
1246      */
1247     if (cum.ref_slot != 0) {
1248         int ref_base = 0;
1249 
1250         if (cum.arg_slot > max_reg_slots) {
1251             int align = __alignof(Int128) / sizeof(tcg_target_long);
1252 
1253             ref_base = cum.arg_slot - max_reg_slots;
1254             if (align > 1) {
1255                 ref_base = ROUND_UP(ref_base, align);
1256             }
1257         }
1258         assert(ref_base + cum.ref_slot <= max_stk_slots);
1259         ref_base += max_reg_slots;
1260 
1261         if (ref_base != 0) {
1262             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1263                 TCGCallArgumentLoc *loc = &info->in[i];
1264                 switch (loc->kind) {
1265                 case TCG_CALL_ARG_BY_REF:
1266                 case TCG_CALL_ARG_BY_REF_N:
1267                     loc->ref_slot += ref_base;
1268                     break;
1269                 default:
1270                     break;
1271                 }
1272             }
1273         }
1274     }
1275 }
1276 
1277 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1278 static void process_op_defs(TCGContext *s);
1279 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1280                                             TCGReg reg, const char *name);
1281 
1282 static void tcg_context_init(unsigned max_cpus)
1283 {
1284     TCGContext *s = &tcg_init_ctx;
1285     int op, total_args, n, i;
1286     TCGOpDef *def;
1287     TCGArgConstraint *args_ct;
1288     TCGTemp *ts;
1289 
1290     memset(s, 0, sizeof(*s));
1291     s->nb_globals = 0;
1292 
1293     /* Count total number of arguments and allocate the corresponding
1294        space */
1295     total_args = 0;
1296     for(op = 0; op < NB_OPS; op++) {
1297         def = &tcg_op_defs[op];
1298         n = def->nb_iargs + def->nb_oargs;
1299         total_args += n;
1300     }
1301 
1302     args_ct = g_new0(TCGArgConstraint, total_args);
1303 
1304     for(op = 0; op < NB_OPS; op++) {
1305         def = &tcg_op_defs[op];
1306         def->args_ct = args_ct;
1307         n = def->nb_iargs + def->nb_oargs;
1308         args_ct += n;
1309     }
1310 
1311     init_call_layout(&info_helper_ld32_mmu);
1312     init_call_layout(&info_helper_ld64_mmu);
1313     init_call_layout(&info_helper_ld128_mmu);
1314     init_call_layout(&info_helper_st32_mmu);
1315     init_call_layout(&info_helper_st64_mmu);
1316     init_call_layout(&info_helper_st128_mmu);
1317 
1318     tcg_target_init(s);
1319     process_op_defs(s);
1320 
1321     /* Reverse the order of the saved registers, assuming they're all at
1322        the start of tcg_target_reg_alloc_order.  */
1323     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1324         int r = tcg_target_reg_alloc_order[n];
1325         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1326             break;
1327         }
1328     }
1329     for (i = 0; i < n; ++i) {
1330         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1331     }
1332     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1333         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1334     }
1335 
1336     alloc_tcg_plugin_context(s);
1337 
1338     tcg_ctx = s;
1339     /*
1340      * In user-mode we simply share the init context among threads, since we
1341      * use a single region. See the documentation tcg_region_init() for the
1342      * reasoning behind this.
1343      * In softmmu we will have at most max_cpus TCG threads.
1344      */
1345 #ifdef CONFIG_USER_ONLY
1346     tcg_ctxs = &tcg_ctx;
1347     tcg_cur_ctxs = 1;
1348     tcg_max_ctxs = 1;
1349 #else
1350     tcg_max_ctxs = max_cpus;
1351     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1352 #endif
1353 
1354     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1355     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1356     cpu_env = temp_tcgv_ptr(ts);
1357 }
1358 
1359 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1360 {
1361     tcg_context_init(max_cpus);
1362     tcg_region_init(tb_size, splitwx, max_cpus);
1363 }
1364 
1365 /*
1366  * Allocate TBs right before their corresponding translated code, making
1367  * sure that TBs and code are on different cache lines.
1368  */
1369 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1370 {
1371     uintptr_t align = qemu_icache_linesize;
1372     TranslationBlock *tb;
1373     void *next;
1374 
1375  retry:
1376     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1377     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1378 
1379     if (unlikely(next > s->code_gen_highwater)) {
1380         if (tcg_region_alloc(s)) {
1381             return NULL;
1382         }
1383         goto retry;
1384     }
1385     qatomic_set(&s->code_gen_ptr, next);
1386     s->data_gen_ptr = NULL;
1387     return tb;
1388 }
1389 
1390 void tcg_prologue_init(TCGContext *s)
1391 {
1392     size_t prologue_size;
1393 
1394     s->code_ptr = s->code_gen_ptr;
1395     s->code_buf = s->code_gen_ptr;
1396     s->data_gen_ptr = NULL;
1397 
1398 #ifndef CONFIG_TCG_INTERPRETER
1399     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1400 #endif
1401 
1402 #ifdef TCG_TARGET_NEED_POOL_LABELS
1403     s->pool_labels = NULL;
1404 #endif
1405 
1406     qemu_thread_jit_write();
1407     /* Generate the prologue.  */
1408     tcg_target_qemu_prologue(s);
1409 
1410 #ifdef TCG_TARGET_NEED_POOL_LABELS
1411     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1412     {
1413         int result = tcg_out_pool_finalize(s);
1414         tcg_debug_assert(result == 0);
1415     }
1416 #endif
1417 
1418     prologue_size = tcg_current_code_size(s);
1419     perf_report_prologue(s->code_gen_ptr, prologue_size);
1420 
1421 #ifndef CONFIG_TCG_INTERPRETER
1422     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1423                         (uintptr_t)s->code_buf, prologue_size);
1424 #endif
1425 
1426     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1427         FILE *logfile = qemu_log_trylock();
1428         if (logfile) {
1429             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1430             if (s->data_gen_ptr) {
1431                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1432                 size_t data_size = prologue_size - code_size;
1433                 size_t i;
1434 
1435                 disas(logfile, s->code_gen_ptr, code_size);
1436 
1437                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1438                     if (sizeof(tcg_target_ulong) == 8) {
1439                         fprintf(logfile,
1440                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1441                                 (uintptr_t)s->data_gen_ptr + i,
1442                                 *(uint64_t *)(s->data_gen_ptr + i));
1443                     } else {
1444                         fprintf(logfile,
1445                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1446                                 (uintptr_t)s->data_gen_ptr + i,
1447                                 *(uint32_t *)(s->data_gen_ptr + i));
1448                     }
1449                 }
1450             } else {
1451                 disas(logfile, s->code_gen_ptr, prologue_size);
1452             }
1453             fprintf(logfile, "\n");
1454             qemu_log_unlock(logfile);
1455         }
1456     }
1457 
1458 #ifndef CONFIG_TCG_INTERPRETER
1459     /*
1460      * Assert that goto_ptr is implemented completely, setting an epilogue.
1461      * For tci, we use NULL as the signal to return from the interpreter,
1462      * so skip this check.
1463      */
1464     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1465 #endif
1466 
1467     tcg_region_prologue_set(s);
1468 }
1469 
1470 void tcg_func_start(TCGContext *s)
1471 {
1472     tcg_pool_reset(s);
1473     s->nb_temps = s->nb_globals;
1474 
1475     /* No temps have been previously allocated for size or locality.  */
1476     memset(s->free_temps, 0, sizeof(s->free_temps));
1477 
1478     /* No constant temps have been previously allocated. */
1479     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1480         if (s->const_table[i]) {
1481             g_hash_table_remove_all(s->const_table[i]);
1482         }
1483     }
1484 
1485     s->nb_ops = 0;
1486     s->nb_labels = 0;
1487     s->current_frame_offset = s->frame_start;
1488 
1489 #ifdef CONFIG_DEBUG_TCG
1490     s->goto_tb_issue_mask = 0;
1491 #endif
1492 
1493     QTAILQ_INIT(&s->ops);
1494     QTAILQ_INIT(&s->free_ops);
1495     QSIMPLEQ_INIT(&s->labels);
1496 
1497     tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1498                      s->addr_type == TCG_TYPE_I64);
1499 
1500 #if defined(CONFIG_SOFTMMU) && !defined(CONFIG_TCG_INTERPRETER)
1501     tcg_debug_assert(s->tlb_fast_offset < 0);
1502     tcg_debug_assert(s->tlb_fast_offset >= MIN_TLB_MASK_TABLE_OFS);
1503 #endif
1504 
1505     tcg_debug_assert(s->insn_start_words > 0);
1506 }
1507 
1508 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1509 {
1510     int n = s->nb_temps++;
1511 
1512     if (n >= TCG_MAX_TEMPS) {
1513         tcg_raise_tb_overflow(s);
1514     }
1515     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1516 }
1517 
1518 static TCGTemp *tcg_global_alloc(TCGContext *s)
1519 {
1520     TCGTemp *ts;
1521 
1522     tcg_debug_assert(s->nb_globals == s->nb_temps);
1523     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1524     s->nb_globals++;
1525     ts = tcg_temp_alloc(s);
1526     ts->kind = TEMP_GLOBAL;
1527 
1528     return ts;
1529 }
1530 
1531 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1532                                             TCGReg reg, const char *name)
1533 {
1534     TCGTemp *ts;
1535 
1536     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1537 
1538     ts = tcg_global_alloc(s);
1539     ts->base_type = type;
1540     ts->type = type;
1541     ts->kind = TEMP_FIXED;
1542     ts->reg = reg;
1543     ts->name = name;
1544     tcg_regset_set_reg(s->reserved_regs, reg);
1545 
1546     return ts;
1547 }
1548 
1549 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1550 {
1551     s->frame_start = start;
1552     s->frame_end = start + size;
1553     s->frame_temp
1554         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1555 }
1556 
1557 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1558                                      intptr_t offset, const char *name)
1559 {
1560     TCGContext *s = tcg_ctx;
1561     TCGTemp *base_ts = tcgv_ptr_temp(base);
1562     TCGTemp *ts = tcg_global_alloc(s);
1563     int indirect_reg = 0;
1564 
1565     switch (base_ts->kind) {
1566     case TEMP_FIXED:
1567         break;
1568     case TEMP_GLOBAL:
1569         /* We do not support double-indirect registers.  */
1570         tcg_debug_assert(!base_ts->indirect_reg);
1571         base_ts->indirect_base = 1;
1572         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1573                             ? 2 : 1);
1574         indirect_reg = 1;
1575         break;
1576     default:
1577         g_assert_not_reached();
1578     }
1579 
1580     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1581         TCGTemp *ts2 = tcg_global_alloc(s);
1582         char buf[64];
1583 
1584         ts->base_type = TCG_TYPE_I64;
1585         ts->type = TCG_TYPE_I32;
1586         ts->indirect_reg = indirect_reg;
1587         ts->mem_allocated = 1;
1588         ts->mem_base = base_ts;
1589         ts->mem_offset = offset;
1590         pstrcpy(buf, sizeof(buf), name);
1591         pstrcat(buf, sizeof(buf), "_0");
1592         ts->name = strdup(buf);
1593 
1594         tcg_debug_assert(ts2 == ts + 1);
1595         ts2->base_type = TCG_TYPE_I64;
1596         ts2->type = TCG_TYPE_I32;
1597         ts2->indirect_reg = indirect_reg;
1598         ts2->mem_allocated = 1;
1599         ts2->mem_base = base_ts;
1600         ts2->mem_offset = offset + 4;
1601         ts2->temp_subindex = 1;
1602         pstrcpy(buf, sizeof(buf), name);
1603         pstrcat(buf, sizeof(buf), "_1");
1604         ts2->name = strdup(buf);
1605     } else {
1606         ts->base_type = type;
1607         ts->type = type;
1608         ts->indirect_reg = indirect_reg;
1609         ts->mem_allocated = 1;
1610         ts->mem_base = base_ts;
1611         ts->mem_offset = offset;
1612         ts->name = name;
1613     }
1614     return ts;
1615 }
1616 
1617 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1618 {
1619     TCGContext *s = tcg_ctx;
1620     TCGTemp *ts;
1621     int n;
1622 
1623     if (kind == TEMP_EBB) {
1624         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1625 
1626         if (idx < TCG_MAX_TEMPS) {
1627             /* There is already an available temp with the right type.  */
1628             clear_bit(idx, s->free_temps[type].l);
1629 
1630             ts = &s->temps[idx];
1631             ts->temp_allocated = 1;
1632             tcg_debug_assert(ts->base_type == type);
1633             tcg_debug_assert(ts->kind == kind);
1634             return ts;
1635         }
1636     } else {
1637         tcg_debug_assert(kind == TEMP_TB);
1638     }
1639 
1640     switch (type) {
1641     case TCG_TYPE_I32:
1642     case TCG_TYPE_V64:
1643     case TCG_TYPE_V128:
1644     case TCG_TYPE_V256:
1645         n = 1;
1646         break;
1647     case TCG_TYPE_I64:
1648         n = 64 / TCG_TARGET_REG_BITS;
1649         break;
1650     case TCG_TYPE_I128:
1651         n = 128 / TCG_TARGET_REG_BITS;
1652         break;
1653     default:
1654         g_assert_not_reached();
1655     }
1656 
1657     ts = tcg_temp_alloc(s);
1658     ts->base_type = type;
1659     ts->temp_allocated = 1;
1660     ts->kind = kind;
1661 
1662     if (n == 1) {
1663         ts->type = type;
1664     } else {
1665         ts->type = TCG_TYPE_REG;
1666 
1667         for (int i = 1; i < n; ++i) {
1668             TCGTemp *ts2 = tcg_temp_alloc(s);
1669 
1670             tcg_debug_assert(ts2 == ts + i);
1671             ts2->base_type = type;
1672             ts2->type = TCG_TYPE_REG;
1673             ts2->temp_allocated = 1;
1674             ts2->temp_subindex = i;
1675             ts2->kind = kind;
1676         }
1677     }
1678     return ts;
1679 }
1680 
1681 TCGv_vec tcg_temp_new_vec(TCGType type)
1682 {
1683     TCGTemp *t;
1684 
1685 #ifdef CONFIG_DEBUG_TCG
1686     switch (type) {
1687     case TCG_TYPE_V64:
1688         assert(TCG_TARGET_HAS_v64);
1689         break;
1690     case TCG_TYPE_V128:
1691         assert(TCG_TARGET_HAS_v128);
1692         break;
1693     case TCG_TYPE_V256:
1694         assert(TCG_TARGET_HAS_v256);
1695         break;
1696     default:
1697         g_assert_not_reached();
1698     }
1699 #endif
1700 
1701     t = tcg_temp_new_internal(type, TEMP_EBB);
1702     return temp_tcgv_vec(t);
1703 }
1704 
1705 /* Create a new temp of the same type as an existing temp.  */
1706 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1707 {
1708     TCGTemp *t = tcgv_vec_temp(match);
1709 
1710     tcg_debug_assert(t->temp_allocated != 0);
1711 
1712     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1713     return temp_tcgv_vec(t);
1714 }
1715 
1716 void tcg_temp_free_internal(TCGTemp *ts)
1717 {
1718     TCGContext *s = tcg_ctx;
1719 
1720     switch (ts->kind) {
1721     case TEMP_CONST:
1722     case TEMP_TB:
1723         /* Silently ignore free. */
1724         break;
1725     case TEMP_EBB:
1726         tcg_debug_assert(ts->temp_allocated != 0);
1727         ts->temp_allocated = 0;
1728         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1729         break;
1730     default:
1731         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1732         g_assert_not_reached();
1733     }
1734 }
1735 
1736 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1737 {
1738     TCGContext *s = tcg_ctx;
1739     GHashTable *h = s->const_table[type];
1740     TCGTemp *ts;
1741 
1742     if (h == NULL) {
1743         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1744         s->const_table[type] = h;
1745     }
1746 
1747     ts = g_hash_table_lookup(h, &val);
1748     if (ts == NULL) {
1749         int64_t *val_ptr;
1750 
1751         ts = tcg_temp_alloc(s);
1752 
1753         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1754             TCGTemp *ts2 = tcg_temp_alloc(s);
1755 
1756             tcg_debug_assert(ts2 == ts + 1);
1757 
1758             ts->base_type = TCG_TYPE_I64;
1759             ts->type = TCG_TYPE_I32;
1760             ts->kind = TEMP_CONST;
1761             ts->temp_allocated = 1;
1762 
1763             ts2->base_type = TCG_TYPE_I64;
1764             ts2->type = TCG_TYPE_I32;
1765             ts2->kind = TEMP_CONST;
1766             ts2->temp_allocated = 1;
1767             ts2->temp_subindex = 1;
1768 
1769             /*
1770              * Retain the full value of the 64-bit constant in the low
1771              * part, so that the hash table works.  Actual uses will
1772              * truncate the value to the low part.
1773              */
1774             ts[HOST_BIG_ENDIAN].val = val;
1775             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1776             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1777         } else {
1778             ts->base_type = type;
1779             ts->type = type;
1780             ts->kind = TEMP_CONST;
1781             ts->temp_allocated = 1;
1782             ts->val = val;
1783             val_ptr = &ts->val;
1784         }
1785         g_hash_table_insert(h, val_ptr, ts);
1786     }
1787 
1788     return ts;
1789 }
1790 
1791 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1792 {
1793     val = dup_const(vece, val);
1794     return temp_tcgv_vec(tcg_constant_internal(type, val));
1795 }
1796 
1797 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1798 {
1799     TCGTemp *t = tcgv_vec_temp(match);
1800 
1801     tcg_debug_assert(t->temp_allocated != 0);
1802     return tcg_constant_vec(t->base_type, vece, val);
1803 }
1804 
1805 #ifdef CONFIG_DEBUG_TCG
1806 size_t temp_idx(TCGTemp *ts)
1807 {
1808     ptrdiff_t n = ts - tcg_ctx->temps;
1809     assert(n >= 0 && n < tcg_ctx->nb_temps);
1810     return n;
1811 }
1812 
1813 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
1814 {
1815     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
1816 
1817     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
1818     assert(o % sizeof(TCGTemp) == 0);
1819 
1820     return (void *)tcg_ctx + (uintptr_t)v;
1821 }
1822 #endif /* CONFIG_DEBUG_TCG */
1823 
1824 /* Return true if OP may appear in the opcode stream.
1825    Test the runtime variable that controls each opcode.  */
1826 bool tcg_op_supported(TCGOpcode op)
1827 {
1828     const bool have_vec
1829         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1830 
1831     switch (op) {
1832     case INDEX_op_discard:
1833     case INDEX_op_set_label:
1834     case INDEX_op_call:
1835     case INDEX_op_br:
1836     case INDEX_op_mb:
1837     case INDEX_op_insn_start:
1838     case INDEX_op_exit_tb:
1839     case INDEX_op_goto_tb:
1840     case INDEX_op_goto_ptr:
1841     case INDEX_op_qemu_ld_a32_i32:
1842     case INDEX_op_qemu_ld_a64_i32:
1843     case INDEX_op_qemu_st_a32_i32:
1844     case INDEX_op_qemu_st_a64_i32:
1845     case INDEX_op_qemu_ld_a32_i64:
1846     case INDEX_op_qemu_ld_a64_i64:
1847     case INDEX_op_qemu_st_a32_i64:
1848     case INDEX_op_qemu_st_a64_i64:
1849         return true;
1850 
1851     case INDEX_op_qemu_st8_a32_i32:
1852     case INDEX_op_qemu_st8_a64_i32:
1853         return TCG_TARGET_HAS_qemu_st8_i32;
1854 
1855     case INDEX_op_qemu_ld_a32_i128:
1856     case INDEX_op_qemu_ld_a64_i128:
1857     case INDEX_op_qemu_st_a32_i128:
1858     case INDEX_op_qemu_st_a64_i128:
1859         return TCG_TARGET_HAS_qemu_ldst_i128;
1860 
1861     case INDEX_op_mov_i32:
1862     case INDEX_op_setcond_i32:
1863     case INDEX_op_brcond_i32:
1864     case INDEX_op_ld8u_i32:
1865     case INDEX_op_ld8s_i32:
1866     case INDEX_op_ld16u_i32:
1867     case INDEX_op_ld16s_i32:
1868     case INDEX_op_ld_i32:
1869     case INDEX_op_st8_i32:
1870     case INDEX_op_st16_i32:
1871     case INDEX_op_st_i32:
1872     case INDEX_op_add_i32:
1873     case INDEX_op_sub_i32:
1874     case INDEX_op_mul_i32:
1875     case INDEX_op_and_i32:
1876     case INDEX_op_or_i32:
1877     case INDEX_op_xor_i32:
1878     case INDEX_op_shl_i32:
1879     case INDEX_op_shr_i32:
1880     case INDEX_op_sar_i32:
1881         return true;
1882 
1883     case INDEX_op_negsetcond_i32:
1884         return TCG_TARGET_HAS_negsetcond_i32;
1885     case INDEX_op_movcond_i32:
1886         return TCG_TARGET_HAS_movcond_i32;
1887     case INDEX_op_div_i32:
1888     case INDEX_op_divu_i32:
1889         return TCG_TARGET_HAS_div_i32;
1890     case INDEX_op_rem_i32:
1891     case INDEX_op_remu_i32:
1892         return TCG_TARGET_HAS_rem_i32;
1893     case INDEX_op_div2_i32:
1894     case INDEX_op_divu2_i32:
1895         return TCG_TARGET_HAS_div2_i32;
1896     case INDEX_op_rotl_i32:
1897     case INDEX_op_rotr_i32:
1898         return TCG_TARGET_HAS_rot_i32;
1899     case INDEX_op_deposit_i32:
1900         return TCG_TARGET_HAS_deposit_i32;
1901     case INDEX_op_extract_i32:
1902         return TCG_TARGET_HAS_extract_i32;
1903     case INDEX_op_sextract_i32:
1904         return TCG_TARGET_HAS_sextract_i32;
1905     case INDEX_op_extract2_i32:
1906         return TCG_TARGET_HAS_extract2_i32;
1907     case INDEX_op_add2_i32:
1908         return TCG_TARGET_HAS_add2_i32;
1909     case INDEX_op_sub2_i32:
1910         return TCG_TARGET_HAS_sub2_i32;
1911     case INDEX_op_mulu2_i32:
1912         return TCG_TARGET_HAS_mulu2_i32;
1913     case INDEX_op_muls2_i32:
1914         return TCG_TARGET_HAS_muls2_i32;
1915     case INDEX_op_muluh_i32:
1916         return TCG_TARGET_HAS_muluh_i32;
1917     case INDEX_op_mulsh_i32:
1918         return TCG_TARGET_HAS_mulsh_i32;
1919     case INDEX_op_ext8s_i32:
1920         return TCG_TARGET_HAS_ext8s_i32;
1921     case INDEX_op_ext16s_i32:
1922         return TCG_TARGET_HAS_ext16s_i32;
1923     case INDEX_op_ext8u_i32:
1924         return TCG_TARGET_HAS_ext8u_i32;
1925     case INDEX_op_ext16u_i32:
1926         return TCG_TARGET_HAS_ext16u_i32;
1927     case INDEX_op_bswap16_i32:
1928         return TCG_TARGET_HAS_bswap16_i32;
1929     case INDEX_op_bswap32_i32:
1930         return TCG_TARGET_HAS_bswap32_i32;
1931     case INDEX_op_not_i32:
1932         return TCG_TARGET_HAS_not_i32;
1933     case INDEX_op_neg_i32:
1934         return TCG_TARGET_HAS_neg_i32;
1935     case INDEX_op_andc_i32:
1936         return TCG_TARGET_HAS_andc_i32;
1937     case INDEX_op_orc_i32:
1938         return TCG_TARGET_HAS_orc_i32;
1939     case INDEX_op_eqv_i32:
1940         return TCG_TARGET_HAS_eqv_i32;
1941     case INDEX_op_nand_i32:
1942         return TCG_TARGET_HAS_nand_i32;
1943     case INDEX_op_nor_i32:
1944         return TCG_TARGET_HAS_nor_i32;
1945     case INDEX_op_clz_i32:
1946         return TCG_TARGET_HAS_clz_i32;
1947     case INDEX_op_ctz_i32:
1948         return TCG_TARGET_HAS_ctz_i32;
1949     case INDEX_op_ctpop_i32:
1950         return TCG_TARGET_HAS_ctpop_i32;
1951 
1952     case INDEX_op_brcond2_i32:
1953     case INDEX_op_setcond2_i32:
1954         return TCG_TARGET_REG_BITS == 32;
1955 
1956     case INDEX_op_mov_i64:
1957     case INDEX_op_setcond_i64:
1958     case INDEX_op_brcond_i64:
1959     case INDEX_op_ld8u_i64:
1960     case INDEX_op_ld8s_i64:
1961     case INDEX_op_ld16u_i64:
1962     case INDEX_op_ld16s_i64:
1963     case INDEX_op_ld32u_i64:
1964     case INDEX_op_ld32s_i64:
1965     case INDEX_op_ld_i64:
1966     case INDEX_op_st8_i64:
1967     case INDEX_op_st16_i64:
1968     case INDEX_op_st32_i64:
1969     case INDEX_op_st_i64:
1970     case INDEX_op_add_i64:
1971     case INDEX_op_sub_i64:
1972     case INDEX_op_mul_i64:
1973     case INDEX_op_and_i64:
1974     case INDEX_op_or_i64:
1975     case INDEX_op_xor_i64:
1976     case INDEX_op_shl_i64:
1977     case INDEX_op_shr_i64:
1978     case INDEX_op_sar_i64:
1979     case INDEX_op_ext_i32_i64:
1980     case INDEX_op_extu_i32_i64:
1981         return TCG_TARGET_REG_BITS == 64;
1982 
1983     case INDEX_op_negsetcond_i64:
1984         return TCG_TARGET_HAS_negsetcond_i64;
1985     case INDEX_op_movcond_i64:
1986         return TCG_TARGET_HAS_movcond_i64;
1987     case INDEX_op_div_i64:
1988     case INDEX_op_divu_i64:
1989         return TCG_TARGET_HAS_div_i64;
1990     case INDEX_op_rem_i64:
1991     case INDEX_op_remu_i64:
1992         return TCG_TARGET_HAS_rem_i64;
1993     case INDEX_op_div2_i64:
1994     case INDEX_op_divu2_i64:
1995         return TCG_TARGET_HAS_div2_i64;
1996     case INDEX_op_rotl_i64:
1997     case INDEX_op_rotr_i64:
1998         return TCG_TARGET_HAS_rot_i64;
1999     case INDEX_op_deposit_i64:
2000         return TCG_TARGET_HAS_deposit_i64;
2001     case INDEX_op_extract_i64:
2002         return TCG_TARGET_HAS_extract_i64;
2003     case INDEX_op_sextract_i64:
2004         return TCG_TARGET_HAS_sextract_i64;
2005     case INDEX_op_extract2_i64:
2006         return TCG_TARGET_HAS_extract2_i64;
2007     case INDEX_op_extrl_i64_i32:
2008     case INDEX_op_extrh_i64_i32:
2009         return TCG_TARGET_HAS_extr_i64_i32;
2010     case INDEX_op_ext8s_i64:
2011         return TCG_TARGET_HAS_ext8s_i64;
2012     case INDEX_op_ext16s_i64:
2013         return TCG_TARGET_HAS_ext16s_i64;
2014     case INDEX_op_ext32s_i64:
2015         return TCG_TARGET_HAS_ext32s_i64;
2016     case INDEX_op_ext8u_i64:
2017         return TCG_TARGET_HAS_ext8u_i64;
2018     case INDEX_op_ext16u_i64:
2019         return TCG_TARGET_HAS_ext16u_i64;
2020     case INDEX_op_ext32u_i64:
2021         return TCG_TARGET_HAS_ext32u_i64;
2022     case INDEX_op_bswap16_i64:
2023         return TCG_TARGET_HAS_bswap16_i64;
2024     case INDEX_op_bswap32_i64:
2025         return TCG_TARGET_HAS_bswap32_i64;
2026     case INDEX_op_bswap64_i64:
2027         return TCG_TARGET_HAS_bswap64_i64;
2028     case INDEX_op_not_i64:
2029         return TCG_TARGET_HAS_not_i64;
2030     case INDEX_op_neg_i64:
2031         return TCG_TARGET_HAS_neg_i64;
2032     case INDEX_op_andc_i64:
2033         return TCG_TARGET_HAS_andc_i64;
2034     case INDEX_op_orc_i64:
2035         return TCG_TARGET_HAS_orc_i64;
2036     case INDEX_op_eqv_i64:
2037         return TCG_TARGET_HAS_eqv_i64;
2038     case INDEX_op_nand_i64:
2039         return TCG_TARGET_HAS_nand_i64;
2040     case INDEX_op_nor_i64:
2041         return TCG_TARGET_HAS_nor_i64;
2042     case INDEX_op_clz_i64:
2043         return TCG_TARGET_HAS_clz_i64;
2044     case INDEX_op_ctz_i64:
2045         return TCG_TARGET_HAS_ctz_i64;
2046     case INDEX_op_ctpop_i64:
2047         return TCG_TARGET_HAS_ctpop_i64;
2048     case INDEX_op_add2_i64:
2049         return TCG_TARGET_HAS_add2_i64;
2050     case INDEX_op_sub2_i64:
2051         return TCG_TARGET_HAS_sub2_i64;
2052     case INDEX_op_mulu2_i64:
2053         return TCG_TARGET_HAS_mulu2_i64;
2054     case INDEX_op_muls2_i64:
2055         return TCG_TARGET_HAS_muls2_i64;
2056     case INDEX_op_muluh_i64:
2057         return TCG_TARGET_HAS_muluh_i64;
2058     case INDEX_op_mulsh_i64:
2059         return TCG_TARGET_HAS_mulsh_i64;
2060 
2061     case INDEX_op_mov_vec:
2062     case INDEX_op_dup_vec:
2063     case INDEX_op_dupm_vec:
2064     case INDEX_op_ld_vec:
2065     case INDEX_op_st_vec:
2066     case INDEX_op_add_vec:
2067     case INDEX_op_sub_vec:
2068     case INDEX_op_and_vec:
2069     case INDEX_op_or_vec:
2070     case INDEX_op_xor_vec:
2071     case INDEX_op_cmp_vec:
2072         return have_vec;
2073     case INDEX_op_dup2_vec:
2074         return have_vec && TCG_TARGET_REG_BITS == 32;
2075     case INDEX_op_not_vec:
2076         return have_vec && TCG_TARGET_HAS_not_vec;
2077     case INDEX_op_neg_vec:
2078         return have_vec && TCG_TARGET_HAS_neg_vec;
2079     case INDEX_op_abs_vec:
2080         return have_vec && TCG_TARGET_HAS_abs_vec;
2081     case INDEX_op_andc_vec:
2082         return have_vec && TCG_TARGET_HAS_andc_vec;
2083     case INDEX_op_orc_vec:
2084         return have_vec && TCG_TARGET_HAS_orc_vec;
2085     case INDEX_op_nand_vec:
2086         return have_vec && TCG_TARGET_HAS_nand_vec;
2087     case INDEX_op_nor_vec:
2088         return have_vec && TCG_TARGET_HAS_nor_vec;
2089     case INDEX_op_eqv_vec:
2090         return have_vec && TCG_TARGET_HAS_eqv_vec;
2091     case INDEX_op_mul_vec:
2092         return have_vec && TCG_TARGET_HAS_mul_vec;
2093     case INDEX_op_shli_vec:
2094     case INDEX_op_shri_vec:
2095     case INDEX_op_sari_vec:
2096         return have_vec && TCG_TARGET_HAS_shi_vec;
2097     case INDEX_op_shls_vec:
2098     case INDEX_op_shrs_vec:
2099     case INDEX_op_sars_vec:
2100         return have_vec && TCG_TARGET_HAS_shs_vec;
2101     case INDEX_op_shlv_vec:
2102     case INDEX_op_shrv_vec:
2103     case INDEX_op_sarv_vec:
2104         return have_vec && TCG_TARGET_HAS_shv_vec;
2105     case INDEX_op_rotli_vec:
2106         return have_vec && TCG_TARGET_HAS_roti_vec;
2107     case INDEX_op_rotls_vec:
2108         return have_vec && TCG_TARGET_HAS_rots_vec;
2109     case INDEX_op_rotlv_vec:
2110     case INDEX_op_rotrv_vec:
2111         return have_vec && TCG_TARGET_HAS_rotv_vec;
2112     case INDEX_op_ssadd_vec:
2113     case INDEX_op_usadd_vec:
2114     case INDEX_op_sssub_vec:
2115     case INDEX_op_ussub_vec:
2116         return have_vec && TCG_TARGET_HAS_sat_vec;
2117     case INDEX_op_smin_vec:
2118     case INDEX_op_umin_vec:
2119     case INDEX_op_smax_vec:
2120     case INDEX_op_umax_vec:
2121         return have_vec && TCG_TARGET_HAS_minmax_vec;
2122     case INDEX_op_bitsel_vec:
2123         return have_vec && TCG_TARGET_HAS_bitsel_vec;
2124     case INDEX_op_cmpsel_vec:
2125         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2126 
2127     default:
2128         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2129         return true;
2130     }
2131 }
2132 
2133 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2134 
2135 static void tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args)
2136 {
2137     TCGv_i64 extend_free[MAX_CALL_IARGS];
2138     int n_extend = 0;
2139     TCGOp *op;
2140     int i, n, pi = 0, total_args;
2141 
2142     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2143         init_call_layout(info);
2144         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2145     }
2146 
2147     total_args = info->nr_out + info->nr_in + 2;
2148     op = tcg_op_alloc(INDEX_op_call, total_args);
2149 
2150 #ifdef CONFIG_PLUGIN
2151     /* Flag helpers that may affect guest state */
2152     if (tcg_ctx->plugin_insn &&
2153         !(info->flags & TCG_CALL_PLUGIN) &&
2154         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2155         tcg_ctx->plugin_insn->calls_helpers = true;
2156     }
2157 #endif
2158 
2159     TCGOP_CALLO(op) = n = info->nr_out;
2160     switch (n) {
2161     case 0:
2162         tcg_debug_assert(ret == NULL);
2163         break;
2164     case 1:
2165         tcg_debug_assert(ret != NULL);
2166         op->args[pi++] = temp_arg(ret);
2167         break;
2168     case 2:
2169     case 4:
2170         tcg_debug_assert(ret != NULL);
2171         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2172         tcg_debug_assert(ret->temp_subindex == 0);
2173         for (i = 0; i < n; ++i) {
2174             op->args[pi++] = temp_arg(ret + i);
2175         }
2176         break;
2177     default:
2178         g_assert_not_reached();
2179     }
2180 
2181     TCGOP_CALLI(op) = n = info->nr_in;
2182     for (i = 0; i < n; i++) {
2183         const TCGCallArgumentLoc *loc = &info->in[i];
2184         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2185 
2186         switch (loc->kind) {
2187         case TCG_CALL_ARG_NORMAL:
2188         case TCG_CALL_ARG_BY_REF:
2189         case TCG_CALL_ARG_BY_REF_N:
2190             op->args[pi++] = temp_arg(ts);
2191             break;
2192 
2193         case TCG_CALL_ARG_EXTEND_U:
2194         case TCG_CALL_ARG_EXTEND_S:
2195             {
2196                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2197                 TCGv_i32 orig = temp_tcgv_i32(ts);
2198 
2199                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2200                     tcg_gen_ext_i32_i64(temp, orig);
2201                 } else {
2202                     tcg_gen_extu_i32_i64(temp, orig);
2203                 }
2204                 op->args[pi++] = tcgv_i64_arg(temp);
2205                 extend_free[n_extend++] = temp;
2206             }
2207             break;
2208 
2209         default:
2210             g_assert_not_reached();
2211         }
2212     }
2213     op->args[pi++] = (uintptr_t)info->func;
2214     op->args[pi++] = (uintptr_t)info;
2215     tcg_debug_assert(pi == total_args);
2216 
2217     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2218 
2219     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2220     for (i = 0; i < n_extend; ++i) {
2221         tcg_temp_free_i64(extend_free[i]);
2222     }
2223 }
2224 
2225 void tcg_gen_call0(TCGHelperInfo *info, TCGTemp *ret)
2226 {
2227     tcg_gen_callN(info, ret, NULL);
2228 }
2229 
2230 void tcg_gen_call1(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2231 {
2232     tcg_gen_callN(info, ret, &t1);
2233 }
2234 
2235 void tcg_gen_call2(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2)
2236 {
2237     TCGTemp *args[2] = { t1, t2 };
2238     tcg_gen_callN(info, ret, args);
2239 }
2240 
2241 void tcg_gen_call3(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2242                    TCGTemp *t2, TCGTemp *t3)
2243 {
2244     TCGTemp *args[3] = { t1, t2, t3 };
2245     tcg_gen_callN(info, ret, args);
2246 }
2247 
2248 void tcg_gen_call4(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2249                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2250 {
2251     TCGTemp *args[4] = { t1, t2, t3, t4 };
2252     tcg_gen_callN(info, ret, args);
2253 }
2254 
2255 void tcg_gen_call5(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2256                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2257 {
2258     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2259     tcg_gen_callN(info, ret, args);
2260 }
2261 
2262 void tcg_gen_call6(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2,
2263                    TCGTemp *t3, TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2264 {
2265     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2266     tcg_gen_callN(info, ret, args);
2267 }
2268 
2269 void tcg_gen_call7(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2270                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2271                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2272 {
2273     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2274     tcg_gen_callN(info, ret, args);
2275 }
2276 
2277 static void tcg_reg_alloc_start(TCGContext *s)
2278 {
2279     int i, n;
2280 
2281     for (i = 0, n = s->nb_temps; i < n; i++) {
2282         TCGTemp *ts = &s->temps[i];
2283         TCGTempVal val = TEMP_VAL_MEM;
2284 
2285         switch (ts->kind) {
2286         case TEMP_CONST:
2287             val = TEMP_VAL_CONST;
2288             break;
2289         case TEMP_FIXED:
2290             val = TEMP_VAL_REG;
2291             break;
2292         case TEMP_GLOBAL:
2293             break;
2294         case TEMP_EBB:
2295             val = TEMP_VAL_DEAD;
2296             /* fall through */
2297         case TEMP_TB:
2298             ts->mem_allocated = 0;
2299             break;
2300         default:
2301             g_assert_not_reached();
2302         }
2303         ts->val_type = val;
2304     }
2305 
2306     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2307 }
2308 
2309 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2310                                  TCGTemp *ts)
2311 {
2312     int idx = temp_idx(ts);
2313 
2314     switch (ts->kind) {
2315     case TEMP_FIXED:
2316     case TEMP_GLOBAL:
2317         pstrcpy(buf, buf_size, ts->name);
2318         break;
2319     case TEMP_TB:
2320         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2321         break;
2322     case TEMP_EBB:
2323         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2324         break;
2325     case TEMP_CONST:
2326         switch (ts->type) {
2327         case TCG_TYPE_I32:
2328             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2329             break;
2330 #if TCG_TARGET_REG_BITS > 32
2331         case TCG_TYPE_I64:
2332             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2333             break;
2334 #endif
2335         case TCG_TYPE_V64:
2336         case TCG_TYPE_V128:
2337         case TCG_TYPE_V256:
2338             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2339                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2340             break;
2341         default:
2342             g_assert_not_reached();
2343         }
2344         break;
2345     }
2346     return buf;
2347 }
2348 
2349 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2350                              int buf_size, TCGArg arg)
2351 {
2352     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2353 }
2354 
2355 static const char * const cond_name[] =
2356 {
2357     [TCG_COND_NEVER] = "never",
2358     [TCG_COND_ALWAYS] = "always",
2359     [TCG_COND_EQ] = "eq",
2360     [TCG_COND_NE] = "ne",
2361     [TCG_COND_LT] = "lt",
2362     [TCG_COND_GE] = "ge",
2363     [TCG_COND_LE] = "le",
2364     [TCG_COND_GT] = "gt",
2365     [TCG_COND_LTU] = "ltu",
2366     [TCG_COND_GEU] = "geu",
2367     [TCG_COND_LEU] = "leu",
2368     [TCG_COND_GTU] = "gtu"
2369 };
2370 
2371 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2372 {
2373     [MO_UB]   = "ub",
2374     [MO_SB]   = "sb",
2375     [MO_LEUW] = "leuw",
2376     [MO_LESW] = "lesw",
2377     [MO_LEUL] = "leul",
2378     [MO_LESL] = "lesl",
2379     [MO_LEUQ] = "leq",
2380     [MO_BEUW] = "beuw",
2381     [MO_BESW] = "besw",
2382     [MO_BEUL] = "beul",
2383     [MO_BESL] = "besl",
2384     [MO_BEUQ] = "beq",
2385     [MO_128 + MO_BE] = "beo",
2386     [MO_128 + MO_LE] = "leo",
2387 };
2388 
2389 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2390     [MO_UNALN >> MO_ASHIFT]    = "un+",
2391     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2392     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2393     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2394     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2395     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2396     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2397     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2398 };
2399 
2400 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2401     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2402     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2403     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2404     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2405     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2406     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2407 };
2408 
2409 static const char bswap_flag_name[][6] = {
2410     [TCG_BSWAP_IZ] = "iz",
2411     [TCG_BSWAP_OZ] = "oz",
2412     [TCG_BSWAP_OS] = "os",
2413     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2414     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2415 };
2416 
2417 static inline bool tcg_regset_single(TCGRegSet d)
2418 {
2419     return (d & (d - 1)) == 0;
2420 }
2421 
2422 static inline TCGReg tcg_regset_first(TCGRegSet d)
2423 {
2424     if (TCG_TARGET_NB_REGS <= 32) {
2425         return ctz32(d);
2426     } else {
2427         return ctz64(d);
2428     }
2429 }
2430 
2431 /* Return only the number of characters output -- no error return. */
2432 #define ne_fprintf(...) \
2433     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2434 
2435 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2436 {
2437     char buf[128];
2438     TCGOp *op;
2439 
2440     QTAILQ_FOREACH(op, &s->ops, link) {
2441         int i, k, nb_oargs, nb_iargs, nb_cargs;
2442         const TCGOpDef *def;
2443         TCGOpcode c;
2444         int col = 0;
2445 
2446         c = op->opc;
2447         def = &tcg_op_defs[c];
2448 
2449         if (c == INDEX_op_insn_start) {
2450             nb_oargs = 0;
2451             col += ne_fprintf(f, "\n ----");
2452 
2453             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2454                 col += ne_fprintf(f, " %016" PRIx64,
2455                                   tcg_get_insn_start_param(op, i));
2456             }
2457         } else if (c == INDEX_op_call) {
2458             const TCGHelperInfo *info = tcg_call_info(op);
2459             void *func = tcg_call_func(op);
2460 
2461             /* variable number of arguments */
2462             nb_oargs = TCGOP_CALLO(op);
2463             nb_iargs = TCGOP_CALLI(op);
2464             nb_cargs = def->nb_cargs;
2465 
2466             col += ne_fprintf(f, " %s ", def->name);
2467 
2468             /*
2469              * Print the function name from TCGHelperInfo, if available.
2470              * Note that plugins have a template function for the info,
2471              * but the actual function pointer comes from the plugin.
2472              */
2473             if (func == info->func) {
2474                 col += ne_fprintf(f, "%s", info->name);
2475             } else {
2476                 col += ne_fprintf(f, "plugin(%p)", func);
2477             }
2478 
2479             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2480             for (i = 0; i < nb_oargs; i++) {
2481                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2482                                                             op->args[i]));
2483             }
2484             for (i = 0; i < nb_iargs; i++) {
2485                 TCGArg arg = op->args[nb_oargs + i];
2486                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2487                 col += ne_fprintf(f, ",%s", t);
2488             }
2489         } else {
2490             col += ne_fprintf(f, " %s ", def->name);
2491 
2492             nb_oargs = def->nb_oargs;
2493             nb_iargs = def->nb_iargs;
2494             nb_cargs = def->nb_cargs;
2495 
2496             if (def->flags & TCG_OPF_VECTOR) {
2497                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2498                                   8 << TCGOP_VECE(op));
2499             }
2500 
2501             k = 0;
2502             for (i = 0; i < nb_oargs; i++) {
2503                 const char *sep =  k ? "," : "";
2504                 col += ne_fprintf(f, "%s%s", sep,
2505                                   tcg_get_arg_str(s, buf, sizeof(buf),
2506                                                   op->args[k++]));
2507             }
2508             for (i = 0; i < nb_iargs; i++) {
2509                 const char *sep =  k ? "," : "";
2510                 col += ne_fprintf(f, "%s%s", sep,
2511                                   tcg_get_arg_str(s, buf, sizeof(buf),
2512                                                   op->args[k++]));
2513             }
2514             switch (c) {
2515             case INDEX_op_brcond_i32:
2516             case INDEX_op_setcond_i32:
2517             case INDEX_op_negsetcond_i32:
2518             case INDEX_op_movcond_i32:
2519             case INDEX_op_brcond2_i32:
2520             case INDEX_op_setcond2_i32:
2521             case INDEX_op_brcond_i64:
2522             case INDEX_op_setcond_i64:
2523             case INDEX_op_negsetcond_i64:
2524             case INDEX_op_movcond_i64:
2525             case INDEX_op_cmp_vec:
2526             case INDEX_op_cmpsel_vec:
2527                 if (op->args[k] < ARRAY_SIZE(cond_name)
2528                     && cond_name[op->args[k]]) {
2529                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2530                 } else {
2531                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2532                 }
2533                 i = 1;
2534                 break;
2535             case INDEX_op_qemu_ld_a32_i32:
2536             case INDEX_op_qemu_ld_a64_i32:
2537             case INDEX_op_qemu_st_a32_i32:
2538             case INDEX_op_qemu_st_a64_i32:
2539             case INDEX_op_qemu_st8_a32_i32:
2540             case INDEX_op_qemu_st8_a64_i32:
2541             case INDEX_op_qemu_ld_a32_i64:
2542             case INDEX_op_qemu_ld_a64_i64:
2543             case INDEX_op_qemu_st_a32_i64:
2544             case INDEX_op_qemu_st_a64_i64:
2545             case INDEX_op_qemu_ld_a32_i128:
2546             case INDEX_op_qemu_ld_a64_i128:
2547             case INDEX_op_qemu_st_a32_i128:
2548             case INDEX_op_qemu_st_a64_i128:
2549                 {
2550                     const char *s_al, *s_op, *s_at;
2551                     MemOpIdx oi = op->args[k++];
2552                     MemOp op = get_memop(oi);
2553                     unsigned ix = get_mmuidx(oi);
2554 
2555                     s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2556                     s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2557                     s_at = atom_name[(op & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2558                     op &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2559 
2560                     /* If all fields are accounted for, print symbolically. */
2561                     if (!op && s_al && s_op && s_at) {
2562                         col += ne_fprintf(f, ",%s%s%s,%u",
2563                                           s_at, s_al, s_op, ix);
2564                     } else {
2565                         op = get_memop(oi);
2566                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2567                     }
2568                     i = 1;
2569                 }
2570                 break;
2571             case INDEX_op_bswap16_i32:
2572             case INDEX_op_bswap16_i64:
2573             case INDEX_op_bswap32_i32:
2574             case INDEX_op_bswap32_i64:
2575             case INDEX_op_bswap64_i64:
2576                 {
2577                     TCGArg flags = op->args[k];
2578                     const char *name = NULL;
2579 
2580                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2581                         name = bswap_flag_name[flags];
2582                     }
2583                     if (name) {
2584                         col += ne_fprintf(f, ",%s", name);
2585                     } else {
2586                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2587                     }
2588                     i = k = 1;
2589                 }
2590                 break;
2591             default:
2592                 i = 0;
2593                 break;
2594             }
2595             switch (c) {
2596             case INDEX_op_set_label:
2597             case INDEX_op_br:
2598             case INDEX_op_brcond_i32:
2599             case INDEX_op_brcond_i64:
2600             case INDEX_op_brcond2_i32:
2601                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2602                                   arg_label(op->args[k])->id);
2603                 i++, k++;
2604                 break;
2605             case INDEX_op_mb:
2606                 {
2607                     TCGBar membar = op->args[k];
2608                     const char *b_op, *m_op;
2609 
2610                     switch (membar & TCG_BAR_SC) {
2611                     case 0:
2612                         b_op = "none";
2613                         break;
2614                     case TCG_BAR_LDAQ:
2615                         b_op = "acq";
2616                         break;
2617                     case TCG_BAR_STRL:
2618                         b_op = "rel";
2619                         break;
2620                     case TCG_BAR_SC:
2621                         b_op = "seq";
2622                         break;
2623                     default:
2624                         g_assert_not_reached();
2625                     }
2626 
2627                     switch (membar & TCG_MO_ALL) {
2628                     case 0:
2629                         m_op = "none";
2630                         break;
2631                     case TCG_MO_LD_LD:
2632                         m_op = "rr";
2633                         break;
2634                     case TCG_MO_LD_ST:
2635                         m_op = "rw";
2636                         break;
2637                     case TCG_MO_ST_LD:
2638                         m_op = "wr";
2639                         break;
2640                     case TCG_MO_ST_ST:
2641                         m_op = "ww";
2642                         break;
2643                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2644                         m_op = "rr+rw";
2645                         break;
2646                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2647                         m_op = "rr+wr";
2648                         break;
2649                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2650                         m_op = "rr+ww";
2651                         break;
2652                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2653                         m_op = "rw+wr";
2654                         break;
2655                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2656                         m_op = "rw+ww";
2657                         break;
2658                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2659                         m_op = "wr+ww";
2660                         break;
2661                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2662                         m_op = "rr+rw+wr";
2663                         break;
2664                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2665                         m_op = "rr+rw+ww";
2666                         break;
2667                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2668                         m_op = "rr+wr+ww";
2669                         break;
2670                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2671                         m_op = "rw+wr+ww";
2672                         break;
2673                     case TCG_MO_ALL:
2674                         m_op = "all";
2675                         break;
2676                     default:
2677                         g_assert_not_reached();
2678                     }
2679 
2680                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2681                     i++, k++;
2682                 }
2683                 break;
2684             default:
2685                 break;
2686             }
2687             for (; i < nb_cargs; i++, k++) {
2688                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2689                                   op->args[k]);
2690             }
2691         }
2692 
2693         if (have_prefs || op->life) {
2694             for (; col < 40; ++col) {
2695                 putc(' ', f);
2696             }
2697         }
2698 
2699         if (op->life) {
2700             unsigned life = op->life;
2701 
2702             if (life & (SYNC_ARG * 3)) {
2703                 ne_fprintf(f, "  sync:");
2704                 for (i = 0; i < 2; ++i) {
2705                     if (life & (SYNC_ARG << i)) {
2706                         ne_fprintf(f, " %d", i);
2707                     }
2708                 }
2709             }
2710             life /= DEAD_ARG;
2711             if (life) {
2712                 ne_fprintf(f, "  dead:");
2713                 for (i = 0; life; ++i, life >>= 1) {
2714                     if (life & 1) {
2715                         ne_fprintf(f, " %d", i);
2716                     }
2717                 }
2718             }
2719         }
2720 
2721         if (have_prefs) {
2722             for (i = 0; i < nb_oargs; ++i) {
2723                 TCGRegSet set = output_pref(op, i);
2724 
2725                 if (i == 0) {
2726                     ne_fprintf(f, "  pref=");
2727                 } else {
2728                     ne_fprintf(f, ",");
2729                 }
2730                 if (set == 0) {
2731                     ne_fprintf(f, "none");
2732                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2733                     ne_fprintf(f, "all");
2734 #ifdef CONFIG_DEBUG_TCG
2735                 } else if (tcg_regset_single(set)) {
2736                     TCGReg reg = tcg_regset_first(set);
2737                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2738 #endif
2739                 } else if (TCG_TARGET_NB_REGS <= 32) {
2740                     ne_fprintf(f, "0x%x", (uint32_t)set);
2741                 } else {
2742                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2743                 }
2744             }
2745         }
2746 
2747         putc('\n', f);
2748     }
2749 }
2750 
2751 /* we give more priority to constraints with less registers */
2752 static int get_constraint_priority(const TCGOpDef *def, int k)
2753 {
2754     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2755     int n = ctpop64(arg_ct->regs);
2756 
2757     /*
2758      * Sort constraints of a single register first, which includes output
2759      * aliases (which must exactly match the input already allocated).
2760      */
2761     if (n == 1 || arg_ct->oalias) {
2762         return INT_MAX;
2763     }
2764 
2765     /*
2766      * Sort register pairs next, first then second immediately after.
2767      * Arbitrarily sort multiple pairs by the index of the first reg;
2768      * there shouldn't be many pairs.
2769      */
2770     switch (arg_ct->pair) {
2771     case 1:
2772     case 3:
2773         return (k + 1) * 2;
2774     case 2:
2775         return (arg_ct->pair_index + 1) * 2 - 1;
2776     }
2777 
2778     /* Finally, sort by decreasing register count. */
2779     assert(n > 1);
2780     return -n;
2781 }
2782 
2783 /* sort from highest priority to lowest */
2784 static void sort_constraints(TCGOpDef *def, int start, int n)
2785 {
2786     int i, j;
2787     TCGArgConstraint *a = def->args_ct;
2788 
2789     for (i = 0; i < n; i++) {
2790         a[start + i].sort_index = start + i;
2791     }
2792     if (n <= 1) {
2793         return;
2794     }
2795     for (i = 0; i < n - 1; i++) {
2796         for (j = i + 1; j < n; j++) {
2797             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2798             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2799             if (p1 < p2) {
2800                 int tmp = a[start + i].sort_index;
2801                 a[start + i].sort_index = a[start + j].sort_index;
2802                 a[start + j].sort_index = tmp;
2803             }
2804         }
2805     }
2806 }
2807 
2808 static void process_op_defs(TCGContext *s)
2809 {
2810     TCGOpcode op;
2811 
2812     for (op = 0; op < NB_OPS; op++) {
2813         TCGOpDef *def = &tcg_op_defs[op];
2814         const TCGTargetOpDef *tdefs;
2815         bool saw_alias_pair = false;
2816         int i, o, i2, o2, nb_args;
2817 
2818         if (def->flags & TCG_OPF_NOT_PRESENT) {
2819             continue;
2820         }
2821 
2822         nb_args = def->nb_iargs + def->nb_oargs;
2823         if (nb_args == 0) {
2824             continue;
2825         }
2826 
2827         /*
2828          * Macro magic should make it impossible, but double-check that
2829          * the array index is in range.  Since the signness of an enum
2830          * is implementation defined, force the result to unsigned.
2831          */
2832         unsigned con_set = tcg_target_op_def(op);
2833         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2834         tdefs = &constraint_sets[con_set];
2835 
2836         for (i = 0; i < nb_args; i++) {
2837             const char *ct_str = tdefs->args_ct_str[i];
2838             bool input_p = i >= def->nb_oargs;
2839 
2840             /* Incomplete TCGTargetOpDef entry. */
2841             tcg_debug_assert(ct_str != NULL);
2842 
2843             switch (*ct_str) {
2844             case '0' ... '9':
2845                 o = *ct_str - '0';
2846                 tcg_debug_assert(input_p);
2847                 tcg_debug_assert(o < def->nb_oargs);
2848                 tcg_debug_assert(def->args_ct[o].regs != 0);
2849                 tcg_debug_assert(!def->args_ct[o].oalias);
2850                 def->args_ct[i] = def->args_ct[o];
2851                 /* The output sets oalias.  */
2852                 def->args_ct[o].oalias = 1;
2853                 def->args_ct[o].alias_index = i;
2854                 /* The input sets ialias. */
2855                 def->args_ct[i].ialias = 1;
2856                 def->args_ct[i].alias_index = o;
2857                 if (def->args_ct[i].pair) {
2858                     saw_alias_pair = true;
2859                 }
2860                 tcg_debug_assert(ct_str[1] == '\0');
2861                 continue;
2862 
2863             case '&':
2864                 tcg_debug_assert(!input_p);
2865                 def->args_ct[i].newreg = true;
2866                 ct_str++;
2867                 break;
2868 
2869             case 'p': /* plus */
2870                 /* Allocate to the register after the previous. */
2871                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2872                 o = i - 1;
2873                 tcg_debug_assert(!def->args_ct[o].pair);
2874                 tcg_debug_assert(!def->args_ct[o].ct);
2875                 def->args_ct[i] = (TCGArgConstraint){
2876                     .pair = 2,
2877                     .pair_index = o,
2878                     .regs = def->args_ct[o].regs << 1,
2879                 };
2880                 def->args_ct[o].pair = 1;
2881                 def->args_ct[o].pair_index = i;
2882                 tcg_debug_assert(ct_str[1] == '\0');
2883                 continue;
2884 
2885             case 'm': /* minus */
2886                 /* Allocate to the register before the previous. */
2887                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2888                 o = i - 1;
2889                 tcg_debug_assert(!def->args_ct[o].pair);
2890                 tcg_debug_assert(!def->args_ct[o].ct);
2891                 def->args_ct[i] = (TCGArgConstraint){
2892                     .pair = 1,
2893                     .pair_index = o,
2894                     .regs = def->args_ct[o].regs >> 1,
2895                 };
2896                 def->args_ct[o].pair = 2;
2897                 def->args_ct[o].pair_index = i;
2898                 tcg_debug_assert(ct_str[1] == '\0');
2899                 continue;
2900             }
2901 
2902             do {
2903                 switch (*ct_str) {
2904                 case 'i':
2905                     def->args_ct[i].ct |= TCG_CT_CONST;
2906                     break;
2907 
2908                 /* Include all of the target-specific constraints. */
2909 
2910 #undef CONST
2911 #define CONST(CASE, MASK) \
2912     case CASE: def->args_ct[i].ct |= MASK; break;
2913 #define REGS(CASE, MASK) \
2914     case CASE: def->args_ct[i].regs |= MASK; break;
2915 
2916 #include "tcg-target-con-str.h"
2917 
2918 #undef REGS
2919 #undef CONST
2920                 default:
2921                 case '0' ... '9':
2922                 case '&':
2923                 case 'p':
2924                 case 'm':
2925                     /* Typo in TCGTargetOpDef constraint. */
2926                     g_assert_not_reached();
2927                 }
2928             } while (*++ct_str != '\0');
2929         }
2930 
2931         /* TCGTargetOpDef entry with too much information? */
2932         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2933 
2934         /*
2935          * Fix up output pairs that are aliased with inputs.
2936          * When we created the alias, we copied pair from the output.
2937          * There are three cases:
2938          *    (1a) Pairs of inputs alias pairs of outputs.
2939          *    (1b) One input aliases the first of a pair of outputs.
2940          *    (2)  One input aliases the second of a pair of outputs.
2941          *
2942          * Case 1a is handled by making sure that the pair_index'es are
2943          * properly updated so that they appear the same as a pair of inputs.
2944          *
2945          * Case 1b is handled by setting the pair_index of the input to
2946          * itself, simply so it doesn't point to an unrelated argument.
2947          * Since we don't encounter the "second" during the input allocation
2948          * phase, nothing happens with the second half of the input pair.
2949          *
2950          * Case 2 is handled by setting the second input to pair=3, the
2951          * first output to pair=3, and the pair_index'es to match.
2952          */
2953         if (saw_alias_pair) {
2954             for (i = def->nb_oargs; i < nb_args; i++) {
2955                 /*
2956                  * Since [0-9pm] must be alone in the constraint string,
2957                  * the only way they can both be set is if the pair comes
2958                  * from the output alias.
2959                  */
2960                 if (!def->args_ct[i].ialias) {
2961                     continue;
2962                 }
2963                 switch (def->args_ct[i].pair) {
2964                 case 0:
2965                     break;
2966                 case 1:
2967                     o = def->args_ct[i].alias_index;
2968                     o2 = def->args_ct[o].pair_index;
2969                     tcg_debug_assert(def->args_ct[o].pair == 1);
2970                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2971                     if (def->args_ct[o2].oalias) {
2972                         /* Case 1a */
2973                         i2 = def->args_ct[o2].alias_index;
2974                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2975                         def->args_ct[i2].pair_index = i;
2976                         def->args_ct[i].pair_index = i2;
2977                     } else {
2978                         /* Case 1b */
2979                         def->args_ct[i].pair_index = i;
2980                     }
2981                     break;
2982                 case 2:
2983                     o = def->args_ct[i].alias_index;
2984                     o2 = def->args_ct[o].pair_index;
2985                     tcg_debug_assert(def->args_ct[o].pair == 2);
2986                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2987                     if (def->args_ct[o2].oalias) {
2988                         /* Case 1a */
2989                         i2 = def->args_ct[o2].alias_index;
2990                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2991                         def->args_ct[i2].pair_index = i;
2992                         def->args_ct[i].pair_index = i2;
2993                     } else {
2994                         /* Case 2 */
2995                         def->args_ct[i].pair = 3;
2996                         def->args_ct[o2].pair = 3;
2997                         def->args_ct[i].pair_index = o2;
2998                         def->args_ct[o2].pair_index = i;
2999                     }
3000                     break;
3001                 default:
3002                     g_assert_not_reached();
3003                 }
3004             }
3005         }
3006 
3007         /* sort the constraints (XXX: this is just an heuristic) */
3008         sort_constraints(def, 0, def->nb_oargs);
3009         sort_constraints(def, def->nb_oargs, def->nb_iargs);
3010     }
3011 }
3012 
3013 static void remove_label_use(TCGOp *op, int idx)
3014 {
3015     TCGLabel *label = arg_label(op->args[idx]);
3016     TCGLabelUse *use;
3017 
3018     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3019         if (use->op == op) {
3020             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3021             return;
3022         }
3023     }
3024     g_assert_not_reached();
3025 }
3026 
3027 void tcg_op_remove(TCGContext *s, TCGOp *op)
3028 {
3029     switch (op->opc) {
3030     case INDEX_op_br:
3031         remove_label_use(op, 0);
3032         break;
3033     case INDEX_op_brcond_i32:
3034     case INDEX_op_brcond_i64:
3035         remove_label_use(op, 3);
3036         break;
3037     case INDEX_op_brcond2_i32:
3038         remove_label_use(op, 5);
3039         break;
3040     default:
3041         break;
3042     }
3043 
3044     QTAILQ_REMOVE(&s->ops, op, link);
3045     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3046     s->nb_ops--;
3047 }
3048 
3049 void tcg_remove_ops_after(TCGOp *op)
3050 {
3051     TCGContext *s = tcg_ctx;
3052 
3053     while (true) {
3054         TCGOp *last = tcg_last_op();
3055         if (last == op) {
3056             return;
3057         }
3058         tcg_op_remove(s, last);
3059     }
3060 }
3061 
3062 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3063 {
3064     TCGContext *s = tcg_ctx;
3065     TCGOp *op = NULL;
3066 
3067     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3068         QTAILQ_FOREACH(op, &s->free_ops, link) {
3069             if (nargs <= op->nargs) {
3070                 QTAILQ_REMOVE(&s->free_ops, op, link);
3071                 nargs = op->nargs;
3072                 goto found;
3073             }
3074         }
3075     }
3076 
3077     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3078     nargs = MAX(4, nargs);
3079     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3080 
3081  found:
3082     memset(op, 0, offsetof(TCGOp, link));
3083     op->opc = opc;
3084     op->nargs = nargs;
3085 
3086     /* Check for bitfield overflow. */
3087     tcg_debug_assert(op->nargs == nargs);
3088 
3089     s->nb_ops++;
3090     return op;
3091 }
3092 
3093 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3094 {
3095     TCGOp *op = tcg_op_alloc(opc, nargs);
3096     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3097     return op;
3098 }
3099 
3100 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3101                             TCGOpcode opc, unsigned nargs)
3102 {
3103     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3104     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3105     return new_op;
3106 }
3107 
3108 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3109                            TCGOpcode opc, unsigned nargs)
3110 {
3111     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3112     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3113     return new_op;
3114 }
3115 
3116 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3117 {
3118     TCGLabelUse *u;
3119 
3120     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3121         TCGOp *op = u->op;
3122         switch (op->opc) {
3123         case INDEX_op_br:
3124             op->args[0] = label_arg(to);
3125             break;
3126         case INDEX_op_brcond_i32:
3127         case INDEX_op_brcond_i64:
3128             op->args[3] = label_arg(to);
3129             break;
3130         case INDEX_op_brcond2_i32:
3131             op->args[5] = label_arg(to);
3132             break;
3133         default:
3134             g_assert_not_reached();
3135         }
3136     }
3137 
3138     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3139 }
3140 
3141 /* Reachable analysis : remove unreachable code.  */
3142 static void __attribute__((noinline))
3143 reachable_code_pass(TCGContext *s)
3144 {
3145     TCGOp *op, *op_next, *op_prev;
3146     bool dead = false;
3147 
3148     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3149         bool remove = dead;
3150         TCGLabel *label;
3151 
3152         switch (op->opc) {
3153         case INDEX_op_set_label:
3154             label = arg_label(op->args[0]);
3155 
3156             /*
3157              * Note that the first op in the TB is always a load,
3158              * so there is always something before a label.
3159              */
3160             op_prev = QTAILQ_PREV(op, link);
3161 
3162             /*
3163              * If we find two sequential labels, move all branches to
3164              * reference the second label and remove the first label.
3165              * Do this before branch to next optimization, so that the
3166              * middle label is out of the way.
3167              */
3168             if (op_prev->opc == INDEX_op_set_label) {
3169                 move_label_uses(label, arg_label(op_prev->args[0]));
3170                 tcg_op_remove(s, op_prev);
3171                 op_prev = QTAILQ_PREV(op, link);
3172             }
3173 
3174             /*
3175              * Optimization can fold conditional branches to unconditional.
3176              * If we find a label which is preceded by an unconditional
3177              * branch to next, remove the branch.  We couldn't do this when
3178              * processing the branch because any dead code between the branch
3179              * and label had not yet been removed.
3180              */
3181             if (op_prev->opc == INDEX_op_br &&
3182                 label == arg_label(op_prev->args[0])) {
3183                 tcg_op_remove(s, op_prev);
3184                 /* Fall through means insns become live again.  */
3185                 dead = false;
3186             }
3187 
3188             if (QSIMPLEQ_EMPTY(&label->branches)) {
3189                 /*
3190                  * While there is an occasional backward branch, virtually
3191                  * all branches generated by the translators are forward.
3192                  * Which means that generally we will have already removed
3193                  * all references to the label that will be, and there is
3194                  * little to be gained by iterating.
3195                  */
3196                 remove = true;
3197             } else {
3198                 /* Once we see a label, insns become live again.  */
3199                 dead = false;
3200                 remove = false;
3201             }
3202             break;
3203 
3204         case INDEX_op_br:
3205         case INDEX_op_exit_tb:
3206         case INDEX_op_goto_ptr:
3207             /* Unconditional branches; everything following is dead.  */
3208             dead = true;
3209             break;
3210 
3211         case INDEX_op_call:
3212             /* Notice noreturn helper calls, raising exceptions.  */
3213             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3214                 dead = true;
3215             }
3216             break;
3217 
3218         case INDEX_op_insn_start:
3219             /* Never remove -- we need to keep these for unwind.  */
3220             remove = false;
3221             break;
3222 
3223         default:
3224             break;
3225         }
3226 
3227         if (remove) {
3228             tcg_op_remove(s, op);
3229         }
3230     }
3231 }
3232 
3233 #define TS_DEAD  1
3234 #define TS_MEM   2
3235 
3236 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3237 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3238 
3239 /* For liveness_pass_1, the register preferences for a given temp.  */
3240 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3241 {
3242     return ts->state_ptr;
3243 }
3244 
3245 /* For liveness_pass_1, reset the preferences for a given temp to the
3246  * maximal regset for its type.
3247  */
3248 static inline void la_reset_pref(TCGTemp *ts)
3249 {
3250     *la_temp_pref(ts)
3251         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3252 }
3253 
3254 /* liveness analysis: end of function: all temps are dead, and globals
3255    should be in memory. */
3256 static void la_func_end(TCGContext *s, int ng, int nt)
3257 {
3258     int i;
3259 
3260     for (i = 0; i < ng; ++i) {
3261         s->temps[i].state = TS_DEAD | TS_MEM;
3262         la_reset_pref(&s->temps[i]);
3263     }
3264     for (i = ng; i < nt; ++i) {
3265         s->temps[i].state = TS_DEAD;
3266         la_reset_pref(&s->temps[i]);
3267     }
3268 }
3269 
3270 /* liveness analysis: end of basic block: all temps are dead, globals
3271    and local temps should be in memory. */
3272 static void la_bb_end(TCGContext *s, int ng, int nt)
3273 {
3274     int i;
3275 
3276     for (i = 0; i < nt; ++i) {
3277         TCGTemp *ts = &s->temps[i];
3278         int state;
3279 
3280         switch (ts->kind) {
3281         case TEMP_FIXED:
3282         case TEMP_GLOBAL:
3283         case TEMP_TB:
3284             state = TS_DEAD | TS_MEM;
3285             break;
3286         case TEMP_EBB:
3287         case TEMP_CONST:
3288             state = TS_DEAD;
3289             break;
3290         default:
3291             g_assert_not_reached();
3292         }
3293         ts->state = state;
3294         la_reset_pref(ts);
3295     }
3296 }
3297 
3298 /* liveness analysis: sync globals back to memory.  */
3299 static void la_global_sync(TCGContext *s, int ng)
3300 {
3301     int i;
3302 
3303     for (i = 0; i < ng; ++i) {
3304         int state = s->temps[i].state;
3305         s->temps[i].state = state | TS_MEM;
3306         if (state == TS_DEAD) {
3307             /* If the global was previously dead, reset prefs.  */
3308             la_reset_pref(&s->temps[i]);
3309         }
3310     }
3311 }
3312 
3313 /*
3314  * liveness analysis: conditional branch: all temps are dead unless
3315  * explicitly live-across-conditional-branch, globals and local temps
3316  * should be synced.
3317  */
3318 static void la_bb_sync(TCGContext *s, int ng, int nt)
3319 {
3320     la_global_sync(s, ng);
3321 
3322     for (int i = ng; i < nt; ++i) {
3323         TCGTemp *ts = &s->temps[i];
3324         int state;
3325 
3326         switch (ts->kind) {
3327         case TEMP_TB:
3328             state = ts->state;
3329             ts->state = state | TS_MEM;
3330             if (state != TS_DEAD) {
3331                 continue;
3332             }
3333             break;
3334         case TEMP_EBB:
3335         case TEMP_CONST:
3336             continue;
3337         default:
3338             g_assert_not_reached();
3339         }
3340         la_reset_pref(&s->temps[i]);
3341     }
3342 }
3343 
3344 /* liveness analysis: sync globals back to memory and kill.  */
3345 static void la_global_kill(TCGContext *s, int ng)
3346 {
3347     int i;
3348 
3349     for (i = 0; i < ng; i++) {
3350         s->temps[i].state = TS_DEAD | TS_MEM;
3351         la_reset_pref(&s->temps[i]);
3352     }
3353 }
3354 
3355 /* liveness analysis: note live globals crossing calls.  */
3356 static void la_cross_call(TCGContext *s, int nt)
3357 {
3358     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3359     int i;
3360 
3361     for (i = 0; i < nt; i++) {
3362         TCGTemp *ts = &s->temps[i];
3363         if (!(ts->state & TS_DEAD)) {
3364             TCGRegSet *pset = la_temp_pref(ts);
3365             TCGRegSet set = *pset;
3366 
3367             set &= mask;
3368             /* If the combination is not possible, restart.  */
3369             if (set == 0) {
3370                 set = tcg_target_available_regs[ts->type] & mask;
3371             }
3372             *pset = set;
3373         }
3374     }
3375 }
3376 
3377 /*
3378  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3379  * to TEMP_EBB, if possible.
3380  */
3381 static void __attribute__((noinline))
3382 liveness_pass_0(TCGContext *s)
3383 {
3384     void * const multiple_ebb = (void *)(uintptr_t)-1;
3385     int nb_temps = s->nb_temps;
3386     TCGOp *op, *ebb;
3387 
3388     for (int i = s->nb_globals; i < nb_temps; ++i) {
3389         s->temps[i].state_ptr = NULL;
3390     }
3391 
3392     /*
3393      * Represent each EBB by the op at which it begins.  In the case of
3394      * the first EBB, this is the first op, otherwise it is a label.
3395      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3396      * within a single EBB, else MULTIPLE_EBB.
3397      */
3398     ebb = QTAILQ_FIRST(&s->ops);
3399     QTAILQ_FOREACH(op, &s->ops, link) {
3400         const TCGOpDef *def;
3401         int nb_oargs, nb_iargs;
3402 
3403         switch (op->opc) {
3404         case INDEX_op_set_label:
3405             ebb = op;
3406             continue;
3407         case INDEX_op_discard:
3408             continue;
3409         case INDEX_op_call:
3410             nb_oargs = TCGOP_CALLO(op);
3411             nb_iargs = TCGOP_CALLI(op);
3412             break;
3413         default:
3414             def = &tcg_op_defs[op->opc];
3415             nb_oargs = def->nb_oargs;
3416             nb_iargs = def->nb_iargs;
3417             break;
3418         }
3419 
3420         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3421             TCGTemp *ts = arg_temp(op->args[i]);
3422 
3423             if (ts->kind != TEMP_TB) {
3424                 continue;
3425             }
3426             if (ts->state_ptr == NULL) {
3427                 ts->state_ptr = ebb;
3428             } else if (ts->state_ptr != ebb) {
3429                 ts->state_ptr = multiple_ebb;
3430             }
3431         }
3432     }
3433 
3434     /*
3435      * For TEMP_TB that turned out not to be used beyond one EBB,
3436      * reduce the liveness to TEMP_EBB.
3437      */
3438     for (int i = s->nb_globals; i < nb_temps; ++i) {
3439         TCGTemp *ts = &s->temps[i];
3440         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3441             ts->kind = TEMP_EBB;
3442         }
3443     }
3444 }
3445 
3446 /* Liveness analysis : update the opc_arg_life array to tell if a
3447    given input arguments is dead. Instructions updating dead
3448    temporaries are removed. */
3449 static void __attribute__((noinline))
3450 liveness_pass_1(TCGContext *s)
3451 {
3452     int nb_globals = s->nb_globals;
3453     int nb_temps = s->nb_temps;
3454     TCGOp *op, *op_prev;
3455     TCGRegSet *prefs;
3456     int i;
3457 
3458     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3459     for (i = 0; i < nb_temps; ++i) {
3460         s->temps[i].state_ptr = prefs + i;
3461     }
3462 
3463     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3464     la_func_end(s, nb_globals, nb_temps);
3465 
3466     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3467         int nb_iargs, nb_oargs;
3468         TCGOpcode opc_new, opc_new2;
3469         bool have_opc_new2;
3470         TCGLifeData arg_life = 0;
3471         TCGTemp *ts;
3472         TCGOpcode opc = op->opc;
3473         const TCGOpDef *def = &tcg_op_defs[opc];
3474 
3475         switch (opc) {
3476         case INDEX_op_call:
3477             {
3478                 const TCGHelperInfo *info = tcg_call_info(op);
3479                 int call_flags = tcg_call_flags(op);
3480 
3481                 nb_oargs = TCGOP_CALLO(op);
3482                 nb_iargs = TCGOP_CALLI(op);
3483 
3484                 /* pure functions can be removed if their result is unused */
3485                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3486                     for (i = 0; i < nb_oargs; i++) {
3487                         ts = arg_temp(op->args[i]);
3488                         if (ts->state != TS_DEAD) {
3489                             goto do_not_remove_call;
3490                         }
3491                     }
3492                     goto do_remove;
3493                 }
3494             do_not_remove_call:
3495 
3496                 /* Output args are dead.  */
3497                 for (i = 0; i < nb_oargs; i++) {
3498                     ts = arg_temp(op->args[i]);
3499                     if (ts->state & TS_DEAD) {
3500                         arg_life |= DEAD_ARG << i;
3501                     }
3502                     if (ts->state & TS_MEM) {
3503                         arg_life |= SYNC_ARG << i;
3504                     }
3505                     ts->state = TS_DEAD;
3506                     la_reset_pref(ts);
3507                 }
3508 
3509                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3510                 memset(op->output_pref, 0, sizeof(op->output_pref));
3511 
3512                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3513                                     TCG_CALL_NO_READ_GLOBALS))) {
3514                     la_global_kill(s, nb_globals);
3515                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3516                     la_global_sync(s, nb_globals);
3517                 }
3518 
3519                 /* Record arguments that die in this helper.  */
3520                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3521                     ts = arg_temp(op->args[i]);
3522                     if (ts->state & TS_DEAD) {
3523                         arg_life |= DEAD_ARG << i;
3524                     }
3525                 }
3526 
3527                 /* For all live registers, remove call-clobbered prefs.  */
3528                 la_cross_call(s, nb_temps);
3529 
3530                 /*
3531                  * Input arguments are live for preceding opcodes.
3532                  *
3533                  * For those arguments that die, and will be allocated in
3534                  * registers, clear the register set for that arg, to be
3535                  * filled in below.  For args that will be on the stack,
3536                  * reset to any available reg.  Process arguments in reverse
3537                  * order so that if a temp is used more than once, the stack
3538                  * reset to max happens before the register reset to 0.
3539                  */
3540                 for (i = nb_iargs - 1; i >= 0; i--) {
3541                     const TCGCallArgumentLoc *loc = &info->in[i];
3542                     ts = arg_temp(op->args[nb_oargs + i]);
3543 
3544                     if (ts->state & TS_DEAD) {
3545                         switch (loc->kind) {
3546                         case TCG_CALL_ARG_NORMAL:
3547                         case TCG_CALL_ARG_EXTEND_U:
3548                         case TCG_CALL_ARG_EXTEND_S:
3549                             if (arg_slot_reg_p(loc->arg_slot)) {
3550                                 *la_temp_pref(ts) = 0;
3551                                 break;
3552                             }
3553                             /* fall through */
3554                         default:
3555                             *la_temp_pref(ts) =
3556                                 tcg_target_available_regs[ts->type];
3557                             break;
3558                         }
3559                         ts->state &= ~TS_DEAD;
3560                     }
3561                 }
3562 
3563                 /*
3564                  * For each input argument, add its input register to prefs.
3565                  * If a temp is used once, this produces a single set bit;
3566                  * if a temp is used multiple times, this produces a set.
3567                  */
3568                 for (i = 0; i < nb_iargs; i++) {
3569                     const TCGCallArgumentLoc *loc = &info->in[i];
3570                     ts = arg_temp(op->args[nb_oargs + i]);
3571 
3572                     switch (loc->kind) {
3573                     case TCG_CALL_ARG_NORMAL:
3574                     case TCG_CALL_ARG_EXTEND_U:
3575                     case TCG_CALL_ARG_EXTEND_S:
3576                         if (arg_slot_reg_p(loc->arg_slot)) {
3577                             tcg_regset_set_reg(*la_temp_pref(ts),
3578                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3579                         }
3580                         break;
3581                     default:
3582                         break;
3583                     }
3584                 }
3585             }
3586             break;
3587         case INDEX_op_insn_start:
3588             break;
3589         case INDEX_op_discard:
3590             /* mark the temporary as dead */
3591             ts = arg_temp(op->args[0]);
3592             ts->state = TS_DEAD;
3593             la_reset_pref(ts);
3594             break;
3595 
3596         case INDEX_op_add2_i32:
3597             opc_new = INDEX_op_add_i32;
3598             goto do_addsub2;
3599         case INDEX_op_sub2_i32:
3600             opc_new = INDEX_op_sub_i32;
3601             goto do_addsub2;
3602         case INDEX_op_add2_i64:
3603             opc_new = INDEX_op_add_i64;
3604             goto do_addsub2;
3605         case INDEX_op_sub2_i64:
3606             opc_new = INDEX_op_sub_i64;
3607         do_addsub2:
3608             nb_iargs = 4;
3609             nb_oargs = 2;
3610             /* Test if the high part of the operation is dead, but not
3611                the low part.  The result can be optimized to a simple
3612                add or sub.  This happens often for x86_64 guest when the
3613                cpu mode is set to 32 bit.  */
3614             if (arg_temp(op->args[1])->state == TS_DEAD) {
3615                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3616                     goto do_remove;
3617                 }
3618                 /* Replace the opcode and adjust the args in place,
3619                    leaving 3 unused args at the end.  */
3620                 op->opc = opc = opc_new;
3621                 op->args[1] = op->args[2];
3622                 op->args[2] = op->args[4];
3623                 /* Fall through and mark the single-word operation live.  */
3624                 nb_iargs = 2;
3625                 nb_oargs = 1;
3626             }
3627             goto do_not_remove;
3628 
3629         case INDEX_op_mulu2_i32:
3630             opc_new = INDEX_op_mul_i32;
3631             opc_new2 = INDEX_op_muluh_i32;
3632             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3633             goto do_mul2;
3634         case INDEX_op_muls2_i32:
3635             opc_new = INDEX_op_mul_i32;
3636             opc_new2 = INDEX_op_mulsh_i32;
3637             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3638             goto do_mul2;
3639         case INDEX_op_mulu2_i64:
3640             opc_new = INDEX_op_mul_i64;
3641             opc_new2 = INDEX_op_muluh_i64;
3642             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3643             goto do_mul2;
3644         case INDEX_op_muls2_i64:
3645             opc_new = INDEX_op_mul_i64;
3646             opc_new2 = INDEX_op_mulsh_i64;
3647             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3648             goto do_mul2;
3649         do_mul2:
3650             nb_iargs = 2;
3651             nb_oargs = 2;
3652             if (arg_temp(op->args[1])->state == TS_DEAD) {
3653                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3654                     /* Both parts of the operation are dead.  */
3655                     goto do_remove;
3656                 }
3657                 /* The high part of the operation is dead; generate the low. */
3658                 op->opc = opc = opc_new;
3659                 op->args[1] = op->args[2];
3660                 op->args[2] = op->args[3];
3661             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3662                 /* The low part of the operation is dead; generate the high. */
3663                 op->opc = opc = opc_new2;
3664                 op->args[0] = op->args[1];
3665                 op->args[1] = op->args[2];
3666                 op->args[2] = op->args[3];
3667             } else {
3668                 goto do_not_remove;
3669             }
3670             /* Mark the single-word operation live.  */
3671             nb_oargs = 1;
3672             goto do_not_remove;
3673 
3674         default:
3675             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3676             nb_iargs = def->nb_iargs;
3677             nb_oargs = def->nb_oargs;
3678 
3679             /* Test if the operation can be removed because all
3680                its outputs are dead. We assume that nb_oargs == 0
3681                implies side effects */
3682             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3683                 for (i = 0; i < nb_oargs; i++) {
3684                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3685                         goto do_not_remove;
3686                     }
3687                 }
3688                 goto do_remove;
3689             }
3690             goto do_not_remove;
3691 
3692         do_remove:
3693             tcg_op_remove(s, op);
3694             break;
3695 
3696         do_not_remove:
3697             for (i = 0; i < nb_oargs; i++) {
3698                 ts = arg_temp(op->args[i]);
3699 
3700                 /* Remember the preference of the uses that followed.  */
3701                 if (i < ARRAY_SIZE(op->output_pref)) {
3702                     op->output_pref[i] = *la_temp_pref(ts);
3703                 }
3704 
3705                 /* Output args are dead.  */
3706                 if (ts->state & TS_DEAD) {
3707                     arg_life |= DEAD_ARG << i;
3708                 }
3709                 if (ts->state & TS_MEM) {
3710                     arg_life |= SYNC_ARG << i;
3711                 }
3712                 ts->state = TS_DEAD;
3713                 la_reset_pref(ts);
3714             }
3715 
3716             /* If end of basic block, update.  */
3717             if (def->flags & TCG_OPF_BB_EXIT) {
3718                 la_func_end(s, nb_globals, nb_temps);
3719             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3720                 la_bb_sync(s, nb_globals, nb_temps);
3721             } else if (def->flags & TCG_OPF_BB_END) {
3722                 la_bb_end(s, nb_globals, nb_temps);
3723             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3724                 la_global_sync(s, nb_globals);
3725                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3726                     la_cross_call(s, nb_temps);
3727                 }
3728             }
3729 
3730             /* Record arguments that die in this opcode.  */
3731             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3732                 ts = arg_temp(op->args[i]);
3733                 if (ts->state & TS_DEAD) {
3734                     arg_life |= DEAD_ARG << i;
3735                 }
3736             }
3737 
3738             /* Input arguments are live for preceding opcodes.  */
3739             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3740                 ts = arg_temp(op->args[i]);
3741                 if (ts->state & TS_DEAD) {
3742                     /* For operands that were dead, initially allow
3743                        all regs for the type.  */
3744                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3745                     ts->state &= ~TS_DEAD;
3746                 }
3747             }
3748 
3749             /* Incorporate constraints for this operand.  */
3750             switch (opc) {
3751             case INDEX_op_mov_i32:
3752             case INDEX_op_mov_i64:
3753                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3754                    have proper constraints.  That said, special case
3755                    moves to propagate preferences backward.  */
3756                 if (IS_DEAD_ARG(1)) {
3757                     *la_temp_pref(arg_temp(op->args[0]))
3758                         = *la_temp_pref(arg_temp(op->args[1]));
3759                 }
3760                 break;
3761 
3762             default:
3763                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3764                     const TCGArgConstraint *ct = &def->args_ct[i];
3765                     TCGRegSet set, *pset;
3766 
3767                     ts = arg_temp(op->args[i]);
3768                     pset = la_temp_pref(ts);
3769                     set = *pset;
3770 
3771                     set &= ct->regs;
3772                     if (ct->ialias) {
3773                         set &= output_pref(op, ct->alias_index);
3774                     }
3775                     /* If the combination is not possible, restart.  */
3776                     if (set == 0) {
3777                         set = ct->regs;
3778                     }
3779                     *pset = set;
3780                 }
3781                 break;
3782             }
3783             break;
3784         }
3785         op->life = arg_life;
3786     }
3787 }
3788 
3789 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3790 static bool __attribute__((noinline))
3791 liveness_pass_2(TCGContext *s)
3792 {
3793     int nb_globals = s->nb_globals;
3794     int nb_temps, i;
3795     bool changes = false;
3796     TCGOp *op, *op_next;
3797 
3798     /* Create a temporary for each indirect global.  */
3799     for (i = 0; i < nb_globals; ++i) {
3800         TCGTemp *its = &s->temps[i];
3801         if (its->indirect_reg) {
3802             TCGTemp *dts = tcg_temp_alloc(s);
3803             dts->type = its->type;
3804             dts->base_type = its->base_type;
3805             dts->temp_subindex = its->temp_subindex;
3806             dts->kind = TEMP_EBB;
3807             its->state_ptr = dts;
3808         } else {
3809             its->state_ptr = NULL;
3810         }
3811         /* All globals begin dead.  */
3812         its->state = TS_DEAD;
3813     }
3814     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3815         TCGTemp *its = &s->temps[i];
3816         its->state_ptr = NULL;
3817         its->state = TS_DEAD;
3818     }
3819 
3820     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3821         TCGOpcode opc = op->opc;
3822         const TCGOpDef *def = &tcg_op_defs[opc];
3823         TCGLifeData arg_life = op->life;
3824         int nb_iargs, nb_oargs, call_flags;
3825         TCGTemp *arg_ts, *dir_ts;
3826 
3827         if (opc == INDEX_op_call) {
3828             nb_oargs = TCGOP_CALLO(op);
3829             nb_iargs = TCGOP_CALLI(op);
3830             call_flags = tcg_call_flags(op);
3831         } else {
3832             nb_iargs = def->nb_iargs;
3833             nb_oargs = def->nb_oargs;
3834 
3835             /* Set flags similar to how calls require.  */
3836             if (def->flags & TCG_OPF_COND_BRANCH) {
3837                 /* Like reading globals: sync_globals */
3838                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3839             } else if (def->flags & TCG_OPF_BB_END) {
3840                 /* Like writing globals: save_globals */
3841                 call_flags = 0;
3842             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3843                 /* Like reading globals: sync_globals */
3844                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3845             } else {
3846                 /* No effect on globals.  */
3847                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3848                               TCG_CALL_NO_WRITE_GLOBALS);
3849             }
3850         }
3851 
3852         /* Make sure that input arguments are available.  */
3853         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3854             arg_ts = arg_temp(op->args[i]);
3855             dir_ts = arg_ts->state_ptr;
3856             if (dir_ts && arg_ts->state == TS_DEAD) {
3857                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3858                                   ? INDEX_op_ld_i32
3859                                   : INDEX_op_ld_i64);
3860                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3861 
3862                 lop->args[0] = temp_arg(dir_ts);
3863                 lop->args[1] = temp_arg(arg_ts->mem_base);
3864                 lop->args[2] = arg_ts->mem_offset;
3865 
3866                 /* Loaded, but synced with memory.  */
3867                 arg_ts->state = TS_MEM;
3868             }
3869         }
3870 
3871         /* Perform input replacement, and mark inputs that became dead.
3872            No action is required except keeping temp_state up to date
3873            so that we reload when needed.  */
3874         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3875             arg_ts = arg_temp(op->args[i]);
3876             dir_ts = arg_ts->state_ptr;
3877             if (dir_ts) {
3878                 op->args[i] = temp_arg(dir_ts);
3879                 changes = true;
3880                 if (IS_DEAD_ARG(i)) {
3881                     arg_ts->state = TS_DEAD;
3882                 }
3883             }
3884         }
3885 
3886         /* Liveness analysis should ensure that the following are
3887            all correct, for call sites and basic block end points.  */
3888         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3889             /* Nothing to do */
3890         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3891             for (i = 0; i < nb_globals; ++i) {
3892                 /* Liveness should see that globals are synced back,
3893                    that is, either TS_DEAD or TS_MEM.  */
3894                 arg_ts = &s->temps[i];
3895                 tcg_debug_assert(arg_ts->state_ptr == 0
3896                                  || arg_ts->state != 0);
3897             }
3898         } else {
3899             for (i = 0; i < nb_globals; ++i) {
3900                 /* Liveness should see that globals are saved back,
3901                    that is, TS_DEAD, waiting to be reloaded.  */
3902                 arg_ts = &s->temps[i];
3903                 tcg_debug_assert(arg_ts->state_ptr == 0
3904                                  || arg_ts->state == TS_DEAD);
3905             }
3906         }
3907 
3908         /* Outputs become available.  */
3909         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3910             arg_ts = arg_temp(op->args[0]);
3911             dir_ts = arg_ts->state_ptr;
3912             if (dir_ts) {
3913                 op->args[0] = temp_arg(dir_ts);
3914                 changes = true;
3915 
3916                 /* The output is now live and modified.  */
3917                 arg_ts->state = 0;
3918 
3919                 if (NEED_SYNC_ARG(0)) {
3920                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3921                                       ? INDEX_op_st_i32
3922                                       : INDEX_op_st_i64);
3923                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3924                     TCGTemp *out_ts = dir_ts;
3925 
3926                     if (IS_DEAD_ARG(0)) {
3927                         out_ts = arg_temp(op->args[1]);
3928                         arg_ts->state = TS_DEAD;
3929                         tcg_op_remove(s, op);
3930                     } else {
3931                         arg_ts->state = TS_MEM;
3932                     }
3933 
3934                     sop->args[0] = temp_arg(out_ts);
3935                     sop->args[1] = temp_arg(arg_ts->mem_base);
3936                     sop->args[2] = arg_ts->mem_offset;
3937                 } else {
3938                     tcg_debug_assert(!IS_DEAD_ARG(0));
3939                 }
3940             }
3941         } else {
3942             for (i = 0; i < nb_oargs; i++) {
3943                 arg_ts = arg_temp(op->args[i]);
3944                 dir_ts = arg_ts->state_ptr;
3945                 if (!dir_ts) {
3946                     continue;
3947                 }
3948                 op->args[i] = temp_arg(dir_ts);
3949                 changes = true;
3950 
3951                 /* The output is now live and modified.  */
3952                 arg_ts->state = 0;
3953 
3954                 /* Sync outputs upon their last write.  */
3955                 if (NEED_SYNC_ARG(i)) {
3956                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3957                                       ? INDEX_op_st_i32
3958                                       : INDEX_op_st_i64);
3959                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3960 
3961                     sop->args[0] = temp_arg(dir_ts);
3962                     sop->args[1] = temp_arg(arg_ts->mem_base);
3963                     sop->args[2] = arg_ts->mem_offset;
3964 
3965                     arg_ts->state = TS_MEM;
3966                 }
3967                 /* Drop outputs that are dead.  */
3968                 if (IS_DEAD_ARG(i)) {
3969                     arg_ts->state = TS_DEAD;
3970                 }
3971             }
3972         }
3973     }
3974 
3975     return changes;
3976 }
3977 
3978 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3979 {
3980     intptr_t off;
3981     int size, align;
3982 
3983     /* When allocating an object, look at the full type. */
3984     size = tcg_type_size(ts->base_type);
3985     switch (ts->base_type) {
3986     case TCG_TYPE_I32:
3987         align = 4;
3988         break;
3989     case TCG_TYPE_I64:
3990     case TCG_TYPE_V64:
3991         align = 8;
3992         break;
3993     case TCG_TYPE_I128:
3994     case TCG_TYPE_V128:
3995     case TCG_TYPE_V256:
3996         /*
3997          * Note that we do not require aligned storage for V256,
3998          * and that we provide alignment for I128 to match V128,
3999          * even if that's above what the host ABI requires.
4000          */
4001         align = 16;
4002         break;
4003     default:
4004         g_assert_not_reached();
4005     }
4006 
4007     /*
4008      * Assume the stack is sufficiently aligned.
4009      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4010      * and do not require 16 byte vector alignment.  This seems slightly
4011      * easier than fully parameterizing the above switch statement.
4012      */
4013     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4014     off = ROUND_UP(s->current_frame_offset, align);
4015 
4016     /* If we've exhausted the stack frame, restart with a smaller TB. */
4017     if (off + size > s->frame_end) {
4018         tcg_raise_tb_overflow(s);
4019     }
4020     s->current_frame_offset = off + size;
4021 #if defined(__sparc__)
4022     off += TCG_TARGET_STACK_BIAS;
4023 #endif
4024 
4025     /* If the object was subdivided, assign memory to all the parts. */
4026     if (ts->base_type != ts->type) {
4027         int part_size = tcg_type_size(ts->type);
4028         int part_count = size / part_size;
4029 
4030         /*
4031          * Each part is allocated sequentially in tcg_temp_new_internal.
4032          * Jump back to the first part by subtracting the current index.
4033          */
4034         ts -= ts->temp_subindex;
4035         for (int i = 0; i < part_count; ++i) {
4036             ts[i].mem_offset = off + i * part_size;
4037             ts[i].mem_base = s->frame_temp;
4038             ts[i].mem_allocated = 1;
4039         }
4040     } else {
4041         ts->mem_offset = off;
4042         ts->mem_base = s->frame_temp;
4043         ts->mem_allocated = 1;
4044     }
4045 }
4046 
4047 /* Assign @reg to @ts, and update reg_to_temp[]. */
4048 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4049 {
4050     if (ts->val_type == TEMP_VAL_REG) {
4051         TCGReg old = ts->reg;
4052         tcg_debug_assert(s->reg_to_temp[old] == ts);
4053         if (old == reg) {
4054             return;
4055         }
4056         s->reg_to_temp[old] = NULL;
4057     }
4058     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4059     s->reg_to_temp[reg] = ts;
4060     ts->val_type = TEMP_VAL_REG;
4061     ts->reg = reg;
4062 }
4063 
4064 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4065 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4066 {
4067     tcg_debug_assert(type != TEMP_VAL_REG);
4068     if (ts->val_type == TEMP_VAL_REG) {
4069         TCGReg reg = ts->reg;
4070         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4071         s->reg_to_temp[reg] = NULL;
4072     }
4073     ts->val_type = type;
4074 }
4075 
4076 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4077 
4078 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4079    mark it free; otherwise mark it dead.  */
4080 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4081 {
4082     TCGTempVal new_type;
4083 
4084     switch (ts->kind) {
4085     case TEMP_FIXED:
4086         return;
4087     case TEMP_GLOBAL:
4088     case TEMP_TB:
4089         new_type = TEMP_VAL_MEM;
4090         break;
4091     case TEMP_EBB:
4092         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4093         break;
4094     case TEMP_CONST:
4095         new_type = TEMP_VAL_CONST;
4096         break;
4097     default:
4098         g_assert_not_reached();
4099     }
4100     set_temp_val_nonreg(s, ts, new_type);
4101 }
4102 
4103 /* Mark a temporary as dead.  */
4104 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4105 {
4106     temp_free_or_dead(s, ts, 1);
4107 }
4108 
4109 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4110    registers needs to be allocated to store a constant.  If 'free_or_dead'
4111    is non-zero, subsequently release the temporary; if it is positive, the
4112    temp is dead; if it is negative, the temp is free.  */
4113 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4114                       TCGRegSet preferred_regs, int free_or_dead)
4115 {
4116     if (!temp_readonly(ts) && !ts->mem_coherent) {
4117         if (!ts->mem_allocated) {
4118             temp_allocate_frame(s, ts);
4119         }
4120         switch (ts->val_type) {
4121         case TEMP_VAL_CONST:
4122             /* If we're going to free the temp immediately, then we won't
4123                require it later in a register, so attempt to store the
4124                constant to memory directly.  */
4125             if (free_or_dead
4126                 && tcg_out_sti(s, ts->type, ts->val,
4127                                ts->mem_base->reg, ts->mem_offset)) {
4128                 break;
4129             }
4130             temp_load(s, ts, tcg_target_available_regs[ts->type],
4131                       allocated_regs, preferred_regs);
4132             /* fallthrough */
4133 
4134         case TEMP_VAL_REG:
4135             tcg_out_st(s, ts->type, ts->reg,
4136                        ts->mem_base->reg, ts->mem_offset);
4137             break;
4138 
4139         case TEMP_VAL_MEM:
4140             break;
4141 
4142         case TEMP_VAL_DEAD:
4143         default:
4144             g_assert_not_reached();
4145         }
4146         ts->mem_coherent = 1;
4147     }
4148     if (free_or_dead) {
4149         temp_free_or_dead(s, ts, free_or_dead);
4150     }
4151 }
4152 
4153 /* free register 'reg' by spilling the corresponding temporary if necessary */
4154 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4155 {
4156     TCGTemp *ts = s->reg_to_temp[reg];
4157     if (ts != NULL) {
4158         temp_sync(s, ts, allocated_regs, 0, -1);
4159     }
4160 }
4161 
4162 /**
4163  * tcg_reg_alloc:
4164  * @required_regs: Set of registers in which we must allocate.
4165  * @allocated_regs: Set of registers which must be avoided.
4166  * @preferred_regs: Set of registers we should prefer.
4167  * @rev: True if we search the registers in "indirect" order.
4168  *
4169  * The allocated register must be in @required_regs & ~@allocated_regs,
4170  * but if we can put it in @preferred_regs we may save a move later.
4171  */
4172 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4173                             TCGRegSet allocated_regs,
4174                             TCGRegSet preferred_regs, bool rev)
4175 {
4176     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4177     TCGRegSet reg_ct[2];
4178     const int *order;
4179 
4180     reg_ct[1] = required_regs & ~allocated_regs;
4181     tcg_debug_assert(reg_ct[1] != 0);
4182     reg_ct[0] = reg_ct[1] & preferred_regs;
4183 
4184     /* Skip the preferred_regs option if it cannot be satisfied,
4185        or if the preference made no difference.  */
4186     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4187 
4188     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4189 
4190     /* Try free registers, preferences first.  */
4191     for (j = f; j < 2; j++) {
4192         TCGRegSet set = reg_ct[j];
4193 
4194         if (tcg_regset_single(set)) {
4195             /* One register in the set.  */
4196             TCGReg reg = tcg_regset_first(set);
4197             if (s->reg_to_temp[reg] == NULL) {
4198                 return reg;
4199             }
4200         } else {
4201             for (i = 0; i < n; i++) {
4202                 TCGReg reg = order[i];
4203                 if (s->reg_to_temp[reg] == NULL &&
4204                     tcg_regset_test_reg(set, reg)) {
4205                     return reg;
4206                 }
4207             }
4208         }
4209     }
4210 
4211     /* We must spill something.  */
4212     for (j = f; j < 2; j++) {
4213         TCGRegSet set = reg_ct[j];
4214 
4215         if (tcg_regset_single(set)) {
4216             /* One register in the set.  */
4217             TCGReg reg = tcg_regset_first(set);
4218             tcg_reg_free(s, reg, allocated_regs);
4219             return reg;
4220         } else {
4221             for (i = 0; i < n; i++) {
4222                 TCGReg reg = order[i];
4223                 if (tcg_regset_test_reg(set, reg)) {
4224                     tcg_reg_free(s, reg, allocated_regs);
4225                     return reg;
4226                 }
4227             }
4228         }
4229     }
4230 
4231     g_assert_not_reached();
4232 }
4233 
4234 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4235                                  TCGRegSet allocated_regs,
4236                                  TCGRegSet preferred_regs, bool rev)
4237 {
4238     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4239     TCGRegSet reg_ct[2];
4240     const int *order;
4241 
4242     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4243     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4244     tcg_debug_assert(reg_ct[1] != 0);
4245     reg_ct[0] = reg_ct[1] & preferred_regs;
4246 
4247     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4248 
4249     /*
4250      * Skip the preferred_regs option if it cannot be satisfied,
4251      * or if the preference made no difference.
4252      */
4253     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4254 
4255     /*
4256      * Minimize the number of flushes by looking for 2 free registers first,
4257      * then a single flush, then two flushes.
4258      */
4259     for (fmin = 2; fmin >= 0; fmin--) {
4260         for (j = k; j < 2; j++) {
4261             TCGRegSet set = reg_ct[j];
4262 
4263             for (i = 0; i < n; i++) {
4264                 TCGReg reg = order[i];
4265 
4266                 if (tcg_regset_test_reg(set, reg)) {
4267                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4268                     if (f >= fmin) {
4269                         tcg_reg_free(s, reg, allocated_regs);
4270                         tcg_reg_free(s, reg + 1, allocated_regs);
4271                         return reg;
4272                     }
4273                 }
4274             }
4275         }
4276     }
4277     g_assert_not_reached();
4278 }
4279 
4280 /* Make sure the temporary is in a register.  If needed, allocate the register
4281    from DESIRED while avoiding ALLOCATED.  */
4282 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4283                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4284 {
4285     TCGReg reg;
4286 
4287     switch (ts->val_type) {
4288     case TEMP_VAL_REG:
4289         return;
4290     case TEMP_VAL_CONST:
4291         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4292                             preferred_regs, ts->indirect_base);
4293         if (ts->type <= TCG_TYPE_I64) {
4294             tcg_out_movi(s, ts->type, reg, ts->val);
4295         } else {
4296             uint64_t val = ts->val;
4297             MemOp vece = MO_64;
4298 
4299             /*
4300              * Find the minimal vector element that matches the constant.
4301              * The targets will, in general, have to do this search anyway,
4302              * do this generically.
4303              */
4304             if (val == dup_const(MO_8, val)) {
4305                 vece = MO_8;
4306             } else if (val == dup_const(MO_16, val)) {
4307                 vece = MO_16;
4308             } else if (val == dup_const(MO_32, val)) {
4309                 vece = MO_32;
4310             }
4311 
4312             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4313         }
4314         ts->mem_coherent = 0;
4315         break;
4316     case TEMP_VAL_MEM:
4317         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4318                             preferred_regs, ts->indirect_base);
4319         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4320         ts->mem_coherent = 1;
4321         break;
4322     case TEMP_VAL_DEAD:
4323     default:
4324         g_assert_not_reached();
4325     }
4326     set_temp_val_reg(s, ts, reg);
4327 }
4328 
4329 /* Save a temporary to memory. 'allocated_regs' is used in case a
4330    temporary registers needs to be allocated to store a constant.  */
4331 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4332 {
4333     /* The liveness analysis already ensures that globals are back
4334        in memory. Keep an tcg_debug_assert for safety. */
4335     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4336 }
4337 
4338 /* save globals to their canonical location and assume they can be
4339    modified be the following code. 'allocated_regs' is used in case a
4340    temporary registers needs to be allocated to store a constant. */
4341 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4342 {
4343     int i, n;
4344 
4345     for (i = 0, n = s->nb_globals; i < n; i++) {
4346         temp_save(s, &s->temps[i], allocated_regs);
4347     }
4348 }
4349 
4350 /* sync globals to their canonical location and assume they can be
4351    read by the following code. 'allocated_regs' is used in case a
4352    temporary registers needs to be allocated to store a constant. */
4353 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4354 {
4355     int i, n;
4356 
4357     for (i = 0, n = s->nb_globals; i < n; i++) {
4358         TCGTemp *ts = &s->temps[i];
4359         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4360                          || ts->kind == TEMP_FIXED
4361                          || ts->mem_coherent);
4362     }
4363 }
4364 
4365 /* at the end of a basic block, we assume all temporaries are dead and
4366    all globals are stored at their canonical location. */
4367 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4368 {
4369     int i;
4370 
4371     for (i = s->nb_globals; i < s->nb_temps; i++) {
4372         TCGTemp *ts = &s->temps[i];
4373 
4374         switch (ts->kind) {
4375         case TEMP_TB:
4376             temp_save(s, ts, allocated_regs);
4377             break;
4378         case TEMP_EBB:
4379             /* The liveness analysis already ensures that temps are dead.
4380                Keep an tcg_debug_assert for safety. */
4381             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4382             break;
4383         case TEMP_CONST:
4384             /* Similarly, we should have freed any allocated register. */
4385             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4386             break;
4387         default:
4388             g_assert_not_reached();
4389         }
4390     }
4391 
4392     save_globals(s, allocated_regs);
4393 }
4394 
4395 /*
4396  * At a conditional branch, we assume all temporaries are dead unless
4397  * explicitly live-across-conditional-branch; all globals and local
4398  * temps are synced to their location.
4399  */
4400 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4401 {
4402     sync_globals(s, allocated_regs);
4403 
4404     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4405         TCGTemp *ts = &s->temps[i];
4406         /*
4407          * The liveness analysis already ensures that temps are dead.
4408          * Keep tcg_debug_asserts for safety.
4409          */
4410         switch (ts->kind) {
4411         case TEMP_TB:
4412             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4413             break;
4414         case TEMP_EBB:
4415         case TEMP_CONST:
4416             break;
4417         default:
4418             g_assert_not_reached();
4419         }
4420     }
4421 }
4422 
4423 /*
4424  * Specialized code generation for INDEX_op_mov_* with a constant.
4425  */
4426 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4427                                   tcg_target_ulong val, TCGLifeData arg_life,
4428                                   TCGRegSet preferred_regs)
4429 {
4430     /* ENV should not be modified.  */
4431     tcg_debug_assert(!temp_readonly(ots));
4432 
4433     /* The movi is not explicitly generated here.  */
4434     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4435     ots->val = val;
4436     ots->mem_coherent = 0;
4437     if (NEED_SYNC_ARG(0)) {
4438         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4439     } else if (IS_DEAD_ARG(0)) {
4440         temp_dead(s, ots);
4441     }
4442 }
4443 
4444 /*
4445  * Specialized code generation for INDEX_op_mov_*.
4446  */
4447 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4448 {
4449     const TCGLifeData arg_life = op->life;
4450     TCGRegSet allocated_regs, preferred_regs;
4451     TCGTemp *ts, *ots;
4452     TCGType otype, itype;
4453     TCGReg oreg, ireg;
4454 
4455     allocated_regs = s->reserved_regs;
4456     preferred_regs = output_pref(op, 0);
4457     ots = arg_temp(op->args[0]);
4458     ts = arg_temp(op->args[1]);
4459 
4460     /* ENV should not be modified.  */
4461     tcg_debug_assert(!temp_readonly(ots));
4462 
4463     /* Note that otype != itype for no-op truncation.  */
4464     otype = ots->type;
4465     itype = ts->type;
4466 
4467     if (ts->val_type == TEMP_VAL_CONST) {
4468         /* propagate constant or generate sti */
4469         tcg_target_ulong val = ts->val;
4470         if (IS_DEAD_ARG(1)) {
4471             temp_dead(s, ts);
4472         }
4473         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4474         return;
4475     }
4476 
4477     /* If the source value is in memory we're going to be forced
4478        to have it in a register in order to perform the copy.  Copy
4479        the SOURCE value into its own register first, that way we
4480        don't have to reload SOURCE the next time it is used. */
4481     if (ts->val_type == TEMP_VAL_MEM) {
4482         temp_load(s, ts, tcg_target_available_regs[itype],
4483                   allocated_regs, preferred_regs);
4484     }
4485     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4486     ireg = ts->reg;
4487 
4488     if (IS_DEAD_ARG(0)) {
4489         /* mov to a non-saved dead register makes no sense (even with
4490            liveness analysis disabled). */
4491         tcg_debug_assert(NEED_SYNC_ARG(0));
4492         if (!ots->mem_allocated) {
4493             temp_allocate_frame(s, ots);
4494         }
4495         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4496         if (IS_DEAD_ARG(1)) {
4497             temp_dead(s, ts);
4498         }
4499         temp_dead(s, ots);
4500         return;
4501     }
4502 
4503     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4504         /*
4505          * The mov can be suppressed.  Kill input first, so that it
4506          * is unlinked from reg_to_temp, then set the output to the
4507          * reg that we saved from the input.
4508          */
4509         temp_dead(s, ts);
4510         oreg = ireg;
4511     } else {
4512         if (ots->val_type == TEMP_VAL_REG) {
4513             oreg = ots->reg;
4514         } else {
4515             /* Make sure to not spill the input register during allocation. */
4516             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4517                                  allocated_regs | ((TCGRegSet)1 << ireg),
4518                                  preferred_regs, ots->indirect_base);
4519         }
4520         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4521             /*
4522              * Cross register class move not supported.
4523              * Store the source register into the destination slot
4524              * and leave the destination temp as TEMP_VAL_MEM.
4525              */
4526             assert(!temp_readonly(ots));
4527             if (!ts->mem_allocated) {
4528                 temp_allocate_frame(s, ots);
4529             }
4530             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4531             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4532             ots->mem_coherent = 1;
4533             return;
4534         }
4535     }
4536     set_temp_val_reg(s, ots, oreg);
4537     ots->mem_coherent = 0;
4538 
4539     if (NEED_SYNC_ARG(0)) {
4540         temp_sync(s, ots, allocated_regs, 0, 0);
4541     }
4542 }
4543 
4544 /*
4545  * Specialized code generation for INDEX_op_dup_vec.
4546  */
4547 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4548 {
4549     const TCGLifeData arg_life = op->life;
4550     TCGRegSet dup_out_regs, dup_in_regs;
4551     TCGTemp *its, *ots;
4552     TCGType itype, vtype;
4553     unsigned vece;
4554     int lowpart_ofs;
4555     bool ok;
4556 
4557     ots = arg_temp(op->args[0]);
4558     its = arg_temp(op->args[1]);
4559 
4560     /* ENV should not be modified.  */
4561     tcg_debug_assert(!temp_readonly(ots));
4562 
4563     itype = its->type;
4564     vece = TCGOP_VECE(op);
4565     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4566 
4567     if (its->val_type == TEMP_VAL_CONST) {
4568         /* Propagate constant via movi -> dupi.  */
4569         tcg_target_ulong val = its->val;
4570         if (IS_DEAD_ARG(1)) {
4571             temp_dead(s, its);
4572         }
4573         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4574         return;
4575     }
4576 
4577     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4578     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4579 
4580     /* Allocate the output register now.  */
4581     if (ots->val_type != TEMP_VAL_REG) {
4582         TCGRegSet allocated_regs = s->reserved_regs;
4583         TCGReg oreg;
4584 
4585         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4586             /* Make sure to not spill the input register. */
4587             tcg_regset_set_reg(allocated_regs, its->reg);
4588         }
4589         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4590                              output_pref(op, 0), ots->indirect_base);
4591         set_temp_val_reg(s, ots, oreg);
4592     }
4593 
4594     switch (its->val_type) {
4595     case TEMP_VAL_REG:
4596         /*
4597          * The dup constriaints must be broad, covering all possible VECE.
4598          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4599          * to fail, indicating that extra moves are required for that case.
4600          */
4601         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4602             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4603                 goto done;
4604             }
4605             /* Try again from memory or a vector input register.  */
4606         }
4607         if (!its->mem_coherent) {
4608             /*
4609              * The input register is not synced, and so an extra store
4610              * would be required to use memory.  Attempt an integer-vector
4611              * register move first.  We do not have a TCGRegSet for this.
4612              */
4613             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4614                 break;
4615             }
4616             /* Sync the temp back to its slot and load from there.  */
4617             temp_sync(s, its, s->reserved_regs, 0, 0);
4618         }
4619         /* fall through */
4620 
4621     case TEMP_VAL_MEM:
4622         lowpart_ofs = 0;
4623         if (HOST_BIG_ENDIAN) {
4624             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4625         }
4626         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4627                              its->mem_offset + lowpart_ofs)) {
4628             goto done;
4629         }
4630         /* Load the input into the destination vector register. */
4631         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4632         break;
4633 
4634     default:
4635         g_assert_not_reached();
4636     }
4637 
4638     /* We now have a vector input register, so dup must succeed. */
4639     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4640     tcg_debug_assert(ok);
4641 
4642  done:
4643     ots->mem_coherent = 0;
4644     if (IS_DEAD_ARG(1)) {
4645         temp_dead(s, its);
4646     }
4647     if (NEED_SYNC_ARG(0)) {
4648         temp_sync(s, ots, s->reserved_regs, 0, 0);
4649     }
4650     if (IS_DEAD_ARG(0)) {
4651         temp_dead(s, ots);
4652     }
4653 }
4654 
4655 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4656 {
4657     const TCGLifeData arg_life = op->life;
4658     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4659     TCGRegSet i_allocated_regs;
4660     TCGRegSet o_allocated_regs;
4661     int i, k, nb_iargs, nb_oargs;
4662     TCGReg reg;
4663     TCGArg arg;
4664     const TCGArgConstraint *arg_ct;
4665     TCGTemp *ts;
4666     TCGArg new_args[TCG_MAX_OP_ARGS];
4667     int const_args[TCG_MAX_OP_ARGS];
4668 
4669     nb_oargs = def->nb_oargs;
4670     nb_iargs = def->nb_iargs;
4671 
4672     /* copy constants */
4673     memcpy(new_args + nb_oargs + nb_iargs,
4674            op->args + nb_oargs + nb_iargs,
4675            sizeof(TCGArg) * def->nb_cargs);
4676 
4677     i_allocated_regs = s->reserved_regs;
4678     o_allocated_regs = s->reserved_regs;
4679 
4680     /* satisfy input constraints */
4681     for (k = 0; k < nb_iargs; k++) {
4682         TCGRegSet i_preferred_regs, i_required_regs;
4683         bool allocate_new_reg, copyto_new_reg;
4684         TCGTemp *ts2;
4685         int i1, i2;
4686 
4687         i = def->args_ct[nb_oargs + k].sort_index;
4688         arg = op->args[i];
4689         arg_ct = &def->args_ct[i];
4690         ts = arg_temp(arg);
4691 
4692         if (ts->val_type == TEMP_VAL_CONST
4693             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct, TCGOP_VECE(op))) {
4694             /* constant is OK for instruction */
4695             const_args[i] = 1;
4696             new_args[i] = ts->val;
4697             continue;
4698         }
4699 
4700         reg = ts->reg;
4701         i_preferred_regs = 0;
4702         i_required_regs = arg_ct->regs;
4703         allocate_new_reg = false;
4704         copyto_new_reg = false;
4705 
4706         switch (arg_ct->pair) {
4707         case 0: /* not paired */
4708             if (arg_ct->ialias) {
4709                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4710 
4711                 /*
4712                  * If the input is readonly, then it cannot also be an
4713                  * output and aliased to itself.  If the input is not
4714                  * dead after the instruction, we must allocate a new
4715                  * register and move it.
4716                  */
4717                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
4718                     || def->args_ct[arg_ct->alias_index].newreg) {
4719                     allocate_new_reg = true;
4720                 } else if (ts->val_type == TEMP_VAL_REG) {
4721                     /*
4722                      * Check if the current register has already been
4723                      * allocated for another input.
4724                      */
4725                     allocate_new_reg =
4726                         tcg_regset_test_reg(i_allocated_regs, reg);
4727                 }
4728             }
4729             if (!allocate_new_reg) {
4730                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4731                           i_preferred_regs);
4732                 reg = ts->reg;
4733                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4734             }
4735             if (allocate_new_reg) {
4736                 /*
4737                  * Allocate a new register matching the constraint
4738                  * and move the temporary register into it.
4739                  */
4740                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4741                           i_allocated_regs, 0);
4742                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4743                                     i_preferred_regs, ts->indirect_base);
4744                 copyto_new_reg = true;
4745             }
4746             break;
4747 
4748         case 1:
4749             /* First of an input pair; if i1 == i2, the second is an output. */
4750             i1 = i;
4751             i2 = arg_ct->pair_index;
4752             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4753 
4754             /*
4755              * It is easier to default to allocating a new pair
4756              * and to identify a few cases where it's not required.
4757              */
4758             if (arg_ct->ialias) {
4759                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4760                 if (IS_DEAD_ARG(i1) &&
4761                     IS_DEAD_ARG(i2) &&
4762                     !temp_readonly(ts) &&
4763                     ts->val_type == TEMP_VAL_REG &&
4764                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4765                     tcg_regset_test_reg(i_required_regs, reg) &&
4766                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4767                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4768                     (ts2
4769                      ? ts2->val_type == TEMP_VAL_REG &&
4770                        ts2->reg == reg + 1 &&
4771                        !temp_readonly(ts2)
4772                      : s->reg_to_temp[reg + 1] == NULL)) {
4773                     break;
4774                 }
4775             } else {
4776                 /* Without aliasing, the pair must also be an input. */
4777                 tcg_debug_assert(ts2);
4778                 if (ts->val_type == TEMP_VAL_REG &&
4779                     ts2->val_type == TEMP_VAL_REG &&
4780                     ts2->reg == reg + 1 &&
4781                     tcg_regset_test_reg(i_required_regs, reg)) {
4782                     break;
4783                 }
4784             }
4785             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4786                                      0, ts->indirect_base);
4787             goto do_pair;
4788 
4789         case 2: /* pair second */
4790             reg = new_args[arg_ct->pair_index] + 1;
4791             goto do_pair;
4792 
4793         case 3: /* ialias with second output, no first input */
4794             tcg_debug_assert(arg_ct->ialias);
4795             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4796 
4797             if (IS_DEAD_ARG(i) &&
4798                 !temp_readonly(ts) &&
4799                 ts->val_type == TEMP_VAL_REG &&
4800                 reg > 0 &&
4801                 s->reg_to_temp[reg - 1] == NULL &&
4802                 tcg_regset_test_reg(i_required_regs, reg) &&
4803                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4804                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4805                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4806                 break;
4807             }
4808             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4809                                      i_allocated_regs, 0,
4810                                      ts->indirect_base);
4811             tcg_regset_set_reg(i_allocated_regs, reg);
4812             reg += 1;
4813             goto do_pair;
4814 
4815         do_pair:
4816             /*
4817              * If an aliased input is not dead after the instruction,
4818              * we must allocate a new register and move it.
4819              */
4820             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4821                 TCGRegSet t_allocated_regs = i_allocated_regs;
4822 
4823                 /*
4824                  * Because of the alias, and the continued life, make sure
4825                  * that the temp is somewhere *other* than the reg pair,
4826                  * and we get a copy in reg.
4827                  */
4828                 tcg_regset_set_reg(t_allocated_regs, reg);
4829                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4830                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4831                     /* If ts was already in reg, copy it somewhere else. */
4832                     TCGReg nr;
4833                     bool ok;
4834 
4835                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4836                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4837                                        t_allocated_regs, 0, ts->indirect_base);
4838                     ok = tcg_out_mov(s, ts->type, nr, reg);
4839                     tcg_debug_assert(ok);
4840 
4841                     set_temp_val_reg(s, ts, nr);
4842                 } else {
4843                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4844                               t_allocated_regs, 0);
4845                     copyto_new_reg = true;
4846                 }
4847             } else {
4848                 /* Preferably allocate to reg, otherwise copy. */
4849                 i_required_regs = (TCGRegSet)1 << reg;
4850                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4851                           i_preferred_regs);
4852                 copyto_new_reg = ts->reg != reg;
4853             }
4854             break;
4855 
4856         default:
4857             g_assert_not_reached();
4858         }
4859 
4860         if (copyto_new_reg) {
4861             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4862                 /*
4863                  * Cross register class move not supported.  Sync the
4864                  * temp back to its slot and load from there.
4865                  */
4866                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4867                 tcg_out_ld(s, ts->type, reg,
4868                            ts->mem_base->reg, ts->mem_offset);
4869             }
4870         }
4871         new_args[i] = reg;
4872         const_args[i] = 0;
4873         tcg_regset_set_reg(i_allocated_regs, reg);
4874     }
4875 
4876     /* mark dead temporaries and free the associated registers */
4877     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4878         if (IS_DEAD_ARG(i)) {
4879             temp_dead(s, arg_temp(op->args[i]));
4880         }
4881     }
4882 
4883     if (def->flags & TCG_OPF_COND_BRANCH) {
4884         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4885     } else if (def->flags & TCG_OPF_BB_END) {
4886         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4887     } else {
4888         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4889             /* XXX: permit generic clobber register list ? */
4890             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4891                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4892                     tcg_reg_free(s, i, i_allocated_regs);
4893                 }
4894             }
4895         }
4896         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4897             /* sync globals if the op has side effects and might trigger
4898                an exception. */
4899             sync_globals(s, i_allocated_regs);
4900         }
4901 
4902         /* satisfy the output constraints */
4903         for(k = 0; k < nb_oargs; k++) {
4904             i = def->args_ct[k].sort_index;
4905             arg = op->args[i];
4906             arg_ct = &def->args_ct[i];
4907             ts = arg_temp(arg);
4908 
4909             /* ENV should not be modified.  */
4910             tcg_debug_assert(!temp_readonly(ts));
4911 
4912             switch (arg_ct->pair) {
4913             case 0: /* not paired */
4914                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4915                     reg = new_args[arg_ct->alias_index];
4916                 } else if (arg_ct->newreg) {
4917                     reg = tcg_reg_alloc(s, arg_ct->regs,
4918                                         i_allocated_regs | o_allocated_regs,
4919                                         output_pref(op, k), ts->indirect_base);
4920                 } else {
4921                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4922                                         output_pref(op, k), ts->indirect_base);
4923                 }
4924                 break;
4925 
4926             case 1: /* first of pair */
4927                 tcg_debug_assert(!arg_ct->newreg);
4928                 if (arg_ct->oalias) {
4929                     reg = new_args[arg_ct->alias_index];
4930                     break;
4931                 }
4932                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4933                                          output_pref(op, k), ts->indirect_base);
4934                 break;
4935 
4936             case 2: /* second of pair */
4937                 tcg_debug_assert(!arg_ct->newreg);
4938                 if (arg_ct->oalias) {
4939                     reg = new_args[arg_ct->alias_index];
4940                 } else {
4941                     reg = new_args[arg_ct->pair_index] + 1;
4942                 }
4943                 break;
4944 
4945             case 3: /* first of pair, aliasing with a second input */
4946                 tcg_debug_assert(!arg_ct->newreg);
4947                 reg = new_args[arg_ct->pair_index] - 1;
4948                 break;
4949 
4950             default:
4951                 g_assert_not_reached();
4952             }
4953             tcg_regset_set_reg(o_allocated_regs, reg);
4954             set_temp_val_reg(s, ts, reg);
4955             ts->mem_coherent = 0;
4956             new_args[i] = reg;
4957         }
4958     }
4959 
4960     /* emit instruction */
4961     switch (op->opc) {
4962     case INDEX_op_ext8s_i32:
4963         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4964         break;
4965     case INDEX_op_ext8s_i64:
4966         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4967         break;
4968     case INDEX_op_ext8u_i32:
4969     case INDEX_op_ext8u_i64:
4970         tcg_out_ext8u(s, new_args[0], new_args[1]);
4971         break;
4972     case INDEX_op_ext16s_i32:
4973         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4974         break;
4975     case INDEX_op_ext16s_i64:
4976         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4977         break;
4978     case INDEX_op_ext16u_i32:
4979     case INDEX_op_ext16u_i64:
4980         tcg_out_ext16u(s, new_args[0], new_args[1]);
4981         break;
4982     case INDEX_op_ext32s_i64:
4983         tcg_out_ext32s(s, new_args[0], new_args[1]);
4984         break;
4985     case INDEX_op_ext32u_i64:
4986         tcg_out_ext32u(s, new_args[0], new_args[1]);
4987         break;
4988     case INDEX_op_ext_i32_i64:
4989         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
4990         break;
4991     case INDEX_op_extu_i32_i64:
4992         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
4993         break;
4994     case INDEX_op_extrl_i64_i32:
4995         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
4996         break;
4997     default:
4998         if (def->flags & TCG_OPF_VECTOR) {
4999             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
5000                            new_args, const_args);
5001         } else {
5002             tcg_out_op(s, op->opc, new_args, const_args);
5003         }
5004         break;
5005     }
5006 
5007     /* move the outputs in the correct register if needed */
5008     for(i = 0; i < nb_oargs; i++) {
5009         ts = arg_temp(op->args[i]);
5010 
5011         /* ENV should not be modified.  */
5012         tcg_debug_assert(!temp_readonly(ts));
5013 
5014         if (NEED_SYNC_ARG(i)) {
5015             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5016         } else if (IS_DEAD_ARG(i)) {
5017             temp_dead(s, ts);
5018         }
5019     }
5020 }
5021 
5022 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5023 {
5024     const TCGLifeData arg_life = op->life;
5025     TCGTemp *ots, *itsl, *itsh;
5026     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
5027 
5028     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5029     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5030     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5031 
5032     ots = arg_temp(op->args[0]);
5033     itsl = arg_temp(op->args[1]);
5034     itsh = arg_temp(op->args[2]);
5035 
5036     /* ENV should not be modified.  */
5037     tcg_debug_assert(!temp_readonly(ots));
5038 
5039     /* Allocate the output register now.  */
5040     if (ots->val_type != TEMP_VAL_REG) {
5041         TCGRegSet allocated_regs = s->reserved_regs;
5042         TCGRegSet dup_out_regs =
5043             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
5044         TCGReg oreg;
5045 
5046         /* Make sure to not spill the input registers. */
5047         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5048             tcg_regset_set_reg(allocated_regs, itsl->reg);
5049         }
5050         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5051             tcg_regset_set_reg(allocated_regs, itsh->reg);
5052         }
5053 
5054         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5055                              output_pref(op, 0), ots->indirect_base);
5056         set_temp_val_reg(s, ots, oreg);
5057     }
5058 
5059     /* Promote dup2 of immediates to dupi_vec. */
5060     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5061         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5062         MemOp vece = MO_64;
5063 
5064         if (val == dup_const(MO_8, val)) {
5065             vece = MO_8;
5066         } else if (val == dup_const(MO_16, val)) {
5067             vece = MO_16;
5068         } else if (val == dup_const(MO_32, val)) {
5069             vece = MO_32;
5070         }
5071 
5072         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5073         goto done;
5074     }
5075 
5076     /* If the two inputs form one 64-bit value, try dupm_vec. */
5077     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5078         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5079         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5080         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5081 
5082         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5083         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5084 
5085         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5086                              its->mem_base->reg, its->mem_offset)) {
5087             goto done;
5088         }
5089     }
5090 
5091     /* Fall back to generic expansion. */
5092     return false;
5093 
5094  done:
5095     ots->mem_coherent = 0;
5096     if (IS_DEAD_ARG(1)) {
5097         temp_dead(s, itsl);
5098     }
5099     if (IS_DEAD_ARG(2)) {
5100         temp_dead(s, itsh);
5101     }
5102     if (NEED_SYNC_ARG(0)) {
5103         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5104     } else if (IS_DEAD_ARG(0)) {
5105         temp_dead(s, ots);
5106     }
5107     return true;
5108 }
5109 
5110 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5111                          TCGRegSet allocated_regs)
5112 {
5113     if (ts->val_type == TEMP_VAL_REG) {
5114         if (ts->reg != reg) {
5115             tcg_reg_free(s, reg, allocated_regs);
5116             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5117                 /*
5118                  * Cross register class move not supported.  Sync the
5119                  * temp back to its slot and load from there.
5120                  */
5121                 temp_sync(s, ts, allocated_regs, 0, 0);
5122                 tcg_out_ld(s, ts->type, reg,
5123                            ts->mem_base->reg, ts->mem_offset);
5124             }
5125         }
5126     } else {
5127         TCGRegSet arg_set = 0;
5128 
5129         tcg_reg_free(s, reg, allocated_regs);
5130         tcg_regset_set_reg(arg_set, reg);
5131         temp_load(s, ts, arg_set, allocated_regs, 0);
5132     }
5133 }
5134 
5135 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5136                          TCGRegSet allocated_regs)
5137 {
5138     /*
5139      * When the destination is on the stack, load up the temp and store.
5140      * If there are many call-saved registers, the temp might live to
5141      * see another use; otherwise it'll be discarded.
5142      */
5143     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5144     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5145                arg_slot_stk_ofs(arg_slot));
5146 }
5147 
5148 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5149                             TCGTemp *ts, TCGRegSet *allocated_regs)
5150 {
5151     if (arg_slot_reg_p(l->arg_slot)) {
5152         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5153         load_arg_reg(s, reg, ts, *allocated_regs);
5154         tcg_regset_set_reg(*allocated_regs, reg);
5155     } else {
5156         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5157     }
5158 }
5159 
5160 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5161                          intptr_t ref_off, TCGRegSet *allocated_regs)
5162 {
5163     TCGReg reg;
5164 
5165     if (arg_slot_reg_p(arg_slot)) {
5166         reg = tcg_target_call_iarg_regs[arg_slot];
5167         tcg_reg_free(s, reg, *allocated_regs);
5168         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5169         tcg_regset_set_reg(*allocated_regs, reg);
5170     } else {
5171         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5172                             *allocated_regs, 0, false);
5173         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5174         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5175                    arg_slot_stk_ofs(arg_slot));
5176     }
5177 }
5178 
5179 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5180 {
5181     const int nb_oargs = TCGOP_CALLO(op);
5182     const int nb_iargs = TCGOP_CALLI(op);
5183     const TCGLifeData arg_life = op->life;
5184     const TCGHelperInfo *info = tcg_call_info(op);
5185     TCGRegSet allocated_regs = s->reserved_regs;
5186     int i;
5187 
5188     /*
5189      * Move inputs into place in reverse order,
5190      * so that we place stacked arguments first.
5191      */
5192     for (i = nb_iargs - 1; i >= 0; --i) {
5193         const TCGCallArgumentLoc *loc = &info->in[i];
5194         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5195 
5196         switch (loc->kind) {
5197         case TCG_CALL_ARG_NORMAL:
5198         case TCG_CALL_ARG_EXTEND_U:
5199         case TCG_CALL_ARG_EXTEND_S:
5200             load_arg_normal(s, loc, ts, &allocated_regs);
5201             break;
5202         case TCG_CALL_ARG_BY_REF:
5203             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5204             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5205                          arg_slot_stk_ofs(loc->ref_slot),
5206                          &allocated_regs);
5207             break;
5208         case TCG_CALL_ARG_BY_REF_N:
5209             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5210             break;
5211         default:
5212             g_assert_not_reached();
5213         }
5214     }
5215 
5216     /* Mark dead temporaries and free the associated registers.  */
5217     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5218         if (IS_DEAD_ARG(i)) {
5219             temp_dead(s, arg_temp(op->args[i]));
5220         }
5221     }
5222 
5223     /* Clobber call registers.  */
5224     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5225         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5226             tcg_reg_free(s, i, allocated_regs);
5227         }
5228     }
5229 
5230     /*
5231      * Save globals if they might be written by the helper,
5232      * sync them if they might be read.
5233      */
5234     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5235         /* Nothing to do */
5236     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5237         sync_globals(s, allocated_regs);
5238     } else {
5239         save_globals(s, allocated_regs);
5240     }
5241 
5242     /*
5243      * If the ABI passes a pointer to the returned struct as the first
5244      * argument, load that now.  Pass a pointer to the output home slot.
5245      */
5246     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5247         TCGTemp *ts = arg_temp(op->args[0]);
5248 
5249         if (!ts->mem_allocated) {
5250             temp_allocate_frame(s, ts);
5251         }
5252         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5253     }
5254 
5255     tcg_out_call(s, tcg_call_func(op), info);
5256 
5257     /* Assign output registers and emit moves if needed.  */
5258     switch (info->out_kind) {
5259     case TCG_CALL_RET_NORMAL:
5260         for (i = 0; i < nb_oargs; i++) {
5261             TCGTemp *ts = arg_temp(op->args[i]);
5262             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5263 
5264             /* ENV should not be modified.  */
5265             tcg_debug_assert(!temp_readonly(ts));
5266 
5267             set_temp_val_reg(s, ts, reg);
5268             ts->mem_coherent = 0;
5269         }
5270         break;
5271 
5272     case TCG_CALL_RET_BY_VEC:
5273         {
5274             TCGTemp *ts = arg_temp(op->args[0]);
5275 
5276             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5277             tcg_debug_assert(ts->temp_subindex == 0);
5278             if (!ts->mem_allocated) {
5279                 temp_allocate_frame(s, ts);
5280             }
5281             tcg_out_st(s, TCG_TYPE_V128,
5282                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5283                        ts->mem_base->reg, ts->mem_offset);
5284         }
5285         /* fall through to mark all parts in memory */
5286 
5287     case TCG_CALL_RET_BY_REF:
5288         /* The callee has performed a write through the reference. */
5289         for (i = 0; i < nb_oargs; i++) {
5290             TCGTemp *ts = arg_temp(op->args[i]);
5291             ts->val_type = TEMP_VAL_MEM;
5292         }
5293         break;
5294 
5295     default:
5296         g_assert_not_reached();
5297     }
5298 
5299     /* Flush or discard output registers as needed. */
5300     for (i = 0; i < nb_oargs; i++) {
5301         TCGTemp *ts = arg_temp(op->args[i]);
5302         if (NEED_SYNC_ARG(i)) {
5303             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5304         } else if (IS_DEAD_ARG(i)) {
5305             temp_dead(s, ts);
5306         }
5307     }
5308 }
5309 
5310 /**
5311  * atom_and_align_for_opc:
5312  * @s: tcg context
5313  * @opc: memory operation code
5314  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5315  * @allow_two_ops: true if we are prepared to issue two operations
5316  *
5317  * Return the alignment and atomicity to use for the inline fast path
5318  * for the given memory operation.  The alignment may be larger than
5319  * that specified in @opc, and the correct alignment will be diagnosed
5320  * by the slow path helper.
5321  *
5322  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5323  * and issue two loads or stores for subalignment.
5324  */
5325 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5326                                            MemOp host_atom, bool allow_two_ops)
5327 {
5328     MemOp align = get_alignment_bits(opc);
5329     MemOp size = opc & MO_SIZE;
5330     MemOp half = size ? size - 1 : 0;
5331     MemOp atmax;
5332     MemOp atom;
5333 
5334     /* When serialized, no further atomicity required.  */
5335     if (s->gen_tb->cflags & CF_PARALLEL) {
5336         atom = opc & MO_ATOM_MASK;
5337     } else {
5338         atom = MO_ATOM_NONE;
5339     }
5340 
5341     switch (atom) {
5342     case MO_ATOM_NONE:
5343         /* The operation requires no specific atomicity. */
5344         atmax = MO_8;
5345         break;
5346 
5347     case MO_ATOM_IFALIGN:
5348         atmax = size;
5349         break;
5350 
5351     case MO_ATOM_IFALIGN_PAIR:
5352         atmax = half;
5353         break;
5354 
5355     case MO_ATOM_WITHIN16:
5356         atmax = size;
5357         if (size == MO_128) {
5358             /* Misalignment implies !within16, and therefore no atomicity. */
5359         } else if (host_atom != MO_ATOM_WITHIN16) {
5360             /* The host does not implement within16, so require alignment. */
5361             align = MAX(align, size);
5362         }
5363         break;
5364 
5365     case MO_ATOM_WITHIN16_PAIR:
5366         atmax = size;
5367         /*
5368          * Misalignment implies !within16, and therefore half atomicity.
5369          * Any host prepared for two operations can implement this with
5370          * half alignment.
5371          */
5372         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5373             align = MAX(align, half);
5374         }
5375         break;
5376 
5377     case MO_ATOM_SUBALIGN:
5378         atmax = size;
5379         if (host_atom != MO_ATOM_SUBALIGN) {
5380             /* If unaligned but not odd, there are subobjects up to half. */
5381             if (allow_two_ops) {
5382                 align = MAX(align, half);
5383             } else {
5384                 align = MAX(align, size);
5385             }
5386         }
5387         break;
5388 
5389     default:
5390         g_assert_not_reached();
5391     }
5392 
5393     return (TCGAtomAlign){ .atom = atmax, .align = align };
5394 }
5395 
5396 /*
5397  * Similarly for qemu_ld/st slow path helpers.
5398  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5399  * using only the provided backend tcg_out_* functions.
5400  */
5401 
5402 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5403 {
5404     int ofs = arg_slot_stk_ofs(slot);
5405 
5406     /*
5407      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5408      * require extension to uint64_t, adjust the address for uint32_t.
5409      */
5410     if (HOST_BIG_ENDIAN &&
5411         TCG_TARGET_REG_BITS == 64 &&
5412         type == TCG_TYPE_I32) {
5413         ofs += 4;
5414     }
5415     return ofs;
5416 }
5417 
5418 static void tcg_out_helper_load_slots(TCGContext *s,
5419                                       unsigned nmov, TCGMovExtend *mov,
5420                                       const TCGLdstHelperParam *parm)
5421 {
5422     unsigned i;
5423     TCGReg dst3;
5424 
5425     /*
5426      * Start from the end, storing to the stack first.
5427      * This frees those registers, so we need not consider overlap.
5428      */
5429     for (i = nmov; i-- > 0; ) {
5430         unsigned slot = mov[i].dst;
5431 
5432         if (arg_slot_reg_p(slot)) {
5433             goto found_reg;
5434         }
5435 
5436         TCGReg src = mov[i].src;
5437         TCGType dst_type = mov[i].dst_type;
5438         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5439 
5440         /* The argument is going onto the stack; extend into scratch. */
5441         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5442             tcg_debug_assert(parm->ntmp != 0);
5443             mov[i].dst = src = parm->tmp[0];
5444             tcg_out_movext1(s, &mov[i]);
5445         }
5446 
5447         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5448                    tcg_out_helper_stk_ofs(dst_type, slot));
5449     }
5450     return;
5451 
5452  found_reg:
5453     /*
5454      * The remaining arguments are in registers.
5455      * Convert slot numbers to argument registers.
5456      */
5457     nmov = i + 1;
5458     for (i = 0; i < nmov; ++i) {
5459         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5460     }
5461 
5462     switch (nmov) {
5463     case 4:
5464         /* The backend must have provided enough temps for the worst case. */
5465         tcg_debug_assert(parm->ntmp >= 2);
5466 
5467         dst3 = mov[3].dst;
5468         for (unsigned j = 0; j < 3; ++j) {
5469             if (dst3 == mov[j].src) {
5470                 /*
5471                  * Conflict. Copy the source to a temporary, perform the
5472                  * remaining moves, then the extension from our scratch
5473                  * on the way out.
5474                  */
5475                 TCGReg scratch = parm->tmp[1];
5476 
5477                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5478                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5479                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5480                 break;
5481             }
5482         }
5483 
5484         /* No conflicts: perform this move and continue. */
5485         tcg_out_movext1(s, &mov[3]);
5486         /* fall through */
5487 
5488     case 3:
5489         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5490                         parm->ntmp ? parm->tmp[0] : -1);
5491         break;
5492     case 2:
5493         tcg_out_movext2(s, mov, mov + 1,
5494                         parm->ntmp ? parm->tmp[0] : -1);
5495         break;
5496     case 1:
5497         tcg_out_movext1(s, mov);
5498         break;
5499     default:
5500         g_assert_not_reached();
5501     }
5502 }
5503 
5504 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5505                                     TCGType type, tcg_target_long imm,
5506                                     const TCGLdstHelperParam *parm)
5507 {
5508     if (arg_slot_reg_p(slot)) {
5509         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5510     } else {
5511         int ofs = tcg_out_helper_stk_ofs(type, slot);
5512         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5513             tcg_debug_assert(parm->ntmp != 0);
5514             tcg_out_movi(s, type, parm->tmp[0], imm);
5515             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5516         }
5517     }
5518 }
5519 
5520 static void tcg_out_helper_load_common_args(TCGContext *s,
5521                                             const TCGLabelQemuLdst *ldst,
5522                                             const TCGLdstHelperParam *parm,
5523                                             const TCGHelperInfo *info,
5524                                             unsigned next_arg)
5525 {
5526     TCGMovExtend ptr_mov = {
5527         .dst_type = TCG_TYPE_PTR,
5528         .src_type = TCG_TYPE_PTR,
5529         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5530     };
5531     const TCGCallArgumentLoc *loc = &info->in[0];
5532     TCGType type;
5533     unsigned slot;
5534     tcg_target_ulong imm;
5535 
5536     /*
5537      * Handle env, which is always first.
5538      */
5539     ptr_mov.dst = loc->arg_slot;
5540     ptr_mov.src = TCG_AREG0;
5541     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5542 
5543     /*
5544      * Handle oi.
5545      */
5546     imm = ldst->oi;
5547     loc = &info->in[next_arg];
5548     type = TCG_TYPE_I32;
5549     switch (loc->kind) {
5550     case TCG_CALL_ARG_NORMAL:
5551         break;
5552     case TCG_CALL_ARG_EXTEND_U:
5553     case TCG_CALL_ARG_EXTEND_S:
5554         /* No extension required for MemOpIdx. */
5555         tcg_debug_assert(imm <= INT32_MAX);
5556         type = TCG_TYPE_REG;
5557         break;
5558     default:
5559         g_assert_not_reached();
5560     }
5561     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5562     next_arg++;
5563 
5564     /*
5565      * Handle ra.
5566      */
5567     loc = &info->in[next_arg];
5568     slot = loc->arg_slot;
5569     if (parm->ra_gen) {
5570         int arg_reg = -1;
5571         TCGReg ra_reg;
5572 
5573         if (arg_slot_reg_p(slot)) {
5574             arg_reg = tcg_target_call_iarg_regs[slot];
5575         }
5576         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5577 
5578         ptr_mov.dst = slot;
5579         ptr_mov.src = ra_reg;
5580         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5581     } else {
5582         imm = (uintptr_t)ldst->raddr;
5583         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5584     }
5585 }
5586 
5587 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5588                                        const TCGCallArgumentLoc *loc,
5589                                        TCGType dst_type, TCGType src_type,
5590                                        TCGReg lo, TCGReg hi)
5591 {
5592     MemOp reg_mo;
5593 
5594     if (dst_type <= TCG_TYPE_REG) {
5595         MemOp src_ext;
5596 
5597         switch (loc->kind) {
5598         case TCG_CALL_ARG_NORMAL:
5599             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5600             break;
5601         case TCG_CALL_ARG_EXTEND_U:
5602             dst_type = TCG_TYPE_REG;
5603             src_ext = MO_UL;
5604             break;
5605         case TCG_CALL_ARG_EXTEND_S:
5606             dst_type = TCG_TYPE_REG;
5607             src_ext = MO_SL;
5608             break;
5609         default:
5610             g_assert_not_reached();
5611         }
5612 
5613         mov[0].dst = loc->arg_slot;
5614         mov[0].dst_type = dst_type;
5615         mov[0].src = lo;
5616         mov[0].src_type = src_type;
5617         mov[0].src_ext = src_ext;
5618         return 1;
5619     }
5620 
5621     if (TCG_TARGET_REG_BITS == 32) {
5622         assert(dst_type == TCG_TYPE_I64);
5623         reg_mo = MO_32;
5624     } else {
5625         assert(dst_type == TCG_TYPE_I128);
5626         reg_mo = MO_64;
5627     }
5628 
5629     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5630     mov[0].src = lo;
5631     mov[0].dst_type = TCG_TYPE_REG;
5632     mov[0].src_type = TCG_TYPE_REG;
5633     mov[0].src_ext = reg_mo;
5634 
5635     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5636     mov[1].src = hi;
5637     mov[1].dst_type = TCG_TYPE_REG;
5638     mov[1].src_type = TCG_TYPE_REG;
5639     mov[1].src_ext = reg_mo;
5640 
5641     return 2;
5642 }
5643 
5644 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5645                                    const TCGLdstHelperParam *parm)
5646 {
5647     const TCGHelperInfo *info;
5648     const TCGCallArgumentLoc *loc;
5649     TCGMovExtend mov[2];
5650     unsigned next_arg, nmov;
5651     MemOp mop = get_memop(ldst->oi);
5652 
5653     switch (mop & MO_SIZE) {
5654     case MO_8:
5655     case MO_16:
5656     case MO_32:
5657         info = &info_helper_ld32_mmu;
5658         break;
5659     case MO_64:
5660         info = &info_helper_ld64_mmu;
5661         break;
5662     case MO_128:
5663         info = &info_helper_ld128_mmu;
5664         break;
5665     default:
5666         g_assert_not_reached();
5667     }
5668 
5669     /* Defer env argument. */
5670     next_arg = 1;
5671 
5672     loc = &info->in[next_arg];
5673     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5674         /*
5675          * 32-bit host with 32-bit guest: zero-extend the guest address
5676          * to 64-bits for the helper by storing the low part, then
5677          * load a zero for the high part.
5678          */
5679         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5680                                TCG_TYPE_I32, TCG_TYPE_I32,
5681                                ldst->addrlo_reg, -1);
5682         tcg_out_helper_load_slots(s, 1, mov, parm);
5683 
5684         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5685                                 TCG_TYPE_I32, 0, parm);
5686         next_arg += 2;
5687     } else {
5688         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5689                                       ldst->addrlo_reg, ldst->addrhi_reg);
5690         tcg_out_helper_load_slots(s, nmov, mov, parm);
5691         next_arg += nmov;
5692     }
5693 
5694     switch (info->out_kind) {
5695     case TCG_CALL_RET_NORMAL:
5696     case TCG_CALL_RET_BY_VEC:
5697         break;
5698     case TCG_CALL_RET_BY_REF:
5699         /*
5700          * The return reference is in the first argument slot.
5701          * We need memory in which to return: re-use the top of stack.
5702          */
5703         {
5704             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5705 
5706             if (arg_slot_reg_p(0)) {
5707                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5708                                  TCG_REG_CALL_STACK, ofs_slot0);
5709             } else {
5710                 tcg_debug_assert(parm->ntmp != 0);
5711                 tcg_out_addi_ptr(s, parm->tmp[0],
5712                                  TCG_REG_CALL_STACK, ofs_slot0);
5713                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5714                            TCG_REG_CALL_STACK, ofs_slot0);
5715             }
5716         }
5717         break;
5718     default:
5719         g_assert_not_reached();
5720     }
5721 
5722     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5723 }
5724 
5725 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5726                                   bool load_sign,
5727                                   const TCGLdstHelperParam *parm)
5728 {
5729     MemOp mop = get_memop(ldst->oi);
5730     TCGMovExtend mov[2];
5731     int ofs_slot0;
5732 
5733     switch (ldst->type) {
5734     case TCG_TYPE_I64:
5735         if (TCG_TARGET_REG_BITS == 32) {
5736             break;
5737         }
5738         /* fall through */
5739 
5740     case TCG_TYPE_I32:
5741         mov[0].dst = ldst->datalo_reg;
5742         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5743         mov[0].dst_type = ldst->type;
5744         mov[0].src_type = TCG_TYPE_REG;
5745 
5746         /*
5747          * If load_sign, then we allowed the helper to perform the
5748          * appropriate sign extension to tcg_target_ulong, and all
5749          * we need now is a plain move.
5750          *
5751          * If they do not, then we expect the relevant extension
5752          * instruction to be no more expensive than a move, and
5753          * we thus save the icache etc by only using one of two
5754          * helper functions.
5755          */
5756         if (load_sign || !(mop & MO_SIGN)) {
5757             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5758                 mov[0].src_ext = MO_32;
5759             } else {
5760                 mov[0].src_ext = MO_64;
5761             }
5762         } else {
5763             mov[0].src_ext = mop & MO_SSIZE;
5764         }
5765         tcg_out_movext1(s, mov);
5766         return;
5767 
5768     case TCG_TYPE_I128:
5769         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5770         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5771         switch (TCG_TARGET_CALL_RET_I128) {
5772         case TCG_CALL_RET_NORMAL:
5773             break;
5774         case TCG_CALL_RET_BY_VEC:
5775             tcg_out_st(s, TCG_TYPE_V128,
5776                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5777                        TCG_REG_CALL_STACK, ofs_slot0);
5778             /* fall through */
5779         case TCG_CALL_RET_BY_REF:
5780             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5781                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5782             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5783                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5784             return;
5785         default:
5786             g_assert_not_reached();
5787         }
5788         break;
5789 
5790     default:
5791         g_assert_not_reached();
5792     }
5793 
5794     mov[0].dst = ldst->datalo_reg;
5795     mov[0].src =
5796         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5797     mov[0].dst_type = TCG_TYPE_REG;
5798     mov[0].src_type = TCG_TYPE_REG;
5799     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5800 
5801     mov[1].dst = ldst->datahi_reg;
5802     mov[1].src =
5803         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5804     mov[1].dst_type = TCG_TYPE_REG;
5805     mov[1].src_type = TCG_TYPE_REG;
5806     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5807 
5808     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5809 }
5810 
5811 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5812                                    const TCGLdstHelperParam *parm)
5813 {
5814     const TCGHelperInfo *info;
5815     const TCGCallArgumentLoc *loc;
5816     TCGMovExtend mov[4];
5817     TCGType data_type;
5818     unsigned next_arg, nmov, n;
5819     MemOp mop = get_memop(ldst->oi);
5820 
5821     switch (mop & MO_SIZE) {
5822     case MO_8:
5823     case MO_16:
5824     case MO_32:
5825         info = &info_helper_st32_mmu;
5826         data_type = TCG_TYPE_I32;
5827         break;
5828     case MO_64:
5829         info = &info_helper_st64_mmu;
5830         data_type = TCG_TYPE_I64;
5831         break;
5832     case MO_128:
5833         info = &info_helper_st128_mmu;
5834         data_type = TCG_TYPE_I128;
5835         break;
5836     default:
5837         g_assert_not_reached();
5838     }
5839 
5840     /* Defer env argument. */
5841     next_arg = 1;
5842     nmov = 0;
5843 
5844     /* Handle addr argument. */
5845     loc = &info->in[next_arg];
5846     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5847         /*
5848          * 32-bit host with 32-bit guest: zero-extend the guest address
5849          * to 64-bits for the helper by storing the low part.  Later,
5850          * after we have processed the register inputs, we will load a
5851          * zero for the high part.
5852          */
5853         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5854                                TCG_TYPE_I32, TCG_TYPE_I32,
5855                                ldst->addrlo_reg, -1);
5856         next_arg += 2;
5857         nmov += 1;
5858     } else {
5859         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5860                                    ldst->addrlo_reg, ldst->addrhi_reg);
5861         next_arg += n;
5862         nmov += n;
5863     }
5864 
5865     /* Handle data argument. */
5866     loc = &info->in[next_arg];
5867     switch (loc->kind) {
5868     case TCG_CALL_ARG_NORMAL:
5869     case TCG_CALL_ARG_EXTEND_U:
5870     case TCG_CALL_ARG_EXTEND_S:
5871         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
5872                                    ldst->datalo_reg, ldst->datahi_reg);
5873         next_arg += n;
5874         nmov += n;
5875         tcg_out_helper_load_slots(s, nmov, mov, parm);
5876         break;
5877 
5878     case TCG_CALL_ARG_BY_REF:
5879         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5880         tcg_debug_assert(data_type == TCG_TYPE_I128);
5881         tcg_out_st(s, TCG_TYPE_I64,
5882                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
5883                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
5884         tcg_out_st(s, TCG_TYPE_I64,
5885                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
5886                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
5887 
5888         tcg_out_helper_load_slots(s, nmov, mov, parm);
5889 
5890         if (arg_slot_reg_p(loc->arg_slot)) {
5891             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
5892                              TCG_REG_CALL_STACK,
5893                              arg_slot_stk_ofs(loc->ref_slot));
5894         } else {
5895             tcg_debug_assert(parm->ntmp != 0);
5896             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
5897                              arg_slot_stk_ofs(loc->ref_slot));
5898             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5899                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
5900         }
5901         next_arg += 2;
5902         break;
5903 
5904     default:
5905         g_assert_not_reached();
5906     }
5907 
5908     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5909         /* Zero extend the address by loading a zero for the high part. */
5910         loc = &info->in[1 + !HOST_BIG_ENDIAN];
5911         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
5912     }
5913 
5914     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5915 }
5916 
5917 void tcg_dump_op_count(GString *buf)
5918 {
5919     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5920 }
5921 
5922 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
5923 {
5924     int i, start_words, num_insns;
5925     TCGOp *op;
5926 
5927     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
5928                  && qemu_log_in_addr_range(pc_start))) {
5929         FILE *logfile = qemu_log_trylock();
5930         if (logfile) {
5931             fprintf(logfile, "OP:\n");
5932             tcg_dump_ops(s, logfile, false);
5933             fprintf(logfile, "\n");
5934             qemu_log_unlock(logfile);
5935         }
5936     }
5937 
5938 #ifdef CONFIG_DEBUG_TCG
5939     /* Ensure all labels referenced have been emitted.  */
5940     {
5941         TCGLabel *l;
5942         bool error = false;
5943 
5944         QSIMPLEQ_FOREACH(l, &s->labels, next) {
5945             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
5946                 qemu_log_mask(CPU_LOG_TB_OP,
5947                               "$L%d referenced but not present.\n", l->id);
5948                 error = true;
5949             }
5950         }
5951         assert(!error);
5952     }
5953 #endif
5954 
5955     tcg_optimize(s);
5956 
5957     reachable_code_pass(s);
5958     liveness_pass_0(s);
5959     liveness_pass_1(s);
5960 
5961     if (s->nb_indirects > 0) {
5962         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
5963                      && qemu_log_in_addr_range(pc_start))) {
5964             FILE *logfile = qemu_log_trylock();
5965             if (logfile) {
5966                 fprintf(logfile, "OP before indirect lowering:\n");
5967                 tcg_dump_ops(s, logfile, false);
5968                 fprintf(logfile, "\n");
5969                 qemu_log_unlock(logfile);
5970             }
5971         }
5972 
5973         /* Replace indirect temps with direct temps.  */
5974         if (liveness_pass_2(s)) {
5975             /* If changes were made, re-run liveness.  */
5976             liveness_pass_1(s);
5977         }
5978     }
5979 
5980     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
5981                  && qemu_log_in_addr_range(pc_start))) {
5982         FILE *logfile = qemu_log_trylock();
5983         if (logfile) {
5984             fprintf(logfile, "OP after optimization and liveness analysis:\n");
5985             tcg_dump_ops(s, logfile, true);
5986             fprintf(logfile, "\n");
5987             qemu_log_unlock(logfile);
5988         }
5989     }
5990 
5991     /* Initialize goto_tb jump offsets. */
5992     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
5993     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
5994     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
5995     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
5996 
5997     tcg_reg_alloc_start(s);
5998 
5999     /*
6000      * Reset the buffer pointers when restarting after overflow.
6001      * TODO: Move this into translate-all.c with the rest of the
6002      * buffer management.  Having only this done here is confusing.
6003      */
6004     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6005     s->code_ptr = s->code_buf;
6006 
6007 #ifdef TCG_TARGET_NEED_LDST_LABELS
6008     QSIMPLEQ_INIT(&s->ldst_labels);
6009 #endif
6010 #ifdef TCG_TARGET_NEED_POOL_LABELS
6011     s->pool_labels = NULL;
6012 #endif
6013 
6014     start_words = s->insn_start_words;
6015     s->gen_insn_data =
6016         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6017 
6018     tcg_out_tb_start(s);
6019 
6020     num_insns = -1;
6021     QTAILQ_FOREACH(op, &s->ops, link) {
6022         TCGOpcode opc = op->opc;
6023 
6024         switch (opc) {
6025         case INDEX_op_mov_i32:
6026         case INDEX_op_mov_i64:
6027         case INDEX_op_mov_vec:
6028             tcg_reg_alloc_mov(s, op);
6029             break;
6030         case INDEX_op_dup_vec:
6031             tcg_reg_alloc_dup(s, op);
6032             break;
6033         case INDEX_op_insn_start:
6034             if (num_insns >= 0) {
6035                 size_t off = tcg_current_code_size(s);
6036                 s->gen_insn_end_off[num_insns] = off;
6037                 /* Assert that we do not overflow our stored offset.  */
6038                 assert(s->gen_insn_end_off[num_insns] == off);
6039             }
6040             num_insns++;
6041             for (i = 0; i < start_words; ++i) {
6042                 s->gen_insn_data[num_insns * start_words + i] =
6043                     tcg_get_insn_start_param(op, i);
6044             }
6045             break;
6046         case INDEX_op_discard:
6047             temp_dead(s, arg_temp(op->args[0]));
6048             break;
6049         case INDEX_op_set_label:
6050             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6051             tcg_out_label(s, arg_label(op->args[0]));
6052             break;
6053         case INDEX_op_call:
6054             tcg_reg_alloc_call(s, op);
6055             break;
6056         case INDEX_op_exit_tb:
6057             tcg_out_exit_tb(s, op->args[0]);
6058             break;
6059         case INDEX_op_goto_tb:
6060             tcg_out_goto_tb(s, op->args[0]);
6061             break;
6062         case INDEX_op_dup2_vec:
6063             if (tcg_reg_alloc_dup2(s, op)) {
6064                 break;
6065             }
6066             /* fall through */
6067         default:
6068             /* Sanity check that we've not introduced any unhandled opcodes. */
6069             tcg_debug_assert(tcg_op_supported(opc));
6070             /* Note: in order to speed up the code, it would be much
6071                faster to have specialized register allocator functions for
6072                some common argument patterns */
6073             tcg_reg_alloc_op(s, op);
6074             break;
6075         }
6076         /* Test for (pending) buffer overflow.  The assumption is that any
6077            one operation beginning below the high water mark cannot overrun
6078            the buffer completely.  Thus we can test for overflow after
6079            generating code without having to check during generation.  */
6080         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6081             return -1;
6082         }
6083         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6084         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6085             return -2;
6086         }
6087     }
6088     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6089     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6090 
6091     /* Generate TB finalization at the end of block */
6092 #ifdef TCG_TARGET_NEED_LDST_LABELS
6093     i = tcg_out_ldst_finalize(s);
6094     if (i < 0) {
6095         return i;
6096     }
6097 #endif
6098 #ifdef TCG_TARGET_NEED_POOL_LABELS
6099     i = tcg_out_pool_finalize(s);
6100     if (i < 0) {
6101         return i;
6102     }
6103 #endif
6104     if (!tcg_resolve_relocs(s)) {
6105         return -2;
6106     }
6107 
6108 #ifndef CONFIG_TCG_INTERPRETER
6109     /* flush instruction cache */
6110     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6111                         (uintptr_t)s->code_buf,
6112                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6113 #endif
6114 
6115     return tcg_current_code_size(s);
6116 }
6117 
6118 void tcg_dump_info(GString *buf)
6119 {
6120     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
6121 }
6122 
6123 #ifdef ELF_HOST_MACHINE
6124 /* In order to use this feature, the backend needs to do three things:
6125 
6126    (1) Define ELF_HOST_MACHINE to indicate both what value to
6127        put into the ELF image and to indicate support for the feature.
6128 
6129    (2) Define tcg_register_jit.  This should create a buffer containing
6130        the contents of a .debug_frame section that describes the post-
6131        prologue unwind info for the tcg machine.
6132 
6133    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6134 */
6135 
6136 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6137 typedef enum {
6138     JIT_NOACTION = 0,
6139     JIT_REGISTER_FN,
6140     JIT_UNREGISTER_FN
6141 } jit_actions_t;
6142 
6143 struct jit_code_entry {
6144     struct jit_code_entry *next_entry;
6145     struct jit_code_entry *prev_entry;
6146     const void *symfile_addr;
6147     uint64_t symfile_size;
6148 };
6149 
6150 struct jit_descriptor {
6151     uint32_t version;
6152     uint32_t action_flag;
6153     struct jit_code_entry *relevant_entry;
6154     struct jit_code_entry *first_entry;
6155 };
6156 
6157 void __jit_debug_register_code(void) __attribute__((noinline));
6158 void __jit_debug_register_code(void)
6159 {
6160     asm("");
6161 }
6162 
6163 /* Must statically initialize the version, because GDB may check
6164    the version before we can set it.  */
6165 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6166 
6167 /* End GDB interface.  */
6168 
6169 static int find_string(const char *strtab, const char *str)
6170 {
6171     const char *p = strtab + 1;
6172 
6173     while (1) {
6174         if (strcmp(p, str) == 0) {
6175             return p - strtab;
6176         }
6177         p += strlen(p) + 1;
6178     }
6179 }
6180 
6181 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6182                                  const void *debug_frame,
6183                                  size_t debug_frame_size)
6184 {
6185     struct __attribute__((packed)) DebugInfo {
6186         uint32_t  len;
6187         uint16_t  version;
6188         uint32_t  abbrev;
6189         uint8_t   ptr_size;
6190         uint8_t   cu_die;
6191         uint16_t  cu_lang;
6192         uintptr_t cu_low_pc;
6193         uintptr_t cu_high_pc;
6194         uint8_t   fn_die;
6195         char      fn_name[16];
6196         uintptr_t fn_low_pc;
6197         uintptr_t fn_high_pc;
6198         uint8_t   cu_eoc;
6199     };
6200 
6201     struct ElfImage {
6202         ElfW(Ehdr) ehdr;
6203         ElfW(Phdr) phdr;
6204         ElfW(Shdr) shdr[7];
6205         ElfW(Sym)  sym[2];
6206         struct DebugInfo di;
6207         uint8_t    da[24];
6208         char       str[80];
6209     };
6210 
6211     struct ElfImage *img;
6212 
6213     static const struct ElfImage img_template = {
6214         .ehdr = {
6215             .e_ident[EI_MAG0] = ELFMAG0,
6216             .e_ident[EI_MAG1] = ELFMAG1,
6217             .e_ident[EI_MAG2] = ELFMAG2,
6218             .e_ident[EI_MAG3] = ELFMAG3,
6219             .e_ident[EI_CLASS] = ELF_CLASS,
6220             .e_ident[EI_DATA] = ELF_DATA,
6221             .e_ident[EI_VERSION] = EV_CURRENT,
6222             .e_type = ET_EXEC,
6223             .e_machine = ELF_HOST_MACHINE,
6224             .e_version = EV_CURRENT,
6225             .e_phoff = offsetof(struct ElfImage, phdr),
6226             .e_shoff = offsetof(struct ElfImage, shdr),
6227             .e_ehsize = sizeof(ElfW(Shdr)),
6228             .e_phentsize = sizeof(ElfW(Phdr)),
6229             .e_phnum = 1,
6230             .e_shentsize = sizeof(ElfW(Shdr)),
6231             .e_shnum = ARRAY_SIZE(img->shdr),
6232             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6233 #ifdef ELF_HOST_FLAGS
6234             .e_flags = ELF_HOST_FLAGS,
6235 #endif
6236 #ifdef ELF_OSABI
6237             .e_ident[EI_OSABI] = ELF_OSABI,
6238 #endif
6239         },
6240         .phdr = {
6241             .p_type = PT_LOAD,
6242             .p_flags = PF_X,
6243         },
6244         .shdr = {
6245             [0] = { .sh_type = SHT_NULL },
6246             /* Trick: The contents of code_gen_buffer are not present in
6247                this fake ELF file; that got allocated elsewhere.  Therefore
6248                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6249                will not look for contents.  We can record any address.  */
6250             [1] = { /* .text */
6251                 .sh_type = SHT_NOBITS,
6252                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6253             },
6254             [2] = { /* .debug_info */
6255                 .sh_type = SHT_PROGBITS,
6256                 .sh_offset = offsetof(struct ElfImage, di),
6257                 .sh_size = sizeof(struct DebugInfo),
6258             },
6259             [3] = { /* .debug_abbrev */
6260                 .sh_type = SHT_PROGBITS,
6261                 .sh_offset = offsetof(struct ElfImage, da),
6262                 .sh_size = sizeof(img->da),
6263             },
6264             [4] = { /* .debug_frame */
6265                 .sh_type = SHT_PROGBITS,
6266                 .sh_offset = sizeof(struct ElfImage),
6267             },
6268             [5] = { /* .symtab */
6269                 .sh_type = SHT_SYMTAB,
6270                 .sh_offset = offsetof(struct ElfImage, sym),
6271                 .sh_size = sizeof(img->sym),
6272                 .sh_info = 1,
6273                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6274                 .sh_entsize = sizeof(ElfW(Sym)),
6275             },
6276             [6] = { /* .strtab */
6277                 .sh_type = SHT_STRTAB,
6278                 .sh_offset = offsetof(struct ElfImage, str),
6279                 .sh_size = sizeof(img->str),
6280             }
6281         },
6282         .sym = {
6283             [1] = { /* code_gen_buffer */
6284                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6285                 .st_shndx = 1,
6286             }
6287         },
6288         .di = {
6289             .len = sizeof(struct DebugInfo) - 4,
6290             .version = 2,
6291             .ptr_size = sizeof(void *),
6292             .cu_die = 1,
6293             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6294             .fn_die = 2,
6295             .fn_name = "code_gen_buffer"
6296         },
6297         .da = {
6298             1,          /* abbrev number (the cu) */
6299             0x11, 1,    /* DW_TAG_compile_unit, has children */
6300             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6301             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6302             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6303             0, 0,       /* end of abbrev */
6304             2,          /* abbrev number (the fn) */
6305             0x2e, 0,    /* DW_TAG_subprogram, no children */
6306             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6307             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6308             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6309             0, 0,       /* end of abbrev */
6310             0           /* no more abbrev */
6311         },
6312         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6313                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6314     };
6315 
6316     /* We only need a single jit entry; statically allocate it.  */
6317     static struct jit_code_entry one_entry;
6318 
6319     uintptr_t buf = (uintptr_t)buf_ptr;
6320     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6321     DebugFrameHeader *dfh;
6322 
6323     img = g_malloc(img_size);
6324     *img = img_template;
6325 
6326     img->phdr.p_vaddr = buf;
6327     img->phdr.p_paddr = buf;
6328     img->phdr.p_memsz = buf_size;
6329 
6330     img->shdr[1].sh_name = find_string(img->str, ".text");
6331     img->shdr[1].sh_addr = buf;
6332     img->shdr[1].sh_size = buf_size;
6333 
6334     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6335     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6336 
6337     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6338     img->shdr[4].sh_size = debug_frame_size;
6339 
6340     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6341     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6342 
6343     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6344     img->sym[1].st_value = buf;
6345     img->sym[1].st_size = buf_size;
6346 
6347     img->di.cu_low_pc = buf;
6348     img->di.cu_high_pc = buf + buf_size;
6349     img->di.fn_low_pc = buf;
6350     img->di.fn_high_pc = buf + buf_size;
6351 
6352     dfh = (DebugFrameHeader *)(img + 1);
6353     memcpy(dfh, debug_frame, debug_frame_size);
6354     dfh->fde.func_start = buf;
6355     dfh->fde.func_len = buf_size;
6356 
6357 #ifdef DEBUG_JIT
6358     /* Enable this block to be able to debug the ELF image file creation.
6359        One can use readelf, objdump, or other inspection utilities.  */
6360     {
6361         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6362         FILE *f = fopen(jit, "w+b");
6363         if (f) {
6364             if (fwrite(img, img_size, 1, f) != img_size) {
6365                 /* Avoid stupid unused return value warning for fwrite.  */
6366             }
6367             fclose(f);
6368         }
6369     }
6370 #endif
6371 
6372     one_entry.symfile_addr = img;
6373     one_entry.symfile_size = img_size;
6374 
6375     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6376     __jit_debug_descriptor.relevant_entry = &one_entry;
6377     __jit_debug_descriptor.first_entry = &one_entry;
6378     __jit_debug_register_code();
6379 }
6380 #else
6381 /* No support for the feature.  Provide the entry point expected by exec.c,
6382    and implement the internal function we declared earlier.  */
6383 
6384 static void tcg_register_jit_int(const void *buf, size_t size,
6385                                  const void *debug_frame,
6386                                  size_t debug_frame_size)
6387 {
6388 }
6389 
6390 void tcg_register_jit(const void *buf, size_t buf_size)
6391 {
6392 }
6393 #endif /* ELF_HOST_MACHINE */
6394 
6395 #if !TCG_TARGET_MAYBE_vec
6396 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6397 {
6398     g_assert_not_reached();
6399 }
6400 #endif
6401