xref: /openbmc/qemu/tcg/tcg.c (revision fa3673e4)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "accel/tcg/perf.h"
59 #ifdef CONFIG_USER_ONLY
60 #include "exec/user/guest-base.h"
61 #endif
62 
63 /* Forward declarations for functions declared in tcg-target.c.inc and
64    used here. */
65 static void tcg_target_init(TCGContext *s);
66 static void tcg_target_qemu_prologue(TCGContext *s);
67 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
68                         intptr_t value, intptr_t addend);
69 
70 /* The CIE and FDE header definitions will be common to all hosts.  */
71 typedef struct {
72     uint32_t len __attribute__((aligned((sizeof(void *)))));
73     uint32_t id;
74     uint8_t version;
75     char augmentation[1];
76     uint8_t code_align;
77     uint8_t data_align;
78     uint8_t return_column;
79 } DebugFrameCIE;
80 
81 typedef struct QEMU_PACKED {
82     uint32_t len __attribute__((aligned((sizeof(void *)))));
83     uint32_t cie_offset;
84     uintptr_t func_start;
85     uintptr_t func_len;
86 } DebugFrameFDEHeader;
87 
88 typedef struct QEMU_PACKED {
89     DebugFrameCIE cie;
90     DebugFrameFDEHeader fde;
91 } DebugFrameHeader;
92 
93 typedef struct TCGLabelQemuLdst {
94     bool is_ld;             /* qemu_ld: true, qemu_st: false */
95     MemOpIdx oi;
96     TCGType type;           /* result type of a load */
97     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
98     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
99     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
100     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
101     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
102     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
103     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
104 } TCGLabelQemuLdst;
105 
106 static void tcg_register_jit_int(const void *buf, size_t size,
107                                  const void *debug_frame,
108                                  size_t debug_frame_size)
109     __attribute__((unused));
110 
111 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
112 static void tcg_out_tb_start(TCGContext *s);
113 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
114                        intptr_t arg2);
115 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
116 static void tcg_out_movi(TCGContext *s, TCGType type,
117                          TCGReg ret, tcg_target_long arg);
118 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
119 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
120 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
121 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
122 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
123 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
124 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
125 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
128 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
129 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
130 static void tcg_out_goto_tb(TCGContext *s, int which);
131 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
132                        const TCGArg args[TCG_MAX_OP_ARGS],
133                        const int const_args[TCG_MAX_OP_ARGS]);
134 #if TCG_TARGET_MAYBE_vec
135 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
136                             TCGReg dst, TCGReg src);
137 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
138                              TCGReg dst, TCGReg base, intptr_t offset);
139 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
140                              TCGReg dst, int64_t arg);
141 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
142                            unsigned vecl, unsigned vece,
143                            const TCGArg args[TCG_MAX_OP_ARGS],
144                            const int const_args[TCG_MAX_OP_ARGS]);
145 #else
146 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
147                                    TCGReg dst, TCGReg src)
148 {
149     g_assert_not_reached();
150 }
151 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
152                                     TCGReg dst, TCGReg base, intptr_t offset)
153 {
154     g_assert_not_reached();
155 }
156 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, int64_t arg)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
162                                   unsigned vecl, unsigned vece,
163                                   const TCGArg args[TCG_MAX_OP_ARGS],
164                                   const int const_args[TCG_MAX_OP_ARGS])
165 {
166     g_assert_not_reached();
167 }
168 #endif
169 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
170                        intptr_t arg2);
171 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
172                         TCGReg base, intptr_t ofs);
173 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
174                          const TCGHelperInfo *info);
175 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
176 static bool tcg_target_const_match(int64_t val, TCGType type, int ct, int vece);
177 #ifdef TCG_TARGET_NEED_LDST_LABELS
178 static int tcg_out_ldst_finalize(TCGContext *s);
179 #endif
180 
181 #ifndef CONFIG_USER_ONLY
182 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
183 #endif
184 
185 typedef struct TCGLdstHelperParam {
186     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
187     unsigned ntmp;
188     int tmp[3];
189 } TCGLdstHelperParam;
190 
191 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
192                                    const TCGLdstHelperParam *p)
193     __attribute__((unused));
194 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
195                                   bool load_sign, const TCGLdstHelperParam *p)
196     __attribute__((unused));
197 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
198                                    const TCGLdstHelperParam *p)
199     __attribute__((unused));
200 
201 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
202     [MO_UB] = helper_ldub_mmu,
203     [MO_SB] = helper_ldsb_mmu,
204     [MO_UW] = helper_lduw_mmu,
205     [MO_SW] = helper_ldsw_mmu,
206     [MO_UL] = helper_ldul_mmu,
207     [MO_UQ] = helper_ldq_mmu,
208 #if TCG_TARGET_REG_BITS == 64
209     [MO_SL] = helper_ldsl_mmu,
210     [MO_128] = helper_ld16_mmu,
211 #endif
212 };
213 
214 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
215     [MO_8]  = helper_stb_mmu,
216     [MO_16] = helper_stw_mmu,
217     [MO_32] = helper_stl_mmu,
218     [MO_64] = helper_stq_mmu,
219 #if TCG_TARGET_REG_BITS == 64
220     [MO_128] = helper_st16_mmu,
221 #endif
222 };
223 
224 typedef struct {
225     MemOp atom;   /* lg2 bits of atomicity required */
226     MemOp align;  /* lg2 bits of alignment to use */
227 } TCGAtomAlign;
228 
229 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
230                                            MemOp host_atom, bool allow_two_ops)
231     __attribute__((unused));
232 
233 #ifdef CONFIG_USER_ONLY
234 bool tcg_use_softmmu;
235 #endif
236 
237 TCGContext tcg_init_ctx;
238 __thread TCGContext *tcg_ctx;
239 
240 TCGContext **tcg_ctxs;
241 unsigned int tcg_cur_ctxs;
242 unsigned int tcg_max_ctxs;
243 TCGv_env tcg_env;
244 const void *tcg_code_gen_epilogue;
245 uintptr_t tcg_splitwx_diff;
246 
247 #ifndef CONFIG_TCG_INTERPRETER
248 tcg_prologue_fn *tcg_qemu_tb_exec;
249 #endif
250 
251 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
252 static TCGRegSet tcg_target_call_clobber_regs;
253 
254 #if TCG_TARGET_INSN_UNIT_SIZE == 1
255 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
256 {
257     *s->code_ptr++ = v;
258 }
259 
260 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
261                                                       uint8_t v)
262 {
263     *p = v;
264 }
265 #endif
266 
267 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
268 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
269 {
270     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
271         *s->code_ptr++ = v;
272     } else {
273         tcg_insn_unit *p = s->code_ptr;
274         memcpy(p, &v, sizeof(v));
275         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
276     }
277 }
278 
279 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
280                                                        uint16_t v)
281 {
282     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
283         *p = v;
284     } else {
285         memcpy(p, &v, sizeof(v));
286     }
287 }
288 #endif
289 
290 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
291 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
292 {
293     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
294         *s->code_ptr++ = v;
295     } else {
296         tcg_insn_unit *p = s->code_ptr;
297         memcpy(p, &v, sizeof(v));
298         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
299     }
300 }
301 
302 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
303                                                        uint32_t v)
304 {
305     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
306         *p = v;
307     } else {
308         memcpy(p, &v, sizeof(v));
309     }
310 }
311 #endif
312 
313 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
314 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
315 {
316     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
317         *s->code_ptr++ = v;
318     } else {
319         tcg_insn_unit *p = s->code_ptr;
320         memcpy(p, &v, sizeof(v));
321         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
322     }
323 }
324 
325 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
326                                                        uint64_t v)
327 {
328     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
329         *p = v;
330     } else {
331         memcpy(p, &v, sizeof(v));
332     }
333 }
334 #endif
335 
336 /* label relocation processing */
337 
338 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
339                           TCGLabel *l, intptr_t addend)
340 {
341     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
342 
343     r->type = type;
344     r->ptr = code_ptr;
345     r->addend = addend;
346     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
347 }
348 
349 static void tcg_out_label(TCGContext *s, TCGLabel *l)
350 {
351     tcg_debug_assert(!l->has_value);
352     l->has_value = 1;
353     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
354 }
355 
356 TCGLabel *gen_new_label(void)
357 {
358     TCGContext *s = tcg_ctx;
359     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
360 
361     memset(l, 0, sizeof(TCGLabel));
362     l->id = s->nb_labels++;
363     QSIMPLEQ_INIT(&l->branches);
364     QSIMPLEQ_INIT(&l->relocs);
365 
366     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
367 
368     return l;
369 }
370 
371 static bool tcg_resolve_relocs(TCGContext *s)
372 {
373     TCGLabel *l;
374 
375     QSIMPLEQ_FOREACH(l, &s->labels, next) {
376         TCGRelocation *r;
377         uintptr_t value = l->u.value;
378 
379         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
380             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
381                 return false;
382             }
383         }
384     }
385     return true;
386 }
387 
388 static void set_jmp_reset_offset(TCGContext *s, int which)
389 {
390     /*
391      * We will check for overflow at the end of the opcode loop in
392      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
393      */
394     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
395 }
396 
397 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
398 {
399     /*
400      * We will check for overflow at the end of the opcode loop in
401      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
402      */
403     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
404 }
405 
406 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
407 {
408     /*
409      * Return the read-execute version of the pointer, for the benefit
410      * of any pc-relative addressing mode.
411      */
412     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
413 }
414 
415 static int __attribute__((unused))
416 tlb_mask_table_ofs(TCGContext *s, int which)
417 {
418     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
419             sizeof(CPUNegativeOffsetState));
420 }
421 
422 /* Signal overflow, starting over with fewer guest insns. */
423 static G_NORETURN
424 void tcg_raise_tb_overflow(TCGContext *s)
425 {
426     siglongjmp(s->jmp_trans, -2);
427 }
428 
429 /*
430  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
431  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
432  *
433  * However, tcg_out_helper_load_slots reuses this field to hold an
434  * argument slot number (which may designate a argument register or an
435  * argument stack slot), converting to TCGReg once all arguments that
436  * are destined for the stack are processed.
437  */
438 typedef struct TCGMovExtend {
439     unsigned dst;
440     TCGReg src;
441     TCGType dst_type;
442     TCGType src_type;
443     MemOp src_ext;
444 } TCGMovExtend;
445 
446 /**
447  * tcg_out_movext -- move and extend
448  * @s: tcg context
449  * @dst_type: integral type for destination
450  * @dst: destination register
451  * @src_type: integral type for source
452  * @src_ext: extension to apply to source
453  * @src: source register
454  *
455  * Move or extend @src into @dst, depending on @src_ext and the types.
456  */
457 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
458                            TCGType src_type, MemOp src_ext, TCGReg src)
459 {
460     switch (src_ext) {
461     case MO_UB:
462         tcg_out_ext8u(s, dst, src);
463         break;
464     case MO_SB:
465         tcg_out_ext8s(s, dst_type, dst, src);
466         break;
467     case MO_UW:
468         tcg_out_ext16u(s, dst, src);
469         break;
470     case MO_SW:
471         tcg_out_ext16s(s, dst_type, dst, src);
472         break;
473     case MO_UL:
474     case MO_SL:
475         if (dst_type == TCG_TYPE_I32) {
476             if (src_type == TCG_TYPE_I32) {
477                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
478             } else {
479                 tcg_out_extrl_i64_i32(s, dst, src);
480             }
481         } else if (src_type == TCG_TYPE_I32) {
482             if (src_ext & MO_SIGN) {
483                 tcg_out_exts_i32_i64(s, dst, src);
484             } else {
485                 tcg_out_extu_i32_i64(s, dst, src);
486             }
487         } else {
488             if (src_ext & MO_SIGN) {
489                 tcg_out_ext32s(s, dst, src);
490             } else {
491                 tcg_out_ext32u(s, dst, src);
492             }
493         }
494         break;
495     case MO_UQ:
496         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
497         if (dst_type == TCG_TYPE_I32) {
498             tcg_out_extrl_i64_i32(s, dst, src);
499         } else {
500             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
501         }
502         break;
503     default:
504         g_assert_not_reached();
505     }
506 }
507 
508 /* Minor variations on a theme, using a structure. */
509 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
510                                     TCGReg src)
511 {
512     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
513 }
514 
515 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
516 {
517     tcg_out_movext1_new_src(s, i, i->src);
518 }
519 
520 /**
521  * tcg_out_movext2 -- move and extend two pair
522  * @s: tcg context
523  * @i1: first move description
524  * @i2: second move description
525  * @scratch: temporary register, or -1 for none
526  *
527  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
528  * between the sources and destinations.
529  */
530 
531 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
532                             const TCGMovExtend *i2, int scratch)
533 {
534     TCGReg src1 = i1->src;
535     TCGReg src2 = i2->src;
536 
537     if (i1->dst != src2) {
538         tcg_out_movext1(s, i1);
539         tcg_out_movext1(s, i2);
540         return;
541     }
542     if (i2->dst == src1) {
543         TCGType src1_type = i1->src_type;
544         TCGType src2_type = i2->src_type;
545 
546         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
547             /* The data is now in the correct registers, now extend. */
548             src1 = i2->src;
549             src2 = i1->src;
550         } else {
551             tcg_debug_assert(scratch >= 0);
552             tcg_out_mov(s, src1_type, scratch, src1);
553             src1 = scratch;
554         }
555     }
556     tcg_out_movext1_new_src(s, i2, src2);
557     tcg_out_movext1_new_src(s, i1, src1);
558 }
559 
560 /**
561  * tcg_out_movext3 -- move and extend three pair
562  * @s: tcg context
563  * @i1: first move description
564  * @i2: second move description
565  * @i3: third move description
566  * @scratch: temporary register, or -1 for none
567  *
568  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
569  * between the sources and destinations.
570  */
571 
572 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
573                             const TCGMovExtend *i2, const TCGMovExtend *i3,
574                             int scratch)
575 {
576     TCGReg src1 = i1->src;
577     TCGReg src2 = i2->src;
578     TCGReg src3 = i3->src;
579 
580     if (i1->dst != src2 && i1->dst != src3) {
581         tcg_out_movext1(s, i1);
582         tcg_out_movext2(s, i2, i3, scratch);
583         return;
584     }
585     if (i2->dst != src1 && i2->dst != src3) {
586         tcg_out_movext1(s, i2);
587         tcg_out_movext2(s, i1, i3, scratch);
588         return;
589     }
590     if (i3->dst != src1 && i3->dst != src2) {
591         tcg_out_movext1(s, i3);
592         tcg_out_movext2(s, i1, i2, scratch);
593         return;
594     }
595 
596     /*
597      * There is a cycle.  Since there are only 3 nodes, the cycle is
598      * either "clockwise" or "anti-clockwise", and can be solved with
599      * a single scratch or two xchg.
600      */
601     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
602         /* "Clockwise" */
603         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
604             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
605             /* The data is now in the correct registers, now extend. */
606             tcg_out_movext1_new_src(s, i1, i1->dst);
607             tcg_out_movext1_new_src(s, i2, i2->dst);
608             tcg_out_movext1_new_src(s, i3, i3->dst);
609         } else {
610             tcg_debug_assert(scratch >= 0);
611             tcg_out_mov(s, i1->src_type, scratch, src1);
612             tcg_out_movext1(s, i3);
613             tcg_out_movext1(s, i2);
614             tcg_out_movext1_new_src(s, i1, scratch);
615         }
616     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
617         /* "Anti-clockwise" */
618         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
619             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
620             /* The data is now in the correct registers, now extend. */
621             tcg_out_movext1_new_src(s, i1, i1->dst);
622             tcg_out_movext1_new_src(s, i2, i2->dst);
623             tcg_out_movext1_new_src(s, i3, i3->dst);
624         } else {
625             tcg_debug_assert(scratch >= 0);
626             tcg_out_mov(s, i1->src_type, scratch, src1);
627             tcg_out_movext1(s, i2);
628             tcg_out_movext1(s, i3);
629             tcg_out_movext1_new_src(s, i1, scratch);
630         }
631     } else {
632         g_assert_not_reached();
633     }
634 }
635 
636 #define C_PFX1(P, A)                    P##A
637 #define C_PFX2(P, A, B)                 P##A##_##B
638 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
639 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
640 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
641 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
642 
643 /* Define an enumeration for the various combinations. */
644 
645 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
646 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
647 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
648 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
649 
650 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
651 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
652 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
653 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
654 
655 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
656 
657 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
658 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
659 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
660 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
661 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
662 
663 typedef enum {
664 #include "tcg-target-con-set.h"
665 } TCGConstraintSetIndex;
666 
667 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
668 
669 #undef C_O0_I1
670 #undef C_O0_I2
671 #undef C_O0_I3
672 #undef C_O0_I4
673 #undef C_O1_I1
674 #undef C_O1_I2
675 #undef C_O1_I3
676 #undef C_O1_I4
677 #undef C_N1_I2
678 #undef C_O2_I1
679 #undef C_O2_I2
680 #undef C_O2_I3
681 #undef C_O2_I4
682 #undef C_N1_O1_I4
683 
684 /* Put all of the constraint sets into an array, indexed by the enum. */
685 
686 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
687 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
688 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
689 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
690 
691 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
692 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
693 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
694 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
695 
696 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
697 
698 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
699 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
700 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
701 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
702 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
703 
704 static const TCGTargetOpDef constraint_sets[] = {
705 #include "tcg-target-con-set.h"
706 };
707 
708 
709 #undef C_O0_I1
710 #undef C_O0_I2
711 #undef C_O0_I3
712 #undef C_O0_I4
713 #undef C_O1_I1
714 #undef C_O1_I2
715 #undef C_O1_I3
716 #undef C_O1_I4
717 #undef C_N1_I2
718 #undef C_O2_I1
719 #undef C_O2_I2
720 #undef C_O2_I3
721 #undef C_O2_I4
722 #undef C_N1_O1_I4
723 
724 /* Expand the enumerator to be returned from tcg_target_op_def(). */
725 
726 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
727 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
728 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
729 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
730 
731 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
732 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
733 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
734 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
735 
736 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
737 
738 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
739 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
740 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
741 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
742 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
743 
744 #include "tcg-target.c.inc"
745 
746 #ifndef CONFIG_TCG_INTERPRETER
747 /* Validate CPUTLBDescFast placement. */
748 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
749                         sizeof(CPUNegativeOffsetState))
750                   < MIN_TLB_MASK_TABLE_OFS);
751 #endif
752 
753 static void alloc_tcg_plugin_context(TCGContext *s)
754 {
755 #ifdef CONFIG_PLUGIN
756     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
757     s->plugin_tb->insns =
758         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
759 #endif
760 }
761 
762 /*
763  * All TCG threads except the parent (i.e. the one that called tcg_context_init
764  * and registered the target's TCG globals) must register with this function
765  * before initiating translation.
766  *
767  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
768  * of tcg_region_init() for the reasoning behind this.
769  *
770  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
771  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
772  * is not used anymore for translation once this function is called.
773  *
774  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
775  * iterates over the array (e.g. tcg_code_size() the same for both system/user
776  * modes.
777  */
778 #ifdef CONFIG_USER_ONLY
779 void tcg_register_thread(void)
780 {
781     tcg_ctx = &tcg_init_ctx;
782 }
783 #else
784 void tcg_register_thread(void)
785 {
786     TCGContext *s = g_malloc(sizeof(*s));
787     unsigned int i, n;
788 
789     *s = tcg_init_ctx;
790 
791     /* Relink mem_base.  */
792     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
793         if (tcg_init_ctx.temps[i].mem_base) {
794             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
795             tcg_debug_assert(b >= 0 && b < n);
796             s->temps[i].mem_base = &s->temps[b];
797         }
798     }
799 
800     /* Claim an entry in tcg_ctxs */
801     n = qatomic_fetch_inc(&tcg_cur_ctxs);
802     g_assert(n < tcg_max_ctxs);
803     qatomic_set(&tcg_ctxs[n], s);
804 
805     if (n > 0) {
806         alloc_tcg_plugin_context(s);
807         tcg_region_initial_alloc(s);
808     }
809 
810     tcg_ctx = s;
811 }
812 #endif /* !CONFIG_USER_ONLY */
813 
814 /* pool based memory allocation */
815 void *tcg_malloc_internal(TCGContext *s, int size)
816 {
817     TCGPool *p;
818     int pool_size;
819 
820     if (size > TCG_POOL_CHUNK_SIZE) {
821         /* big malloc: insert a new pool (XXX: could optimize) */
822         p = g_malloc(sizeof(TCGPool) + size);
823         p->size = size;
824         p->next = s->pool_first_large;
825         s->pool_first_large = p;
826         return p->data;
827     } else {
828         p = s->pool_current;
829         if (!p) {
830             p = s->pool_first;
831             if (!p)
832                 goto new_pool;
833         } else {
834             if (!p->next) {
835             new_pool:
836                 pool_size = TCG_POOL_CHUNK_SIZE;
837                 p = g_malloc(sizeof(TCGPool) + pool_size);
838                 p->size = pool_size;
839                 p->next = NULL;
840                 if (s->pool_current) {
841                     s->pool_current->next = p;
842                 } else {
843                     s->pool_first = p;
844                 }
845             } else {
846                 p = p->next;
847             }
848         }
849     }
850     s->pool_current = p;
851     s->pool_cur = p->data + size;
852     s->pool_end = p->data + p->size;
853     return p->data;
854 }
855 
856 void tcg_pool_reset(TCGContext *s)
857 {
858     TCGPool *p, *t;
859     for (p = s->pool_first_large; p; p = t) {
860         t = p->next;
861         g_free(p);
862     }
863     s->pool_first_large = NULL;
864     s->pool_cur = s->pool_end = NULL;
865     s->pool_current = NULL;
866 }
867 
868 /*
869  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
870  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
871  * We only use these for layout in tcg_out_ld_helper_ret and
872  * tcg_out_st_helper_args, and share them between several of
873  * the helpers, with the end result that it's easier to build manually.
874  */
875 
876 #if TCG_TARGET_REG_BITS == 32
877 # define dh_typecode_ttl  dh_typecode_i32
878 #else
879 # define dh_typecode_ttl  dh_typecode_i64
880 #endif
881 
882 static TCGHelperInfo info_helper_ld32_mmu = {
883     .flags = TCG_CALL_NO_WG,
884     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
885               | dh_typemask(env, 1)
886               | dh_typemask(i64, 2)  /* uint64_t addr */
887               | dh_typemask(i32, 3)  /* unsigned oi */
888               | dh_typemask(ptr, 4)  /* uintptr_t ra */
889 };
890 
891 static TCGHelperInfo info_helper_ld64_mmu = {
892     .flags = TCG_CALL_NO_WG,
893     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
894               | dh_typemask(env, 1)
895               | dh_typemask(i64, 2)  /* uint64_t addr */
896               | dh_typemask(i32, 3)  /* unsigned oi */
897               | dh_typemask(ptr, 4)  /* uintptr_t ra */
898 };
899 
900 static TCGHelperInfo info_helper_ld128_mmu = {
901     .flags = TCG_CALL_NO_WG,
902     .typemask = dh_typemask(i128, 0) /* return Int128 */
903               | dh_typemask(env, 1)
904               | dh_typemask(i64, 2)  /* uint64_t addr */
905               | dh_typemask(i32, 3)  /* unsigned oi */
906               | dh_typemask(ptr, 4)  /* uintptr_t ra */
907 };
908 
909 static TCGHelperInfo info_helper_st32_mmu = {
910     .flags = TCG_CALL_NO_WG,
911     .typemask = dh_typemask(void, 0)
912               | dh_typemask(env, 1)
913               | dh_typemask(i64, 2)  /* uint64_t addr */
914               | dh_typemask(i32, 3)  /* uint32_t data */
915               | dh_typemask(i32, 4)  /* unsigned oi */
916               | dh_typemask(ptr, 5)  /* uintptr_t ra */
917 };
918 
919 static TCGHelperInfo info_helper_st64_mmu = {
920     .flags = TCG_CALL_NO_WG,
921     .typemask = dh_typemask(void, 0)
922               | dh_typemask(env, 1)
923               | dh_typemask(i64, 2)  /* uint64_t addr */
924               | dh_typemask(i64, 3)  /* uint64_t data */
925               | dh_typemask(i32, 4)  /* unsigned oi */
926               | dh_typemask(ptr, 5)  /* uintptr_t ra */
927 };
928 
929 static TCGHelperInfo info_helper_st128_mmu = {
930     .flags = TCG_CALL_NO_WG,
931     .typemask = dh_typemask(void, 0)
932               | dh_typemask(env, 1)
933               | dh_typemask(i64, 2)  /* uint64_t addr */
934               | dh_typemask(i128, 3) /* Int128 data */
935               | dh_typemask(i32, 4)  /* unsigned oi */
936               | dh_typemask(ptr, 5)  /* uintptr_t ra */
937 };
938 
939 #ifdef CONFIG_TCG_INTERPRETER
940 static ffi_type *typecode_to_ffi(int argmask)
941 {
942     /*
943      * libffi does not support __int128_t, so we have forced Int128
944      * to use the structure definition instead of the builtin type.
945      */
946     static ffi_type *ffi_type_i128_elements[3] = {
947         &ffi_type_uint64,
948         &ffi_type_uint64,
949         NULL
950     };
951     static ffi_type ffi_type_i128 = {
952         .size = 16,
953         .alignment = __alignof__(Int128),
954         .type = FFI_TYPE_STRUCT,
955         .elements = ffi_type_i128_elements,
956     };
957 
958     switch (argmask) {
959     case dh_typecode_void:
960         return &ffi_type_void;
961     case dh_typecode_i32:
962         return &ffi_type_uint32;
963     case dh_typecode_s32:
964         return &ffi_type_sint32;
965     case dh_typecode_i64:
966         return &ffi_type_uint64;
967     case dh_typecode_s64:
968         return &ffi_type_sint64;
969     case dh_typecode_ptr:
970         return &ffi_type_pointer;
971     case dh_typecode_i128:
972         return &ffi_type_i128;
973     }
974     g_assert_not_reached();
975 }
976 
977 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
978 {
979     unsigned typemask = info->typemask;
980     struct {
981         ffi_cif cif;
982         ffi_type *args[];
983     } *ca;
984     ffi_status status;
985     int nargs;
986 
987     /* Ignoring the return type, find the last non-zero field. */
988     nargs = 32 - clz32(typemask >> 3);
989     nargs = DIV_ROUND_UP(nargs, 3);
990     assert(nargs <= MAX_CALL_IARGS);
991 
992     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
993     ca->cif.rtype = typecode_to_ffi(typemask & 7);
994     ca->cif.nargs = nargs;
995 
996     if (nargs != 0) {
997         ca->cif.arg_types = ca->args;
998         for (int j = 0; j < nargs; ++j) {
999             int typecode = extract32(typemask, (j + 1) * 3, 3);
1000             ca->args[j] = typecode_to_ffi(typecode);
1001         }
1002     }
1003 
1004     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1005                           ca->cif.rtype, ca->cif.arg_types);
1006     assert(status == FFI_OK);
1007 
1008     return &ca->cif;
1009 }
1010 
1011 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1012 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1013 #else
1014 #define HELPER_INFO_INIT(I)      (&(I)->init)
1015 #define HELPER_INFO_INIT_VAL(I)  1
1016 #endif /* CONFIG_TCG_INTERPRETER */
1017 
1018 static inline bool arg_slot_reg_p(unsigned arg_slot)
1019 {
1020     /*
1021      * Split the sizeof away from the comparison to avoid Werror from
1022      * "unsigned < 0 is always false", when iarg_regs is empty.
1023      */
1024     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1025     return arg_slot < nreg;
1026 }
1027 
1028 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1029 {
1030     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1031     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1032 
1033     tcg_debug_assert(stk_slot < max);
1034     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1035 }
1036 
1037 typedef struct TCGCumulativeArgs {
1038     int arg_idx;                /* tcg_gen_callN args[] */
1039     int info_in_idx;            /* TCGHelperInfo in[] */
1040     int arg_slot;               /* regs+stack slot */
1041     int ref_slot;               /* stack slots for references */
1042 } TCGCumulativeArgs;
1043 
1044 static void layout_arg_even(TCGCumulativeArgs *cum)
1045 {
1046     cum->arg_slot += cum->arg_slot & 1;
1047 }
1048 
1049 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1050                          TCGCallArgumentKind kind)
1051 {
1052     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1053 
1054     *loc = (TCGCallArgumentLoc){
1055         .kind = kind,
1056         .arg_idx = cum->arg_idx,
1057         .arg_slot = cum->arg_slot,
1058     };
1059     cum->info_in_idx++;
1060     cum->arg_slot++;
1061 }
1062 
1063 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1064                                 TCGHelperInfo *info, int n)
1065 {
1066     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1067 
1068     for (int i = 0; i < n; ++i) {
1069         /* Layout all using the same arg_idx, adjusting the subindex. */
1070         loc[i] = (TCGCallArgumentLoc){
1071             .kind = TCG_CALL_ARG_NORMAL,
1072             .arg_idx = cum->arg_idx,
1073             .tmp_subindex = i,
1074             .arg_slot = cum->arg_slot + i,
1075         };
1076     }
1077     cum->info_in_idx += n;
1078     cum->arg_slot += n;
1079 }
1080 
1081 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1082 {
1083     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1084     int n = 128 / TCG_TARGET_REG_BITS;
1085 
1086     /* The first subindex carries the pointer. */
1087     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1088 
1089     /*
1090      * The callee is allowed to clobber memory associated with
1091      * structure pass by-reference.  Therefore we must make copies.
1092      * Allocate space from "ref_slot", which will be adjusted to
1093      * follow the parameters on the stack.
1094      */
1095     loc[0].ref_slot = cum->ref_slot;
1096 
1097     /*
1098      * Subsequent words also go into the reference slot, but
1099      * do not accumulate into the regular arguments.
1100      */
1101     for (int i = 1; i < n; ++i) {
1102         loc[i] = (TCGCallArgumentLoc){
1103             .kind = TCG_CALL_ARG_BY_REF_N,
1104             .arg_idx = cum->arg_idx,
1105             .tmp_subindex = i,
1106             .ref_slot = cum->ref_slot + i,
1107         };
1108     }
1109     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1110     cum->ref_slot += n;
1111 }
1112 
1113 static void init_call_layout(TCGHelperInfo *info)
1114 {
1115     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1116     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1117     unsigned typemask = info->typemask;
1118     unsigned typecode;
1119     TCGCumulativeArgs cum = { };
1120 
1121     /*
1122      * Parse and place any function return value.
1123      */
1124     typecode = typemask & 7;
1125     switch (typecode) {
1126     case dh_typecode_void:
1127         info->nr_out = 0;
1128         break;
1129     case dh_typecode_i32:
1130     case dh_typecode_s32:
1131     case dh_typecode_ptr:
1132         info->nr_out = 1;
1133         info->out_kind = TCG_CALL_RET_NORMAL;
1134         break;
1135     case dh_typecode_i64:
1136     case dh_typecode_s64:
1137         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1138         info->out_kind = TCG_CALL_RET_NORMAL;
1139         /* Query the last register now to trigger any assert early. */
1140         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1141         break;
1142     case dh_typecode_i128:
1143         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1144         info->out_kind = TCG_TARGET_CALL_RET_I128;
1145         switch (TCG_TARGET_CALL_RET_I128) {
1146         case TCG_CALL_RET_NORMAL:
1147             /* Query the last register now to trigger any assert early. */
1148             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1149             break;
1150         case TCG_CALL_RET_BY_VEC:
1151             /* Query the single register now to trigger any assert early. */
1152             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1153             break;
1154         case TCG_CALL_RET_BY_REF:
1155             /*
1156              * Allocate the first argument to the output.
1157              * We don't need to store this anywhere, just make it
1158              * unavailable for use in the input loop below.
1159              */
1160             cum.arg_slot = 1;
1161             break;
1162         default:
1163             qemu_build_not_reached();
1164         }
1165         break;
1166     default:
1167         g_assert_not_reached();
1168     }
1169 
1170     /*
1171      * Parse and place function arguments.
1172      */
1173     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1174         TCGCallArgumentKind kind;
1175         TCGType type;
1176 
1177         typecode = typemask & 7;
1178         switch (typecode) {
1179         case dh_typecode_i32:
1180         case dh_typecode_s32:
1181             type = TCG_TYPE_I32;
1182             break;
1183         case dh_typecode_i64:
1184         case dh_typecode_s64:
1185             type = TCG_TYPE_I64;
1186             break;
1187         case dh_typecode_ptr:
1188             type = TCG_TYPE_PTR;
1189             break;
1190         case dh_typecode_i128:
1191             type = TCG_TYPE_I128;
1192             break;
1193         default:
1194             g_assert_not_reached();
1195         }
1196 
1197         switch (type) {
1198         case TCG_TYPE_I32:
1199             switch (TCG_TARGET_CALL_ARG_I32) {
1200             case TCG_CALL_ARG_EVEN:
1201                 layout_arg_even(&cum);
1202                 /* fall through */
1203             case TCG_CALL_ARG_NORMAL:
1204                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1205                 break;
1206             case TCG_CALL_ARG_EXTEND:
1207                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1208                 layout_arg_1(&cum, info, kind);
1209                 break;
1210             default:
1211                 qemu_build_not_reached();
1212             }
1213             break;
1214 
1215         case TCG_TYPE_I64:
1216             switch (TCG_TARGET_CALL_ARG_I64) {
1217             case TCG_CALL_ARG_EVEN:
1218                 layout_arg_even(&cum);
1219                 /* fall through */
1220             case TCG_CALL_ARG_NORMAL:
1221                 if (TCG_TARGET_REG_BITS == 32) {
1222                     layout_arg_normal_n(&cum, info, 2);
1223                 } else {
1224                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1225                 }
1226                 break;
1227             default:
1228                 qemu_build_not_reached();
1229             }
1230             break;
1231 
1232         case TCG_TYPE_I128:
1233             switch (TCG_TARGET_CALL_ARG_I128) {
1234             case TCG_CALL_ARG_EVEN:
1235                 layout_arg_even(&cum);
1236                 /* fall through */
1237             case TCG_CALL_ARG_NORMAL:
1238                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1239                 break;
1240             case TCG_CALL_ARG_BY_REF:
1241                 layout_arg_by_ref(&cum, info);
1242                 break;
1243             default:
1244                 qemu_build_not_reached();
1245             }
1246             break;
1247 
1248         default:
1249             g_assert_not_reached();
1250         }
1251     }
1252     info->nr_in = cum.info_in_idx;
1253 
1254     /* Validate that we didn't overrun the input array. */
1255     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1256     /* Validate the backend has enough argument space. */
1257     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1258 
1259     /*
1260      * Relocate the "ref_slot" area to the end of the parameters.
1261      * Minimizing this stack offset helps code size for x86,
1262      * which has a signed 8-bit offset encoding.
1263      */
1264     if (cum.ref_slot != 0) {
1265         int ref_base = 0;
1266 
1267         if (cum.arg_slot > max_reg_slots) {
1268             int align = __alignof(Int128) / sizeof(tcg_target_long);
1269 
1270             ref_base = cum.arg_slot - max_reg_slots;
1271             if (align > 1) {
1272                 ref_base = ROUND_UP(ref_base, align);
1273             }
1274         }
1275         assert(ref_base + cum.ref_slot <= max_stk_slots);
1276         ref_base += max_reg_slots;
1277 
1278         if (ref_base != 0) {
1279             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1280                 TCGCallArgumentLoc *loc = &info->in[i];
1281                 switch (loc->kind) {
1282                 case TCG_CALL_ARG_BY_REF:
1283                 case TCG_CALL_ARG_BY_REF_N:
1284                     loc->ref_slot += ref_base;
1285                     break;
1286                 default:
1287                     break;
1288                 }
1289             }
1290         }
1291     }
1292 }
1293 
1294 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1295 static void process_op_defs(TCGContext *s);
1296 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1297                                             TCGReg reg, const char *name);
1298 
1299 static void tcg_context_init(unsigned max_cpus)
1300 {
1301     TCGContext *s = &tcg_init_ctx;
1302     int op, total_args, n, i;
1303     TCGOpDef *def;
1304     TCGArgConstraint *args_ct;
1305     TCGTemp *ts;
1306 
1307     memset(s, 0, sizeof(*s));
1308     s->nb_globals = 0;
1309 
1310     /* Count total number of arguments and allocate the corresponding
1311        space */
1312     total_args = 0;
1313     for(op = 0; op < NB_OPS; op++) {
1314         def = &tcg_op_defs[op];
1315         n = def->nb_iargs + def->nb_oargs;
1316         total_args += n;
1317     }
1318 
1319     args_ct = g_new0(TCGArgConstraint, total_args);
1320 
1321     for(op = 0; op < NB_OPS; op++) {
1322         def = &tcg_op_defs[op];
1323         def->args_ct = args_ct;
1324         n = def->nb_iargs + def->nb_oargs;
1325         args_ct += n;
1326     }
1327 
1328     init_call_layout(&info_helper_ld32_mmu);
1329     init_call_layout(&info_helper_ld64_mmu);
1330     init_call_layout(&info_helper_ld128_mmu);
1331     init_call_layout(&info_helper_st32_mmu);
1332     init_call_layout(&info_helper_st64_mmu);
1333     init_call_layout(&info_helper_st128_mmu);
1334 
1335     tcg_target_init(s);
1336     process_op_defs(s);
1337 
1338     /* Reverse the order of the saved registers, assuming they're all at
1339        the start of tcg_target_reg_alloc_order.  */
1340     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1341         int r = tcg_target_reg_alloc_order[n];
1342         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1343             break;
1344         }
1345     }
1346     for (i = 0; i < n; ++i) {
1347         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1348     }
1349     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1350         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1351     }
1352 
1353     alloc_tcg_plugin_context(s);
1354 
1355     tcg_ctx = s;
1356     /*
1357      * In user-mode we simply share the init context among threads, since we
1358      * use a single region. See the documentation tcg_region_init() for the
1359      * reasoning behind this.
1360      * In system-mode we will have at most max_cpus TCG threads.
1361      */
1362 #ifdef CONFIG_USER_ONLY
1363     tcg_ctxs = &tcg_ctx;
1364     tcg_cur_ctxs = 1;
1365     tcg_max_ctxs = 1;
1366 #else
1367     tcg_max_ctxs = max_cpus;
1368     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1369 #endif
1370 
1371     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1372     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1373     tcg_env = temp_tcgv_ptr(ts);
1374 }
1375 
1376 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1377 {
1378     tcg_context_init(max_cpus);
1379     tcg_region_init(tb_size, splitwx, max_cpus);
1380 }
1381 
1382 /*
1383  * Allocate TBs right before their corresponding translated code, making
1384  * sure that TBs and code are on different cache lines.
1385  */
1386 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1387 {
1388     uintptr_t align = qemu_icache_linesize;
1389     TranslationBlock *tb;
1390     void *next;
1391 
1392  retry:
1393     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1394     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1395 
1396     if (unlikely(next > s->code_gen_highwater)) {
1397         if (tcg_region_alloc(s)) {
1398             return NULL;
1399         }
1400         goto retry;
1401     }
1402     qatomic_set(&s->code_gen_ptr, next);
1403     s->data_gen_ptr = NULL;
1404     return tb;
1405 }
1406 
1407 void tcg_prologue_init(void)
1408 {
1409     TCGContext *s = tcg_ctx;
1410     size_t prologue_size;
1411 
1412     s->code_ptr = s->code_gen_ptr;
1413     s->code_buf = s->code_gen_ptr;
1414     s->data_gen_ptr = NULL;
1415 
1416 #ifndef CONFIG_TCG_INTERPRETER
1417     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1418 #endif
1419 
1420 #ifdef TCG_TARGET_NEED_POOL_LABELS
1421     s->pool_labels = NULL;
1422 #endif
1423 
1424     qemu_thread_jit_write();
1425     /* Generate the prologue.  */
1426     tcg_target_qemu_prologue(s);
1427 
1428 #ifdef TCG_TARGET_NEED_POOL_LABELS
1429     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1430     {
1431         int result = tcg_out_pool_finalize(s);
1432         tcg_debug_assert(result == 0);
1433     }
1434 #endif
1435 
1436     prologue_size = tcg_current_code_size(s);
1437     perf_report_prologue(s->code_gen_ptr, prologue_size);
1438 
1439 #ifndef CONFIG_TCG_INTERPRETER
1440     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1441                         (uintptr_t)s->code_buf, prologue_size);
1442 #endif
1443 
1444     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1445         FILE *logfile = qemu_log_trylock();
1446         if (logfile) {
1447             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1448             if (s->data_gen_ptr) {
1449                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1450                 size_t data_size = prologue_size - code_size;
1451                 size_t i;
1452 
1453                 disas(logfile, s->code_gen_ptr, code_size);
1454 
1455                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1456                     if (sizeof(tcg_target_ulong) == 8) {
1457                         fprintf(logfile,
1458                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1459                                 (uintptr_t)s->data_gen_ptr + i,
1460                                 *(uint64_t *)(s->data_gen_ptr + i));
1461                     } else {
1462                         fprintf(logfile,
1463                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1464                                 (uintptr_t)s->data_gen_ptr + i,
1465                                 *(uint32_t *)(s->data_gen_ptr + i));
1466                     }
1467                 }
1468             } else {
1469                 disas(logfile, s->code_gen_ptr, prologue_size);
1470             }
1471             fprintf(logfile, "\n");
1472             qemu_log_unlock(logfile);
1473         }
1474     }
1475 
1476 #ifndef CONFIG_TCG_INTERPRETER
1477     /*
1478      * Assert that goto_ptr is implemented completely, setting an epilogue.
1479      * For tci, we use NULL as the signal to return from the interpreter,
1480      * so skip this check.
1481      */
1482     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1483 #endif
1484 
1485     tcg_region_prologue_set(s);
1486 }
1487 
1488 void tcg_func_start(TCGContext *s)
1489 {
1490     tcg_pool_reset(s);
1491     s->nb_temps = s->nb_globals;
1492 
1493     /* No temps have been previously allocated for size or locality.  */
1494     memset(s->free_temps, 0, sizeof(s->free_temps));
1495 
1496     /* No constant temps have been previously allocated. */
1497     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1498         if (s->const_table[i]) {
1499             g_hash_table_remove_all(s->const_table[i]);
1500         }
1501     }
1502 
1503     s->nb_ops = 0;
1504     s->nb_labels = 0;
1505     s->current_frame_offset = s->frame_start;
1506 
1507 #ifdef CONFIG_DEBUG_TCG
1508     s->goto_tb_issue_mask = 0;
1509 #endif
1510 
1511     QTAILQ_INIT(&s->ops);
1512     QTAILQ_INIT(&s->free_ops);
1513     QSIMPLEQ_INIT(&s->labels);
1514 
1515     tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1516                      s->addr_type == TCG_TYPE_I64);
1517 
1518     tcg_debug_assert(s->insn_start_words > 0);
1519 }
1520 
1521 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1522 {
1523     int n = s->nb_temps++;
1524 
1525     if (n >= TCG_MAX_TEMPS) {
1526         tcg_raise_tb_overflow(s);
1527     }
1528     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1529 }
1530 
1531 static TCGTemp *tcg_global_alloc(TCGContext *s)
1532 {
1533     TCGTemp *ts;
1534 
1535     tcg_debug_assert(s->nb_globals == s->nb_temps);
1536     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1537     s->nb_globals++;
1538     ts = tcg_temp_alloc(s);
1539     ts->kind = TEMP_GLOBAL;
1540 
1541     return ts;
1542 }
1543 
1544 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1545                                             TCGReg reg, const char *name)
1546 {
1547     TCGTemp *ts;
1548 
1549     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1550 
1551     ts = tcg_global_alloc(s);
1552     ts->base_type = type;
1553     ts->type = type;
1554     ts->kind = TEMP_FIXED;
1555     ts->reg = reg;
1556     ts->name = name;
1557     tcg_regset_set_reg(s->reserved_regs, reg);
1558 
1559     return ts;
1560 }
1561 
1562 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1563 {
1564     s->frame_start = start;
1565     s->frame_end = start + size;
1566     s->frame_temp
1567         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1568 }
1569 
1570 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1571                                      intptr_t offset, const char *name)
1572 {
1573     TCGContext *s = tcg_ctx;
1574     TCGTemp *base_ts = tcgv_ptr_temp(base);
1575     TCGTemp *ts = tcg_global_alloc(s);
1576     int indirect_reg = 0;
1577 
1578     switch (base_ts->kind) {
1579     case TEMP_FIXED:
1580         break;
1581     case TEMP_GLOBAL:
1582         /* We do not support double-indirect registers.  */
1583         tcg_debug_assert(!base_ts->indirect_reg);
1584         base_ts->indirect_base = 1;
1585         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1586                             ? 2 : 1);
1587         indirect_reg = 1;
1588         break;
1589     default:
1590         g_assert_not_reached();
1591     }
1592 
1593     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1594         TCGTemp *ts2 = tcg_global_alloc(s);
1595         char buf[64];
1596 
1597         ts->base_type = TCG_TYPE_I64;
1598         ts->type = TCG_TYPE_I32;
1599         ts->indirect_reg = indirect_reg;
1600         ts->mem_allocated = 1;
1601         ts->mem_base = base_ts;
1602         ts->mem_offset = offset;
1603         pstrcpy(buf, sizeof(buf), name);
1604         pstrcat(buf, sizeof(buf), "_0");
1605         ts->name = strdup(buf);
1606 
1607         tcg_debug_assert(ts2 == ts + 1);
1608         ts2->base_type = TCG_TYPE_I64;
1609         ts2->type = TCG_TYPE_I32;
1610         ts2->indirect_reg = indirect_reg;
1611         ts2->mem_allocated = 1;
1612         ts2->mem_base = base_ts;
1613         ts2->mem_offset = offset + 4;
1614         ts2->temp_subindex = 1;
1615         pstrcpy(buf, sizeof(buf), name);
1616         pstrcat(buf, sizeof(buf), "_1");
1617         ts2->name = strdup(buf);
1618     } else {
1619         ts->base_type = type;
1620         ts->type = type;
1621         ts->indirect_reg = indirect_reg;
1622         ts->mem_allocated = 1;
1623         ts->mem_base = base_ts;
1624         ts->mem_offset = offset;
1625         ts->name = name;
1626     }
1627     return ts;
1628 }
1629 
1630 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1631 {
1632     TCGContext *s = tcg_ctx;
1633     TCGTemp *ts;
1634     int n;
1635 
1636     if (kind == TEMP_EBB) {
1637         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1638 
1639         if (idx < TCG_MAX_TEMPS) {
1640             /* There is already an available temp with the right type.  */
1641             clear_bit(idx, s->free_temps[type].l);
1642 
1643             ts = &s->temps[idx];
1644             ts->temp_allocated = 1;
1645             tcg_debug_assert(ts->base_type == type);
1646             tcg_debug_assert(ts->kind == kind);
1647             return ts;
1648         }
1649     } else {
1650         tcg_debug_assert(kind == TEMP_TB);
1651     }
1652 
1653     switch (type) {
1654     case TCG_TYPE_I32:
1655     case TCG_TYPE_V64:
1656     case TCG_TYPE_V128:
1657     case TCG_TYPE_V256:
1658         n = 1;
1659         break;
1660     case TCG_TYPE_I64:
1661         n = 64 / TCG_TARGET_REG_BITS;
1662         break;
1663     case TCG_TYPE_I128:
1664         n = 128 / TCG_TARGET_REG_BITS;
1665         break;
1666     default:
1667         g_assert_not_reached();
1668     }
1669 
1670     ts = tcg_temp_alloc(s);
1671     ts->base_type = type;
1672     ts->temp_allocated = 1;
1673     ts->kind = kind;
1674 
1675     if (n == 1) {
1676         ts->type = type;
1677     } else {
1678         ts->type = TCG_TYPE_REG;
1679 
1680         for (int i = 1; i < n; ++i) {
1681             TCGTemp *ts2 = tcg_temp_alloc(s);
1682 
1683             tcg_debug_assert(ts2 == ts + i);
1684             ts2->base_type = type;
1685             ts2->type = TCG_TYPE_REG;
1686             ts2->temp_allocated = 1;
1687             ts2->temp_subindex = i;
1688             ts2->kind = kind;
1689         }
1690     }
1691     return ts;
1692 }
1693 
1694 TCGv_vec tcg_temp_new_vec(TCGType type)
1695 {
1696     TCGTemp *t;
1697 
1698 #ifdef CONFIG_DEBUG_TCG
1699     switch (type) {
1700     case TCG_TYPE_V64:
1701         assert(TCG_TARGET_HAS_v64);
1702         break;
1703     case TCG_TYPE_V128:
1704         assert(TCG_TARGET_HAS_v128);
1705         break;
1706     case TCG_TYPE_V256:
1707         assert(TCG_TARGET_HAS_v256);
1708         break;
1709     default:
1710         g_assert_not_reached();
1711     }
1712 #endif
1713 
1714     t = tcg_temp_new_internal(type, TEMP_EBB);
1715     return temp_tcgv_vec(t);
1716 }
1717 
1718 /* Create a new temp of the same type as an existing temp.  */
1719 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1720 {
1721     TCGTemp *t = tcgv_vec_temp(match);
1722 
1723     tcg_debug_assert(t->temp_allocated != 0);
1724 
1725     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1726     return temp_tcgv_vec(t);
1727 }
1728 
1729 void tcg_temp_free_internal(TCGTemp *ts)
1730 {
1731     TCGContext *s = tcg_ctx;
1732 
1733     switch (ts->kind) {
1734     case TEMP_CONST:
1735     case TEMP_TB:
1736         /* Silently ignore free. */
1737         break;
1738     case TEMP_EBB:
1739         tcg_debug_assert(ts->temp_allocated != 0);
1740         ts->temp_allocated = 0;
1741         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1742         break;
1743     default:
1744         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1745         g_assert_not_reached();
1746     }
1747 }
1748 
1749 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1750 {
1751     TCGContext *s = tcg_ctx;
1752     GHashTable *h = s->const_table[type];
1753     TCGTemp *ts;
1754 
1755     if (h == NULL) {
1756         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1757         s->const_table[type] = h;
1758     }
1759 
1760     ts = g_hash_table_lookup(h, &val);
1761     if (ts == NULL) {
1762         int64_t *val_ptr;
1763 
1764         ts = tcg_temp_alloc(s);
1765 
1766         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1767             TCGTemp *ts2 = tcg_temp_alloc(s);
1768 
1769             tcg_debug_assert(ts2 == ts + 1);
1770 
1771             ts->base_type = TCG_TYPE_I64;
1772             ts->type = TCG_TYPE_I32;
1773             ts->kind = TEMP_CONST;
1774             ts->temp_allocated = 1;
1775 
1776             ts2->base_type = TCG_TYPE_I64;
1777             ts2->type = TCG_TYPE_I32;
1778             ts2->kind = TEMP_CONST;
1779             ts2->temp_allocated = 1;
1780             ts2->temp_subindex = 1;
1781 
1782             /*
1783              * Retain the full value of the 64-bit constant in the low
1784              * part, so that the hash table works.  Actual uses will
1785              * truncate the value to the low part.
1786              */
1787             ts[HOST_BIG_ENDIAN].val = val;
1788             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1789             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1790         } else {
1791             ts->base_type = type;
1792             ts->type = type;
1793             ts->kind = TEMP_CONST;
1794             ts->temp_allocated = 1;
1795             ts->val = val;
1796             val_ptr = &ts->val;
1797         }
1798         g_hash_table_insert(h, val_ptr, ts);
1799     }
1800 
1801     return ts;
1802 }
1803 
1804 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1805 {
1806     val = dup_const(vece, val);
1807     return temp_tcgv_vec(tcg_constant_internal(type, val));
1808 }
1809 
1810 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1811 {
1812     TCGTemp *t = tcgv_vec_temp(match);
1813 
1814     tcg_debug_assert(t->temp_allocated != 0);
1815     return tcg_constant_vec(t->base_type, vece, val);
1816 }
1817 
1818 #ifdef CONFIG_DEBUG_TCG
1819 size_t temp_idx(TCGTemp *ts)
1820 {
1821     ptrdiff_t n = ts - tcg_ctx->temps;
1822     assert(n >= 0 && n < tcg_ctx->nb_temps);
1823     return n;
1824 }
1825 
1826 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
1827 {
1828     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
1829 
1830     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
1831     assert(o % sizeof(TCGTemp) == 0);
1832 
1833     return (void *)tcg_ctx + (uintptr_t)v;
1834 }
1835 #endif /* CONFIG_DEBUG_TCG */
1836 
1837 /* Return true if OP may appear in the opcode stream.
1838    Test the runtime variable that controls each opcode.  */
1839 bool tcg_op_supported(TCGOpcode op)
1840 {
1841     const bool have_vec
1842         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1843 
1844     switch (op) {
1845     case INDEX_op_discard:
1846     case INDEX_op_set_label:
1847     case INDEX_op_call:
1848     case INDEX_op_br:
1849     case INDEX_op_mb:
1850     case INDEX_op_insn_start:
1851     case INDEX_op_exit_tb:
1852     case INDEX_op_goto_tb:
1853     case INDEX_op_goto_ptr:
1854     case INDEX_op_qemu_ld_a32_i32:
1855     case INDEX_op_qemu_ld_a64_i32:
1856     case INDEX_op_qemu_st_a32_i32:
1857     case INDEX_op_qemu_st_a64_i32:
1858     case INDEX_op_qemu_ld_a32_i64:
1859     case INDEX_op_qemu_ld_a64_i64:
1860     case INDEX_op_qemu_st_a32_i64:
1861     case INDEX_op_qemu_st_a64_i64:
1862         return true;
1863 
1864     case INDEX_op_qemu_st8_a32_i32:
1865     case INDEX_op_qemu_st8_a64_i32:
1866         return TCG_TARGET_HAS_qemu_st8_i32;
1867 
1868     case INDEX_op_qemu_ld_a32_i128:
1869     case INDEX_op_qemu_ld_a64_i128:
1870     case INDEX_op_qemu_st_a32_i128:
1871     case INDEX_op_qemu_st_a64_i128:
1872         return TCG_TARGET_HAS_qemu_ldst_i128;
1873 
1874     case INDEX_op_mov_i32:
1875     case INDEX_op_setcond_i32:
1876     case INDEX_op_brcond_i32:
1877     case INDEX_op_ld8u_i32:
1878     case INDEX_op_ld8s_i32:
1879     case INDEX_op_ld16u_i32:
1880     case INDEX_op_ld16s_i32:
1881     case INDEX_op_ld_i32:
1882     case INDEX_op_st8_i32:
1883     case INDEX_op_st16_i32:
1884     case INDEX_op_st_i32:
1885     case INDEX_op_add_i32:
1886     case INDEX_op_sub_i32:
1887     case INDEX_op_mul_i32:
1888     case INDEX_op_and_i32:
1889     case INDEX_op_or_i32:
1890     case INDEX_op_xor_i32:
1891     case INDEX_op_shl_i32:
1892     case INDEX_op_shr_i32:
1893     case INDEX_op_sar_i32:
1894         return true;
1895 
1896     case INDEX_op_negsetcond_i32:
1897         return TCG_TARGET_HAS_negsetcond_i32;
1898     case INDEX_op_movcond_i32:
1899         return TCG_TARGET_HAS_movcond_i32;
1900     case INDEX_op_div_i32:
1901     case INDEX_op_divu_i32:
1902         return TCG_TARGET_HAS_div_i32;
1903     case INDEX_op_rem_i32:
1904     case INDEX_op_remu_i32:
1905         return TCG_TARGET_HAS_rem_i32;
1906     case INDEX_op_div2_i32:
1907     case INDEX_op_divu2_i32:
1908         return TCG_TARGET_HAS_div2_i32;
1909     case INDEX_op_rotl_i32:
1910     case INDEX_op_rotr_i32:
1911         return TCG_TARGET_HAS_rot_i32;
1912     case INDEX_op_deposit_i32:
1913         return TCG_TARGET_HAS_deposit_i32;
1914     case INDEX_op_extract_i32:
1915         return TCG_TARGET_HAS_extract_i32;
1916     case INDEX_op_sextract_i32:
1917         return TCG_TARGET_HAS_sextract_i32;
1918     case INDEX_op_extract2_i32:
1919         return TCG_TARGET_HAS_extract2_i32;
1920     case INDEX_op_add2_i32:
1921         return TCG_TARGET_HAS_add2_i32;
1922     case INDEX_op_sub2_i32:
1923         return TCG_TARGET_HAS_sub2_i32;
1924     case INDEX_op_mulu2_i32:
1925         return TCG_TARGET_HAS_mulu2_i32;
1926     case INDEX_op_muls2_i32:
1927         return TCG_TARGET_HAS_muls2_i32;
1928     case INDEX_op_muluh_i32:
1929         return TCG_TARGET_HAS_muluh_i32;
1930     case INDEX_op_mulsh_i32:
1931         return TCG_TARGET_HAS_mulsh_i32;
1932     case INDEX_op_ext8s_i32:
1933         return TCG_TARGET_HAS_ext8s_i32;
1934     case INDEX_op_ext16s_i32:
1935         return TCG_TARGET_HAS_ext16s_i32;
1936     case INDEX_op_ext8u_i32:
1937         return TCG_TARGET_HAS_ext8u_i32;
1938     case INDEX_op_ext16u_i32:
1939         return TCG_TARGET_HAS_ext16u_i32;
1940     case INDEX_op_bswap16_i32:
1941         return TCG_TARGET_HAS_bswap16_i32;
1942     case INDEX_op_bswap32_i32:
1943         return TCG_TARGET_HAS_bswap32_i32;
1944     case INDEX_op_not_i32:
1945         return TCG_TARGET_HAS_not_i32;
1946     case INDEX_op_neg_i32:
1947         return TCG_TARGET_HAS_neg_i32;
1948     case INDEX_op_andc_i32:
1949         return TCG_TARGET_HAS_andc_i32;
1950     case INDEX_op_orc_i32:
1951         return TCG_TARGET_HAS_orc_i32;
1952     case INDEX_op_eqv_i32:
1953         return TCG_TARGET_HAS_eqv_i32;
1954     case INDEX_op_nand_i32:
1955         return TCG_TARGET_HAS_nand_i32;
1956     case INDEX_op_nor_i32:
1957         return TCG_TARGET_HAS_nor_i32;
1958     case INDEX_op_clz_i32:
1959         return TCG_TARGET_HAS_clz_i32;
1960     case INDEX_op_ctz_i32:
1961         return TCG_TARGET_HAS_ctz_i32;
1962     case INDEX_op_ctpop_i32:
1963         return TCG_TARGET_HAS_ctpop_i32;
1964 
1965     case INDEX_op_brcond2_i32:
1966     case INDEX_op_setcond2_i32:
1967         return TCG_TARGET_REG_BITS == 32;
1968 
1969     case INDEX_op_mov_i64:
1970     case INDEX_op_setcond_i64:
1971     case INDEX_op_brcond_i64:
1972     case INDEX_op_ld8u_i64:
1973     case INDEX_op_ld8s_i64:
1974     case INDEX_op_ld16u_i64:
1975     case INDEX_op_ld16s_i64:
1976     case INDEX_op_ld32u_i64:
1977     case INDEX_op_ld32s_i64:
1978     case INDEX_op_ld_i64:
1979     case INDEX_op_st8_i64:
1980     case INDEX_op_st16_i64:
1981     case INDEX_op_st32_i64:
1982     case INDEX_op_st_i64:
1983     case INDEX_op_add_i64:
1984     case INDEX_op_sub_i64:
1985     case INDEX_op_mul_i64:
1986     case INDEX_op_and_i64:
1987     case INDEX_op_or_i64:
1988     case INDEX_op_xor_i64:
1989     case INDEX_op_shl_i64:
1990     case INDEX_op_shr_i64:
1991     case INDEX_op_sar_i64:
1992     case INDEX_op_ext_i32_i64:
1993     case INDEX_op_extu_i32_i64:
1994         return TCG_TARGET_REG_BITS == 64;
1995 
1996     case INDEX_op_negsetcond_i64:
1997         return TCG_TARGET_HAS_negsetcond_i64;
1998     case INDEX_op_movcond_i64:
1999         return TCG_TARGET_HAS_movcond_i64;
2000     case INDEX_op_div_i64:
2001     case INDEX_op_divu_i64:
2002         return TCG_TARGET_HAS_div_i64;
2003     case INDEX_op_rem_i64:
2004     case INDEX_op_remu_i64:
2005         return TCG_TARGET_HAS_rem_i64;
2006     case INDEX_op_div2_i64:
2007     case INDEX_op_divu2_i64:
2008         return TCG_TARGET_HAS_div2_i64;
2009     case INDEX_op_rotl_i64:
2010     case INDEX_op_rotr_i64:
2011         return TCG_TARGET_HAS_rot_i64;
2012     case INDEX_op_deposit_i64:
2013         return TCG_TARGET_HAS_deposit_i64;
2014     case INDEX_op_extract_i64:
2015         return TCG_TARGET_HAS_extract_i64;
2016     case INDEX_op_sextract_i64:
2017         return TCG_TARGET_HAS_sextract_i64;
2018     case INDEX_op_extract2_i64:
2019         return TCG_TARGET_HAS_extract2_i64;
2020     case INDEX_op_extrl_i64_i32:
2021     case INDEX_op_extrh_i64_i32:
2022         return TCG_TARGET_HAS_extr_i64_i32;
2023     case INDEX_op_ext8s_i64:
2024         return TCG_TARGET_HAS_ext8s_i64;
2025     case INDEX_op_ext16s_i64:
2026         return TCG_TARGET_HAS_ext16s_i64;
2027     case INDEX_op_ext32s_i64:
2028         return TCG_TARGET_HAS_ext32s_i64;
2029     case INDEX_op_ext8u_i64:
2030         return TCG_TARGET_HAS_ext8u_i64;
2031     case INDEX_op_ext16u_i64:
2032         return TCG_TARGET_HAS_ext16u_i64;
2033     case INDEX_op_ext32u_i64:
2034         return TCG_TARGET_HAS_ext32u_i64;
2035     case INDEX_op_bswap16_i64:
2036         return TCG_TARGET_HAS_bswap16_i64;
2037     case INDEX_op_bswap32_i64:
2038         return TCG_TARGET_HAS_bswap32_i64;
2039     case INDEX_op_bswap64_i64:
2040         return TCG_TARGET_HAS_bswap64_i64;
2041     case INDEX_op_not_i64:
2042         return TCG_TARGET_HAS_not_i64;
2043     case INDEX_op_neg_i64:
2044         return TCG_TARGET_HAS_neg_i64;
2045     case INDEX_op_andc_i64:
2046         return TCG_TARGET_HAS_andc_i64;
2047     case INDEX_op_orc_i64:
2048         return TCG_TARGET_HAS_orc_i64;
2049     case INDEX_op_eqv_i64:
2050         return TCG_TARGET_HAS_eqv_i64;
2051     case INDEX_op_nand_i64:
2052         return TCG_TARGET_HAS_nand_i64;
2053     case INDEX_op_nor_i64:
2054         return TCG_TARGET_HAS_nor_i64;
2055     case INDEX_op_clz_i64:
2056         return TCG_TARGET_HAS_clz_i64;
2057     case INDEX_op_ctz_i64:
2058         return TCG_TARGET_HAS_ctz_i64;
2059     case INDEX_op_ctpop_i64:
2060         return TCG_TARGET_HAS_ctpop_i64;
2061     case INDEX_op_add2_i64:
2062         return TCG_TARGET_HAS_add2_i64;
2063     case INDEX_op_sub2_i64:
2064         return TCG_TARGET_HAS_sub2_i64;
2065     case INDEX_op_mulu2_i64:
2066         return TCG_TARGET_HAS_mulu2_i64;
2067     case INDEX_op_muls2_i64:
2068         return TCG_TARGET_HAS_muls2_i64;
2069     case INDEX_op_muluh_i64:
2070         return TCG_TARGET_HAS_muluh_i64;
2071     case INDEX_op_mulsh_i64:
2072         return TCG_TARGET_HAS_mulsh_i64;
2073 
2074     case INDEX_op_mov_vec:
2075     case INDEX_op_dup_vec:
2076     case INDEX_op_dupm_vec:
2077     case INDEX_op_ld_vec:
2078     case INDEX_op_st_vec:
2079     case INDEX_op_add_vec:
2080     case INDEX_op_sub_vec:
2081     case INDEX_op_and_vec:
2082     case INDEX_op_or_vec:
2083     case INDEX_op_xor_vec:
2084     case INDEX_op_cmp_vec:
2085         return have_vec;
2086     case INDEX_op_dup2_vec:
2087         return have_vec && TCG_TARGET_REG_BITS == 32;
2088     case INDEX_op_not_vec:
2089         return have_vec && TCG_TARGET_HAS_not_vec;
2090     case INDEX_op_neg_vec:
2091         return have_vec && TCG_TARGET_HAS_neg_vec;
2092     case INDEX_op_abs_vec:
2093         return have_vec && TCG_TARGET_HAS_abs_vec;
2094     case INDEX_op_andc_vec:
2095         return have_vec && TCG_TARGET_HAS_andc_vec;
2096     case INDEX_op_orc_vec:
2097         return have_vec && TCG_TARGET_HAS_orc_vec;
2098     case INDEX_op_nand_vec:
2099         return have_vec && TCG_TARGET_HAS_nand_vec;
2100     case INDEX_op_nor_vec:
2101         return have_vec && TCG_TARGET_HAS_nor_vec;
2102     case INDEX_op_eqv_vec:
2103         return have_vec && TCG_TARGET_HAS_eqv_vec;
2104     case INDEX_op_mul_vec:
2105         return have_vec && TCG_TARGET_HAS_mul_vec;
2106     case INDEX_op_shli_vec:
2107     case INDEX_op_shri_vec:
2108     case INDEX_op_sari_vec:
2109         return have_vec && TCG_TARGET_HAS_shi_vec;
2110     case INDEX_op_shls_vec:
2111     case INDEX_op_shrs_vec:
2112     case INDEX_op_sars_vec:
2113         return have_vec && TCG_TARGET_HAS_shs_vec;
2114     case INDEX_op_shlv_vec:
2115     case INDEX_op_shrv_vec:
2116     case INDEX_op_sarv_vec:
2117         return have_vec && TCG_TARGET_HAS_shv_vec;
2118     case INDEX_op_rotli_vec:
2119         return have_vec && TCG_TARGET_HAS_roti_vec;
2120     case INDEX_op_rotls_vec:
2121         return have_vec && TCG_TARGET_HAS_rots_vec;
2122     case INDEX_op_rotlv_vec:
2123     case INDEX_op_rotrv_vec:
2124         return have_vec && TCG_TARGET_HAS_rotv_vec;
2125     case INDEX_op_ssadd_vec:
2126     case INDEX_op_usadd_vec:
2127     case INDEX_op_sssub_vec:
2128     case INDEX_op_ussub_vec:
2129         return have_vec && TCG_TARGET_HAS_sat_vec;
2130     case INDEX_op_smin_vec:
2131     case INDEX_op_umin_vec:
2132     case INDEX_op_smax_vec:
2133     case INDEX_op_umax_vec:
2134         return have_vec && TCG_TARGET_HAS_minmax_vec;
2135     case INDEX_op_bitsel_vec:
2136         return have_vec && TCG_TARGET_HAS_bitsel_vec;
2137     case INDEX_op_cmpsel_vec:
2138         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2139 
2140     default:
2141         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2142         return true;
2143     }
2144 }
2145 
2146 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2147 
2148 static void tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args)
2149 {
2150     TCGv_i64 extend_free[MAX_CALL_IARGS];
2151     int n_extend = 0;
2152     TCGOp *op;
2153     int i, n, pi = 0, total_args;
2154 
2155     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2156         init_call_layout(info);
2157         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2158     }
2159 
2160     total_args = info->nr_out + info->nr_in + 2;
2161     op = tcg_op_alloc(INDEX_op_call, total_args);
2162 
2163 #ifdef CONFIG_PLUGIN
2164     /* Flag helpers that may affect guest state */
2165     if (tcg_ctx->plugin_insn &&
2166         !(info->flags & TCG_CALL_PLUGIN) &&
2167         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2168         tcg_ctx->plugin_insn->calls_helpers = true;
2169     }
2170 #endif
2171 
2172     TCGOP_CALLO(op) = n = info->nr_out;
2173     switch (n) {
2174     case 0:
2175         tcg_debug_assert(ret == NULL);
2176         break;
2177     case 1:
2178         tcg_debug_assert(ret != NULL);
2179         op->args[pi++] = temp_arg(ret);
2180         break;
2181     case 2:
2182     case 4:
2183         tcg_debug_assert(ret != NULL);
2184         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2185         tcg_debug_assert(ret->temp_subindex == 0);
2186         for (i = 0; i < n; ++i) {
2187             op->args[pi++] = temp_arg(ret + i);
2188         }
2189         break;
2190     default:
2191         g_assert_not_reached();
2192     }
2193 
2194     TCGOP_CALLI(op) = n = info->nr_in;
2195     for (i = 0; i < n; i++) {
2196         const TCGCallArgumentLoc *loc = &info->in[i];
2197         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2198 
2199         switch (loc->kind) {
2200         case TCG_CALL_ARG_NORMAL:
2201         case TCG_CALL_ARG_BY_REF:
2202         case TCG_CALL_ARG_BY_REF_N:
2203             op->args[pi++] = temp_arg(ts);
2204             break;
2205 
2206         case TCG_CALL_ARG_EXTEND_U:
2207         case TCG_CALL_ARG_EXTEND_S:
2208             {
2209                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2210                 TCGv_i32 orig = temp_tcgv_i32(ts);
2211 
2212                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2213                     tcg_gen_ext_i32_i64(temp, orig);
2214                 } else {
2215                     tcg_gen_extu_i32_i64(temp, orig);
2216                 }
2217                 op->args[pi++] = tcgv_i64_arg(temp);
2218                 extend_free[n_extend++] = temp;
2219             }
2220             break;
2221 
2222         default:
2223             g_assert_not_reached();
2224         }
2225     }
2226     op->args[pi++] = (uintptr_t)info->func;
2227     op->args[pi++] = (uintptr_t)info;
2228     tcg_debug_assert(pi == total_args);
2229 
2230     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2231 
2232     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2233     for (i = 0; i < n_extend; ++i) {
2234         tcg_temp_free_i64(extend_free[i]);
2235     }
2236 }
2237 
2238 void tcg_gen_call0(TCGHelperInfo *info, TCGTemp *ret)
2239 {
2240     tcg_gen_callN(info, ret, NULL);
2241 }
2242 
2243 void tcg_gen_call1(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2244 {
2245     tcg_gen_callN(info, ret, &t1);
2246 }
2247 
2248 void tcg_gen_call2(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2)
2249 {
2250     TCGTemp *args[2] = { t1, t2 };
2251     tcg_gen_callN(info, ret, args);
2252 }
2253 
2254 void tcg_gen_call3(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2255                    TCGTemp *t2, TCGTemp *t3)
2256 {
2257     TCGTemp *args[3] = { t1, t2, t3 };
2258     tcg_gen_callN(info, ret, args);
2259 }
2260 
2261 void tcg_gen_call4(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2262                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2263 {
2264     TCGTemp *args[4] = { t1, t2, t3, t4 };
2265     tcg_gen_callN(info, ret, args);
2266 }
2267 
2268 void tcg_gen_call5(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2269                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2270 {
2271     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2272     tcg_gen_callN(info, ret, args);
2273 }
2274 
2275 void tcg_gen_call6(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2,
2276                    TCGTemp *t3, TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2277 {
2278     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2279     tcg_gen_callN(info, ret, args);
2280 }
2281 
2282 void tcg_gen_call7(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2283                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2284                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2285 {
2286     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2287     tcg_gen_callN(info, ret, args);
2288 }
2289 
2290 static void tcg_reg_alloc_start(TCGContext *s)
2291 {
2292     int i, n;
2293 
2294     for (i = 0, n = s->nb_temps; i < n; i++) {
2295         TCGTemp *ts = &s->temps[i];
2296         TCGTempVal val = TEMP_VAL_MEM;
2297 
2298         switch (ts->kind) {
2299         case TEMP_CONST:
2300             val = TEMP_VAL_CONST;
2301             break;
2302         case TEMP_FIXED:
2303             val = TEMP_VAL_REG;
2304             break;
2305         case TEMP_GLOBAL:
2306             break;
2307         case TEMP_EBB:
2308             val = TEMP_VAL_DEAD;
2309             /* fall through */
2310         case TEMP_TB:
2311             ts->mem_allocated = 0;
2312             break;
2313         default:
2314             g_assert_not_reached();
2315         }
2316         ts->val_type = val;
2317     }
2318 
2319     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2320 }
2321 
2322 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2323                                  TCGTemp *ts)
2324 {
2325     int idx = temp_idx(ts);
2326 
2327     switch (ts->kind) {
2328     case TEMP_FIXED:
2329     case TEMP_GLOBAL:
2330         pstrcpy(buf, buf_size, ts->name);
2331         break;
2332     case TEMP_TB:
2333         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2334         break;
2335     case TEMP_EBB:
2336         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2337         break;
2338     case TEMP_CONST:
2339         switch (ts->type) {
2340         case TCG_TYPE_I32:
2341             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2342             break;
2343 #if TCG_TARGET_REG_BITS > 32
2344         case TCG_TYPE_I64:
2345             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2346             break;
2347 #endif
2348         case TCG_TYPE_V64:
2349         case TCG_TYPE_V128:
2350         case TCG_TYPE_V256:
2351             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2352                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2353             break;
2354         default:
2355             g_assert_not_reached();
2356         }
2357         break;
2358     }
2359     return buf;
2360 }
2361 
2362 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2363                              int buf_size, TCGArg arg)
2364 {
2365     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2366 }
2367 
2368 static const char * const cond_name[] =
2369 {
2370     [TCG_COND_NEVER] = "never",
2371     [TCG_COND_ALWAYS] = "always",
2372     [TCG_COND_EQ] = "eq",
2373     [TCG_COND_NE] = "ne",
2374     [TCG_COND_LT] = "lt",
2375     [TCG_COND_GE] = "ge",
2376     [TCG_COND_LE] = "le",
2377     [TCG_COND_GT] = "gt",
2378     [TCG_COND_LTU] = "ltu",
2379     [TCG_COND_GEU] = "geu",
2380     [TCG_COND_LEU] = "leu",
2381     [TCG_COND_GTU] = "gtu"
2382 };
2383 
2384 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2385 {
2386     [MO_UB]   = "ub",
2387     [MO_SB]   = "sb",
2388     [MO_LEUW] = "leuw",
2389     [MO_LESW] = "lesw",
2390     [MO_LEUL] = "leul",
2391     [MO_LESL] = "lesl",
2392     [MO_LEUQ] = "leq",
2393     [MO_BEUW] = "beuw",
2394     [MO_BESW] = "besw",
2395     [MO_BEUL] = "beul",
2396     [MO_BESL] = "besl",
2397     [MO_BEUQ] = "beq",
2398     [MO_128 + MO_BE] = "beo",
2399     [MO_128 + MO_LE] = "leo",
2400 };
2401 
2402 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2403     [MO_UNALN >> MO_ASHIFT]    = "un+",
2404     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2405     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2406     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2407     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2408     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2409     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2410     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2411 };
2412 
2413 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2414     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2415     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2416     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2417     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2418     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2419     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2420 };
2421 
2422 static const char bswap_flag_name[][6] = {
2423     [TCG_BSWAP_IZ] = "iz",
2424     [TCG_BSWAP_OZ] = "oz",
2425     [TCG_BSWAP_OS] = "os",
2426     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2427     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2428 };
2429 
2430 static inline bool tcg_regset_single(TCGRegSet d)
2431 {
2432     return (d & (d - 1)) == 0;
2433 }
2434 
2435 static inline TCGReg tcg_regset_first(TCGRegSet d)
2436 {
2437     if (TCG_TARGET_NB_REGS <= 32) {
2438         return ctz32(d);
2439     } else {
2440         return ctz64(d);
2441     }
2442 }
2443 
2444 /* Return only the number of characters output -- no error return. */
2445 #define ne_fprintf(...) \
2446     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2447 
2448 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2449 {
2450     char buf[128];
2451     TCGOp *op;
2452 
2453     QTAILQ_FOREACH(op, &s->ops, link) {
2454         int i, k, nb_oargs, nb_iargs, nb_cargs;
2455         const TCGOpDef *def;
2456         TCGOpcode c;
2457         int col = 0;
2458 
2459         c = op->opc;
2460         def = &tcg_op_defs[c];
2461 
2462         if (c == INDEX_op_insn_start) {
2463             nb_oargs = 0;
2464             col += ne_fprintf(f, "\n ----");
2465 
2466             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2467                 col += ne_fprintf(f, " %016" PRIx64,
2468                                   tcg_get_insn_start_param(op, i));
2469             }
2470         } else if (c == INDEX_op_call) {
2471             const TCGHelperInfo *info = tcg_call_info(op);
2472             void *func = tcg_call_func(op);
2473 
2474             /* variable number of arguments */
2475             nb_oargs = TCGOP_CALLO(op);
2476             nb_iargs = TCGOP_CALLI(op);
2477             nb_cargs = def->nb_cargs;
2478 
2479             col += ne_fprintf(f, " %s ", def->name);
2480 
2481             /*
2482              * Print the function name from TCGHelperInfo, if available.
2483              * Note that plugins have a template function for the info,
2484              * but the actual function pointer comes from the plugin.
2485              */
2486             if (func == info->func) {
2487                 col += ne_fprintf(f, "%s", info->name);
2488             } else {
2489                 col += ne_fprintf(f, "plugin(%p)", func);
2490             }
2491 
2492             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2493             for (i = 0; i < nb_oargs; i++) {
2494                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2495                                                             op->args[i]));
2496             }
2497             for (i = 0; i < nb_iargs; i++) {
2498                 TCGArg arg = op->args[nb_oargs + i];
2499                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2500                 col += ne_fprintf(f, ",%s", t);
2501             }
2502         } else {
2503             col += ne_fprintf(f, " %s ", def->name);
2504 
2505             nb_oargs = def->nb_oargs;
2506             nb_iargs = def->nb_iargs;
2507             nb_cargs = def->nb_cargs;
2508 
2509             if (def->flags & TCG_OPF_VECTOR) {
2510                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2511                                   8 << TCGOP_VECE(op));
2512             }
2513 
2514             k = 0;
2515             for (i = 0; i < nb_oargs; i++) {
2516                 const char *sep =  k ? "," : "";
2517                 col += ne_fprintf(f, "%s%s", sep,
2518                                   tcg_get_arg_str(s, buf, sizeof(buf),
2519                                                   op->args[k++]));
2520             }
2521             for (i = 0; i < nb_iargs; i++) {
2522                 const char *sep =  k ? "," : "";
2523                 col += ne_fprintf(f, "%s%s", sep,
2524                                   tcg_get_arg_str(s, buf, sizeof(buf),
2525                                                   op->args[k++]));
2526             }
2527             switch (c) {
2528             case INDEX_op_brcond_i32:
2529             case INDEX_op_setcond_i32:
2530             case INDEX_op_negsetcond_i32:
2531             case INDEX_op_movcond_i32:
2532             case INDEX_op_brcond2_i32:
2533             case INDEX_op_setcond2_i32:
2534             case INDEX_op_brcond_i64:
2535             case INDEX_op_setcond_i64:
2536             case INDEX_op_negsetcond_i64:
2537             case INDEX_op_movcond_i64:
2538             case INDEX_op_cmp_vec:
2539             case INDEX_op_cmpsel_vec:
2540                 if (op->args[k] < ARRAY_SIZE(cond_name)
2541                     && cond_name[op->args[k]]) {
2542                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2543                 } else {
2544                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2545                 }
2546                 i = 1;
2547                 break;
2548             case INDEX_op_qemu_ld_a32_i32:
2549             case INDEX_op_qemu_ld_a64_i32:
2550             case INDEX_op_qemu_st_a32_i32:
2551             case INDEX_op_qemu_st_a64_i32:
2552             case INDEX_op_qemu_st8_a32_i32:
2553             case INDEX_op_qemu_st8_a64_i32:
2554             case INDEX_op_qemu_ld_a32_i64:
2555             case INDEX_op_qemu_ld_a64_i64:
2556             case INDEX_op_qemu_st_a32_i64:
2557             case INDEX_op_qemu_st_a64_i64:
2558             case INDEX_op_qemu_ld_a32_i128:
2559             case INDEX_op_qemu_ld_a64_i128:
2560             case INDEX_op_qemu_st_a32_i128:
2561             case INDEX_op_qemu_st_a64_i128:
2562                 {
2563                     const char *s_al, *s_op, *s_at;
2564                     MemOpIdx oi = op->args[k++];
2565                     MemOp mop = get_memop(oi);
2566                     unsigned ix = get_mmuidx(oi);
2567 
2568                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2569                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2570                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2571                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2572 
2573                     /* If all fields are accounted for, print symbolically. */
2574                     if (!mop && s_al && s_op && s_at) {
2575                         col += ne_fprintf(f, ",%s%s%s,%u",
2576                                           s_at, s_al, s_op, ix);
2577                     } else {
2578                         mop = get_memop(oi);
2579                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2580                     }
2581                     i = 1;
2582                 }
2583                 break;
2584             case INDEX_op_bswap16_i32:
2585             case INDEX_op_bswap16_i64:
2586             case INDEX_op_bswap32_i32:
2587             case INDEX_op_bswap32_i64:
2588             case INDEX_op_bswap64_i64:
2589                 {
2590                     TCGArg flags = op->args[k];
2591                     const char *name = NULL;
2592 
2593                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2594                         name = bswap_flag_name[flags];
2595                     }
2596                     if (name) {
2597                         col += ne_fprintf(f, ",%s", name);
2598                     } else {
2599                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2600                     }
2601                     i = k = 1;
2602                 }
2603                 break;
2604             default:
2605                 i = 0;
2606                 break;
2607             }
2608             switch (c) {
2609             case INDEX_op_set_label:
2610             case INDEX_op_br:
2611             case INDEX_op_brcond_i32:
2612             case INDEX_op_brcond_i64:
2613             case INDEX_op_brcond2_i32:
2614                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2615                                   arg_label(op->args[k])->id);
2616                 i++, k++;
2617                 break;
2618             case INDEX_op_mb:
2619                 {
2620                     TCGBar membar = op->args[k];
2621                     const char *b_op, *m_op;
2622 
2623                     switch (membar & TCG_BAR_SC) {
2624                     case 0:
2625                         b_op = "none";
2626                         break;
2627                     case TCG_BAR_LDAQ:
2628                         b_op = "acq";
2629                         break;
2630                     case TCG_BAR_STRL:
2631                         b_op = "rel";
2632                         break;
2633                     case TCG_BAR_SC:
2634                         b_op = "seq";
2635                         break;
2636                     default:
2637                         g_assert_not_reached();
2638                     }
2639 
2640                     switch (membar & TCG_MO_ALL) {
2641                     case 0:
2642                         m_op = "none";
2643                         break;
2644                     case TCG_MO_LD_LD:
2645                         m_op = "rr";
2646                         break;
2647                     case TCG_MO_LD_ST:
2648                         m_op = "rw";
2649                         break;
2650                     case TCG_MO_ST_LD:
2651                         m_op = "wr";
2652                         break;
2653                     case TCG_MO_ST_ST:
2654                         m_op = "ww";
2655                         break;
2656                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2657                         m_op = "rr+rw";
2658                         break;
2659                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2660                         m_op = "rr+wr";
2661                         break;
2662                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2663                         m_op = "rr+ww";
2664                         break;
2665                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2666                         m_op = "rw+wr";
2667                         break;
2668                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2669                         m_op = "rw+ww";
2670                         break;
2671                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2672                         m_op = "wr+ww";
2673                         break;
2674                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2675                         m_op = "rr+rw+wr";
2676                         break;
2677                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2678                         m_op = "rr+rw+ww";
2679                         break;
2680                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2681                         m_op = "rr+wr+ww";
2682                         break;
2683                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2684                         m_op = "rw+wr+ww";
2685                         break;
2686                     case TCG_MO_ALL:
2687                         m_op = "all";
2688                         break;
2689                     default:
2690                         g_assert_not_reached();
2691                     }
2692 
2693                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2694                     i++, k++;
2695                 }
2696                 break;
2697             default:
2698                 break;
2699             }
2700             for (; i < nb_cargs; i++, k++) {
2701                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2702                                   op->args[k]);
2703             }
2704         }
2705 
2706         if (have_prefs || op->life) {
2707             for (; col < 40; ++col) {
2708                 putc(' ', f);
2709             }
2710         }
2711 
2712         if (op->life) {
2713             unsigned life = op->life;
2714 
2715             if (life & (SYNC_ARG * 3)) {
2716                 ne_fprintf(f, "  sync:");
2717                 for (i = 0; i < 2; ++i) {
2718                     if (life & (SYNC_ARG << i)) {
2719                         ne_fprintf(f, " %d", i);
2720                     }
2721                 }
2722             }
2723             life /= DEAD_ARG;
2724             if (life) {
2725                 ne_fprintf(f, "  dead:");
2726                 for (i = 0; life; ++i, life >>= 1) {
2727                     if (life & 1) {
2728                         ne_fprintf(f, " %d", i);
2729                     }
2730                 }
2731             }
2732         }
2733 
2734         if (have_prefs) {
2735             for (i = 0; i < nb_oargs; ++i) {
2736                 TCGRegSet set = output_pref(op, i);
2737 
2738                 if (i == 0) {
2739                     ne_fprintf(f, "  pref=");
2740                 } else {
2741                     ne_fprintf(f, ",");
2742                 }
2743                 if (set == 0) {
2744                     ne_fprintf(f, "none");
2745                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2746                     ne_fprintf(f, "all");
2747 #ifdef CONFIG_DEBUG_TCG
2748                 } else if (tcg_regset_single(set)) {
2749                     TCGReg reg = tcg_regset_first(set);
2750                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2751 #endif
2752                 } else if (TCG_TARGET_NB_REGS <= 32) {
2753                     ne_fprintf(f, "0x%x", (uint32_t)set);
2754                 } else {
2755                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2756                 }
2757             }
2758         }
2759 
2760         putc('\n', f);
2761     }
2762 }
2763 
2764 /* we give more priority to constraints with less registers */
2765 static int get_constraint_priority(const TCGOpDef *def, int k)
2766 {
2767     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2768     int n = ctpop64(arg_ct->regs);
2769 
2770     /*
2771      * Sort constraints of a single register first, which includes output
2772      * aliases (which must exactly match the input already allocated).
2773      */
2774     if (n == 1 || arg_ct->oalias) {
2775         return INT_MAX;
2776     }
2777 
2778     /*
2779      * Sort register pairs next, first then second immediately after.
2780      * Arbitrarily sort multiple pairs by the index of the first reg;
2781      * there shouldn't be many pairs.
2782      */
2783     switch (arg_ct->pair) {
2784     case 1:
2785     case 3:
2786         return (k + 1) * 2;
2787     case 2:
2788         return (arg_ct->pair_index + 1) * 2 - 1;
2789     }
2790 
2791     /* Finally, sort by decreasing register count. */
2792     assert(n > 1);
2793     return -n;
2794 }
2795 
2796 /* sort from highest priority to lowest */
2797 static void sort_constraints(TCGOpDef *def, int start, int n)
2798 {
2799     int i, j;
2800     TCGArgConstraint *a = def->args_ct;
2801 
2802     for (i = 0; i < n; i++) {
2803         a[start + i].sort_index = start + i;
2804     }
2805     if (n <= 1) {
2806         return;
2807     }
2808     for (i = 0; i < n - 1; i++) {
2809         for (j = i + 1; j < n; j++) {
2810             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2811             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2812             if (p1 < p2) {
2813                 int tmp = a[start + i].sort_index;
2814                 a[start + i].sort_index = a[start + j].sort_index;
2815                 a[start + j].sort_index = tmp;
2816             }
2817         }
2818     }
2819 }
2820 
2821 static void process_op_defs(TCGContext *s)
2822 {
2823     TCGOpcode op;
2824 
2825     for (op = 0; op < NB_OPS; op++) {
2826         TCGOpDef *def = &tcg_op_defs[op];
2827         const TCGTargetOpDef *tdefs;
2828         bool saw_alias_pair = false;
2829         int i, o, i2, o2, nb_args;
2830 
2831         if (def->flags & TCG_OPF_NOT_PRESENT) {
2832             continue;
2833         }
2834 
2835         nb_args = def->nb_iargs + def->nb_oargs;
2836         if (nb_args == 0) {
2837             continue;
2838         }
2839 
2840         /*
2841          * Macro magic should make it impossible, but double-check that
2842          * the array index is in range.  Since the signness of an enum
2843          * is implementation defined, force the result to unsigned.
2844          */
2845         unsigned con_set = tcg_target_op_def(op);
2846         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2847         tdefs = &constraint_sets[con_set];
2848 
2849         for (i = 0; i < nb_args; i++) {
2850             const char *ct_str = tdefs->args_ct_str[i];
2851             bool input_p = i >= def->nb_oargs;
2852 
2853             /* Incomplete TCGTargetOpDef entry. */
2854             tcg_debug_assert(ct_str != NULL);
2855 
2856             switch (*ct_str) {
2857             case '0' ... '9':
2858                 o = *ct_str - '0';
2859                 tcg_debug_assert(input_p);
2860                 tcg_debug_assert(o < def->nb_oargs);
2861                 tcg_debug_assert(def->args_ct[o].regs != 0);
2862                 tcg_debug_assert(!def->args_ct[o].oalias);
2863                 def->args_ct[i] = def->args_ct[o];
2864                 /* The output sets oalias.  */
2865                 def->args_ct[o].oalias = 1;
2866                 def->args_ct[o].alias_index = i;
2867                 /* The input sets ialias. */
2868                 def->args_ct[i].ialias = 1;
2869                 def->args_ct[i].alias_index = o;
2870                 if (def->args_ct[i].pair) {
2871                     saw_alias_pair = true;
2872                 }
2873                 tcg_debug_assert(ct_str[1] == '\0');
2874                 continue;
2875 
2876             case '&':
2877                 tcg_debug_assert(!input_p);
2878                 def->args_ct[i].newreg = true;
2879                 ct_str++;
2880                 break;
2881 
2882             case 'p': /* plus */
2883                 /* Allocate to the register after the previous. */
2884                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2885                 o = i - 1;
2886                 tcg_debug_assert(!def->args_ct[o].pair);
2887                 tcg_debug_assert(!def->args_ct[o].ct);
2888                 def->args_ct[i] = (TCGArgConstraint){
2889                     .pair = 2,
2890                     .pair_index = o,
2891                     .regs = def->args_ct[o].regs << 1,
2892                 };
2893                 def->args_ct[o].pair = 1;
2894                 def->args_ct[o].pair_index = i;
2895                 tcg_debug_assert(ct_str[1] == '\0');
2896                 continue;
2897 
2898             case 'm': /* minus */
2899                 /* Allocate to the register before the previous. */
2900                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2901                 o = i - 1;
2902                 tcg_debug_assert(!def->args_ct[o].pair);
2903                 tcg_debug_assert(!def->args_ct[o].ct);
2904                 def->args_ct[i] = (TCGArgConstraint){
2905                     .pair = 1,
2906                     .pair_index = o,
2907                     .regs = def->args_ct[o].regs >> 1,
2908                 };
2909                 def->args_ct[o].pair = 2;
2910                 def->args_ct[o].pair_index = i;
2911                 tcg_debug_assert(ct_str[1] == '\0');
2912                 continue;
2913             }
2914 
2915             do {
2916                 switch (*ct_str) {
2917                 case 'i':
2918                     def->args_ct[i].ct |= TCG_CT_CONST;
2919                     break;
2920 
2921                 /* Include all of the target-specific constraints. */
2922 
2923 #undef CONST
2924 #define CONST(CASE, MASK) \
2925     case CASE: def->args_ct[i].ct |= MASK; break;
2926 #define REGS(CASE, MASK) \
2927     case CASE: def->args_ct[i].regs |= MASK; break;
2928 
2929 #include "tcg-target-con-str.h"
2930 
2931 #undef REGS
2932 #undef CONST
2933                 default:
2934                 case '0' ... '9':
2935                 case '&':
2936                 case 'p':
2937                 case 'm':
2938                     /* Typo in TCGTargetOpDef constraint. */
2939                     g_assert_not_reached();
2940                 }
2941             } while (*++ct_str != '\0');
2942         }
2943 
2944         /* TCGTargetOpDef entry with too much information? */
2945         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2946 
2947         /*
2948          * Fix up output pairs that are aliased with inputs.
2949          * When we created the alias, we copied pair from the output.
2950          * There are three cases:
2951          *    (1a) Pairs of inputs alias pairs of outputs.
2952          *    (1b) One input aliases the first of a pair of outputs.
2953          *    (2)  One input aliases the second of a pair of outputs.
2954          *
2955          * Case 1a is handled by making sure that the pair_index'es are
2956          * properly updated so that they appear the same as a pair of inputs.
2957          *
2958          * Case 1b is handled by setting the pair_index of the input to
2959          * itself, simply so it doesn't point to an unrelated argument.
2960          * Since we don't encounter the "second" during the input allocation
2961          * phase, nothing happens with the second half of the input pair.
2962          *
2963          * Case 2 is handled by setting the second input to pair=3, the
2964          * first output to pair=3, and the pair_index'es to match.
2965          */
2966         if (saw_alias_pair) {
2967             for (i = def->nb_oargs; i < nb_args; i++) {
2968                 /*
2969                  * Since [0-9pm] must be alone in the constraint string,
2970                  * the only way they can both be set is if the pair comes
2971                  * from the output alias.
2972                  */
2973                 if (!def->args_ct[i].ialias) {
2974                     continue;
2975                 }
2976                 switch (def->args_ct[i].pair) {
2977                 case 0:
2978                     break;
2979                 case 1:
2980                     o = def->args_ct[i].alias_index;
2981                     o2 = def->args_ct[o].pair_index;
2982                     tcg_debug_assert(def->args_ct[o].pair == 1);
2983                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2984                     if (def->args_ct[o2].oalias) {
2985                         /* Case 1a */
2986                         i2 = def->args_ct[o2].alias_index;
2987                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2988                         def->args_ct[i2].pair_index = i;
2989                         def->args_ct[i].pair_index = i2;
2990                     } else {
2991                         /* Case 1b */
2992                         def->args_ct[i].pair_index = i;
2993                     }
2994                     break;
2995                 case 2:
2996                     o = def->args_ct[i].alias_index;
2997                     o2 = def->args_ct[o].pair_index;
2998                     tcg_debug_assert(def->args_ct[o].pair == 2);
2999                     tcg_debug_assert(def->args_ct[o2].pair == 1);
3000                     if (def->args_ct[o2].oalias) {
3001                         /* Case 1a */
3002                         i2 = def->args_ct[o2].alias_index;
3003                         tcg_debug_assert(def->args_ct[i2].pair == 1);
3004                         def->args_ct[i2].pair_index = i;
3005                         def->args_ct[i].pair_index = i2;
3006                     } else {
3007                         /* Case 2 */
3008                         def->args_ct[i].pair = 3;
3009                         def->args_ct[o2].pair = 3;
3010                         def->args_ct[i].pair_index = o2;
3011                         def->args_ct[o2].pair_index = i;
3012                     }
3013                     break;
3014                 default:
3015                     g_assert_not_reached();
3016                 }
3017             }
3018         }
3019 
3020         /* sort the constraints (XXX: this is just an heuristic) */
3021         sort_constraints(def, 0, def->nb_oargs);
3022         sort_constraints(def, def->nb_oargs, def->nb_iargs);
3023     }
3024 }
3025 
3026 static void remove_label_use(TCGOp *op, int idx)
3027 {
3028     TCGLabel *label = arg_label(op->args[idx]);
3029     TCGLabelUse *use;
3030 
3031     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3032         if (use->op == op) {
3033             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3034             return;
3035         }
3036     }
3037     g_assert_not_reached();
3038 }
3039 
3040 void tcg_op_remove(TCGContext *s, TCGOp *op)
3041 {
3042     switch (op->opc) {
3043     case INDEX_op_br:
3044         remove_label_use(op, 0);
3045         break;
3046     case INDEX_op_brcond_i32:
3047     case INDEX_op_brcond_i64:
3048         remove_label_use(op, 3);
3049         break;
3050     case INDEX_op_brcond2_i32:
3051         remove_label_use(op, 5);
3052         break;
3053     default:
3054         break;
3055     }
3056 
3057     QTAILQ_REMOVE(&s->ops, op, link);
3058     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3059     s->nb_ops--;
3060 }
3061 
3062 void tcg_remove_ops_after(TCGOp *op)
3063 {
3064     TCGContext *s = tcg_ctx;
3065 
3066     while (true) {
3067         TCGOp *last = tcg_last_op();
3068         if (last == op) {
3069             return;
3070         }
3071         tcg_op_remove(s, last);
3072     }
3073 }
3074 
3075 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3076 {
3077     TCGContext *s = tcg_ctx;
3078     TCGOp *op = NULL;
3079 
3080     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3081         QTAILQ_FOREACH(op, &s->free_ops, link) {
3082             if (nargs <= op->nargs) {
3083                 QTAILQ_REMOVE(&s->free_ops, op, link);
3084                 nargs = op->nargs;
3085                 goto found;
3086             }
3087         }
3088     }
3089 
3090     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3091     nargs = MAX(4, nargs);
3092     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3093 
3094  found:
3095     memset(op, 0, offsetof(TCGOp, link));
3096     op->opc = opc;
3097     op->nargs = nargs;
3098 
3099     /* Check for bitfield overflow. */
3100     tcg_debug_assert(op->nargs == nargs);
3101 
3102     s->nb_ops++;
3103     return op;
3104 }
3105 
3106 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3107 {
3108     TCGOp *op = tcg_op_alloc(opc, nargs);
3109     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3110     return op;
3111 }
3112 
3113 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3114                             TCGOpcode opc, unsigned nargs)
3115 {
3116     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3117     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3118     return new_op;
3119 }
3120 
3121 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3122                            TCGOpcode opc, unsigned nargs)
3123 {
3124     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3125     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3126     return new_op;
3127 }
3128 
3129 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3130 {
3131     TCGLabelUse *u;
3132 
3133     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3134         TCGOp *op = u->op;
3135         switch (op->opc) {
3136         case INDEX_op_br:
3137             op->args[0] = label_arg(to);
3138             break;
3139         case INDEX_op_brcond_i32:
3140         case INDEX_op_brcond_i64:
3141             op->args[3] = label_arg(to);
3142             break;
3143         case INDEX_op_brcond2_i32:
3144             op->args[5] = label_arg(to);
3145             break;
3146         default:
3147             g_assert_not_reached();
3148         }
3149     }
3150 
3151     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3152 }
3153 
3154 /* Reachable analysis : remove unreachable code.  */
3155 static void __attribute__((noinline))
3156 reachable_code_pass(TCGContext *s)
3157 {
3158     TCGOp *op, *op_next, *op_prev;
3159     bool dead = false;
3160 
3161     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3162         bool remove = dead;
3163         TCGLabel *label;
3164 
3165         switch (op->opc) {
3166         case INDEX_op_set_label:
3167             label = arg_label(op->args[0]);
3168 
3169             /*
3170              * Note that the first op in the TB is always a load,
3171              * so there is always something before a label.
3172              */
3173             op_prev = QTAILQ_PREV(op, link);
3174 
3175             /*
3176              * If we find two sequential labels, move all branches to
3177              * reference the second label and remove the first label.
3178              * Do this before branch to next optimization, so that the
3179              * middle label is out of the way.
3180              */
3181             if (op_prev->opc == INDEX_op_set_label) {
3182                 move_label_uses(label, arg_label(op_prev->args[0]));
3183                 tcg_op_remove(s, op_prev);
3184                 op_prev = QTAILQ_PREV(op, link);
3185             }
3186 
3187             /*
3188              * Optimization can fold conditional branches to unconditional.
3189              * If we find a label which is preceded by an unconditional
3190              * branch to next, remove the branch.  We couldn't do this when
3191              * processing the branch because any dead code between the branch
3192              * and label had not yet been removed.
3193              */
3194             if (op_prev->opc == INDEX_op_br &&
3195                 label == arg_label(op_prev->args[0])) {
3196                 tcg_op_remove(s, op_prev);
3197                 /* Fall through means insns become live again.  */
3198                 dead = false;
3199             }
3200 
3201             if (QSIMPLEQ_EMPTY(&label->branches)) {
3202                 /*
3203                  * While there is an occasional backward branch, virtually
3204                  * all branches generated by the translators are forward.
3205                  * Which means that generally we will have already removed
3206                  * all references to the label that will be, and there is
3207                  * little to be gained by iterating.
3208                  */
3209                 remove = true;
3210             } else {
3211                 /* Once we see a label, insns become live again.  */
3212                 dead = false;
3213                 remove = false;
3214             }
3215             break;
3216 
3217         case INDEX_op_br:
3218         case INDEX_op_exit_tb:
3219         case INDEX_op_goto_ptr:
3220             /* Unconditional branches; everything following is dead.  */
3221             dead = true;
3222             break;
3223 
3224         case INDEX_op_call:
3225             /* Notice noreturn helper calls, raising exceptions.  */
3226             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3227                 dead = true;
3228             }
3229             break;
3230 
3231         case INDEX_op_insn_start:
3232             /* Never remove -- we need to keep these for unwind.  */
3233             remove = false;
3234             break;
3235 
3236         default:
3237             break;
3238         }
3239 
3240         if (remove) {
3241             tcg_op_remove(s, op);
3242         }
3243     }
3244 }
3245 
3246 #define TS_DEAD  1
3247 #define TS_MEM   2
3248 
3249 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3250 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3251 
3252 /* For liveness_pass_1, the register preferences for a given temp.  */
3253 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3254 {
3255     return ts->state_ptr;
3256 }
3257 
3258 /* For liveness_pass_1, reset the preferences for a given temp to the
3259  * maximal regset for its type.
3260  */
3261 static inline void la_reset_pref(TCGTemp *ts)
3262 {
3263     *la_temp_pref(ts)
3264         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3265 }
3266 
3267 /* liveness analysis: end of function: all temps are dead, and globals
3268    should be in memory. */
3269 static void la_func_end(TCGContext *s, int ng, int nt)
3270 {
3271     int i;
3272 
3273     for (i = 0; i < ng; ++i) {
3274         s->temps[i].state = TS_DEAD | TS_MEM;
3275         la_reset_pref(&s->temps[i]);
3276     }
3277     for (i = ng; i < nt; ++i) {
3278         s->temps[i].state = TS_DEAD;
3279         la_reset_pref(&s->temps[i]);
3280     }
3281 }
3282 
3283 /* liveness analysis: end of basic block: all temps are dead, globals
3284    and local temps should be in memory. */
3285 static void la_bb_end(TCGContext *s, int ng, int nt)
3286 {
3287     int i;
3288 
3289     for (i = 0; i < nt; ++i) {
3290         TCGTemp *ts = &s->temps[i];
3291         int state;
3292 
3293         switch (ts->kind) {
3294         case TEMP_FIXED:
3295         case TEMP_GLOBAL:
3296         case TEMP_TB:
3297             state = TS_DEAD | TS_MEM;
3298             break;
3299         case TEMP_EBB:
3300         case TEMP_CONST:
3301             state = TS_DEAD;
3302             break;
3303         default:
3304             g_assert_not_reached();
3305         }
3306         ts->state = state;
3307         la_reset_pref(ts);
3308     }
3309 }
3310 
3311 /* liveness analysis: sync globals back to memory.  */
3312 static void la_global_sync(TCGContext *s, int ng)
3313 {
3314     int i;
3315 
3316     for (i = 0; i < ng; ++i) {
3317         int state = s->temps[i].state;
3318         s->temps[i].state = state | TS_MEM;
3319         if (state == TS_DEAD) {
3320             /* If the global was previously dead, reset prefs.  */
3321             la_reset_pref(&s->temps[i]);
3322         }
3323     }
3324 }
3325 
3326 /*
3327  * liveness analysis: conditional branch: all temps are dead unless
3328  * explicitly live-across-conditional-branch, globals and local temps
3329  * should be synced.
3330  */
3331 static void la_bb_sync(TCGContext *s, int ng, int nt)
3332 {
3333     la_global_sync(s, ng);
3334 
3335     for (int i = ng; i < nt; ++i) {
3336         TCGTemp *ts = &s->temps[i];
3337         int state;
3338 
3339         switch (ts->kind) {
3340         case TEMP_TB:
3341             state = ts->state;
3342             ts->state = state | TS_MEM;
3343             if (state != TS_DEAD) {
3344                 continue;
3345             }
3346             break;
3347         case TEMP_EBB:
3348         case TEMP_CONST:
3349             continue;
3350         default:
3351             g_assert_not_reached();
3352         }
3353         la_reset_pref(&s->temps[i]);
3354     }
3355 }
3356 
3357 /* liveness analysis: sync globals back to memory and kill.  */
3358 static void la_global_kill(TCGContext *s, int ng)
3359 {
3360     int i;
3361 
3362     for (i = 0; i < ng; i++) {
3363         s->temps[i].state = TS_DEAD | TS_MEM;
3364         la_reset_pref(&s->temps[i]);
3365     }
3366 }
3367 
3368 /* liveness analysis: note live globals crossing calls.  */
3369 static void la_cross_call(TCGContext *s, int nt)
3370 {
3371     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3372     int i;
3373 
3374     for (i = 0; i < nt; i++) {
3375         TCGTemp *ts = &s->temps[i];
3376         if (!(ts->state & TS_DEAD)) {
3377             TCGRegSet *pset = la_temp_pref(ts);
3378             TCGRegSet set = *pset;
3379 
3380             set &= mask;
3381             /* If the combination is not possible, restart.  */
3382             if (set == 0) {
3383                 set = tcg_target_available_regs[ts->type] & mask;
3384             }
3385             *pset = set;
3386         }
3387     }
3388 }
3389 
3390 /*
3391  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3392  * to TEMP_EBB, if possible.
3393  */
3394 static void __attribute__((noinline))
3395 liveness_pass_0(TCGContext *s)
3396 {
3397     void * const multiple_ebb = (void *)(uintptr_t)-1;
3398     int nb_temps = s->nb_temps;
3399     TCGOp *op, *ebb;
3400 
3401     for (int i = s->nb_globals; i < nb_temps; ++i) {
3402         s->temps[i].state_ptr = NULL;
3403     }
3404 
3405     /*
3406      * Represent each EBB by the op at which it begins.  In the case of
3407      * the first EBB, this is the first op, otherwise it is a label.
3408      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3409      * within a single EBB, else MULTIPLE_EBB.
3410      */
3411     ebb = QTAILQ_FIRST(&s->ops);
3412     QTAILQ_FOREACH(op, &s->ops, link) {
3413         const TCGOpDef *def;
3414         int nb_oargs, nb_iargs;
3415 
3416         switch (op->opc) {
3417         case INDEX_op_set_label:
3418             ebb = op;
3419             continue;
3420         case INDEX_op_discard:
3421             continue;
3422         case INDEX_op_call:
3423             nb_oargs = TCGOP_CALLO(op);
3424             nb_iargs = TCGOP_CALLI(op);
3425             break;
3426         default:
3427             def = &tcg_op_defs[op->opc];
3428             nb_oargs = def->nb_oargs;
3429             nb_iargs = def->nb_iargs;
3430             break;
3431         }
3432 
3433         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3434             TCGTemp *ts = arg_temp(op->args[i]);
3435 
3436             if (ts->kind != TEMP_TB) {
3437                 continue;
3438             }
3439             if (ts->state_ptr == NULL) {
3440                 ts->state_ptr = ebb;
3441             } else if (ts->state_ptr != ebb) {
3442                 ts->state_ptr = multiple_ebb;
3443             }
3444         }
3445     }
3446 
3447     /*
3448      * For TEMP_TB that turned out not to be used beyond one EBB,
3449      * reduce the liveness to TEMP_EBB.
3450      */
3451     for (int i = s->nb_globals; i < nb_temps; ++i) {
3452         TCGTemp *ts = &s->temps[i];
3453         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3454             ts->kind = TEMP_EBB;
3455         }
3456     }
3457 }
3458 
3459 /* Liveness analysis : update the opc_arg_life array to tell if a
3460    given input arguments is dead. Instructions updating dead
3461    temporaries are removed. */
3462 static void __attribute__((noinline))
3463 liveness_pass_1(TCGContext *s)
3464 {
3465     int nb_globals = s->nb_globals;
3466     int nb_temps = s->nb_temps;
3467     TCGOp *op, *op_prev;
3468     TCGRegSet *prefs;
3469     int i;
3470 
3471     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3472     for (i = 0; i < nb_temps; ++i) {
3473         s->temps[i].state_ptr = prefs + i;
3474     }
3475 
3476     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3477     la_func_end(s, nb_globals, nb_temps);
3478 
3479     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3480         int nb_iargs, nb_oargs;
3481         TCGOpcode opc_new, opc_new2;
3482         bool have_opc_new2;
3483         TCGLifeData arg_life = 0;
3484         TCGTemp *ts;
3485         TCGOpcode opc = op->opc;
3486         const TCGOpDef *def = &tcg_op_defs[opc];
3487 
3488         switch (opc) {
3489         case INDEX_op_call:
3490             {
3491                 const TCGHelperInfo *info = tcg_call_info(op);
3492                 int call_flags = tcg_call_flags(op);
3493 
3494                 nb_oargs = TCGOP_CALLO(op);
3495                 nb_iargs = TCGOP_CALLI(op);
3496 
3497                 /* pure functions can be removed if their result is unused */
3498                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3499                     for (i = 0; i < nb_oargs; i++) {
3500                         ts = arg_temp(op->args[i]);
3501                         if (ts->state != TS_DEAD) {
3502                             goto do_not_remove_call;
3503                         }
3504                     }
3505                     goto do_remove;
3506                 }
3507             do_not_remove_call:
3508 
3509                 /* Output args are dead.  */
3510                 for (i = 0; i < nb_oargs; i++) {
3511                     ts = arg_temp(op->args[i]);
3512                     if (ts->state & TS_DEAD) {
3513                         arg_life |= DEAD_ARG << i;
3514                     }
3515                     if (ts->state & TS_MEM) {
3516                         arg_life |= SYNC_ARG << i;
3517                     }
3518                     ts->state = TS_DEAD;
3519                     la_reset_pref(ts);
3520                 }
3521 
3522                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3523                 memset(op->output_pref, 0, sizeof(op->output_pref));
3524 
3525                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3526                                     TCG_CALL_NO_READ_GLOBALS))) {
3527                     la_global_kill(s, nb_globals);
3528                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3529                     la_global_sync(s, nb_globals);
3530                 }
3531 
3532                 /* Record arguments that die in this helper.  */
3533                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3534                     ts = arg_temp(op->args[i]);
3535                     if (ts->state & TS_DEAD) {
3536                         arg_life |= DEAD_ARG << i;
3537                     }
3538                 }
3539 
3540                 /* For all live registers, remove call-clobbered prefs.  */
3541                 la_cross_call(s, nb_temps);
3542 
3543                 /*
3544                  * Input arguments are live for preceding opcodes.
3545                  *
3546                  * For those arguments that die, and will be allocated in
3547                  * registers, clear the register set for that arg, to be
3548                  * filled in below.  For args that will be on the stack,
3549                  * reset to any available reg.  Process arguments in reverse
3550                  * order so that if a temp is used more than once, the stack
3551                  * reset to max happens before the register reset to 0.
3552                  */
3553                 for (i = nb_iargs - 1; i >= 0; i--) {
3554                     const TCGCallArgumentLoc *loc = &info->in[i];
3555                     ts = arg_temp(op->args[nb_oargs + i]);
3556 
3557                     if (ts->state & TS_DEAD) {
3558                         switch (loc->kind) {
3559                         case TCG_CALL_ARG_NORMAL:
3560                         case TCG_CALL_ARG_EXTEND_U:
3561                         case TCG_CALL_ARG_EXTEND_S:
3562                             if (arg_slot_reg_p(loc->arg_slot)) {
3563                                 *la_temp_pref(ts) = 0;
3564                                 break;
3565                             }
3566                             /* fall through */
3567                         default:
3568                             *la_temp_pref(ts) =
3569                                 tcg_target_available_regs[ts->type];
3570                             break;
3571                         }
3572                         ts->state &= ~TS_DEAD;
3573                     }
3574                 }
3575 
3576                 /*
3577                  * For each input argument, add its input register to prefs.
3578                  * If a temp is used once, this produces a single set bit;
3579                  * if a temp is used multiple times, this produces a set.
3580                  */
3581                 for (i = 0; i < nb_iargs; i++) {
3582                     const TCGCallArgumentLoc *loc = &info->in[i];
3583                     ts = arg_temp(op->args[nb_oargs + i]);
3584 
3585                     switch (loc->kind) {
3586                     case TCG_CALL_ARG_NORMAL:
3587                     case TCG_CALL_ARG_EXTEND_U:
3588                     case TCG_CALL_ARG_EXTEND_S:
3589                         if (arg_slot_reg_p(loc->arg_slot)) {
3590                             tcg_regset_set_reg(*la_temp_pref(ts),
3591                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3592                         }
3593                         break;
3594                     default:
3595                         break;
3596                     }
3597                 }
3598             }
3599             break;
3600         case INDEX_op_insn_start:
3601             break;
3602         case INDEX_op_discard:
3603             /* mark the temporary as dead */
3604             ts = arg_temp(op->args[0]);
3605             ts->state = TS_DEAD;
3606             la_reset_pref(ts);
3607             break;
3608 
3609         case INDEX_op_add2_i32:
3610             opc_new = INDEX_op_add_i32;
3611             goto do_addsub2;
3612         case INDEX_op_sub2_i32:
3613             opc_new = INDEX_op_sub_i32;
3614             goto do_addsub2;
3615         case INDEX_op_add2_i64:
3616             opc_new = INDEX_op_add_i64;
3617             goto do_addsub2;
3618         case INDEX_op_sub2_i64:
3619             opc_new = INDEX_op_sub_i64;
3620         do_addsub2:
3621             nb_iargs = 4;
3622             nb_oargs = 2;
3623             /* Test if the high part of the operation is dead, but not
3624                the low part.  The result can be optimized to a simple
3625                add or sub.  This happens often for x86_64 guest when the
3626                cpu mode is set to 32 bit.  */
3627             if (arg_temp(op->args[1])->state == TS_DEAD) {
3628                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3629                     goto do_remove;
3630                 }
3631                 /* Replace the opcode and adjust the args in place,
3632                    leaving 3 unused args at the end.  */
3633                 op->opc = opc = opc_new;
3634                 op->args[1] = op->args[2];
3635                 op->args[2] = op->args[4];
3636                 /* Fall through and mark the single-word operation live.  */
3637                 nb_iargs = 2;
3638                 nb_oargs = 1;
3639             }
3640             goto do_not_remove;
3641 
3642         case INDEX_op_mulu2_i32:
3643             opc_new = INDEX_op_mul_i32;
3644             opc_new2 = INDEX_op_muluh_i32;
3645             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3646             goto do_mul2;
3647         case INDEX_op_muls2_i32:
3648             opc_new = INDEX_op_mul_i32;
3649             opc_new2 = INDEX_op_mulsh_i32;
3650             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3651             goto do_mul2;
3652         case INDEX_op_mulu2_i64:
3653             opc_new = INDEX_op_mul_i64;
3654             opc_new2 = INDEX_op_muluh_i64;
3655             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3656             goto do_mul2;
3657         case INDEX_op_muls2_i64:
3658             opc_new = INDEX_op_mul_i64;
3659             opc_new2 = INDEX_op_mulsh_i64;
3660             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3661             goto do_mul2;
3662         do_mul2:
3663             nb_iargs = 2;
3664             nb_oargs = 2;
3665             if (arg_temp(op->args[1])->state == TS_DEAD) {
3666                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3667                     /* Both parts of the operation are dead.  */
3668                     goto do_remove;
3669                 }
3670                 /* The high part of the operation is dead; generate the low. */
3671                 op->opc = opc = opc_new;
3672                 op->args[1] = op->args[2];
3673                 op->args[2] = op->args[3];
3674             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3675                 /* The low part of the operation is dead; generate the high. */
3676                 op->opc = opc = opc_new2;
3677                 op->args[0] = op->args[1];
3678                 op->args[1] = op->args[2];
3679                 op->args[2] = op->args[3];
3680             } else {
3681                 goto do_not_remove;
3682             }
3683             /* Mark the single-word operation live.  */
3684             nb_oargs = 1;
3685             goto do_not_remove;
3686 
3687         default:
3688             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3689             nb_iargs = def->nb_iargs;
3690             nb_oargs = def->nb_oargs;
3691 
3692             /* Test if the operation can be removed because all
3693                its outputs are dead. We assume that nb_oargs == 0
3694                implies side effects */
3695             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3696                 for (i = 0; i < nb_oargs; i++) {
3697                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3698                         goto do_not_remove;
3699                     }
3700                 }
3701                 goto do_remove;
3702             }
3703             goto do_not_remove;
3704 
3705         do_remove:
3706             tcg_op_remove(s, op);
3707             break;
3708 
3709         do_not_remove:
3710             for (i = 0; i < nb_oargs; i++) {
3711                 ts = arg_temp(op->args[i]);
3712 
3713                 /* Remember the preference of the uses that followed.  */
3714                 if (i < ARRAY_SIZE(op->output_pref)) {
3715                     op->output_pref[i] = *la_temp_pref(ts);
3716                 }
3717 
3718                 /* Output args are dead.  */
3719                 if (ts->state & TS_DEAD) {
3720                     arg_life |= DEAD_ARG << i;
3721                 }
3722                 if (ts->state & TS_MEM) {
3723                     arg_life |= SYNC_ARG << i;
3724                 }
3725                 ts->state = TS_DEAD;
3726                 la_reset_pref(ts);
3727             }
3728 
3729             /* If end of basic block, update.  */
3730             if (def->flags & TCG_OPF_BB_EXIT) {
3731                 la_func_end(s, nb_globals, nb_temps);
3732             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3733                 la_bb_sync(s, nb_globals, nb_temps);
3734             } else if (def->flags & TCG_OPF_BB_END) {
3735                 la_bb_end(s, nb_globals, nb_temps);
3736             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3737                 la_global_sync(s, nb_globals);
3738                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3739                     la_cross_call(s, nb_temps);
3740                 }
3741             }
3742 
3743             /* Record arguments that die in this opcode.  */
3744             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3745                 ts = arg_temp(op->args[i]);
3746                 if (ts->state & TS_DEAD) {
3747                     arg_life |= DEAD_ARG << i;
3748                 }
3749             }
3750 
3751             /* Input arguments are live for preceding opcodes.  */
3752             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3753                 ts = arg_temp(op->args[i]);
3754                 if (ts->state & TS_DEAD) {
3755                     /* For operands that were dead, initially allow
3756                        all regs for the type.  */
3757                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3758                     ts->state &= ~TS_DEAD;
3759                 }
3760             }
3761 
3762             /* Incorporate constraints for this operand.  */
3763             switch (opc) {
3764             case INDEX_op_mov_i32:
3765             case INDEX_op_mov_i64:
3766                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3767                    have proper constraints.  That said, special case
3768                    moves to propagate preferences backward.  */
3769                 if (IS_DEAD_ARG(1)) {
3770                     *la_temp_pref(arg_temp(op->args[0]))
3771                         = *la_temp_pref(arg_temp(op->args[1]));
3772                 }
3773                 break;
3774 
3775             default:
3776                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3777                     const TCGArgConstraint *ct = &def->args_ct[i];
3778                     TCGRegSet set, *pset;
3779 
3780                     ts = arg_temp(op->args[i]);
3781                     pset = la_temp_pref(ts);
3782                     set = *pset;
3783 
3784                     set &= ct->regs;
3785                     if (ct->ialias) {
3786                         set &= output_pref(op, ct->alias_index);
3787                     }
3788                     /* If the combination is not possible, restart.  */
3789                     if (set == 0) {
3790                         set = ct->regs;
3791                     }
3792                     *pset = set;
3793                 }
3794                 break;
3795             }
3796             break;
3797         }
3798         op->life = arg_life;
3799     }
3800 }
3801 
3802 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3803 static bool __attribute__((noinline))
3804 liveness_pass_2(TCGContext *s)
3805 {
3806     int nb_globals = s->nb_globals;
3807     int nb_temps, i;
3808     bool changes = false;
3809     TCGOp *op, *op_next;
3810 
3811     /* Create a temporary for each indirect global.  */
3812     for (i = 0; i < nb_globals; ++i) {
3813         TCGTemp *its = &s->temps[i];
3814         if (its->indirect_reg) {
3815             TCGTemp *dts = tcg_temp_alloc(s);
3816             dts->type = its->type;
3817             dts->base_type = its->base_type;
3818             dts->temp_subindex = its->temp_subindex;
3819             dts->kind = TEMP_EBB;
3820             its->state_ptr = dts;
3821         } else {
3822             its->state_ptr = NULL;
3823         }
3824         /* All globals begin dead.  */
3825         its->state = TS_DEAD;
3826     }
3827     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3828         TCGTemp *its = &s->temps[i];
3829         its->state_ptr = NULL;
3830         its->state = TS_DEAD;
3831     }
3832 
3833     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3834         TCGOpcode opc = op->opc;
3835         const TCGOpDef *def = &tcg_op_defs[opc];
3836         TCGLifeData arg_life = op->life;
3837         int nb_iargs, nb_oargs, call_flags;
3838         TCGTemp *arg_ts, *dir_ts;
3839 
3840         if (opc == INDEX_op_call) {
3841             nb_oargs = TCGOP_CALLO(op);
3842             nb_iargs = TCGOP_CALLI(op);
3843             call_flags = tcg_call_flags(op);
3844         } else {
3845             nb_iargs = def->nb_iargs;
3846             nb_oargs = def->nb_oargs;
3847 
3848             /* Set flags similar to how calls require.  */
3849             if (def->flags & TCG_OPF_COND_BRANCH) {
3850                 /* Like reading globals: sync_globals */
3851                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3852             } else if (def->flags & TCG_OPF_BB_END) {
3853                 /* Like writing globals: save_globals */
3854                 call_flags = 0;
3855             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3856                 /* Like reading globals: sync_globals */
3857                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3858             } else {
3859                 /* No effect on globals.  */
3860                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3861                               TCG_CALL_NO_WRITE_GLOBALS);
3862             }
3863         }
3864 
3865         /* Make sure that input arguments are available.  */
3866         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3867             arg_ts = arg_temp(op->args[i]);
3868             dir_ts = arg_ts->state_ptr;
3869             if (dir_ts && arg_ts->state == TS_DEAD) {
3870                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3871                                   ? INDEX_op_ld_i32
3872                                   : INDEX_op_ld_i64);
3873                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3874 
3875                 lop->args[0] = temp_arg(dir_ts);
3876                 lop->args[1] = temp_arg(arg_ts->mem_base);
3877                 lop->args[2] = arg_ts->mem_offset;
3878 
3879                 /* Loaded, but synced with memory.  */
3880                 arg_ts->state = TS_MEM;
3881             }
3882         }
3883 
3884         /* Perform input replacement, and mark inputs that became dead.
3885            No action is required except keeping temp_state up to date
3886            so that we reload when needed.  */
3887         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3888             arg_ts = arg_temp(op->args[i]);
3889             dir_ts = arg_ts->state_ptr;
3890             if (dir_ts) {
3891                 op->args[i] = temp_arg(dir_ts);
3892                 changes = true;
3893                 if (IS_DEAD_ARG(i)) {
3894                     arg_ts->state = TS_DEAD;
3895                 }
3896             }
3897         }
3898 
3899         /* Liveness analysis should ensure that the following are
3900            all correct, for call sites and basic block end points.  */
3901         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3902             /* Nothing to do */
3903         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3904             for (i = 0; i < nb_globals; ++i) {
3905                 /* Liveness should see that globals are synced back,
3906                    that is, either TS_DEAD or TS_MEM.  */
3907                 arg_ts = &s->temps[i];
3908                 tcg_debug_assert(arg_ts->state_ptr == 0
3909                                  || arg_ts->state != 0);
3910             }
3911         } else {
3912             for (i = 0; i < nb_globals; ++i) {
3913                 /* Liveness should see that globals are saved back,
3914                    that is, TS_DEAD, waiting to be reloaded.  */
3915                 arg_ts = &s->temps[i];
3916                 tcg_debug_assert(arg_ts->state_ptr == 0
3917                                  || arg_ts->state == TS_DEAD);
3918             }
3919         }
3920 
3921         /* Outputs become available.  */
3922         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3923             arg_ts = arg_temp(op->args[0]);
3924             dir_ts = arg_ts->state_ptr;
3925             if (dir_ts) {
3926                 op->args[0] = temp_arg(dir_ts);
3927                 changes = true;
3928 
3929                 /* The output is now live and modified.  */
3930                 arg_ts->state = 0;
3931 
3932                 if (NEED_SYNC_ARG(0)) {
3933                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3934                                       ? INDEX_op_st_i32
3935                                       : INDEX_op_st_i64);
3936                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3937                     TCGTemp *out_ts = dir_ts;
3938 
3939                     if (IS_DEAD_ARG(0)) {
3940                         out_ts = arg_temp(op->args[1]);
3941                         arg_ts->state = TS_DEAD;
3942                         tcg_op_remove(s, op);
3943                     } else {
3944                         arg_ts->state = TS_MEM;
3945                     }
3946 
3947                     sop->args[0] = temp_arg(out_ts);
3948                     sop->args[1] = temp_arg(arg_ts->mem_base);
3949                     sop->args[2] = arg_ts->mem_offset;
3950                 } else {
3951                     tcg_debug_assert(!IS_DEAD_ARG(0));
3952                 }
3953             }
3954         } else {
3955             for (i = 0; i < nb_oargs; i++) {
3956                 arg_ts = arg_temp(op->args[i]);
3957                 dir_ts = arg_ts->state_ptr;
3958                 if (!dir_ts) {
3959                     continue;
3960                 }
3961                 op->args[i] = temp_arg(dir_ts);
3962                 changes = true;
3963 
3964                 /* The output is now live and modified.  */
3965                 arg_ts->state = 0;
3966 
3967                 /* Sync outputs upon their last write.  */
3968                 if (NEED_SYNC_ARG(i)) {
3969                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3970                                       ? INDEX_op_st_i32
3971                                       : INDEX_op_st_i64);
3972                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3973 
3974                     sop->args[0] = temp_arg(dir_ts);
3975                     sop->args[1] = temp_arg(arg_ts->mem_base);
3976                     sop->args[2] = arg_ts->mem_offset;
3977 
3978                     arg_ts->state = TS_MEM;
3979                 }
3980                 /* Drop outputs that are dead.  */
3981                 if (IS_DEAD_ARG(i)) {
3982                     arg_ts->state = TS_DEAD;
3983                 }
3984             }
3985         }
3986     }
3987 
3988     return changes;
3989 }
3990 
3991 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3992 {
3993     intptr_t off;
3994     int size, align;
3995 
3996     /* When allocating an object, look at the full type. */
3997     size = tcg_type_size(ts->base_type);
3998     switch (ts->base_type) {
3999     case TCG_TYPE_I32:
4000         align = 4;
4001         break;
4002     case TCG_TYPE_I64:
4003     case TCG_TYPE_V64:
4004         align = 8;
4005         break;
4006     case TCG_TYPE_I128:
4007     case TCG_TYPE_V128:
4008     case TCG_TYPE_V256:
4009         /*
4010          * Note that we do not require aligned storage for V256,
4011          * and that we provide alignment for I128 to match V128,
4012          * even if that's above what the host ABI requires.
4013          */
4014         align = 16;
4015         break;
4016     default:
4017         g_assert_not_reached();
4018     }
4019 
4020     /*
4021      * Assume the stack is sufficiently aligned.
4022      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4023      * and do not require 16 byte vector alignment.  This seems slightly
4024      * easier than fully parameterizing the above switch statement.
4025      */
4026     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4027     off = ROUND_UP(s->current_frame_offset, align);
4028 
4029     /* If we've exhausted the stack frame, restart with a smaller TB. */
4030     if (off + size > s->frame_end) {
4031         tcg_raise_tb_overflow(s);
4032     }
4033     s->current_frame_offset = off + size;
4034 #if defined(__sparc__)
4035     off += TCG_TARGET_STACK_BIAS;
4036 #endif
4037 
4038     /* If the object was subdivided, assign memory to all the parts. */
4039     if (ts->base_type != ts->type) {
4040         int part_size = tcg_type_size(ts->type);
4041         int part_count = size / part_size;
4042 
4043         /*
4044          * Each part is allocated sequentially in tcg_temp_new_internal.
4045          * Jump back to the first part by subtracting the current index.
4046          */
4047         ts -= ts->temp_subindex;
4048         for (int i = 0; i < part_count; ++i) {
4049             ts[i].mem_offset = off + i * part_size;
4050             ts[i].mem_base = s->frame_temp;
4051             ts[i].mem_allocated = 1;
4052         }
4053     } else {
4054         ts->mem_offset = off;
4055         ts->mem_base = s->frame_temp;
4056         ts->mem_allocated = 1;
4057     }
4058 }
4059 
4060 /* Assign @reg to @ts, and update reg_to_temp[]. */
4061 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4062 {
4063     if (ts->val_type == TEMP_VAL_REG) {
4064         TCGReg old = ts->reg;
4065         tcg_debug_assert(s->reg_to_temp[old] == ts);
4066         if (old == reg) {
4067             return;
4068         }
4069         s->reg_to_temp[old] = NULL;
4070     }
4071     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4072     s->reg_to_temp[reg] = ts;
4073     ts->val_type = TEMP_VAL_REG;
4074     ts->reg = reg;
4075 }
4076 
4077 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4078 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4079 {
4080     tcg_debug_assert(type != TEMP_VAL_REG);
4081     if (ts->val_type == TEMP_VAL_REG) {
4082         TCGReg reg = ts->reg;
4083         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4084         s->reg_to_temp[reg] = NULL;
4085     }
4086     ts->val_type = type;
4087 }
4088 
4089 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4090 
4091 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4092    mark it free; otherwise mark it dead.  */
4093 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4094 {
4095     TCGTempVal new_type;
4096 
4097     switch (ts->kind) {
4098     case TEMP_FIXED:
4099         return;
4100     case TEMP_GLOBAL:
4101     case TEMP_TB:
4102         new_type = TEMP_VAL_MEM;
4103         break;
4104     case TEMP_EBB:
4105         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4106         break;
4107     case TEMP_CONST:
4108         new_type = TEMP_VAL_CONST;
4109         break;
4110     default:
4111         g_assert_not_reached();
4112     }
4113     set_temp_val_nonreg(s, ts, new_type);
4114 }
4115 
4116 /* Mark a temporary as dead.  */
4117 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4118 {
4119     temp_free_or_dead(s, ts, 1);
4120 }
4121 
4122 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4123    registers needs to be allocated to store a constant.  If 'free_or_dead'
4124    is non-zero, subsequently release the temporary; if it is positive, the
4125    temp is dead; if it is negative, the temp is free.  */
4126 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4127                       TCGRegSet preferred_regs, int free_or_dead)
4128 {
4129     if (!temp_readonly(ts) && !ts->mem_coherent) {
4130         if (!ts->mem_allocated) {
4131             temp_allocate_frame(s, ts);
4132         }
4133         switch (ts->val_type) {
4134         case TEMP_VAL_CONST:
4135             /* If we're going to free the temp immediately, then we won't
4136                require it later in a register, so attempt to store the
4137                constant to memory directly.  */
4138             if (free_or_dead
4139                 && tcg_out_sti(s, ts->type, ts->val,
4140                                ts->mem_base->reg, ts->mem_offset)) {
4141                 break;
4142             }
4143             temp_load(s, ts, tcg_target_available_regs[ts->type],
4144                       allocated_regs, preferred_regs);
4145             /* fallthrough */
4146 
4147         case TEMP_VAL_REG:
4148             tcg_out_st(s, ts->type, ts->reg,
4149                        ts->mem_base->reg, ts->mem_offset);
4150             break;
4151 
4152         case TEMP_VAL_MEM:
4153             break;
4154 
4155         case TEMP_VAL_DEAD:
4156         default:
4157             g_assert_not_reached();
4158         }
4159         ts->mem_coherent = 1;
4160     }
4161     if (free_or_dead) {
4162         temp_free_or_dead(s, ts, free_or_dead);
4163     }
4164 }
4165 
4166 /* free register 'reg' by spilling the corresponding temporary if necessary */
4167 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4168 {
4169     TCGTemp *ts = s->reg_to_temp[reg];
4170     if (ts != NULL) {
4171         temp_sync(s, ts, allocated_regs, 0, -1);
4172     }
4173 }
4174 
4175 /**
4176  * tcg_reg_alloc:
4177  * @required_regs: Set of registers in which we must allocate.
4178  * @allocated_regs: Set of registers which must be avoided.
4179  * @preferred_regs: Set of registers we should prefer.
4180  * @rev: True if we search the registers in "indirect" order.
4181  *
4182  * The allocated register must be in @required_regs & ~@allocated_regs,
4183  * but if we can put it in @preferred_regs we may save a move later.
4184  */
4185 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4186                             TCGRegSet allocated_regs,
4187                             TCGRegSet preferred_regs, bool rev)
4188 {
4189     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4190     TCGRegSet reg_ct[2];
4191     const int *order;
4192 
4193     reg_ct[1] = required_regs & ~allocated_regs;
4194     tcg_debug_assert(reg_ct[1] != 0);
4195     reg_ct[0] = reg_ct[1] & preferred_regs;
4196 
4197     /* Skip the preferred_regs option if it cannot be satisfied,
4198        or if the preference made no difference.  */
4199     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4200 
4201     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4202 
4203     /* Try free registers, preferences first.  */
4204     for (j = f; j < 2; j++) {
4205         TCGRegSet set = reg_ct[j];
4206 
4207         if (tcg_regset_single(set)) {
4208             /* One register in the set.  */
4209             TCGReg reg = tcg_regset_first(set);
4210             if (s->reg_to_temp[reg] == NULL) {
4211                 return reg;
4212             }
4213         } else {
4214             for (i = 0; i < n; i++) {
4215                 TCGReg reg = order[i];
4216                 if (s->reg_to_temp[reg] == NULL &&
4217                     tcg_regset_test_reg(set, reg)) {
4218                     return reg;
4219                 }
4220             }
4221         }
4222     }
4223 
4224     /* We must spill something.  */
4225     for (j = f; j < 2; j++) {
4226         TCGRegSet set = reg_ct[j];
4227 
4228         if (tcg_regset_single(set)) {
4229             /* One register in the set.  */
4230             TCGReg reg = tcg_regset_first(set);
4231             tcg_reg_free(s, reg, allocated_regs);
4232             return reg;
4233         } else {
4234             for (i = 0; i < n; i++) {
4235                 TCGReg reg = order[i];
4236                 if (tcg_regset_test_reg(set, reg)) {
4237                     tcg_reg_free(s, reg, allocated_regs);
4238                     return reg;
4239                 }
4240             }
4241         }
4242     }
4243 
4244     g_assert_not_reached();
4245 }
4246 
4247 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4248                                  TCGRegSet allocated_regs,
4249                                  TCGRegSet preferred_regs, bool rev)
4250 {
4251     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4252     TCGRegSet reg_ct[2];
4253     const int *order;
4254 
4255     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4256     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4257     tcg_debug_assert(reg_ct[1] != 0);
4258     reg_ct[0] = reg_ct[1] & preferred_regs;
4259 
4260     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4261 
4262     /*
4263      * Skip the preferred_regs option if it cannot be satisfied,
4264      * or if the preference made no difference.
4265      */
4266     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4267 
4268     /*
4269      * Minimize the number of flushes by looking for 2 free registers first,
4270      * then a single flush, then two flushes.
4271      */
4272     for (fmin = 2; fmin >= 0; fmin--) {
4273         for (j = k; j < 2; j++) {
4274             TCGRegSet set = reg_ct[j];
4275 
4276             for (i = 0; i < n; i++) {
4277                 TCGReg reg = order[i];
4278 
4279                 if (tcg_regset_test_reg(set, reg)) {
4280                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4281                     if (f >= fmin) {
4282                         tcg_reg_free(s, reg, allocated_regs);
4283                         tcg_reg_free(s, reg + 1, allocated_regs);
4284                         return reg;
4285                     }
4286                 }
4287             }
4288         }
4289     }
4290     g_assert_not_reached();
4291 }
4292 
4293 /* Make sure the temporary is in a register.  If needed, allocate the register
4294    from DESIRED while avoiding ALLOCATED.  */
4295 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4296                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4297 {
4298     TCGReg reg;
4299 
4300     switch (ts->val_type) {
4301     case TEMP_VAL_REG:
4302         return;
4303     case TEMP_VAL_CONST:
4304         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4305                             preferred_regs, ts->indirect_base);
4306         if (ts->type <= TCG_TYPE_I64) {
4307             tcg_out_movi(s, ts->type, reg, ts->val);
4308         } else {
4309             uint64_t val = ts->val;
4310             MemOp vece = MO_64;
4311 
4312             /*
4313              * Find the minimal vector element that matches the constant.
4314              * The targets will, in general, have to do this search anyway,
4315              * do this generically.
4316              */
4317             if (val == dup_const(MO_8, val)) {
4318                 vece = MO_8;
4319             } else if (val == dup_const(MO_16, val)) {
4320                 vece = MO_16;
4321             } else if (val == dup_const(MO_32, val)) {
4322                 vece = MO_32;
4323             }
4324 
4325             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4326         }
4327         ts->mem_coherent = 0;
4328         break;
4329     case TEMP_VAL_MEM:
4330         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4331                             preferred_regs, ts->indirect_base);
4332         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4333         ts->mem_coherent = 1;
4334         break;
4335     case TEMP_VAL_DEAD:
4336     default:
4337         g_assert_not_reached();
4338     }
4339     set_temp_val_reg(s, ts, reg);
4340 }
4341 
4342 /* Save a temporary to memory. 'allocated_regs' is used in case a
4343    temporary registers needs to be allocated to store a constant.  */
4344 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4345 {
4346     /* The liveness analysis already ensures that globals are back
4347        in memory. Keep an tcg_debug_assert for safety. */
4348     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4349 }
4350 
4351 /* save globals to their canonical location and assume they can be
4352    modified be the following code. 'allocated_regs' is used in case a
4353    temporary registers needs to be allocated to store a constant. */
4354 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4355 {
4356     int i, n;
4357 
4358     for (i = 0, n = s->nb_globals; i < n; i++) {
4359         temp_save(s, &s->temps[i], allocated_regs);
4360     }
4361 }
4362 
4363 /* sync globals to their canonical location and assume they can be
4364    read by the following code. 'allocated_regs' is used in case a
4365    temporary registers needs to be allocated to store a constant. */
4366 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4367 {
4368     int i, n;
4369 
4370     for (i = 0, n = s->nb_globals; i < n; i++) {
4371         TCGTemp *ts = &s->temps[i];
4372         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4373                          || ts->kind == TEMP_FIXED
4374                          || ts->mem_coherent);
4375     }
4376 }
4377 
4378 /* at the end of a basic block, we assume all temporaries are dead and
4379    all globals are stored at their canonical location. */
4380 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4381 {
4382     int i;
4383 
4384     for (i = s->nb_globals; i < s->nb_temps; i++) {
4385         TCGTemp *ts = &s->temps[i];
4386 
4387         switch (ts->kind) {
4388         case TEMP_TB:
4389             temp_save(s, ts, allocated_regs);
4390             break;
4391         case TEMP_EBB:
4392             /* The liveness analysis already ensures that temps are dead.
4393                Keep an tcg_debug_assert for safety. */
4394             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4395             break;
4396         case TEMP_CONST:
4397             /* Similarly, we should have freed any allocated register. */
4398             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4399             break;
4400         default:
4401             g_assert_not_reached();
4402         }
4403     }
4404 
4405     save_globals(s, allocated_regs);
4406 }
4407 
4408 /*
4409  * At a conditional branch, we assume all temporaries are dead unless
4410  * explicitly live-across-conditional-branch; all globals and local
4411  * temps are synced to their location.
4412  */
4413 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4414 {
4415     sync_globals(s, allocated_regs);
4416 
4417     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4418         TCGTemp *ts = &s->temps[i];
4419         /*
4420          * The liveness analysis already ensures that temps are dead.
4421          * Keep tcg_debug_asserts for safety.
4422          */
4423         switch (ts->kind) {
4424         case TEMP_TB:
4425             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4426             break;
4427         case TEMP_EBB:
4428         case TEMP_CONST:
4429             break;
4430         default:
4431             g_assert_not_reached();
4432         }
4433     }
4434 }
4435 
4436 /*
4437  * Specialized code generation for INDEX_op_mov_* with a constant.
4438  */
4439 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4440                                   tcg_target_ulong val, TCGLifeData arg_life,
4441                                   TCGRegSet preferred_regs)
4442 {
4443     /* ENV should not be modified.  */
4444     tcg_debug_assert(!temp_readonly(ots));
4445 
4446     /* The movi is not explicitly generated here.  */
4447     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4448     ots->val = val;
4449     ots->mem_coherent = 0;
4450     if (NEED_SYNC_ARG(0)) {
4451         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4452     } else if (IS_DEAD_ARG(0)) {
4453         temp_dead(s, ots);
4454     }
4455 }
4456 
4457 /*
4458  * Specialized code generation for INDEX_op_mov_*.
4459  */
4460 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4461 {
4462     const TCGLifeData arg_life = op->life;
4463     TCGRegSet allocated_regs, preferred_regs;
4464     TCGTemp *ts, *ots;
4465     TCGType otype, itype;
4466     TCGReg oreg, ireg;
4467 
4468     allocated_regs = s->reserved_regs;
4469     preferred_regs = output_pref(op, 0);
4470     ots = arg_temp(op->args[0]);
4471     ts = arg_temp(op->args[1]);
4472 
4473     /* ENV should not be modified.  */
4474     tcg_debug_assert(!temp_readonly(ots));
4475 
4476     /* Note that otype != itype for no-op truncation.  */
4477     otype = ots->type;
4478     itype = ts->type;
4479 
4480     if (ts->val_type == TEMP_VAL_CONST) {
4481         /* propagate constant or generate sti */
4482         tcg_target_ulong val = ts->val;
4483         if (IS_DEAD_ARG(1)) {
4484             temp_dead(s, ts);
4485         }
4486         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4487         return;
4488     }
4489 
4490     /* If the source value is in memory we're going to be forced
4491        to have it in a register in order to perform the copy.  Copy
4492        the SOURCE value into its own register first, that way we
4493        don't have to reload SOURCE the next time it is used. */
4494     if (ts->val_type == TEMP_VAL_MEM) {
4495         temp_load(s, ts, tcg_target_available_regs[itype],
4496                   allocated_regs, preferred_regs);
4497     }
4498     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4499     ireg = ts->reg;
4500 
4501     if (IS_DEAD_ARG(0)) {
4502         /* mov to a non-saved dead register makes no sense (even with
4503            liveness analysis disabled). */
4504         tcg_debug_assert(NEED_SYNC_ARG(0));
4505         if (!ots->mem_allocated) {
4506             temp_allocate_frame(s, ots);
4507         }
4508         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4509         if (IS_DEAD_ARG(1)) {
4510             temp_dead(s, ts);
4511         }
4512         temp_dead(s, ots);
4513         return;
4514     }
4515 
4516     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4517         /*
4518          * The mov can be suppressed.  Kill input first, so that it
4519          * is unlinked from reg_to_temp, then set the output to the
4520          * reg that we saved from the input.
4521          */
4522         temp_dead(s, ts);
4523         oreg = ireg;
4524     } else {
4525         if (ots->val_type == TEMP_VAL_REG) {
4526             oreg = ots->reg;
4527         } else {
4528             /* Make sure to not spill the input register during allocation. */
4529             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4530                                  allocated_regs | ((TCGRegSet)1 << ireg),
4531                                  preferred_regs, ots->indirect_base);
4532         }
4533         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4534             /*
4535              * Cross register class move not supported.
4536              * Store the source register into the destination slot
4537              * and leave the destination temp as TEMP_VAL_MEM.
4538              */
4539             assert(!temp_readonly(ots));
4540             if (!ts->mem_allocated) {
4541                 temp_allocate_frame(s, ots);
4542             }
4543             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4544             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4545             ots->mem_coherent = 1;
4546             return;
4547         }
4548     }
4549     set_temp_val_reg(s, ots, oreg);
4550     ots->mem_coherent = 0;
4551 
4552     if (NEED_SYNC_ARG(0)) {
4553         temp_sync(s, ots, allocated_regs, 0, 0);
4554     }
4555 }
4556 
4557 /*
4558  * Specialized code generation for INDEX_op_dup_vec.
4559  */
4560 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4561 {
4562     const TCGLifeData arg_life = op->life;
4563     TCGRegSet dup_out_regs, dup_in_regs;
4564     TCGTemp *its, *ots;
4565     TCGType itype, vtype;
4566     unsigned vece;
4567     int lowpart_ofs;
4568     bool ok;
4569 
4570     ots = arg_temp(op->args[0]);
4571     its = arg_temp(op->args[1]);
4572 
4573     /* ENV should not be modified.  */
4574     tcg_debug_assert(!temp_readonly(ots));
4575 
4576     itype = its->type;
4577     vece = TCGOP_VECE(op);
4578     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4579 
4580     if (its->val_type == TEMP_VAL_CONST) {
4581         /* Propagate constant via movi -> dupi.  */
4582         tcg_target_ulong val = its->val;
4583         if (IS_DEAD_ARG(1)) {
4584             temp_dead(s, its);
4585         }
4586         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4587         return;
4588     }
4589 
4590     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4591     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4592 
4593     /* Allocate the output register now.  */
4594     if (ots->val_type != TEMP_VAL_REG) {
4595         TCGRegSet allocated_regs = s->reserved_regs;
4596         TCGReg oreg;
4597 
4598         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4599             /* Make sure to not spill the input register. */
4600             tcg_regset_set_reg(allocated_regs, its->reg);
4601         }
4602         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4603                              output_pref(op, 0), ots->indirect_base);
4604         set_temp_val_reg(s, ots, oreg);
4605     }
4606 
4607     switch (its->val_type) {
4608     case TEMP_VAL_REG:
4609         /*
4610          * The dup constriaints must be broad, covering all possible VECE.
4611          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4612          * to fail, indicating that extra moves are required for that case.
4613          */
4614         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4615             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4616                 goto done;
4617             }
4618             /* Try again from memory or a vector input register.  */
4619         }
4620         if (!its->mem_coherent) {
4621             /*
4622              * The input register is not synced, and so an extra store
4623              * would be required to use memory.  Attempt an integer-vector
4624              * register move first.  We do not have a TCGRegSet for this.
4625              */
4626             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4627                 break;
4628             }
4629             /* Sync the temp back to its slot and load from there.  */
4630             temp_sync(s, its, s->reserved_regs, 0, 0);
4631         }
4632         /* fall through */
4633 
4634     case TEMP_VAL_MEM:
4635         lowpart_ofs = 0;
4636         if (HOST_BIG_ENDIAN) {
4637             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4638         }
4639         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4640                              its->mem_offset + lowpart_ofs)) {
4641             goto done;
4642         }
4643         /* Load the input into the destination vector register. */
4644         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4645         break;
4646 
4647     default:
4648         g_assert_not_reached();
4649     }
4650 
4651     /* We now have a vector input register, so dup must succeed. */
4652     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4653     tcg_debug_assert(ok);
4654 
4655  done:
4656     ots->mem_coherent = 0;
4657     if (IS_DEAD_ARG(1)) {
4658         temp_dead(s, its);
4659     }
4660     if (NEED_SYNC_ARG(0)) {
4661         temp_sync(s, ots, s->reserved_regs, 0, 0);
4662     }
4663     if (IS_DEAD_ARG(0)) {
4664         temp_dead(s, ots);
4665     }
4666 }
4667 
4668 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4669 {
4670     const TCGLifeData arg_life = op->life;
4671     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4672     TCGRegSet i_allocated_regs;
4673     TCGRegSet o_allocated_regs;
4674     int i, k, nb_iargs, nb_oargs;
4675     TCGReg reg;
4676     TCGArg arg;
4677     const TCGArgConstraint *arg_ct;
4678     TCGTemp *ts;
4679     TCGArg new_args[TCG_MAX_OP_ARGS];
4680     int const_args[TCG_MAX_OP_ARGS];
4681 
4682     nb_oargs = def->nb_oargs;
4683     nb_iargs = def->nb_iargs;
4684 
4685     /* copy constants */
4686     memcpy(new_args + nb_oargs + nb_iargs,
4687            op->args + nb_oargs + nb_iargs,
4688            sizeof(TCGArg) * def->nb_cargs);
4689 
4690     i_allocated_regs = s->reserved_regs;
4691     o_allocated_regs = s->reserved_regs;
4692 
4693     /* satisfy input constraints */
4694     for (k = 0; k < nb_iargs; k++) {
4695         TCGRegSet i_preferred_regs, i_required_regs;
4696         bool allocate_new_reg, copyto_new_reg;
4697         TCGTemp *ts2;
4698         int i1, i2;
4699 
4700         i = def->args_ct[nb_oargs + k].sort_index;
4701         arg = op->args[i];
4702         arg_ct = &def->args_ct[i];
4703         ts = arg_temp(arg);
4704 
4705         if (ts->val_type == TEMP_VAL_CONST
4706             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct, TCGOP_VECE(op))) {
4707             /* constant is OK for instruction */
4708             const_args[i] = 1;
4709             new_args[i] = ts->val;
4710             continue;
4711         }
4712 
4713         reg = ts->reg;
4714         i_preferred_regs = 0;
4715         i_required_regs = arg_ct->regs;
4716         allocate_new_reg = false;
4717         copyto_new_reg = false;
4718 
4719         switch (arg_ct->pair) {
4720         case 0: /* not paired */
4721             if (arg_ct->ialias) {
4722                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4723 
4724                 /*
4725                  * If the input is readonly, then it cannot also be an
4726                  * output and aliased to itself.  If the input is not
4727                  * dead after the instruction, we must allocate a new
4728                  * register and move it.
4729                  */
4730                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
4731                     || def->args_ct[arg_ct->alias_index].newreg) {
4732                     allocate_new_reg = true;
4733                 } else if (ts->val_type == TEMP_VAL_REG) {
4734                     /*
4735                      * Check if the current register has already been
4736                      * allocated for another input.
4737                      */
4738                     allocate_new_reg =
4739                         tcg_regset_test_reg(i_allocated_regs, reg);
4740                 }
4741             }
4742             if (!allocate_new_reg) {
4743                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4744                           i_preferred_regs);
4745                 reg = ts->reg;
4746                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4747             }
4748             if (allocate_new_reg) {
4749                 /*
4750                  * Allocate a new register matching the constraint
4751                  * and move the temporary register into it.
4752                  */
4753                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4754                           i_allocated_regs, 0);
4755                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4756                                     i_preferred_regs, ts->indirect_base);
4757                 copyto_new_reg = true;
4758             }
4759             break;
4760 
4761         case 1:
4762             /* First of an input pair; if i1 == i2, the second is an output. */
4763             i1 = i;
4764             i2 = arg_ct->pair_index;
4765             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4766 
4767             /*
4768              * It is easier to default to allocating a new pair
4769              * and to identify a few cases where it's not required.
4770              */
4771             if (arg_ct->ialias) {
4772                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4773                 if (IS_DEAD_ARG(i1) &&
4774                     IS_DEAD_ARG(i2) &&
4775                     !temp_readonly(ts) &&
4776                     ts->val_type == TEMP_VAL_REG &&
4777                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4778                     tcg_regset_test_reg(i_required_regs, reg) &&
4779                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4780                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4781                     (ts2
4782                      ? ts2->val_type == TEMP_VAL_REG &&
4783                        ts2->reg == reg + 1 &&
4784                        !temp_readonly(ts2)
4785                      : s->reg_to_temp[reg + 1] == NULL)) {
4786                     break;
4787                 }
4788             } else {
4789                 /* Without aliasing, the pair must also be an input. */
4790                 tcg_debug_assert(ts2);
4791                 if (ts->val_type == TEMP_VAL_REG &&
4792                     ts2->val_type == TEMP_VAL_REG &&
4793                     ts2->reg == reg + 1 &&
4794                     tcg_regset_test_reg(i_required_regs, reg)) {
4795                     break;
4796                 }
4797             }
4798             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4799                                      0, ts->indirect_base);
4800             goto do_pair;
4801 
4802         case 2: /* pair second */
4803             reg = new_args[arg_ct->pair_index] + 1;
4804             goto do_pair;
4805 
4806         case 3: /* ialias with second output, no first input */
4807             tcg_debug_assert(arg_ct->ialias);
4808             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4809 
4810             if (IS_DEAD_ARG(i) &&
4811                 !temp_readonly(ts) &&
4812                 ts->val_type == TEMP_VAL_REG &&
4813                 reg > 0 &&
4814                 s->reg_to_temp[reg - 1] == NULL &&
4815                 tcg_regset_test_reg(i_required_regs, reg) &&
4816                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4817                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4818                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4819                 break;
4820             }
4821             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4822                                      i_allocated_regs, 0,
4823                                      ts->indirect_base);
4824             tcg_regset_set_reg(i_allocated_regs, reg);
4825             reg += 1;
4826             goto do_pair;
4827 
4828         do_pair:
4829             /*
4830              * If an aliased input is not dead after the instruction,
4831              * we must allocate a new register and move it.
4832              */
4833             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4834                 TCGRegSet t_allocated_regs = i_allocated_regs;
4835 
4836                 /*
4837                  * Because of the alias, and the continued life, make sure
4838                  * that the temp is somewhere *other* than the reg pair,
4839                  * and we get a copy in reg.
4840                  */
4841                 tcg_regset_set_reg(t_allocated_regs, reg);
4842                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4843                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4844                     /* If ts was already in reg, copy it somewhere else. */
4845                     TCGReg nr;
4846                     bool ok;
4847 
4848                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4849                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4850                                        t_allocated_regs, 0, ts->indirect_base);
4851                     ok = tcg_out_mov(s, ts->type, nr, reg);
4852                     tcg_debug_assert(ok);
4853 
4854                     set_temp_val_reg(s, ts, nr);
4855                 } else {
4856                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4857                               t_allocated_regs, 0);
4858                     copyto_new_reg = true;
4859                 }
4860             } else {
4861                 /* Preferably allocate to reg, otherwise copy. */
4862                 i_required_regs = (TCGRegSet)1 << reg;
4863                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4864                           i_preferred_regs);
4865                 copyto_new_reg = ts->reg != reg;
4866             }
4867             break;
4868 
4869         default:
4870             g_assert_not_reached();
4871         }
4872 
4873         if (copyto_new_reg) {
4874             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4875                 /*
4876                  * Cross register class move not supported.  Sync the
4877                  * temp back to its slot and load from there.
4878                  */
4879                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4880                 tcg_out_ld(s, ts->type, reg,
4881                            ts->mem_base->reg, ts->mem_offset);
4882             }
4883         }
4884         new_args[i] = reg;
4885         const_args[i] = 0;
4886         tcg_regset_set_reg(i_allocated_regs, reg);
4887     }
4888 
4889     /* mark dead temporaries and free the associated registers */
4890     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4891         if (IS_DEAD_ARG(i)) {
4892             temp_dead(s, arg_temp(op->args[i]));
4893         }
4894     }
4895 
4896     if (def->flags & TCG_OPF_COND_BRANCH) {
4897         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4898     } else if (def->flags & TCG_OPF_BB_END) {
4899         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4900     } else {
4901         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4902             /* XXX: permit generic clobber register list ? */
4903             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4904                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4905                     tcg_reg_free(s, i, i_allocated_regs);
4906                 }
4907             }
4908         }
4909         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4910             /* sync globals if the op has side effects and might trigger
4911                an exception. */
4912             sync_globals(s, i_allocated_regs);
4913         }
4914 
4915         /* satisfy the output constraints */
4916         for(k = 0; k < nb_oargs; k++) {
4917             i = def->args_ct[k].sort_index;
4918             arg = op->args[i];
4919             arg_ct = &def->args_ct[i];
4920             ts = arg_temp(arg);
4921 
4922             /* ENV should not be modified.  */
4923             tcg_debug_assert(!temp_readonly(ts));
4924 
4925             switch (arg_ct->pair) {
4926             case 0: /* not paired */
4927                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4928                     reg = new_args[arg_ct->alias_index];
4929                 } else if (arg_ct->newreg) {
4930                     reg = tcg_reg_alloc(s, arg_ct->regs,
4931                                         i_allocated_regs | o_allocated_regs,
4932                                         output_pref(op, k), ts->indirect_base);
4933                 } else {
4934                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4935                                         output_pref(op, k), ts->indirect_base);
4936                 }
4937                 break;
4938 
4939             case 1: /* first of pair */
4940                 tcg_debug_assert(!arg_ct->newreg);
4941                 if (arg_ct->oalias) {
4942                     reg = new_args[arg_ct->alias_index];
4943                     break;
4944                 }
4945                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4946                                          output_pref(op, k), ts->indirect_base);
4947                 break;
4948 
4949             case 2: /* second of pair */
4950                 tcg_debug_assert(!arg_ct->newreg);
4951                 if (arg_ct->oalias) {
4952                     reg = new_args[arg_ct->alias_index];
4953                 } else {
4954                     reg = new_args[arg_ct->pair_index] + 1;
4955                 }
4956                 break;
4957 
4958             case 3: /* first of pair, aliasing with a second input */
4959                 tcg_debug_assert(!arg_ct->newreg);
4960                 reg = new_args[arg_ct->pair_index] - 1;
4961                 break;
4962 
4963             default:
4964                 g_assert_not_reached();
4965             }
4966             tcg_regset_set_reg(o_allocated_regs, reg);
4967             set_temp_val_reg(s, ts, reg);
4968             ts->mem_coherent = 0;
4969             new_args[i] = reg;
4970         }
4971     }
4972 
4973     /* emit instruction */
4974     switch (op->opc) {
4975     case INDEX_op_ext8s_i32:
4976         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4977         break;
4978     case INDEX_op_ext8s_i64:
4979         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4980         break;
4981     case INDEX_op_ext8u_i32:
4982     case INDEX_op_ext8u_i64:
4983         tcg_out_ext8u(s, new_args[0], new_args[1]);
4984         break;
4985     case INDEX_op_ext16s_i32:
4986         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4987         break;
4988     case INDEX_op_ext16s_i64:
4989         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4990         break;
4991     case INDEX_op_ext16u_i32:
4992     case INDEX_op_ext16u_i64:
4993         tcg_out_ext16u(s, new_args[0], new_args[1]);
4994         break;
4995     case INDEX_op_ext32s_i64:
4996         tcg_out_ext32s(s, new_args[0], new_args[1]);
4997         break;
4998     case INDEX_op_ext32u_i64:
4999         tcg_out_ext32u(s, new_args[0], new_args[1]);
5000         break;
5001     case INDEX_op_ext_i32_i64:
5002         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5003         break;
5004     case INDEX_op_extu_i32_i64:
5005         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5006         break;
5007     case INDEX_op_extrl_i64_i32:
5008         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5009         break;
5010     default:
5011         if (def->flags & TCG_OPF_VECTOR) {
5012             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
5013                            new_args, const_args);
5014         } else {
5015             tcg_out_op(s, op->opc, new_args, const_args);
5016         }
5017         break;
5018     }
5019 
5020     /* move the outputs in the correct register if needed */
5021     for(i = 0; i < nb_oargs; i++) {
5022         ts = arg_temp(op->args[i]);
5023 
5024         /* ENV should not be modified.  */
5025         tcg_debug_assert(!temp_readonly(ts));
5026 
5027         if (NEED_SYNC_ARG(i)) {
5028             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5029         } else if (IS_DEAD_ARG(i)) {
5030             temp_dead(s, ts);
5031         }
5032     }
5033 }
5034 
5035 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5036 {
5037     const TCGLifeData arg_life = op->life;
5038     TCGTemp *ots, *itsl, *itsh;
5039     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
5040 
5041     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5042     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5043     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5044 
5045     ots = arg_temp(op->args[0]);
5046     itsl = arg_temp(op->args[1]);
5047     itsh = arg_temp(op->args[2]);
5048 
5049     /* ENV should not be modified.  */
5050     tcg_debug_assert(!temp_readonly(ots));
5051 
5052     /* Allocate the output register now.  */
5053     if (ots->val_type != TEMP_VAL_REG) {
5054         TCGRegSet allocated_regs = s->reserved_regs;
5055         TCGRegSet dup_out_regs =
5056             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
5057         TCGReg oreg;
5058 
5059         /* Make sure to not spill the input registers. */
5060         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5061             tcg_regset_set_reg(allocated_regs, itsl->reg);
5062         }
5063         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5064             tcg_regset_set_reg(allocated_regs, itsh->reg);
5065         }
5066 
5067         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5068                              output_pref(op, 0), ots->indirect_base);
5069         set_temp_val_reg(s, ots, oreg);
5070     }
5071 
5072     /* Promote dup2 of immediates to dupi_vec. */
5073     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5074         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5075         MemOp vece = MO_64;
5076 
5077         if (val == dup_const(MO_8, val)) {
5078             vece = MO_8;
5079         } else if (val == dup_const(MO_16, val)) {
5080             vece = MO_16;
5081         } else if (val == dup_const(MO_32, val)) {
5082             vece = MO_32;
5083         }
5084 
5085         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5086         goto done;
5087     }
5088 
5089     /* If the two inputs form one 64-bit value, try dupm_vec. */
5090     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5091         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5092         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5093         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5094 
5095         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5096         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5097 
5098         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5099                              its->mem_base->reg, its->mem_offset)) {
5100             goto done;
5101         }
5102     }
5103 
5104     /* Fall back to generic expansion. */
5105     return false;
5106 
5107  done:
5108     ots->mem_coherent = 0;
5109     if (IS_DEAD_ARG(1)) {
5110         temp_dead(s, itsl);
5111     }
5112     if (IS_DEAD_ARG(2)) {
5113         temp_dead(s, itsh);
5114     }
5115     if (NEED_SYNC_ARG(0)) {
5116         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5117     } else if (IS_DEAD_ARG(0)) {
5118         temp_dead(s, ots);
5119     }
5120     return true;
5121 }
5122 
5123 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5124                          TCGRegSet allocated_regs)
5125 {
5126     if (ts->val_type == TEMP_VAL_REG) {
5127         if (ts->reg != reg) {
5128             tcg_reg_free(s, reg, allocated_regs);
5129             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5130                 /*
5131                  * Cross register class move not supported.  Sync the
5132                  * temp back to its slot and load from there.
5133                  */
5134                 temp_sync(s, ts, allocated_regs, 0, 0);
5135                 tcg_out_ld(s, ts->type, reg,
5136                            ts->mem_base->reg, ts->mem_offset);
5137             }
5138         }
5139     } else {
5140         TCGRegSet arg_set = 0;
5141 
5142         tcg_reg_free(s, reg, allocated_regs);
5143         tcg_regset_set_reg(arg_set, reg);
5144         temp_load(s, ts, arg_set, allocated_regs, 0);
5145     }
5146 }
5147 
5148 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5149                          TCGRegSet allocated_regs)
5150 {
5151     /*
5152      * When the destination is on the stack, load up the temp and store.
5153      * If there are many call-saved registers, the temp might live to
5154      * see another use; otherwise it'll be discarded.
5155      */
5156     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5157     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5158                arg_slot_stk_ofs(arg_slot));
5159 }
5160 
5161 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5162                             TCGTemp *ts, TCGRegSet *allocated_regs)
5163 {
5164     if (arg_slot_reg_p(l->arg_slot)) {
5165         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5166         load_arg_reg(s, reg, ts, *allocated_regs);
5167         tcg_regset_set_reg(*allocated_regs, reg);
5168     } else {
5169         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5170     }
5171 }
5172 
5173 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5174                          intptr_t ref_off, TCGRegSet *allocated_regs)
5175 {
5176     TCGReg reg;
5177 
5178     if (arg_slot_reg_p(arg_slot)) {
5179         reg = tcg_target_call_iarg_regs[arg_slot];
5180         tcg_reg_free(s, reg, *allocated_regs);
5181         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5182         tcg_regset_set_reg(*allocated_regs, reg);
5183     } else {
5184         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5185                             *allocated_regs, 0, false);
5186         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5187         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5188                    arg_slot_stk_ofs(arg_slot));
5189     }
5190 }
5191 
5192 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5193 {
5194     const int nb_oargs = TCGOP_CALLO(op);
5195     const int nb_iargs = TCGOP_CALLI(op);
5196     const TCGLifeData arg_life = op->life;
5197     const TCGHelperInfo *info = tcg_call_info(op);
5198     TCGRegSet allocated_regs = s->reserved_regs;
5199     int i;
5200 
5201     /*
5202      * Move inputs into place in reverse order,
5203      * so that we place stacked arguments first.
5204      */
5205     for (i = nb_iargs - 1; i >= 0; --i) {
5206         const TCGCallArgumentLoc *loc = &info->in[i];
5207         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5208 
5209         switch (loc->kind) {
5210         case TCG_CALL_ARG_NORMAL:
5211         case TCG_CALL_ARG_EXTEND_U:
5212         case TCG_CALL_ARG_EXTEND_S:
5213             load_arg_normal(s, loc, ts, &allocated_regs);
5214             break;
5215         case TCG_CALL_ARG_BY_REF:
5216             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5217             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5218                          arg_slot_stk_ofs(loc->ref_slot),
5219                          &allocated_regs);
5220             break;
5221         case TCG_CALL_ARG_BY_REF_N:
5222             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5223             break;
5224         default:
5225             g_assert_not_reached();
5226         }
5227     }
5228 
5229     /* Mark dead temporaries and free the associated registers.  */
5230     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5231         if (IS_DEAD_ARG(i)) {
5232             temp_dead(s, arg_temp(op->args[i]));
5233         }
5234     }
5235 
5236     /* Clobber call registers.  */
5237     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5238         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5239             tcg_reg_free(s, i, allocated_regs);
5240         }
5241     }
5242 
5243     /*
5244      * Save globals if they might be written by the helper,
5245      * sync them if they might be read.
5246      */
5247     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5248         /* Nothing to do */
5249     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5250         sync_globals(s, allocated_regs);
5251     } else {
5252         save_globals(s, allocated_regs);
5253     }
5254 
5255     /*
5256      * If the ABI passes a pointer to the returned struct as the first
5257      * argument, load that now.  Pass a pointer to the output home slot.
5258      */
5259     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5260         TCGTemp *ts = arg_temp(op->args[0]);
5261 
5262         if (!ts->mem_allocated) {
5263             temp_allocate_frame(s, ts);
5264         }
5265         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5266     }
5267 
5268     tcg_out_call(s, tcg_call_func(op), info);
5269 
5270     /* Assign output registers and emit moves if needed.  */
5271     switch (info->out_kind) {
5272     case TCG_CALL_RET_NORMAL:
5273         for (i = 0; i < nb_oargs; i++) {
5274             TCGTemp *ts = arg_temp(op->args[i]);
5275             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5276 
5277             /* ENV should not be modified.  */
5278             tcg_debug_assert(!temp_readonly(ts));
5279 
5280             set_temp_val_reg(s, ts, reg);
5281             ts->mem_coherent = 0;
5282         }
5283         break;
5284 
5285     case TCG_CALL_RET_BY_VEC:
5286         {
5287             TCGTemp *ts = arg_temp(op->args[0]);
5288 
5289             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5290             tcg_debug_assert(ts->temp_subindex == 0);
5291             if (!ts->mem_allocated) {
5292                 temp_allocate_frame(s, ts);
5293             }
5294             tcg_out_st(s, TCG_TYPE_V128,
5295                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5296                        ts->mem_base->reg, ts->mem_offset);
5297         }
5298         /* fall through to mark all parts in memory */
5299 
5300     case TCG_CALL_RET_BY_REF:
5301         /* The callee has performed a write through the reference. */
5302         for (i = 0; i < nb_oargs; i++) {
5303             TCGTemp *ts = arg_temp(op->args[i]);
5304             ts->val_type = TEMP_VAL_MEM;
5305         }
5306         break;
5307 
5308     default:
5309         g_assert_not_reached();
5310     }
5311 
5312     /* Flush or discard output registers as needed. */
5313     for (i = 0; i < nb_oargs; i++) {
5314         TCGTemp *ts = arg_temp(op->args[i]);
5315         if (NEED_SYNC_ARG(i)) {
5316             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5317         } else if (IS_DEAD_ARG(i)) {
5318             temp_dead(s, ts);
5319         }
5320     }
5321 }
5322 
5323 /**
5324  * atom_and_align_for_opc:
5325  * @s: tcg context
5326  * @opc: memory operation code
5327  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5328  * @allow_two_ops: true if we are prepared to issue two operations
5329  *
5330  * Return the alignment and atomicity to use for the inline fast path
5331  * for the given memory operation.  The alignment may be larger than
5332  * that specified in @opc, and the correct alignment will be diagnosed
5333  * by the slow path helper.
5334  *
5335  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5336  * and issue two loads or stores for subalignment.
5337  */
5338 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5339                                            MemOp host_atom, bool allow_two_ops)
5340 {
5341     MemOp align = get_alignment_bits(opc);
5342     MemOp size = opc & MO_SIZE;
5343     MemOp half = size ? size - 1 : 0;
5344     MemOp atmax;
5345     MemOp atom;
5346 
5347     /* When serialized, no further atomicity required.  */
5348     if (s->gen_tb->cflags & CF_PARALLEL) {
5349         atom = opc & MO_ATOM_MASK;
5350     } else {
5351         atom = MO_ATOM_NONE;
5352     }
5353 
5354     switch (atom) {
5355     case MO_ATOM_NONE:
5356         /* The operation requires no specific atomicity. */
5357         atmax = MO_8;
5358         break;
5359 
5360     case MO_ATOM_IFALIGN:
5361         atmax = size;
5362         break;
5363 
5364     case MO_ATOM_IFALIGN_PAIR:
5365         atmax = half;
5366         break;
5367 
5368     case MO_ATOM_WITHIN16:
5369         atmax = size;
5370         if (size == MO_128) {
5371             /* Misalignment implies !within16, and therefore no atomicity. */
5372         } else if (host_atom != MO_ATOM_WITHIN16) {
5373             /* The host does not implement within16, so require alignment. */
5374             align = MAX(align, size);
5375         }
5376         break;
5377 
5378     case MO_ATOM_WITHIN16_PAIR:
5379         atmax = size;
5380         /*
5381          * Misalignment implies !within16, and therefore half atomicity.
5382          * Any host prepared for two operations can implement this with
5383          * half alignment.
5384          */
5385         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5386             align = MAX(align, half);
5387         }
5388         break;
5389 
5390     case MO_ATOM_SUBALIGN:
5391         atmax = size;
5392         if (host_atom != MO_ATOM_SUBALIGN) {
5393             /* If unaligned but not odd, there are subobjects up to half. */
5394             if (allow_two_ops) {
5395                 align = MAX(align, half);
5396             } else {
5397                 align = MAX(align, size);
5398             }
5399         }
5400         break;
5401 
5402     default:
5403         g_assert_not_reached();
5404     }
5405 
5406     return (TCGAtomAlign){ .atom = atmax, .align = align };
5407 }
5408 
5409 /*
5410  * Similarly for qemu_ld/st slow path helpers.
5411  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5412  * using only the provided backend tcg_out_* functions.
5413  */
5414 
5415 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5416 {
5417     int ofs = arg_slot_stk_ofs(slot);
5418 
5419     /*
5420      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5421      * require extension to uint64_t, adjust the address for uint32_t.
5422      */
5423     if (HOST_BIG_ENDIAN &&
5424         TCG_TARGET_REG_BITS == 64 &&
5425         type == TCG_TYPE_I32) {
5426         ofs += 4;
5427     }
5428     return ofs;
5429 }
5430 
5431 static void tcg_out_helper_load_slots(TCGContext *s,
5432                                       unsigned nmov, TCGMovExtend *mov,
5433                                       const TCGLdstHelperParam *parm)
5434 {
5435     unsigned i;
5436     TCGReg dst3;
5437 
5438     /*
5439      * Start from the end, storing to the stack first.
5440      * This frees those registers, so we need not consider overlap.
5441      */
5442     for (i = nmov; i-- > 0; ) {
5443         unsigned slot = mov[i].dst;
5444 
5445         if (arg_slot_reg_p(slot)) {
5446             goto found_reg;
5447         }
5448 
5449         TCGReg src = mov[i].src;
5450         TCGType dst_type = mov[i].dst_type;
5451         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5452 
5453         /* The argument is going onto the stack; extend into scratch. */
5454         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5455             tcg_debug_assert(parm->ntmp != 0);
5456             mov[i].dst = src = parm->tmp[0];
5457             tcg_out_movext1(s, &mov[i]);
5458         }
5459 
5460         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5461                    tcg_out_helper_stk_ofs(dst_type, slot));
5462     }
5463     return;
5464 
5465  found_reg:
5466     /*
5467      * The remaining arguments are in registers.
5468      * Convert slot numbers to argument registers.
5469      */
5470     nmov = i + 1;
5471     for (i = 0; i < nmov; ++i) {
5472         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5473     }
5474 
5475     switch (nmov) {
5476     case 4:
5477         /* The backend must have provided enough temps for the worst case. */
5478         tcg_debug_assert(parm->ntmp >= 2);
5479 
5480         dst3 = mov[3].dst;
5481         for (unsigned j = 0; j < 3; ++j) {
5482             if (dst3 == mov[j].src) {
5483                 /*
5484                  * Conflict. Copy the source to a temporary, perform the
5485                  * remaining moves, then the extension from our scratch
5486                  * on the way out.
5487                  */
5488                 TCGReg scratch = parm->tmp[1];
5489 
5490                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5491                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5492                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5493                 break;
5494             }
5495         }
5496 
5497         /* No conflicts: perform this move and continue. */
5498         tcg_out_movext1(s, &mov[3]);
5499         /* fall through */
5500 
5501     case 3:
5502         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5503                         parm->ntmp ? parm->tmp[0] : -1);
5504         break;
5505     case 2:
5506         tcg_out_movext2(s, mov, mov + 1,
5507                         parm->ntmp ? parm->tmp[0] : -1);
5508         break;
5509     case 1:
5510         tcg_out_movext1(s, mov);
5511         break;
5512     default:
5513         g_assert_not_reached();
5514     }
5515 }
5516 
5517 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5518                                     TCGType type, tcg_target_long imm,
5519                                     const TCGLdstHelperParam *parm)
5520 {
5521     if (arg_slot_reg_p(slot)) {
5522         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5523     } else {
5524         int ofs = tcg_out_helper_stk_ofs(type, slot);
5525         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5526             tcg_debug_assert(parm->ntmp != 0);
5527             tcg_out_movi(s, type, parm->tmp[0], imm);
5528             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5529         }
5530     }
5531 }
5532 
5533 static void tcg_out_helper_load_common_args(TCGContext *s,
5534                                             const TCGLabelQemuLdst *ldst,
5535                                             const TCGLdstHelperParam *parm,
5536                                             const TCGHelperInfo *info,
5537                                             unsigned next_arg)
5538 {
5539     TCGMovExtend ptr_mov = {
5540         .dst_type = TCG_TYPE_PTR,
5541         .src_type = TCG_TYPE_PTR,
5542         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5543     };
5544     const TCGCallArgumentLoc *loc = &info->in[0];
5545     TCGType type;
5546     unsigned slot;
5547     tcg_target_ulong imm;
5548 
5549     /*
5550      * Handle env, which is always first.
5551      */
5552     ptr_mov.dst = loc->arg_slot;
5553     ptr_mov.src = TCG_AREG0;
5554     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5555 
5556     /*
5557      * Handle oi.
5558      */
5559     imm = ldst->oi;
5560     loc = &info->in[next_arg];
5561     type = TCG_TYPE_I32;
5562     switch (loc->kind) {
5563     case TCG_CALL_ARG_NORMAL:
5564         break;
5565     case TCG_CALL_ARG_EXTEND_U:
5566     case TCG_CALL_ARG_EXTEND_S:
5567         /* No extension required for MemOpIdx. */
5568         tcg_debug_assert(imm <= INT32_MAX);
5569         type = TCG_TYPE_REG;
5570         break;
5571     default:
5572         g_assert_not_reached();
5573     }
5574     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5575     next_arg++;
5576 
5577     /*
5578      * Handle ra.
5579      */
5580     loc = &info->in[next_arg];
5581     slot = loc->arg_slot;
5582     if (parm->ra_gen) {
5583         int arg_reg = -1;
5584         TCGReg ra_reg;
5585 
5586         if (arg_slot_reg_p(slot)) {
5587             arg_reg = tcg_target_call_iarg_regs[slot];
5588         }
5589         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5590 
5591         ptr_mov.dst = slot;
5592         ptr_mov.src = ra_reg;
5593         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5594     } else {
5595         imm = (uintptr_t)ldst->raddr;
5596         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5597     }
5598 }
5599 
5600 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5601                                        const TCGCallArgumentLoc *loc,
5602                                        TCGType dst_type, TCGType src_type,
5603                                        TCGReg lo, TCGReg hi)
5604 {
5605     MemOp reg_mo;
5606 
5607     if (dst_type <= TCG_TYPE_REG) {
5608         MemOp src_ext;
5609 
5610         switch (loc->kind) {
5611         case TCG_CALL_ARG_NORMAL:
5612             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5613             break;
5614         case TCG_CALL_ARG_EXTEND_U:
5615             dst_type = TCG_TYPE_REG;
5616             src_ext = MO_UL;
5617             break;
5618         case TCG_CALL_ARG_EXTEND_S:
5619             dst_type = TCG_TYPE_REG;
5620             src_ext = MO_SL;
5621             break;
5622         default:
5623             g_assert_not_reached();
5624         }
5625 
5626         mov[0].dst = loc->arg_slot;
5627         mov[0].dst_type = dst_type;
5628         mov[0].src = lo;
5629         mov[0].src_type = src_type;
5630         mov[0].src_ext = src_ext;
5631         return 1;
5632     }
5633 
5634     if (TCG_TARGET_REG_BITS == 32) {
5635         assert(dst_type == TCG_TYPE_I64);
5636         reg_mo = MO_32;
5637     } else {
5638         assert(dst_type == TCG_TYPE_I128);
5639         reg_mo = MO_64;
5640     }
5641 
5642     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5643     mov[0].src = lo;
5644     mov[0].dst_type = TCG_TYPE_REG;
5645     mov[0].src_type = TCG_TYPE_REG;
5646     mov[0].src_ext = reg_mo;
5647 
5648     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5649     mov[1].src = hi;
5650     mov[1].dst_type = TCG_TYPE_REG;
5651     mov[1].src_type = TCG_TYPE_REG;
5652     mov[1].src_ext = reg_mo;
5653 
5654     return 2;
5655 }
5656 
5657 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5658                                    const TCGLdstHelperParam *parm)
5659 {
5660     const TCGHelperInfo *info;
5661     const TCGCallArgumentLoc *loc;
5662     TCGMovExtend mov[2];
5663     unsigned next_arg, nmov;
5664     MemOp mop = get_memop(ldst->oi);
5665 
5666     switch (mop & MO_SIZE) {
5667     case MO_8:
5668     case MO_16:
5669     case MO_32:
5670         info = &info_helper_ld32_mmu;
5671         break;
5672     case MO_64:
5673         info = &info_helper_ld64_mmu;
5674         break;
5675     case MO_128:
5676         info = &info_helper_ld128_mmu;
5677         break;
5678     default:
5679         g_assert_not_reached();
5680     }
5681 
5682     /* Defer env argument. */
5683     next_arg = 1;
5684 
5685     loc = &info->in[next_arg];
5686     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5687         /*
5688          * 32-bit host with 32-bit guest: zero-extend the guest address
5689          * to 64-bits for the helper by storing the low part, then
5690          * load a zero for the high part.
5691          */
5692         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5693                                TCG_TYPE_I32, TCG_TYPE_I32,
5694                                ldst->addrlo_reg, -1);
5695         tcg_out_helper_load_slots(s, 1, mov, parm);
5696 
5697         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5698                                 TCG_TYPE_I32, 0, parm);
5699         next_arg += 2;
5700     } else {
5701         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5702                                       ldst->addrlo_reg, ldst->addrhi_reg);
5703         tcg_out_helper_load_slots(s, nmov, mov, parm);
5704         next_arg += nmov;
5705     }
5706 
5707     switch (info->out_kind) {
5708     case TCG_CALL_RET_NORMAL:
5709     case TCG_CALL_RET_BY_VEC:
5710         break;
5711     case TCG_CALL_RET_BY_REF:
5712         /*
5713          * The return reference is in the first argument slot.
5714          * We need memory in which to return: re-use the top of stack.
5715          */
5716         {
5717             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5718 
5719             if (arg_slot_reg_p(0)) {
5720                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5721                                  TCG_REG_CALL_STACK, ofs_slot0);
5722             } else {
5723                 tcg_debug_assert(parm->ntmp != 0);
5724                 tcg_out_addi_ptr(s, parm->tmp[0],
5725                                  TCG_REG_CALL_STACK, ofs_slot0);
5726                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5727                            TCG_REG_CALL_STACK, ofs_slot0);
5728             }
5729         }
5730         break;
5731     default:
5732         g_assert_not_reached();
5733     }
5734 
5735     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5736 }
5737 
5738 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5739                                   bool load_sign,
5740                                   const TCGLdstHelperParam *parm)
5741 {
5742     MemOp mop = get_memop(ldst->oi);
5743     TCGMovExtend mov[2];
5744     int ofs_slot0;
5745 
5746     switch (ldst->type) {
5747     case TCG_TYPE_I64:
5748         if (TCG_TARGET_REG_BITS == 32) {
5749             break;
5750         }
5751         /* fall through */
5752 
5753     case TCG_TYPE_I32:
5754         mov[0].dst = ldst->datalo_reg;
5755         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5756         mov[0].dst_type = ldst->type;
5757         mov[0].src_type = TCG_TYPE_REG;
5758 
5759         /*
5760          * If load_sign, then we allowed the helper to perform the
5761          * appropriate sign extension to tcg_target_ulong, and all
5762          * we need now is a plain move.
5763          *
5764          * If they do not, then we expect the relevant extension
5765          * instruction to be no more expensive than a move, and
5766          * we thus save the icache etc by only using one of two
5767          * helper functions.
5768          */
5769         if (load_sign || !(mop & MO_SIGN)) {
5770             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5771                 mov[0].src_ext = MO_32;
5772             } else {
5773                 mov[0].src_ext = MO_64;
5774             }
5775         } else {
5776             mov[0].src_ext = mop & MO_SSIZE;
5777         }
5778         tcg_out_movext1(s, mov);
5779         return;
5780 
5781     case TCG_TYPE_I128:
5782         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5783         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5784         switch (TCG_TARGET_CALL_RET_I128) {
5785         case TCG_CALL_RET_NORMAL:
5786             break;
5787         case TCG_CALL_RET_BY_VEC:
5788             tcg_out_st(s, TCG_TYPE_V128,
5789                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5790                        TCG_REG_CALL_STACK, ofs_slot0);
5791             /* fall through */
5792         case TCG_CALL_RET_BY_REF:
5793             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5794                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5795             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5796                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5797             return;
5798         default:
5799             g_assert_not_reached();
5800         }
5801         break;
5802 
5803     default:
5804         g_assert_not_reached();
5805     }
5806 
5807     mov[0].dst = ldst->datalo_reg;
5808     mov[0].src =
5809         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5810     mov[0].dst_type = TCG_TYPE_REG;
5811     mov[0].src_type = TCG_TYPE_REG;
5812     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5813 
5814     mov[1].dst = ldst->datahi_reg;
5815     mov[1].src =
5816         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5817     mov[1].dst_type = TCG_TYPE_REG;
5818     mov[1].src_type = TCG_TYPE_REG;
5819     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5820 
5821     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5822 }
5823 
5824 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5825                                    const TCGLdstHelperParam *parm)
5826 {
5827     const TCGHelperInfo *info;
5828     const TCGCallArgumentLoc *loc;
5829     TCGMovExtend mov[4];
5830     TCGType data_type;
5831     unsigned next_arg, nmov, n;
5832     MemOp mop = get_memop(ldst->oi);
5833 
5834     switch (mop & MO_SIZE) {
5835     case MO_8:
5836     case MO_16:
5837     case MO_32:
5838         info = &info_helper_st32_mmu;
5839         data_type = TCG_TYPE_I32;
5840         break;
5841     case MO_64:
5842         info = &info_helper_st64_mmu;
5843         data_type = TCG_TYPE_I64;
5844         break;
5845     case MO_128:
5846         info = &info_helper_st128_mmu;
5847         data_type = TCG_TYPE_I128;
5848         break;
5849     default:
5850         g_assert_not_reached();
5851     }
5852 
5853     /* Defer env argument. */
5854     next_arg = 1;
5855     nmov = 0;
5856 
5857     /* Handle addr argument. */
5858     loc = &info->in[next_arg];
5859     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5860         /*
5861          * 32-bit host with 32-bit guest: zero-extend the guest address
5862          * to 64-bits for the helper by storing the low part.  Later,
5863          * after we have processed the register inputs, we will load a
5864          * zero for the high part.
5865          */
5866         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5867                                TCG_TYPE_I32, TCG_TYPE_I32,
5868                                ldst->addrlo_reg, -1);
5869         next_arg += 2;
5870         nmov += 1;
5871     } else {
5872         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5873                                    ldst->addrlo_reg, ldst->addrhi_reg);
5874         next_arg += n;
5875         nmov += n;
5876     }
5877 
5878     /* Handle data argument. */
5879     loc = &info->in[next_arg];
5880     switch (loc->kind) {
5881     case TCG_CALL_ARG_NORMAL:
5882     case TCG_CALL_ARG_EXTEND_U:
5883     case TCG_CALL_ARG_EXTEND_S:
5884         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
5885                                    ldst->datalo_reg, ldst->datahi_reg);
5886         next_arg += n;
5887         nmov += n;
5888         tcg_out_helper_load_slots(s, nmov, mov, parm);
5889         break;
5890 
5891     case TCG_CALL_ARG_BY_REF:
5892         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5893         tcg_debug_assert(data_type == TCG_TYPE_I128);
5894         tcg_out_st(s, TCG_TYPE_I64,
5895                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
5896                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
5897         tcg_out_st(s, TCG_TYPE_I64,
5898                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
5899                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
5900 
5901         tcg_out_helper_load_slots(s, nmov, mov, parm);
5902 
5903         if (arg_slot_reg_p(loc->arg_slot)) {
5904             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
5905                              TCG_REG_CALL_STACK,
5906                              arg_slot_stk_ofs(loc->ref_slot));
5907         } else {
5908             tcg_debug_assert(parm->ntmp != 0);
5909             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
5910                              arg_slot_stk_ofs(loc->ref_slot));
5911             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5912                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
5913         }
5914         next_arg += 2;
5915         break;
5916 
5917     default:
5918         g_assert_not_reached();
5919     }
5920 
5921     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5922         /* Zero extend the address by loading a zero for the high part. */
5923         loc = &info->in[1 + !HOST_BIG_ENDIAN];
5924         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
5925     }
5926 
5927     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5928 }
5929 
5930 void tcg_dump_op_count(GString *buf)
5931 {
5932     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5933 }
5934 
5935 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
5936 {
5937     int i, start_words, num_insns;
5938     TCGOp *op;
5939 
5940     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
5941                  && qemu_log_in_addr_range(pc_start))) {
5942         FILE *logfile = qemu_log_trylock();
5943         if (logfile) {
5944             fprintf(logfile, "OP:\n");
5945             tcg_dump_ops(s, logfile, false);
5946             fprintf(logfile, "\n");
5947             qemu_log_unlock(logfile);
5948         }
5949     }
5950 
5951 #ifdef CONFIG_DEBUG_TCG
5952     /* Ensure all labels referenced have been emitted.  */
5953     {
5954         TCGLabel *l;
5955         bool error = false;
5956 
5957         QSIMPLEQ_FOREACH(l, &s->labels, next) {
5958             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
5959                 qemu_log_mask(CPU_LOG_TB_OP,
5960                               "$L%d referenced but not present.\n", l->id);
5961                 error = true;
5962             }
5963         }
5964         assert(!error);
5965     }
5966 #endif
5967 
5968     tcg_optimize(s);
5969 
5970     reachable_code_pass(s);
5971     liveness_pass_0(s);
5972     liveness_pass_1(s);
5973 
5974     if (s->nb_indirects > 0) {
5975         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
5976                      && qemu_log_in_addr_range(pc_start))) {
5977             FILE *logfile = qemu_log_trylock();
5978             if (logfile) {
5979                 fprintf(logfile, "OP before indirect lowering:\n");
5980                 tcg_dump_ops(s, logfile, false);
5981                 fprintf(logfile, "\n");
5982                 qemu_log_unlock(logfile);
5983             }
5984         }
5985 
5986         /* Replace indirect temps with direct temps.  */
5987         if (liveness_pass_2(s)) {
5988             /* If changes were made, re-run liveness.  */
5989             liveness_pass_1(s);
5990         }
5991     }
5992 
5993     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
5994                  && qemu_log_in_addr_range(pc_start))) {
5995         FILE *logfile = qemu_log_trylock();
5996         if (logfile) {
5997             fprintf(logfile, "OP after optimization and liveness analysis:\n");
5998             tcg_dump_ops(s, logfile, true);
5999             fprintf(logfile, "\n");
6000             qemu_log_unlock(logfile);
6001         }
6002     }
6003 
6004     /* Initialize goto_tb jump offsets. */
6005     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6006     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6007     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6008     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6009 
6010     tcg_reg_alloc_start(s);
6011 
6012     /*
6013      * Reset the buffer pointers when restarting after overflow.
6014      * TODO: Move this into translate-all.c with the rest of the
6015      * buffer management.  Having only this done here is confusing.
6016      */
6017     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6018     s->code_ptr = s->code_buf;
6019 
6020 #ifdef TCG_TARGET_NEED_LDST_LABELS
6021     QSIMPLEQ_INIT(&s->ldst_labels);
6022 #endif
6023 #ifdef TCG_TARGET_NEED_POOL_LABELS
6024     s->pool_labels = NULL;
6025 #endif
6026 
6027     start_words = s->insn_start_words;
6028     s->gen_insn_data =
6029         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6030 
6031     tcg_out_tb_start(s);
6032 
6033     num_insns = -1;
6034     QTAILQ_FOREACH(op, &s->ops, link) {
6035         TCGOpcode opc = op->opc;
6036 
6037         switch (opc) {
6038         case INDEX_op_mov_i32:
6039         case INDEX_op_mov_i64:
6040         case INDEX_op_mov_vec:
6041             tcg_reg_alloc_mov(s, op);
6042             break;
6043         case INDEX_op_dup_vec:
6044             tcg_reg_alloc_dup(s, op);
6045             break;
6046         case INDEX_op_insn_start:
6047             if (num_insns >= 0) {
6048                 size_t off = tcg_current_code_size(s);
6049                 s->gen_insn_end_off[num_insns] = off;
6050                 /* Assert that we do not overflow our stored offset.  */
6051                 assert(s->gen_insn_end_off[num_insns] == off);
6052             }
6053             num_insns++;
6054             for (i = 0; i < start_words; ++i) {
6055                 s->gen_insn_data[num_insns * start_words + i] =
6056                     tcg_get_insn_start_param(op, i);
6057             }
6058             break;
6059         case INDEX_op_discard:
6060             temp_dead(s, arg_temp(op->args[0]));
6061             break;
6062         case INDEX_op_set_label:
6063             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6064             tcg_out_label(s, arg_label(op->args[0]));
6065             break;
6066         case INDEX_op_call:
6067             tcg_reg_alloc_call(s, op);
6068             break;
6069         case INDEX_op_exit_tb:
6070             tcg_out_exit_tb(s, op->args[0]);
6071             break;
6072         case INDEX_op_goto_tb:
6073             tcg_out_goto_tb(s, op->args[0]);
6074             break;
6075         case INDEX_op_dup2_vec:
6076             if (tcg_reg_alloc_dup2(s, op)) {
6077                 break;
6078             }
6079             /* fall through */
6080         default:
6081             /* Sanity check that we've not introduced any unhandled opcodes. */
6082             tcg_debug_assert(tcg_op_supported(opc));
6083             /* Note: in order to speed up the code, it would be much
6084                faster to have specialized register allocator functions for
6085                some common argument patterns */
6086             tcg_reg_alloc_op(s, op);
6087             break;
6088         }
6089         /* Test for (pending) buffer overflow.  The assumption is that any
6090            one operation beginning below the high water mark cannot overrun
6091            the buffer completely.  Thus we can test for overflow after
6092            generating code without having to check during generation.  */
6093         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6094             return -1;
6095         }
6096         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6097         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6098             return -2;
6099         }
6100     }
6101     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6102     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6103 
6104     /* Generate TB finalization at the end of block */
6105 #ifdef TCG_TARGET_NEED_LDST_LABELS
6106     i = tcg_out_ldst_finalize(s);
6107     if (i < 0) {
6108         return i;
6109     }
6110 #endif
6111 #ifdef TCG_TARGET_NEED_POOL_LABELS
6112     i = tcg_out_pool_finalize(s);
6113     if (i < 0) {
6114         return i;
6115     }
6116 #endif
6117     if (!tcg_resolve_relocs(s)) {
6118         return -2;
6119     }
6120 
6121 #ifndef CONFIG_TCG_INTERPRETER
6122     /* flush instruction cache */
6123     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6124                         (uintptr_t)s->code_buf,
6125                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6126 #endif
6127 
6128     return tcg_current_code_size(s);
6129 }
6130 
6131 void tcg_dump_info(GString *buf)
6132 {
6133     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
6134 }
6135 
6136 #ifdef ELF_HOST_MACHINE
6137 /* In order to use this feature, the backend needs to do three things:
6138 
6139    (1) Define ELF_HOST_MACHINE to indicate both what value to
6140        put into the ELF image and to indicate support for the feature.
6141 
6142    (2) Define tcg_register_jit.  This should create a buffer containing
6143        the contents of a .debug_frame section that describes the post-
6144        prologue unwind info for the tcg machine.
6145 
6146    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6147 */
6148 
6149 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6150 typedef enum {
6151     JIT_NOACTION = 0,
6152     JIT_REGISTER_FN,
6153     JIT_UNREGISTER_FN
6154 } jit_actions_t;
6155 
6156 struct jit_code_entry {
6157     struct jit_code_entry *next_entry;
6158     struct jit_code_entry *prev_entry;
6159     const void *symfile_addr;
6160     uint64_t symfile_size;
6161 };
6162 
6163 struct jit_descriptor {
6164     uint32_t version;
6165     uint32_t action_flag;
6166     struct jit_code_entry *relevant_entry;
6167     struct jit_code_entry *first_entry;
6168 };
6169 
6170 void __jit_debug_register_code(void) __attribute__((noinline));
6171 void __jit_debug_register_code(void)
6172 {
6173     asm("");
6174 }
6175 
6176 /* Must statically initialize the version, because GDB may check
6177    the version before we can set it.  */
6178 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6179 
6180 /* End GDB interface.  */
6181 
6182 static int find_string(const char *strtab, const char *str)
6183 {
6184     const char *p = strtab + 1;
6185 
6186     while (1) {
6187         if (strcmp(p, str) == 0) {
6188             return p - strtab;
6189         }
6190         p += strlen(p) + 1;
6191     }
6192 }
6193 
6194 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6195                                  const void *debug_frame,
6196                                  size_t debug_frame_size)
6197 {
6198     struct __attribute__((packed)) DebugInfo {
6199         uint32_t  len;
6200         uint16_t  version;
6201         uint32_t  abbrev;
6202         uint8_t   ptr_size;
6203         uint8_t   cu_die;
6204         uint16_t  cu_lang;
6205         uintptr_t cu_low_pc;
6206         uintptr_t cu_high_pc;
6207         uint8_t   fn_die;
6208         char      fn_name[16];
6209         uintptr_t fn_low_pc;
6210         uintptr_t fn_high_pc;
6211         uint8_t   cu_eoc;
6212     };
6213 
6214     struct ElfImage {
6215         ElfW(Ehdr) ehdr;
6216         ElfW(Phdr) phdr;
6217         ElfW(Shdr) shdr[7];
6218         ElfW(Sym)  sym[2];
6219         struct DebugInfo di;
6220         uint8_t    da[24];
6221         char       str[80];
6222     };
6223 
6224     struct ElfImage *img;
6225 
6226     static const struct ElfImage img_template = {
6227         .ehdr = {
6228             .e_ident[EI_MAG0] = ELFMAG0,
6229             .e_ident[EI_MAG1] = ELFMAG1,
6230             .e_ident[EI_MAG2] = ELFMAG2,
6231             .e_ident[EI_MAG3] = ELFMAG3,
6232             .e_ident[EI_CLASS] = ELF_CLASS,
6233             .e_ident[EI_DATA] = ELF_DATA,
6234             .e_ident[EI_VERSION] = EV_CURRENT,
6235             .e_type = ET_EXEC,
6236             .e_machine = ELF_HOST_MACHINE,
6237             .e_version = EV_CURRENT,
6238             .e_phoff = offsetof(struct ElfImage, phdr),
6239             .e_shoff = offsetof(struct ElfImage, shdr),
6240             .e_ehsize = sizeof(ElfW(Shdr)),
6241             .e_phentsize = sizeof(ElfW(Phdr)),
6242             .e_phnum = 1,
6243             .e_shentsize = sizeof(ElfW(Shdr)),
6244             .e_shnum = ARRAY_SIZE(img->shdr),
6245             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6246 #ifdef ELF_HOST_FLAGS
6247             .e_flags = ELF_HOST_FLAGS,
6248 #endif
6249 #ifdef ELF_OSABI
6250             .e_ident[EI_OSABI] = ELF_OSABI,
6251 #endif
6252         },
6253         .phdr = {
6254             .p_type = PT_LOAD,
6255             .p_flags = PF_X,
6256         },
6257         .shdr = {
6258             [0] = { .sh_type = SHT_NULL },
6259             /* Trick: The contents of code_gen_buffer are not present in
6260                this fake ELF file; that got allocated elsewhere.  Therefore
6261                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6262                will not look for contents.  We can record any address.  */
6263             [1] = { /* .text */
6264                 .sh_type = SHT_NOBITS,
6265                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6266             },
6267             [2] = { /* .debug_info */
6268                 .sh_type = SHT_PROGBITS,
6269                 .sh_offset = offsetof(struct ElfImage, di),
6270                 .sh_size = sizeof(struct DebugInfo),
6271             },
6272             [3] = { /* .debug_abbrev */
6273                 .sh_type = SHT_PROGBITS,
6274                 .sh_offset = offsetof(struct ElfImage, da),
6275                 .sh_size = sizeof(img->da),
6276             },
6277             [4] = { /* .debug_frame */
6278                 .sh_type = SHT_PROGBITS,
6279                 .sh_offset = sizeof(struct ElfImage),
6280             },
6281             [5] = { /* .symtab */
6282                 .sh_type = SHT_SYMTAB,
6283                 .sh_offset = offsetof(struct ElfImage, sym),
6284                 .sh_size = sizeof(img->sym),
6285                 .sh_info = 1,
6286                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6287                 .sh_entsize = sizeof(ElfW(Sym)),
6288             },
6289             [6] = { /* .strtab */
6290                 .sh_type = SHT_STRTAB,
6291                 .sh_offset = offsetof(struct ElfImage, str),
6292                 .sh_size = sizeof(img->str),
6293             }
6294         },
6295         .sym = {
6296             [1] = { /* code_gen_buffer */
6297                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6298                 .st_shndx = 1,
6299             }
6300         },
6301         .di = {
6302             .len = sizeof(struct DebugInfo) - 4,
6303             .version = 2,
6304             .ptr_size = sizeof(void *),
6305             .cu_die = 1,
6306             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6307             .fn_die = 2,
6308             .fn_name = "code_gen_buffer"
6309         },
6310         .da = {
6311             1,          /* abbrev number (the cu) */
6312             0x11, 1,    /* DW_TAG_compile_unit, has children */
6313             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6314             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6315             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6316             0, 0,       /* end of abbrev */
6317             2,          /* abbrev number (the fn) */
6318             0x2e, 0,    /* DW_TAG_subprogram, no children */
6319             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6320             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6321             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6322             0, 0,       /* end of abbrev */
6323             0           /* no more abbrev */
6324         },
6325         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6326                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6327     };
6328 
6329     /* We only need a single jit entry; statically allocate it.  */
6330     static struct jit_code_entry one_entry;
6331 
6332     uintptr_t buf = (uintptr_t)buf_ptr;
6333     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6334     DebugFrameHeader *dfh;
6335 
6336     img = g_malloc(img_size);
6337     *img = img_template;
6338 
6339     img->phdr.p_vaddr = buf;
6340     img->phdr.p_paddr = buf;
6341     img->phdr.p_memsz = buf_size;
6342 
6343     img->shdr[1].sh_name = find_string(img->str, ".text");
6344     img->shdr[1].sh_addr = buf;
6345     img->shdr[1].sh_size = buf_size;
6346 
6347     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6348     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6349 
6350     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6351     img->shdr[4].sh_size = debug_frame_size;
6352 
6353     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6354     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6355 
6356     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6357     img->sym[1].st_value = buf;
6358     img->sym[1].st_size = buf_size;
6359 
6360     img->di.cu_low_pc = buf;
6361     img->di.cu_high_pc = buf + buf_size;
6362     img->di.fn_low_pc = buf;
6363     img->di.fn_high_pc = buf + buf_size;
6364 
6365     dfh = (DebugFrameHeader *)(img + 1);
6366     memcpy(dfh, debug_frame, debug_frame_size);
6367     dfh->fde.func_start = buf;
6368     dfh->fde.func_len = buf_size;
6369 
6370 #ifdef DEBUG_JIT
6371     /* Enable this block to be able to debug the ELF image file creation.
6372        One can use readelf, objdump, or other inspection utilities.  */
6373     {
6374         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6375         FILE *f = fopen(jit, "w+b");
6376         if (f) {
6377             if (fwrite(img, img_size, 1, f) != img_size) {
6378                 /* Avoid stupid unused return value warning for fwrite.  */
6379             }
6380             fclose(f);
6381         }
6382     }
6383 #endif
6384 
6385     one_entry.symfile_addr = img;
6386     one_entry.symfile_size = img_size;
6387 
6388     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6389     __jit_debug_descriptor.relevant_entry = &one_entry;
6390     __jit_debug_descriptor.first_entry = &one_entry;
6391     __jit_debug_register_code();
6392 }
6393 #else
6394 /* No support for the feature.  Provide the entry point expected by exec.c,
6395    and implement the internal function we declared earlier.  */
6396 
6397 static void tcg_register_jit_int(const void *buf, size_t size,
6398                                  const void *debug_frame,
6399                                  size_t debug_frame_size)
6400 {
6401 }
6402 
6403 void tcg_register_jit(const void *buf, size_t buf_size)
6404 {
6405 }
6406 #endif /* ELF_HOST_MACHINE */
6407 
6408 #if !TCG_TARGET_MAYBE_vec
6409 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6410 {
6411     g_assert_not_reached();
6412 }
6413 #endif
6414