xref: /openbmc/qemu/tcg/tcg.c (revision 6c1e3906)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/cacheflush.h"
38 #include "qemu/cacheinfo.h"
39 #include "qemu/timer.h"
40 
41 /* Note: the long term plan is to reduce the dependencies on the QEMU
42    CPU definitions. Currently they are used for qemu_ld/st
43    instructions */
44 #define NO_CPU_IO_DEFS
45 
46 #include "exec/exec-all.h"
47 #include "tcg/tcg-op.h"
48 
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS  ELFCLASS32
51 #else
52 # define ELF_CLASS  ELFCLASS64
53 #endif
54 #if HOST_BIG_ENDIAN
55 # define ELF_DATA   ELFDATA2MSB
56 #else
57 # define ELF_DATA   ELFDATA2LSB
58 #endif
59 
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "tcg/tcg-ldst.h"
63 #include "tcg/tcg-temp-internal.h"
64 #include "tcg-internal.h"
65 #include "accel/tcg/perf.h"
66 #ifdef CONFIG_USER_ONLY
67 #include "exec/user/guest-base.h"
68 #endif
69 
70 /* Forward declarations for functions declared in tcg-target.c.inc and
71    used here. */
72 static void tcg_target_init(TCGContext *s);
73 static void tcg_target_qemu_prologue(TCGContext *s);
74 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
75                         intptr_t value, intptr_t addend);
76 
77 /* The CIE and FDE header definitions will be common to all hosts.  */
78 typedef struct {
79     uint32_t len __attribute__((aligned((sizeof(void *)))));
80     uint32_t id;
81     uint8_t version;
82     char augmentation[1];
83     uint8_t code_align;
84     uint8_t data_align;
85     uint8_t return_column;
86 } DebugFrameCIE;
87 
88 typedef struct QEMU_PACKED {
89     uint32_t len __attribute__((aligned((sizeof(void *)))));
90     uint32_t cie_offset;
91     uintptr_t func_start;
92     uintptr_t func_len;
93 } DebugFrameFDEHeader;
94 
95 typedef struct QEMU_PACKED {
96     DebugFrameCIE cie;
97     DebugFrameFDEHeader fde;
98 } DebugFrameHeader;
99 
100 typedef struct TCGLabelQemuLdst {
101     bool is_ld;             /* qemu_ld: true, qemu_st: false */
102     MemOpIdx oi;
103     TCGType type;           /* result type of a load */
104     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
105     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
106     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
107     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
108     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
109     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
110     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
111 } TCGLabelQemuLdst;
112 
113 static void tcg_register_jit_int(const void *buf, size_t size,
114                                  const void *debug_frame,
115                                  size_t debug_frame_size)
116     __attribute__((unused));
117 
118 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
119 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
120                        intptr_t arg2);
121 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
122 static void tcg_out_movi(TCGContext *s, TCGType type,
123                          TCGReg ret, tcg_target_long arg);
124 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
133 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
134 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
135 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
136 static void tcg_out_goto_tb(TCGContext *s, int which);
137 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
138                        const TCGArg args[TCG_MAX_OP_ARGS],
139                        const int const_args[TCG_MAX_OP_ARGS]);
140 #if TCG_TARGET_MAYBE_vec
141 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
142                             TCGReg dst, TCGReg src);
143 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
144                              TCGReg dst, TCGReg base, intptr_t offset);
145 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
146                              TCGReg dst, int64_t arg);
147 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
148                            unsigned vecl, unsigned vece,
149                            const TCGArg args[TCG_MAX_OP_ARGS],
150                            const int const_args[TCG_MAX_OP_ARGS]);
151 #else
152 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
153                                    TCGReg dst, TCGReg src)
154 {
155     g_assert_not_reached();
156 }
157 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
158                                     TCGReg dst, TCGReg base, intptr_t offset)
159 {
160     g_assert_not_reached();
161 }
162 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
163                                     TCGReg dst, int64_t arg)
164 {
165     g_assert_not_reached();
166 }
167 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
168                                   unsigned vecl, unsigned vece,
169                                   const TCGArg args[TCG_MAX_OP_ARGS],
170                                   const int const_args[TCG_MAX_OP_ARGS])
171 {
172     g_assert_not_reached();
173 }
174 #endif
175 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
176                        intptr_t arg2);
177 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
178                         TCGReg base, intptr_t ofs);
179 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
180                          const TCGHelperInfo *info);
181 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
182 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
183 #ifdef TCG_TARGET_NEED_LDST_LABELS
184 static int tcg_out_ldst_finalize(TCGContext *s);
185 #endif
186 
187 typedef struct TCGLdstHelperParam {
188     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
189     unsigned ntmp;
190     int tmp[3];
191 } TCGLdstHelperParam;
192 
193 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
194                                    const TCGLdstHelperParam *p)
195     __attribute__((unused));
196 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
197                                   bool load_sign, const TCGLdstHelperParam *p)
198     __attribute__((unused));
199 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
200                                    const TCGLdstHelperParam *p)
201     __attribute__((unused));
202 
203 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
204     [MO_UB] = helper_ldub_mmu,
205     [MO_SB] = helper_ldsb_mmu,
206     [MO_UW] = helper_lduw_mmu,
207     [MO_SW] = helper_ldsw_mmu,
208     [MO_UL] = helper_ldul_mmu,
209     [MO_UQ] = helper_ldq_mmu,
210 #if TCG_TARGET_REG_BITS == 64
211     [MO_SL] = helper_ldsl_mmu,
212     [MO_128] = helper_ld16_mmu,
213 #endif
214 };
215 
216 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
217     [MO_8]  = helper_stb_mmu,
218     [MO_16] = helper_stw_mmu,
219     [MO_32] = helper_stl_mmu,
220     [MO_64] = helper_stq_mmu,
221 #if TCG_TARGET_REG_BITS == 64
222     [MO_128] = helper_st16_mmu,
223 #endif
224 };
225 
226 typedef struct {
227     MemOp atom;   /* lg2 bits of atomicity required */
228     MemOp align;  /* lg2 bits of alignment to use */
229 } TCGAtomAlign;
230 
231 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
232                                            MemOp host_atom, bool allow_two_ops)
233     __attribute__((unused));
234 
235 TCGContext tcg_init_ctx;
236 __thread TCGContext *tcg_ctx;
237 
238 TCGContext **tcg_ctxs;
239 unsigned int tcg_cur_ctxs;
240 unsigned int tcg_max_ctxs;
241 TCGv_env cpu_env = 0;
242 const void *tcg_code_gen_epilogue;
243 uintptr_t tcg_splitwx_diff;
244 
245 #ifndef CONFIG_TCG_INTERPRETER
246 tcg_prologue_fn *tcg_qemu_tb_exec;
247 #endif
248 
249 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
250 static TCGRegSet tcg_target_call_clobber_regs;
251 
252 #if TCG_TARGET_INSN_UNIT_SIZE == 1
253 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
254 {
255     *s->code_ptr++ = v;
256 }
257 
258 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
259                                                       uint8_t v)
260 {
261     *p = v;
262 }
263 #endif
264 
265 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
266 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
267 {
268     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
269         *s->code_ptr++ = v;
270     } else {
271         tcg_insn_unit *p = s->code_ptr;
272         memcpy(p, &v, sizeof(v));
273         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
274     }
275 }
276 
277 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
278                                                        uint16_t v)
279 {
280     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
281         *p = v;
282     } else {
283         memcpy(p, &v, sizeof(v));
284     }
285 }
286 #endif
287 
288 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
289 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
290 {
291     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
292         *s->code_ptr++ = v;
293     } else {
294         tcg_insn_unit *p = s->code_ptr;
295         memcpy(p, &v, sizeof(v));
296         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
297     }
298 }
299 
300 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
301                                                        uint32_t v)
302 {
303     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
304         *p = v;
305     } else {
306         memcpy(p, &v, sizeof(v));
307     }
308 }
309 #endif
310 
311 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
312 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
313 {
314     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
315         *s->code_ptr++ = v;
316     } else {
317         tcg_insn_unit *p = s->code_ptr;
318         memcpy(p, &v, sizeof(v));
319         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
320     }
321 }
322 
323 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
324                                                        uint64_t v)
325 {
326     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
327         *p = v;
328     } else {
329         memcpy(p, &v, sizeof(v));
330     }
331 }
332 #endif
333 
334 /* label relocation processing */
335 
336 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
337                           TCGLabel *l, intptr_t addend)
338 {
339     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
340 
341     r->type = type;
342     r->ptr = code_ptr;
343     r->addend = addend;
344     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
345 }
346 
347 static void tcg_out_label(TCGContext *s, TCGLabel *l)
348 {
349     tcg_debug_assert(!l->has_value);
350     l->has_value = 1;
351     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
352 }
353 
354 TCGLabel *gen_new_label(void)
355 {
356     TCGContext *s = tcg_ctx;
357     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
358 
359     memset(l, 0, sizeof(TCGLabel));
360     l->id = s->nb_labels++;
361     QSIMPLEQ_INIT(&l->branches);
362     QSIMPLEQ_INIT(&l->relocs);
363 
364     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
365 
366     return l;
367 }
368 
369 static bool tcg_resolve_relocs(TCGContext *s)
370 {
371     TCGLabel *l;
372 
373     QSIMPLEQ_FOREACH(l, &s->labels, next) {
374         TCGRelocation *r;
375         uintptr_t value = l->u.value;
376 
377         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
378             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
379                 return false;
380             }
381         }
382     }
383     return true;
384 }
385 
386 static void set_jmp_reset_offset(TCGContext *s, int which)
387 {
388     /*
389      * We will check for overflow at the end of the opcode loop in
390      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
391      */
392     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
393 }
394 
395 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
405 {
406     /*
407      * Return the read-execute version of the pointer, for the benefit
408      * of any pc-relative addressing mode.
409      */
410     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
411 }
412 
413 /* Signal overflow, starting over with fewer guest insns. */
414 static G_NORETURN
415 void tcg_raise_tb_overflow(TCGContext *s)
416 {
417     siglongjmp(s->jmp_trans, -2);
418 }
419 
420 /*
421  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
422  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
423  *
424  * However, tcg_out_helper_load_slots reuses this field to hold an
425  * argument slot number (which may designate a argument register or an
426  * argument stack slot), converting to TCGReg once all arguments that
427  * are destined for the stack are processed.
428  */
429 typedef struct TCGMovExtend {
430     unsigned dst;
431     TCGReg src;
432     TCGType dst_type;
433     TCGType src_type;
434     MemOp src_ext;
435 } TCGMovExtend;
436 
437 /**
438  * tcg_out_movext -- move and extend
439  * @s: tcg context
440  * @dst_type: integral type for destination
441  * @dst: destination register
442  * @src_type: integral type for source
443  * @src_ext: extension to apply to source
444  * @src: source register
445  *
446  * Move or extend @src into @dst, depending on @src_ext and the types.
447  */
448 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
449                            TCGType src_type, MemOp src_ext, TCGReg src)
450 {
451     switch (src_ext) {
452     case MO_UB:
453         tcg_out_ext8u(s, dst, src);
454         break;
455     case MO_SB:
456         tcg_out_ext8s(s, dst_type, dst, src);
457         break;
458     case MO_UW:
459         tcg_out_ext16u(s, dst, src);
460         break;
461     case MO_SW:
462         tcg_out_ext16s(s, dst_type, dst, src);
463         break;
464     case MO_UL:
465     case MO_SL:
466         if (dst_type == TCG_TYPE_I32) {
467             if (src_type == TCG_TYPE_I32) {
468                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
469             } else {
470                 tcg_out_extrl_i64_i32(s, dst, src);
471             }
472         } else if (src_type == TCG_TYPE_I32) {
473             if (src_ext & MO_SIGN) {
474                 tcg_out_exts_i32_i64(s, dst, src);
475             } else {
476                 tcg_out_extu_i32_i64(s, dst, src);
477             }
478         } else {
479             if (src_ext & MO_SIGN) {
480                 tcg_out_ext32s(s, dst, src);
481             } else {
482                 tcg_out_ext32u(s, dst, src);
483             }
484         }
485         break;
486     case MO_UQ:
487         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
488         if (dst_type == TCG_TYPE_I32) {
489             tcg_out_extrl_i64_i32(s, dst, src);
490         } else {
491             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
492         }
493         break;
494     default:
495         g_assert_not_reached();
496     }
497 }
498 
499 /* Minor variations on a theme, using a structure. */
500 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
501                                     TCGReg src)
502 {
503     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
504 }
505 
506 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
507 {
508     tcg_out_movext1_new_src(s, i, i->src);
509 }
510 
511 /**
512  * tcg_out_movext2 -- move and extend two pair
513  * @s: tcg context
514  * @i1: first move description
515  * @i2: second move description
516  * @scratch: temporary register, or -1 for none
517  *
518  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
519  * between the sources and destinations.
520  */
521 
522 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
523                             const TCGMovExtend *i2, int scratch)
524 {
525     TCGReg src1 = i1->src;
526     TCGReg src2 = i2->src;
527 
528     if (i1->dst != src2) {
529         tcg_out_movext1(s, i1);
530         tcg_out_movext1(s, i2);
531         return;
532     }
533     if (i2->dst == src1) {
534         TCGType src1_type = i1->src_type;
535         TCGType src2_type = i2->src_type;
536 
537         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
538             /* The data is now in the correct registers, now extend. */
539             src1 = i2->src;
540             src2 = i1->src;
541         } else {
542             tcg_debug_assert(scratch >= 0);
543             tcg_out_mov(s, src1_type, scratch, src1);
544             src1 = scratch;
545         }
546     }
547     tcg_out_movext1_new_src(s, i2, src2);
548     tcg_out_movext1_new_src(s, i1, src1);
549 }
550 
551 /**
552  * tcg_out_movext3 -- move and extend three pair
553  * @s: tcg context
554  * @i1: first move description
555  * @i2: second move description
556  * @i3: third move description
557  * @scratch: temporary register, or -1 for none
558  *
559  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
560  * between the sources and destinations.
561  */
562 
563 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
564                             const TCGMovExtend *i2, const TCGMovExtend *i3,
565                             int scratch)
566 {
567     TCGReg src1 = i1->src;
568     TCGReg src2 = i2->src;
569     TCGReg src3 = i3->src;
570 
571     if (i1->dst != src2 && i1->dst != src3) {
572         tcg_out_movext1(s, i1);
573         tcg_out_movext2(s, i2, i3, scratch);
574         return;
575     }
576     if (i2->dst != src1 && i2->dst != src3) {
577         tcg_out_movext1(s, i2);
578         tcg_out_movext2(s, i1, i3, scratch);
579         return;
580     }
581     if (i3->dst != src1 && i3->dst != src2) {
582         tcg_out_movext1(s, i3);
583         tcg_out_movext2(s, i1, i2, scratch);
584         return;
585     }
586 
587     /*
588      * There is a cycle.  Since there are only 3 nodes, the cycle is
589      * either "clockwise" or "anti-clockwise", and can be solved with
590      * a single scratch or two xchg.
591      */
592     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
593         /* "Clockwise" */
594         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
595             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
596             /* The data is now in the correct registers, now extend. */
597             tcg_out_movext1_new_src(s, i1, i1->dst);
598             tcg_out_movext1_new_src(s, i2, i2->dst);
599             tcg_out_movext1_new_src(s, i3, i3->dst);
600         } else {
601             tcg_debug_assert(scratch >= 0);
602             tcg_out_mov(s, i1->src_type, scratch, src1);
603             tcg_out_movext1(s, i3);
604             tcg_out_movext1(s, i2);
605             tcg_out_movext1_new_src(s, i1, scratch);
606         }
607     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
608         /* "Anti-clockwise" */
609         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
610             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
611             /* The data is now in the correct registers, now extend. */
612             tcg_out_movext1_new_src(s, i1, i1->dst);
613             tcg_out_movext1_new_src(s, i2, i2->dst);
614             tcg_out_movext1_new_src(s, i3, i3->dst);
615         } else {
616             tcg_debug_assert(scratch >= 0);
617             tcg_out_mov(s, i1->src_type, scratch, src1);
618             tcg_out_movext1(s, i2);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1_new_src(s, i1, scratch);
621         }
622     } else {
623         g_assert_not_reached();
624     }
625 }
626 
627 #define C_PFX1(P, A)                    P##A
628 #define C_PFX2(P, A, B)                 P##A##_##B
629 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
630 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
631 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
632 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
633 
634 /* Define an enumeration for the various combinations. */
635 
636 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
637 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
638 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
639 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
640 
641 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
642 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
643 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
644 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
645 
646 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
647 
648 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
649 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
650 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
651 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
652 
653 typedef enum {
654 #include "tcg-target-con-set.h"
655 } TCGConstraintSetIndex;
656 
657 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
658 
659 #undef C_O0_I1
660 #undef C_O0_I2
661 #undef C_O0_I3
662 #undef C_O0_I4
663 #undef C_O1_I1
664 #undef C_O1_I2
665 #undef C_O1_I3
666 #undef C_O1_I4
667 #undef C_N1_I2
668 #undef C_O2_I1
669 #undef C_O2_I2
670 #undef C_O2_I3
671 #undef C_O2_I4
672 
673 /* Put all of the constraint sets into an array, indexed by the enum. */
674 
675 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
676 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
677 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
678 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
679 
680 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
681 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
682 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
683 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
684 
685 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
686 
687 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
688 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
689 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
690 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
691 
692 static const TCGTargetOpDef constraint_sets[] = {
693 #include "tcg-target-con-set.h"
694 };
695 
696 
697 #undef C_O0_I1
698 #undef C_O0_I2
699 #undef C_O0_I3
700 #undef C_O0_I4
701 #undef C_O1_I1
702 #undef C_O1_I2
703 #undef C_O1_I3
704 #undef C_O1_I4
705 #undef C_N1_I2
706 #undef C_O2_I1
707 #undef C_O2_I2
708 #undef C_O2_I3
709 #undef C_O2_I4
710 
711 /* Expand the enumerator to be returned from tcg_target_op_def(). */
712 
713 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
714 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
715 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
716 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
717 
718 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
719 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
720 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
721 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
722 
723 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
724 
725 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
726 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
727 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
728 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
729 
730 #include "tcg-target.c.inc"
731 
732 static void alloc_tcg_plugin_context(TCGContext *s)
733 {
734 #ifdef CONFIG_PLUGIN
735     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
736     s->plugin_tb->insns =
737         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
738 #endif
739 }
740 
741 /*
742  * All TCG threads except the parent (i.e. the one that called tcg_context_init
743  * and registered the target's TCG globals) must register with this function
744  * before initiating translation.
745  *
746  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
747  * of tcg_region_init() for the reasoning behind this.
748  *
749  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
750  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
751  * is not used anymore for translation once this function is called.
752  *
753  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
754  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
755  */
756 #ifdef CONFIG_USER_ONLY
757 void tcg_register_thread(void)
758 {
759     tcg_ctx = &tcg_init_ctx;
760 }
761 #else
762 void tcg_register_thread(void)
763 {
764     TCGContext *s = g_malloc(sizeof(*s));
765     unsigned int i, n;
766 
767     *s = tcg_init_ctx;
768 
769     /* Relink mem_base.  */
770     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
771         if (tcg_init_ctx.temps[i].mem_base) {
772             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
773             tcg_debug_assert(b >= 0 && b < n);
774             s->temps[i].mem_base = &s->temps[b];
775         }
776     }
777 
778     /* Claim an entry in tcg_ctxs */
779     n = qatomic_fetch_inc(&tcg_cur_ctxs);
780     g_assert(n < tcg_max_ctxs);
781     qatomic_set(&tcg_ctxs[n], s);
782 
783     if (n > 0) {
784         alloc_tcg_plugin_context(s);
785         tcg_region_initial_alloc(s);
786     }
787 
788     tcg_ctx = s;
789 }
790 #endif /* !CONFIG_USER_ONLY */
791 
792 /* pool based memory allocation */
793 void *tcg_malloc_internal(TCGContext *s, int size)
794 {
795     TCGPool *p;
796     int pool_size;
797 
798     if (size > TCG_POOL_CHUNK_SIZE) {
799         /* big malloc: insert a new pool (XXX: could optimize) */
800         p = g_malloc(sizeof(TCGPool) + size);
801         p->size = size;
802         p->next = s->pool_first_large;
803         s->pool_first_large = p;
804         return p->data;
805     } else {
806         p = s->pool_current;
807         if (!p) {
808             p = s->pool_first;
809             if (!p)
810                 goto new_pool;
811         } else {
812             if (!p->next) {
813             new_pool:
814                 pool_size = TCG_POOL_CHUNK_SIZE;
815                 p = g_malloc(sizeof(TCGPool) + pool_size);
816                 p->size = pool_size;
817                 p->next = NULL;
818                 if (s->pool_current) {
819                     s->pool_current->next = p;
820                 } else {
821                     s->pool_first = p;
822                 }
823             } else {
824                 p = p->next;
825             }
826         }
827     }
828     s->pool_current = p;
829     s->pool_cur = p->data + size;
830     s->pool_end = p->data + p->size;
831     return p->data;
832 }
833 
834 void tcg_pool_reset(TCGContext *s)
835 {
836     TCGPool *p, *t;
837     for (p = s->pool_first_large; p; p = t) {
838         t = p->next;
839         g_free(p);
840     }
841     s->pool_first_large = NULL;
842     s->pool_cur = s->pool_end = NULL;
843     s->pool_current = NULL;
844 }
845 
846 #include "exec/helper-proto.h"
847 
848 static TCGHelperInfo all_helpers[] = {
849 #include "exec/helper-tcg.h"
850 };
851 static GHashTable *helper_table;
852 
853 /*
854  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
855  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
856  * We only use these for layout in tcg_out_ld_helper_ret and
857  * tcg_out_st_helper_args, and share them between several of
858  * the helpers, with the end result that it's easier to build manually.
859  */
860 
861 #if TCG_TARGET_REG_BITS == 32
862 # define dh_typecode_ttl  dh_typecode_i32
863 #else
864 # define dh_typecode_ttl  dh_typecode_i64
865 #endif
866 
867 static TCGHelperInfo info_helper_ld32_mmu = {
868     .flags = TCG_CALL_NO_WG,
869     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
870               | dh_typemask(env, 1)
871               | dh_typemask(i64, 2)  /* uint64_t addr */
872               | dh_typemask(i32, 3)  /* unsigned oi */
873               | dh_typemask(ptr, 4)  /* uintptr_t ra */
874 };
875 
876 static TCGHelperInfo info_helper_ld64_mmu = {
877     .flags = TCG_CALL_NO_WG,
878     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
879               | dh_typemask(env, 1)
880               | dh_typemask(i64, 2)  /* uint64_t addr */
881               | dh_typemask(i32, 3)  /* unsigned oi */
882               | dh_typemask(ptr, 4)  /* uintptr_t ra */
883 };
884 
885 static TCGHelperInfo info_helper_ld128_mmu = {
886     .flags = TCG_CALL_NO_WG,
887     .typemask = dh_typemask(i128, 0) /* return Int128 */
888               | dh_typemask(env, 1)
889               | dh_typemask(i64, 2)  /* uint64_t addr */
890               | dh_typemask(i32, 3)  /* unsigned oi */
891               | dh_typemask(ptr, 4)  /* uintptr_t ra */
892 };
893 
894 static TCGHelperInfo info_helper_st32_mmu = {
895     .flags = TCG_CALL_NO_WG,
896     .typemask = dh_typemask(void, 0)
897               | dh_typemask(env, 1)
898               | dh_typemask(i64, 2)  /* uint64_t addr */
899               | dh_typemask(i32, 3)  /* uint32_t data */
900               | dh_typemask(i32, 4)  /* unsigned oi */
901               | dh_typemask(ptr, 5)  /* uintptr_t ra */
902 };
903 
904 static TCGHelperInfo info_helper_st64_mmu = {
905     .flags = TCG_CALL_NO_WG,
906     .typemask = dh_typemask(void, 0)
907               | dh_typemask(env, 1)
908               | dh_typemask(i64, 2)  /* uint64_t addr */
909               | dh_typemask(i64, 3)  /* uint64_t data */
910               | dh_typemask(i32, 4)  /* unsigned oi */
911               | dh_typemask(ptr, 5)  /* uintptr_t ra */
912 };
913 
914 static TCGHelperInfo info_helper_st128_mmu = {
915     .flags = TCG_CALL_NO_WG,
916     .typemask = dh_typemask(void, 0)
917               | dh_typemask(env, 1)
918               | dh_typemask(i64, 2)  /* uint64_t addr */
919               | dh_typemask(i128, 3) /* Int128 data */
920               | dh_typemask(i32, 4)  /* unsigned oi */
921               | dh_typemask(ptr, 5)  /* uintptr_t ra */
922 };
923 
924 #ifdef CONFIG_TCG_INTERPRETER
925 static ffi_type *typecode_to_ffi(int argmask)
926 {
927     /*
928      * libffi does not support __int128_t, so we have forced Int128
929      * to use the structure definition instead of the builtin type.
930      */
931     static ffi_type *ffi_type_i128_elements[3] = {
932         &ffi_type_uint64,
933         &ffi_type_uint64,
934         NULL
935     };
936     static ffi_type ffi_type_i128 = {
937         .size = 16,
938         .alignment = __alignof__(Int128),
939         .type = FFI_TYPE_STRUCT,
940         .elements = ffi_type_i128_elements,
941     };
942 
943     switch (argmask) {
944     case dh_typecode_void:
945         return &ffi_type_void;
946     case dh_typecode_i32:
947         return &ffi_type_uint32;
948     case dh_typecode_s32:
949         return &ffi_type_sint32;
950     case dh_typecode_i64:
951         return &ffi_type_uint64;
952     case dh_typecode_s64:
953         return &ffi_type_sint64;
954     case dh_typecode_ptr:
955         return &ffi_type_pointer;
956     case dh_typecode_i128:
957         return &ffi_type_i128;
958     }
959     g_assert_not_reached();
960 }
961 
962 static void init_ffi_layouts(void)
963 {
964     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
965     GHashTable *ffi_table = g_hash_table_new(NULL, NULL);
966 
967     for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
968         TCGHelperInfo *info = &all_helpers[i];
969         unsigned typemask = info->typemask;
970         gpointer hash = (gpointer)(uintptr_t)typemask;
971         struct {
972             ffi_cif cif;
973             ffi_type *args[];
974         } *ca;
975         ffi_status status;
976         int nargs;
977         ffi_cif *cif;
978 
979         cif = g_hash_table_lookup(ffi_table, hash);
980         if (cif) {
981             info->cif = cif;
982             continue;
983         }
984 
985         /* Ignoring the return type, find the last non-zero field. */
986         nargs = 32 - clz32(typemask >> 3);
987         nargs = DIV_ROUND_UP(nargs, 3);
988         assert(nargs <= MAX_CALL_IARGS);
989 
990         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
991         ca->cif.rtype = typecode_to_ffi(typemask & 7);
992         ca->cif.nargs = nargs;
993 
994         if (nargs != 0) {
995             ca->cif.arg_types = ca->args;
996             for (int j = 0; j < nargs; ++j) {
997                 int typecode = extract32(typemask, (j + 1) * 3, 3);
998                 ca->args[j] = typecode_to_ffi(typecode);
999             }
1000         }
1001 
1002         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1003                               ca->cif.rtype, ca->cif.arg_types);
1004         assert(status == FFI_OK);
1005 
1006         cif = &ca->cif;
1007         info->cif = cif;
1008         g_hash_table_insert(ffi_table, hash, (gpointer)cif);
1009     }
1010 
1011     g_hash_table_destroy(ffi_table);
1012 }
1013 #endif /* CONFIG_TCG_INTERPRETER */
1014 
1015 static inline bool arg_slot_reg_p(unsigned arg_slot)
1016 {
1017     /*
1018      * Split the sizeof away from the comparison to avoid Werror from
1019      * "unsigned < 0 is always false", when iarg_regs is empty.
1020      */
1021     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1022     return arg_slot < nreg;
1023 }
1024 
1025 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1026 {
1027     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1028     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1029 
1030     tcg_debug_assert(stk_slot < max);
1031     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1032 }
1033 
1034 typedef struct TCGCumulativeArgs {
1035     int arg_idx;                /* tcg_gen_callN args[] */
1036     int info_in_idx;            /* TCGHelperInfo in[] */
1037     int arg_slot;               /* regs+stack slot */
1038     int ref_slot;               /* stack slots for references */
1039 } TCGCumulativeArgs;
1040 
1041 static void layout_arg_even(TCGCumulativeArgs *cum)
1042 {
1043     cum->arg_slot += cum->arg_slot & 1;
1044 }
1045 
1046 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1047                          TCGCallArgumentKind kind)
1048 {
1049     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1050 
1051     *loc = (TCGCallArgumentLoc){
1052         .kind = kind,
1053         .arg_idx = cum->arg_idx,
1054         .arg_slot = cum->arg_slot,
1055     };
1056     cum->info_in_idx++;
1057     cum->arg_slot++;
1058 }
1059 
1060 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1061                                 TCGHelperInfo *info, int n)
1062 {
1063     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1064 
1065     for (int i = 0; i < n; ++i) {
1066         /* Layout all using the same arg_idx, adjusting the subindex. */
1067         loc[i] = (TCGCallArgumentLoc){
1068             .kind = TCG_CALL_ARG_NORMAL,
1069             .arg_idx = cum->arg_idx,
1070             .tmp_subindex = i,
1071             .arg_slot = cum->arg_slot + i,
1072         };
1073     }
1074     cum->info_in_idx += n;
1075     cum->arg_slot += n;
1076 }
1077 
1078 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1079 {
1080     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1081     int n = 128 / TCG_TARGET_REG_BITS;
1082 
1083     /* The first subindex carries the pointer. */
1084     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1085 
1086     /*
1087      * The callee is allowed to clobber memory associated with
1088      * structure pass by-reference.  Therefore we must make copies.
1089      * Allocate space from "ref_slot", which will be adjusted to
1090      * follow the parameters on the stack.
1091      */
1092     loc[0].ref_slot = cum->ref_slot;
1093 
1094     /*
1095      * Subsequent words also go into the reference slot, but
1096      * do not accumulate into the regular arguments.
1097      */
1098     for (int i = 1; i < n; ++i) {
1099         loc[i] = (TCGCallArgumentLoc){
1100             .kind = TCG_CALL_ARG_BY_REF_N,
1101             .arg_idx = cum->arg_idx,
1102             .tmp_subindex = i,
1103             .ref_slot = cum->ref_slot + i,
1104         };
1105     }
1106     cum->info_in_idx += n;
1107     cum->ref_slot += n;
1108 }
1109 
1110 static void init_call_layout(TCGHelperInfo *info)
1111 {
1112     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1113     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1114     unsigned typemask = info->typemask;
1115     unsigned typecode;
1116     TCGCumulativeArgs cum = { };
1117 
1118     /*
1119      * Parse and place any function return value.
1120      */
1121     typecode = typemask & 7;
1122     switch (typecode) {
1123     case dh_typecode_void:
1124         info->nr_out = 0;
1125         break;
1126     case dh_typecode_i32:
1127     case dh_typecode_s32:
1128     case dh_typecode_ptr:
1129         info->nr_out = 1;
1130         info->out_kind = TCG_CALL_RET_NORMAL;
1131         break;
1132     case dh_typecode_i64:
1133     case dh_typecode_s64:
1134         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1135         info->out_kind = TCG_CALL_RET_NORMAL;
1136         /* Query the last register now to trigger any assert early. */
1137         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1138         break;
1139     case dh_typecode_i128:
1140         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1141         info->out_kind = TCG_TARGET_CALL_RET_I128;
1142         switch (TCG_TARGET_CALL_RET_I128) {
1143         case TCG_CALL_RET_NORMAL:
1144             /* Query the last register now to trigger any assert early. */
1145             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1146             break;
1147         case TCG_CALL_RET_BY_VEC:
1148             /* Query the single register now to trigger any assert early. */
1149             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1150             break;
1151         case TCG_CALL_RET_BY_REF:
1152             /*
1153              * Allocate the first argument to the output.
1154              * We don't need to store this anywhere, just make it
1155              * unavailable for use in the input loop below.
1156              */
1157             cum.arg_slot = 1;
1158             break;
1159         default:
1160             qemu_build_not_reached();
1161         }
1162         break;
1163     default:
1164         g_assert_not_reached();
1165     }
1166 
1167     /*
1168      * Parse and place function arguments.
1169      */
1170     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1171         TCGCallArgumentKind kind;
1172         TCGType type;
1173 
1174         typecode = typemask & 7;
1175         switch (typecode) {
1176         case dh_typecode_i32:
1177         case dh_typecode_s32:
1178             type = TCG_TYPE_I32;
1179             break;
1180         case dh_typecode_i64:
1181         case dh_typecode_s64:
1182             type = TCG_TYPE_I64;
1183             break;
1184         case dh_typecode_ptr:
1185             type = TCG_TYPE_PTR;
1186             break;
1187         case dh_typecode_i128:
1188             type = TCG_TYPE_I128;
1189             break;
1190         default:
1191             g_assert_not_reached();
1192         }
1193 
1194         switch (type) {
1195         case TCG_TYPE_I32:
1196             switch (TCG_TARGET_CALL_ARG_I32) {
1197             case TCG_CALL_ARG_EVEN:
1198                 layout_arg_even(&cum);
1199                 /* fall through */
1200             case TCG_CALL_ARG_NORMAL:
1201                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1202                 break;
1203             case TCG_CALL_ARG_EXTEND:
1204                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1205                 layout_arg_1(&cum, info, kind);
1206                 break;
1207             default:
1208                 qemu_build_not_reached();
1209             }
1210             break;
1211 
1212         case TCG_TYPE_I64:
1213             switch (TCG_TARGET_CALL_ARG_I64) {
1214             case TCG_CALL_ARG_EVEN:
1215                 layout_arg_even(&cum);
1216                 /* fall through */
1217             case TCG_CALL_ARG_NORMAL:
1218                 if (TCG_TARGET_REG_BITS == 32) {
1219                     layout_arg_normal_n(&cum, info, 2);
1220                 } else {
1221                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1222                 }
1223                 break;
1224             default:
1225                 qemu_build_not_reached();
1226             }
1227             break;
1228 
1229         case TCG_TYPE_I128:
1230             switch (TCG_TARGET_CALL_ARG_I128) {
1231             case TCG_CALL_ARG_EVEN:
1232                 layout_arg_even(&cum);
1233                 /* fall through */
1234             case TCG_CALL_ARG_NORMAL:
1235                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1236                 break;
1237             case TCG_CALL_ARG_BY_REF:
1238                 layout_arg_by_ref(&cum, info);
1239                 break;
1240             default:
1241                 qemu_build_not_reached();
1242             }
1243             break;
1244 
1245         default:
1246             g_assert_not_reached();
1247         }
1248     }
1249     info->nr_in = cum.info_in_idx;
1250 
1251     /* Validate that we didn't overrun the input array. */
1252     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1253     /* Validate the backend has enough argument space. */
1254     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1255 
1256     /*
1257      * Relocate the "ref_slot" area to the end of the parameters.
1258      * Minimizing this stack offset helps code size for x86,
1259      * which has a signed 8-bit offset encoding.
1260      */
1261     if (cum.ref_slot != 0) {
1262         int ref_base = 0;
1263 
1264         if (cum.arg_slot > max_reg_slots) {
1265             int align = __alignof(Int128) / sizeof(tcg_target_long);
1266 
1267             ref_base = cum.arg_slot - max_reg_slots;
1268             if (align > 1) {
1269                 ref_base = ROUND_UP(ref_base, align);
1270             }
1271         }
1272         assert(ref_base + cum.ref_slot <= max_stk_slots);
1273         ref_base += max_reg_slots;
1274 
1275         if (ref_base != 0) {
1276             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1277                 TCGCallArgumentLoc *loc = &info->in[i];
1278                 switch (loc->kind) {
1279                 case TCG_CALL_ARG_BY_REF:
1280                 case TCG_CALL_ARG_BY_REF_N:
1281                     loc->ref_slot += ref_base;
1282                     break;
1283                 default:
1284                     break;
1285                 }
1286             }
1287         }
1288     }
1289 }
1290 
1291 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1292 static void process_op_defs(TCGContext *s);
1293 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1294                                             TCGReg reg, const char *name);
1295 
1296 static void tcg_context_init(unsigned max_cpus)
1297 {
1298     TCGContext *s = &tcg_init_ctx;
1299     int op, total_args, n, i;
1300     TCGOpDef *def;
1301     TCGArgConstraint *args_ct;
1302     TCGTemp *ts;
1303 
1304     memset(s, 0, sizeof(*s));
1305     s->nb_globals = 0;
1306 
1307     /* Count total number of arguments and allocate the corresponding
1308        space */
1309     total_args = 0;
1310     for(op = 0; op < NB_OPS; op++) {
1311         def = &tcg_op_defs[op];
1312         n = def->nb_iargs + def->nb_oargs;
1313         total_args += n;
1314     }
1315 
1316     args_ct = g_new0(TCGArgConstraint, total_args);
1317 
1318     for(op = 0; op < NB_OPS; op++) {
1319         def = &tcg_op_defs[op];
1320         def->args_ct = args_ct;
1321         n = def->nb_iargs + def->nb_oargs;
1322         args_ct += n;
1323     }
1324 
1325     /* Register helpers.  */
1326     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
1327     helper_table = g_hash_table_new(NULL, NULL);
1328 
1329     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1330         init_call_layout(&all_helpers[i]);
1331         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1332                             (gpointer)&all_helpers[i]);
1333     }
1334 
1335     init_call_layout(&info_helper_ld32_mmu);
1336     init_call_layout(&info_helper_ld64_mmu);
1337     init_call_layout(&info_helper_ld128_mmu);
1338     init_call_layout(&info_helper_st32_mmu);
1339     init_call_layout(&info_helper_st64_mmu);
1340     init_call_layout(&info_helper_st128_mmu);
1341 
1342 #ifdef CONFIG_TCG_INTERPRETER
1343     init_ffi_layouts();
1344 #endif
1345 
1346     tcg_target_init(s);
1347     process_op_defs(s);
1348 
1349     /* Reverse the order of the saved registers, assuming they're all at
1350        the start of tcg_target_reg_alloc_order.  */
1351     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1352         int r = tcg_target_reg_alloc_order[n];
1353         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1354             break;
1355         }
1356     }
1357     for (i = 0; i < n; ++i) {
1358         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1359     }
1360     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1361         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1362     }
1363 
1364     alloc_tcg_plugin_context(s);
1365 
1366     tcg_ctx = s;
1367     /*
1368      * In user-mode we simply share the init context among threads, since we
1369      * use a single region. See the documentation tcg_region_init() for the
1370      * reasoning behind this.
1371      * In softmmu we will have at most max_cpus TCG threads.
1372      */
1373 #ifdef CONFIG_USER_ONLY
1374     tcg_ctxs = &tcg_ctx;
1375     tcg_cur_ctxs = 1;
1376     tcg_max_ctxs = 1;
1377 #else
1378     tcg_max_ctxs = max_cpus;
1379     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1380 #endif
1381 
1382     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1383     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1384     cpu_env = temp_tcgv_ptr(ts);
1385 }
1386 
1387 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1388 {
1389     tcg_context_init(max_cpus);
1390     tcg_region_init(tb_size, splitwx, max_cpus);
1391 }
1392 
1393 /*
1394  * Allocate TBs right before their corresponding translated code, making
1395  * sure that TBs and code are on different cache lines.
1396  */
1397 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1398 {
1399     uintptr_t align = qemu_icache_linesize;
1400     TranslationBlock *tb;
1401     void *next;
1402 
1403  retry:
1404     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1405     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1406 
1407     if (unlikely(next > s->code_gen_highwater)) {
1408         if (tcg_region_alloc(s)) {
1409             return NULL;
1410         }
1411         goto retry;
1412     }
1413     qatomic_set(&s->code_gen_ptr, next);
1414     s->data_gen_ptr = NULL;
1415     return tb;
1416 }
1417 
1418 void tcg_prologue_init(TCGContext *s)
1419 {
1420     size_t prologue_size;
1421 
1422     s->code_ptr = s->code_gen_ptr;
1423     s->code_buf = s->code_gen_ptr;
1424     s->data_gen_ptr = NULL;
1425 
1426 #ifndef CONFIG_TCG_INTERPRETER
1427     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1428 #endif
1429 
1430 #ifdef TCG_TARGET_NEED_POOL_LABELS
1431     s->pool_labels = NULL;
1432 #endif
1433 
1434     qemu_thread_jit_write();
1435     /* Generate the prologue.  */
1436     tcg_target_qemu_prologue(s);
1437 
1438 #ifdef TCG_TARGET_NEED_POOL_LABELS
1439     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1440     {
1441         int result = tcg_out_pool_finalize(s);
1442         tcg_debug_assert(result == 0);
1443     }
1444 #endif
1445 
1446     prologue_size = tcg_current_code_size(s);
1447     perf_report_prologue(s->code_gen_ptr, prologue_size);
1448 
1449 #ifndef CONFIG_TCG_INTERPRETER
1450     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1451                         (uintptr_t)s->code_buf, prologue_size);
1452 #endif
1453 
1454 #ifdef DEBUG_DISAS
1455     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1456         FILE *logfile = qemu_log_trylock();
1457         if (logfile) {
1458             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1459             if (s->data_gen_ptr) {
1460                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1461                 size_t data_size = prologue_size - code_size;
1462                 size_t i;
1463 
1464                 disas(logfile, s->code_gen_ptr, code_size);
1465 
1466                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1467                     if (sizeof(tcg_target_ulong) == 8) {
1468                         fprintf(logfile,
1469                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1470                                 (uintptr_t)s->data_gen_ptr + i,
1471                                 *(uint64_t *)(s->data_gen_ptr + i));
1472                     } else {
1473                         fprintf(logfile,
1474                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1475                                 (uintptr_t)s->data_gen_ptr + i,
1476                                 *(uint32_t *)(s->data_gen_ptr + i));
1477                     }
1478                 }
1479             } else {
1480                 disas(logfile, s->code_gen_ptr, prologue_size);
1481             }
1482             fprintf(logfile, "\n");
1483             qemu_log_unlock(logfile);
1484         }
1485     }
1486 #endif
1487 
1488 #ifndef CONFIG_TCG_INTERPRETER
1489     /*
1490      * Assert that goto_ptr is implemented completely, setting an epilogue.
1491      * For tci, we use NULL as the signal to return from the interpreter,
1492      * so skip this check.
1493      */
1494     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1495 #endif
1496 
1497     tcg_region_prologue_set(s);
1498 }
1499 
1500 void tcg_func_start(TCGContext *s)
1501 {
1502     tcg_pool_reset(s);
1503     s->nb_temps = s->nb_globals;
1504 
1505     /* No temps have been previously allocated for size or locality.  */
1506     memset(s->free_temps, 0, sizeof(s->free_temps));
1507 
1508     /* No constant temps have been previously allocated. */
1509     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1510         if (s->const_table[i]) {
1511             g_hash_table_remove_all(s->const_table[i]);
1512         }
1513     }
1514 
1515     s->nb_ops = 0;
1516     s->nb_labels = 0;
1517     s->current_frame_offset = s->frame_start;
1518 
1519 #ifdef CONFIG_DEBUG_TCG
1520     s->goto_tb_issue_mask = 0;
1521 #endif
1522 
1523     QTAILQ_INIT(&s->ops);
1524     QTAILQ_INIT(&s->free_ops);
1525     QSIMPLEQ_INIT(&s->labels);
1526 
1527     tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1528                      s->addr_type == TCG_TYPE_I64);
1529 }
1530 
1531 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1532 {
1533     int n = s->nb_temps++;
1534 
1535     if (n >= TCG_MAX_TEMPS) {
1536         tcg_raise_tb_overflow(s);
1537     }
1538     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1539 }
1540 
1541 static TCGTemp *tcg_global_alloc(TCGContext *s)
1542 {
1543     TCGTemp *ts;
1544 
1545     tcg_debug_assert(s->nb_globals == s->nb_temps);
1546     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1547     s->nb_globals++;
1548     ts = tcg_temp_alloc(s);
1549     ts->kind = TEMP_GLOBAL;
1550 
1551     return ts;
1552 }
1553 
1554 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1555                                             TCGReg reg, const char *name)
1556 {
1557     TCGTemp *ts;
1558 
1559     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1560 
1561     ts = tcg_global_alloc(s);
1562     ts->base_type = type;
1563     ts->type = type;
1564     ts->kind = TEMP_FIXED;
1565     ts->reg = reg;
1566     ts->name = name;
1567     tcg_regset_set_reg(s->reserved_regs, reg);
1568 
1569     return ts;
1570 }
1571 
1572 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1573 {
1574     s->frame_start = start;
1575     s->frame_end = start + size;
1576     s->frame_temp
1577         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1578 }
1579 
1580 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1581                                      intptr_t offset, const char *name)
1582 {
1583     TCGContext *s = tcg_ctx;
1584     TCGTemp *base_ts = tcgv_ptr_temp(base);
1585     TCGTemp *ts = tcg_global_alloc(s);
1586     int indirect_reg = 0;
1587 
1588     switch (base_ts->kind) {
1589     case TEMP_FIXED:
1590         break;
1591     case TEMP_GLOBAL:
1592         /* We do not support double-indirect registers.  */
1593         tcg_debug_assert(!base_ts->indirect_reg);
1594         base_ts->indirect_base = 1;
1595         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1596                             ? 2 : 1);
1597         indirect_reg = 1;
1598         break;
1599     default:
1600         g_assert_not_reached();
1601     }
1602 
1603     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1604         TCGTemp *ts2 = tcg_global_alloc(s);
1605         char buf[64];
1606 
1607         ts->base_type = TCG_TYPE_I64;
1608         ts->type = TCG_TYPE_I32;
1609         ts->indirect_reg = indirect_reg;
1610         ts->mem_allocated = 1;
1611         ts->mem_base = base_ts;
1612         ts->mem_offset = offset;
1613         pstrcpy(buf, sizeof(buf), name);
1614         pstrcat(buf, sizeof(buf), "_0");
1615         ts->name = strdup(buf);
1616 
1617         tcg_debug_assert(ts2 == ts + 1);
1618         ts2->base_type = TCG_TYPE_I64;
1619         ts2->type = TCG_TYPE_I32;
1620         ts2->indirect_reg = indirect_reg;
1621         ts2->mem_allocated = 1;
1622         ts2->mem_base = base_ts;
1623         ts2->mem_offset = offset + 4;
1624         ts2->temp_subindex = 1;
1625         pstrcpy(buf, sizeof(buf), name);
1626         pstrcat(buf, sizeof(buf), "_1");
1627         ts2->name = strdup(buf);
1628     } else {
1629         ts->base_type = type;
1630         ts->type = type;
1631         ts->indirect_reg = indirect_reg;
1632         ts->mem_allocated = 1;
1633         ts->mem_base = base_ts;
1634         ts->mem_offset = offset;
1635         ts->name = name;
1636     }
1637     return ts;
1638 }
1639 
1640 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1641 {
1642     TCGContext *s = tcg_ctx;
1643     TCGTemp *ts;
1644     int n;
1645 
1646     if (kind == TEMP_EBB) {
1647         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1648 
1649         if (idx < TCG_MAX_TEMPS) {
1650             /* There is already an available temp with the right type.  */
1651             clear_bit(idx, s->free_temps[type].l);
1652 
1653             ts = &s->temps[idx];
1654             ts->temp_allocated = 1;
1655             tcg_debug_assert(ts->base_type == type);
1656             tcg_debug_assert(ts->kind == kind);
1657             return ts;
1658         }
1659     } else {
1660         tcg_debug_assert(kind == TEMP_TB);
1661     }
1662 
1663     switch (type) {
1664     case TCG_TYPE_I32:
1665     case TCG_TYPE_V64:
1666     case TCG_TYPE_V128:
1667     case TCG_TYPE_V256:
1668         n = 1;
1669         break;
1670     case TCG_TYPE_I64:
1671         n = 64 / TCG_TARGET_REG_BITS;
1672         break;
1673     case TCG_TYPE_I128:
1674         n = 128 / TCG_TARGET_REG_BITS;
1675         break;
1676     default:
1677         g_assert_not_reached();
1678     }
1679 
1680     ts = tcg_temp_alloc(s);
1681     ts->base_type = type;
1682     ts->temp_allocated = 1;
1683     ts->kind = kind;
1684 
1685     if (n == 1) {
1686         ts->type = type;
1687     } else {
1688         ts->type = TCG_TYPE_REG;
1689 
1690         for (int i = 1; i < n; ++i) {
1691             TCGTemp *ts2 = tcg_temp_alloc(s);
1692 
1693             tcg_debug_assert(ts2 == ts + i);
1694             ts2->base_type = type;
1695             ts2->type = TCG_TYPE_REG;
1696             ts2->temp_allocated = 1;
1697             ts2->temp_subindex = i;
1698             ts2->kind = kind;
1699         }
1700     }
1701     return ts;
1702 }
1703 
1704 TCGv_vec tcg_temp_new_vec(TCGType type)
1705 {
1706     TCGTemp *t;
1707 
1708 #ifdef CONFIG_DEBUG_TCG
1709     switch (type) {
1710     case TCG_TYPE_V64:
1711         assert(TCG_TARGET_HAS_v64);
1712         break;
1713     case TCG_TYPE_V128:
1714         assert(TCG_TARGET_HAS_v128);
1715         break;
1716     case TCG_TYPE_V256:
1717         assert(TCG_TARGET_HAS_v256);
1718         break;
1719     default:
1720         g_assert_not_reached();
1721     }
1722 #endif
1723 
1724     t = tcg_temp_new_internal(type, TEMP_EBB);
1725     return temp_tcgv_vec(t);
1726 }
1727 
1728 /* Create a new temp of the same type as an existing temp.  */
1729 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1730 {
1731     TCGTemp *t = tcgv_vec_temp(match);
1732 
1733     tcg_debug_assert(t->temp_allocated != 0);
1734 
1735     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1736     return temp_tcgv_vec(t);
1737 }
1738 
1739 void tcg_temp_free_internal(TCGTemp *ts)
1740 {
1741     TCGContext *s = tcg_ctx;
1742 
1743     switch (ts->kind) {
1744     case TEMP_CONST:
1745     case TEMP_TB:
1746         /* Silently ignore free. */
1747         break;
1748     case TEMP_EBB:
1749         tcg_debug_assert(ts->temp_allocated != 0);
1750         ts->temp_allocated = 0;
1751         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1752         break;
1753     default:
1754         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1755         g_assert_not_reached();
1756     }
1757 }
1758 
1759 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1760 {
1761     TCGContext *s = tcg_ctx;
1762     GHashTable *h = s->const_table[type];
1763     TCGTemp *ts;
1764 
1765     if (h == NULL) {
1766         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1767         s->const_table[type] = h;
1768     }
1769 
1770     ts = g_hash_table_lookup(h, &val);
1771     if (ts == NULL) {
1772         int64_t *val_ptr;
1773 
1774         ts = tcg_temp_alloc(s);
1775 
1776         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1777             TCGTemp *ts2 = tcg_temp_alloc(s);
1778 
1779             tcg_debug_assert(ts2 == ts + 1);
1780 
1781             ts->base_type = TCG_TYPE_I64;
1782             ts->type = TCG_TYPE_I32;
1783             ts->kind = TEMP_CONST;
1784             ts->temp_allocated = 1;
1785 
1786             ts2->base_type = TCG_TYPE_I64;
1787             ts2->type = TCG_TYPE_I32;
1788             ts2->kind = TEMP_CONST;
1789             ts2->temp_allocated = 1;
1790             ts2->temp_subindex = 1;
1791 
1792             /*
1793              * Retain the full value of the 64-bit constant in the low
1794              * part, so that the hash table works.  Actual uses will
1795              * truncate the value to the low part.
1796              */
1797             ts[HOST_BIG_ENDIAN].val = val;
1798             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1799             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1800         } else {
1801             ts->base_type = type;
1802             ts->type = type;
1803             ts->kind = TEMP_CONST;
1804             ts->temp_allocated = 1;
1805             ts->val = val;
1806             val_ptr = &ts->val;
1807         }
1808         g_hash_table_insert(h, val_ptr, ts);
1809     }
1810 
1811     return ts;
1812 }
1813 
1814 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1815 {
1816     val = dup_const(vece, val);
1817     return temp_tcgv_vec(tcg_constant_internal(type, val));
1818 }
1819 
1820 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1821 {
1822     TCGTemp *t = tcgv_vec_temp(match);
1823 
1824     tcg_debug_assert(t->temp_allocated != 0);
1825     return tcg_constant_vec(t->base_type, vece, val);
1826 }
1827 
1828 /* Return true if OP may appear in the opcode stream.
1829    Test the runtime variable that controls each opcode.  */
1830 bool tcg_op_supported(TCGOpcode op)
1831 {
1832     const bool have_vec
1833         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1834 
1835     switch (op) {
1836     case INDEX_op_discard:
1837     case INDEX_op_set_label:
1838     case INDEX_op_call:
1839     case INDEX_op_br:
1840     case INDEX_op_mb:
1841     case INDEX_op_insn_start:
1842     case INDEX_op_exit_tb:
1843     case INDEX_op_goto_tb:
1844     case INDEX_op_goto_ptr:
1845     case INDEX_op_qemu_ld_a32_i32:
1846     case INDEX_op_qemu_ld_a64_i32:
1847     case INDEX_op_qemu_st_a32_i32:
1848     case INDEX_op_qemu_st_a64_i32:
1849     case INDEX_op_qemu_ld_a32_i64:
1850     case INDEX_op_qemu_ld_a64_i64:
1851     case INDEX_op_qemu_st_a32_i64:
1852     case INDEX_op_qemu_st_a64_i64:
1853         return true;
1854 
1855     case INDEX_op_qemu_st8_a32_i32:
1856     case INDEX_op_qemu_st8_a64_i32:
1857         return TCG_TARGET_HAS_qemu_st8_i32;
1858 
1859     case INDEX_op_qemu_ld_a32_i128:
1860     case INDEX_op_qemu_ld_a64_i128:
1861     case INDEX_op_qemu_st_a32_i128:
1862     case INDEX_op_qemu_st_a64_i128:
1863         return TCG_TARGET_HAS_qemu_ldst_i128;
1864 
1865     case INDEX_op_mov_i32:
1866     case INDEX_op_setcond_i32:
1867     case INDEX_op_brcond_i32:
1868     case INDEX_op_ld8u_i32:
1869     case INDEX_op_ld8s_i32:
1870     case INDEX_op_ld16u_i32:
1871     case INDEX_op_ld16s_i32:
1872     case INDEX_op_ld_i32:
1873     case INDEX_op_st8_i32:
1874     case INDEX_op_st16_i32:
1875     case INDEX_op_st_i32:
1876     case INDEX_op_add_i32:
1877     case INDEX_op_sub_i32:
1878     case INDEX_op_mul_i32:
1879     case INDEX_op_and_i32:
1880     case INDEX_op_or_i32:
1881     case INDEX_op_xor_i32:
1882     case INDEX_op_shl_i32:
1883     case INDEX_op_shr_i32:
1884     case INDEX_op_sar_i32:
1885         return true;
1886 
1887     case INDEX_op_movcond_i32:
1888         return TCG_TARGET_HAS_movcond_i32;
1889     case INDEX_op_div_i32:
1890     case INDEX_op_divu_i32:
1891         return TCG_TARGET_HAS_div_i32;
1892     case INDEX_op_rem_i32:
1893     case INDEX_op_remu_i32:
1894         return TCG_TARGET_HAS_rem_i32;
1895     case INDEX_op_div2_i32:
1896     case INDEX_op_divu2_i32:
1897         return TCG_TARGET_HAS_div2_i32;
1898     case INDEX_op_rotl_i32:
1899     case INDEX_op_rotr_i32:
1900         return TCG_TARGET_HAS_rot_i32;
1901     case INDEX_op_deposit_i32:
1902         return TCG_TARGET_HAS_deposit_i32;
1903     case INDEX_op_extract_i32:
1904         return TCG_TARGET_HAS_extract_i32;
1905     case INDEX_op_sextract_i32:
1906         return TCG_TARGET_HAS_sextract_i32;
1907     case INDEX_op_extract2_i32:
1908         return TCG_TARGET_HAS_extract2_i32;
1909     case INDEX_op_add2_i32:
1910         return TCG_TARGET_HAS_add2_i32;
1911     case INDEX_op_sub2_i32:
1912         return TCG_TARGET_HAS_sub2_i32;
1913     case INDEX_op_mulu2_i32:
1914         return TCG_TARGET_HAS_mulu2_i32;
1915     case INDEX_op_muls2_i32:
1916         return TCG_TARGET_HAS_muls2_i32;
1917     case INDEX_op_muluh_i32:
1918         return TCG_TARGET_HAS_muluh_i32;
1919     case INDEX_op_mulsh_i32:
1920         return TCG_TARGET_HAS_mulsh_i32;
1921     case INDEX_op_ext8s_i32:
1922         return TCG_TARGET_HAS_ext8s_i32;
1923     case INDEX_op_ext16s_i32:
1924         return TCG_TARGET_HAS_ext16s_i32;
1925     case INDEX_op_ext8u_i32:
1926         return TCG_TARGET_HAS_ext8u_i32;
1927     case INDEX_op_ext16u_i32:
1928         return TCG_TARGET_HAS_ext16u_i32;
1929     case INDEX_op_bswap16_i32:
1930         return TCG_TARGET_HAS_bswap16_i32;
1931     case INDEX_op_bswap32_i32:
1932         return TCG_TARGET_HAS_bswap32_i32;
1933     case INDEX_op_not_i32:
1934         return TCG_TARGET_HAS_not_i32;
1935     case INDEX_op_neg_i32:
1936         return TCG_TARGET_HAS_neg_i32;
1937     case INDEX_op_andc_i32:
1938         return TCG_TARGET_HAS_andc_i32;
1939     case INDEX_op_orc_i32:
1940         return TCG_TARGET_HAS_orc_i32;
1941     case INDEX_op_eqv_i32:
1942         return TCG_TARGET_HAS_eqv_i32;
1943     case INDEX_op_nand_i32:
1944         return TCG_TARGET_HAS_nand_i32;
1945     case INDEX_op_nor_i32:
1946         return TCG_TARGET_HAS_nor_i32;
1947     case INDEX_op_clz_i32:
1948         return TCG_TARGET_HAS_clz_i32;
1949     case INDEX_op_ctz_i32:
1950         return TCG_TARGET_HAS_ctz_i32;
1951     case INDEX_op_ctpop_i32:
1952         return TCG_TARGET_HAS_ctpop_i32;
1953 
1954     case INDEX_op_brcond2_i32:
1955     case INDEX_op_setcond2_i32:
1956         return TCG_TARGET_REG_BITS == 32;
1957 
1958     case INDEX_op_mov_i64:
1959     case INDEX_op_setcond_i64:
1960     case INDEX_op_brcond_i64:
1961     case INDEX_op_ld8u_i64:
1962     case INDEX_op_ld8s_i64:
1963     case INDEX_op_ld16u_i64:
1964     case INDEX_op_ld16s_i64:
1965     case INDEX_op_ld32u_i64:
1966     case INDEX_op_ld32s_i64:
1967     case INDEX_op_ld_i64:
1968     case INDEX_op_st8_i64:
1969     case INDEX_op_st16_i64:
1970     case INDEX_op_st32_i64:
1971     case INDEX_op_st_i64:
1972     case INDEX_op_add_i64:
1973     case INDEX_op_sub_i64:
1974     case INDEX_op_mul_i64:
1975     case INDEX_op_and_i64:
1976     case INDEX_op_or_i64:
1977     case INDEX_op_xor_i64:
1978     case INDEX_op_shl_i64:
1979     case INDEX_op_shr_i64:
1980     case INDEX_op_sar_i64:
1981     case INDEX_op_ext_i32_i64:
1982     case INDEX_op_extu_i32_i64:
1983         return TCG_TARGET_REG_BITS == 64;
1984 
1985     case INDEX_op_movcond_i64:
1986         return TCG_TARGET_HAS_movcond_i64;
1987     case INDEX_op_div_i64:
1988     case INDEX_op_divu_i64:
1989         return TCG_TARGET_HAS_div_i64;
1990     case INDEX_op_rem_i64:
1991     case INDEX_op_remu_i64:
1992         return TCG_TARGET_HAS_rem_i64;
1993     case INDEX_op_div2_i64:
1994     case INDEX_op_divu2_i64:
1995         return TCG_TARGET_HAS_div2_i64;
1996     case INDEX_op_rotl_i64:
1997     case INDEX_op_rotr_i64:
1998         return TCG_TARGET_HAS_rot_i64;
1999     case INDEX_op_deposit_i64:
2000         return TCG_TARGET_HAS_deposit_i64;
2001     case INDEX_op_extract_i64:
2002         return TCG_TARGET_HAS_extract_i64;
2003     case INDEX_op_sextract_i64:
2004         return TCG_TARGET_HAS_sextract_i64;
2005     case INDEX_op_extract2_i64:
2006         return TCG_TARGET_HAS_extract2_i64;
2007     case INDEX_op_extrl_i64_i32:
2008         return TCG_TARGET_HAS_extrl_i64_i32;
2009     case INDEX_op_extrh_i64_i32:
2010         return TCG_TARGET_HAS_extrh_i64_i32;
2011     case INDEX_op_ext8s_i64:
2012         return TCG_TARGET_HAS_ext8s_i64;
2013     case INDEX_op_ext16s_i64:
2014         return TCG_TARGET_HAS_ext16s_i64;
2015     case INDEX_op_ext32s_i64:
2016         return TCG_TARGET_HAS_ext32s_i64;
2017     case INDEX_op_ext8u_i64:
2018         return TCG_TARGET_HAS_ext8u_i64;
2019     case INDEX_op_ext16u_i64:
2020         return TCG_TARGET_HAS_ext16u_i64;
2021     case INDEX_op_ext32u_i64:
2022         return TCG_TARGET_HAS_ext32u_i64;
2023     case INDEX_op_bswap16_i64:
2024         return TCG_TARGET_HAS_bswap16_i64;
2025     case INDEX_op_bswap32_i64:
2026         return TCG_TARGET_HAS_bswap32_i64;
2027     case INDEX_op_bswap64_i64:
2028         return TCG_TARGET_HAS_bswap64_i64;
2029     case INDEX_op_not_i64:
2030         return TCG_TARGET_HAS_not_i64;
2031     case INDEX_op_neg_i64:
2032         return TCG_TARGET_HAS_neg_i64;
2033     case INDEX_op_andc_i64:
2034         return TCG_TARGET_HAS_andc_i64;
2035     case INDEX_op_orc_i64:
2036         return TCG_TARGET_HAS_orc_i64;
2037     case INDEX_op_eqv_i64:
2038         return TCG_TARGET_HAS_eqv_i64;
2039     case INDEX_op_nand_i64:
2040         return TCG_TARGET_HAS_nand_i64;
2041     case INDEX_op_nor_i64:
2042         return TCG_TARGET_HAS_nor_i64;
2043     case INDEX_op_clz_i64:
2044         return TCG_TARGET_HAS_clz_i64;
2045     case INDEX_op_ctz_i64:
2046         return TCG_TARGET_HAS_ctz_i64;
2047     case INDEX_op_ctpop_i64:
2048         return TCG_TARGET_HAS_ctpop_i64;
2049     case INDEX_op_add2_i64:
2050         return TCG_TARGET_HAS_add2_i64;
2051     case INDEX_op_sub2_i64:
2052         return TCG_TARGET_HAS_sub2_i64;
2053     case INDEX_op_mulu2_i64:
2054         return TCG_TARGET_HAS_mulu2_i64;
2055     case INDEX_op_muls2_i64:
2056         return TCG_TARGET_HAS_muls2_i64;
2057     case INDEX_op_muluh_i64:
2058         return TCG_TARGET_HAS_muluh_i64;
2059     case INDEX_op_mulsh_i64:
2060         return TCG_TARGET_HAS_mulsh_i64;
2061 
2062     case INDEX_op_mov_vec:
2063     case INDEX_op_dup_vec:
2064     case INDEX_op_dupm_vec:
2065     case INDEX_op_ld_vec:
2066     case INDEX_op_st_vec:
2067     case INDEX_op_add_vec:
2068     case INDEX_op_sub_vec:
2069     case INDEX_op_and_vec:
2070     case INDEX_op_or_vec:
2071     case INDEX_op_xor_vec:
2072     case INDEX_op_cmp_vec:
2073         return have_vec;
2074     case INDEX_op_dup2_vec:
2075         return have_vec && TCG_TARGET_REG_BITS == 32;
2076     case INDEX_op_not_vec:
2077         return have_vec && TCG_TARGET_HAS_not_vec;
2078     case INDEX_op_neg_vec:
2079         return have_vec && TCG_TARGET_HAS_neg_vec;
2080     case INDEX_op_abs_vec:
2081         return have_vec && TCG_TARGET_HAS_abs_vec;
2082     case INDEX_op_andc_vec:
2083         return have_vec && TCG_TARGET_HAS_andc_vec;
2084     case INDEX_op_orc_vec:
2085         return have_vec && TCG_TARGET_HAS_orc_vec;
2086     case INDEX_op_nand_vec:
2087         return have_vec && TCG_TARGET_HAS_nand_vec;
2088     case INDEX_op_nor_vec:
2089         return have_vec && TCG_TARGET_HAS_nor_vec;
2090     case INDEX_op_eqv_vec:
2091         return have_vec && TCG_TARGET_HAS_eqv_vec;
2092     case INDEX_op_mul_vec:
2093         return have_vec && TCG_TARGET_HAS_mul_vec;
2094     case INDEX_op_shli_vec:
2095     case INDEX_op_shri_vec:
2096     case INDEX_op_sari_vec:
2097         return have_vec && TCG_TARGET_HAS_shi_vec;
2098     case INDEX_op_shls_vec:
2099     case INDEX_op_shrs_vec:
2100     case INDEX_op_sars_vec:
2101         return have_vec && TCG_TARGET_HAS_shs_vec;
2102     case INDEX_op_shlv_vec:
2103     case INDEX_op_shrv_vec:
2104     case INDEX_op_sarv_vec:
2105         return have_vec && TCG_TARGET_HAS_shv_vec;
2106     case INDEX_op_rotli_vec:
2107         return have_vec && TCG_TARGET_HAS_roti_vec;
2108     case INDEX_op_rotls_vec:
2109         return have_vec && TCG_TARGET_HAS_rots_vec;
2110     case INDEX_op_rotlv_vec:
2111     case INDEX_op_rotrv_vec:
2112         return have_vec && TCG_TARGET_HAS_rotv_vec;
2113     case INDEX_op_ssadd_vec:
2114     case INDEX_op_usadd_vec:
2115     case INDEX_op_sssub_vec:
2116     case INDEX_op_ussub_vec:
2117         return have_vec && TCG_TARGET_HAS_sat_vec;
2118     case INDEX_op_smin_vec:
2119     case INDEX_op_umin_vec:
2120     case INDEX_op_smax_vec:
2121     case INDEX_op_umax_vec:
2122         return have_vec && TCG_TARGET_HAS_minmax_vec;
2123     case INDEX_op_bitsel_vec:
2124         return have_vec && TCG_TARGET_HAS_bitsel_vec;
2125     case INDEX_op_cmpsel_vec:
2126         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2127 
2128     default:
2129         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2130         return true;
2131     }
2132 }
2133 
2134 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2135 
2136 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
2137 {
2138     const TCGHelperInfo *info;
2139     TCGv_i64 extend_free[MAX_CALL_IARGS];
2140     int n_extend = 0;
2141     TCGOp *op;
2142     int i, n, pi = 0, total_args;
2143 
2144     info = g_hash_table_lookup(helper_table, (gpointer)func);
2145     total_args = info->nr_out + info->nr_in + 2;
2146     op = tcg_op_alloc(INDEX_op_call, total_args);
2147 
2148 #ifdef CONFIG_PLUGIN
2149     /* Flag helpers that may affect guest state */
2150     if (tcg_ctx->plugin_insn &&
2151         !(info->flags & TCG_CALL_PLUGIN) &&
2152         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2153         tcg_ctx->plugin_insn->calls_helpers = true;
2154     }
2155 #endif
2156 
2157     TCGOP_CALLO(op) = n = info->nr_out;
2158     switch (n) {
2159     case 0:
2160         tcg_debug_assert(ret == NULL);
2161         break;
2162     case 1:
2163         tcg_debug_assert(ret != NULL);
2164         op->args[pi++] = temp_arg(ret);
2165         break;
2166     case 2:
2167     case 4:
2168         tcg_debug_assert(ret != NULL);
2169         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2170         tcg_debug_assert(ret->temp_subindex == 0);
2171         for (i = 0; i < n; ++i) {
2172             op->args[pi++] = temp_arg(ret + i);
2173         }
2174         break;
2175     default:
2176         g_assert_not_reached();
2177     }
2178 
2179     TCGOP_CALLI(op) = n = info->nr_in;
2180     for (i = 0; i < n; i++) {
2181         const TCGCallArgumentLoc *loc = &info->in[i];
2182         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2183 
2184         switch (loc->kind) {
2185         case TCG_CALL_ARG_NORMAL:
2186         case TCG_CALL_ARG_BY_REF:
2187         case TCG_CALL_ARG_BY_REF_N:
2188             op->args[pi++] = temp_arg(ts);
2189             break;
2190 
2191         case TCG_CALL_ARG_EXTEND_U:
2192         case TCG_CALL_ARG_EXTEND_S:
2193             {
2194                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2195                 TCGv_i32 orig = temp_tcgv_i32(ts);
2196 
2197                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2198                     tcg_gen_ext_i32_i64(temp, orig);
2199                 } else {
2200                     tcg_gen_extu_i32_i64(temp, orig);
2201                 }
2202                 op->args[pi++] = tcgv_i64_arg(temp);
2203                 extend_free[n_extend++] = temp;
2204             }
2205             break;
2206 
2207         default:
2208             g_assert_not_reached();
2209         }
2210     }
2211     op->args[pi++] = (uintptr_t)func;
2212     op->args[pi++] = (uintptr_t)info;
2213     tcg_debug_assert(pi == total_args);
2214 
2215     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2216 
2217     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2218     for (i = 0; i < n_extend; ++i) {
2219         tcg_temp_free_i64(extend_free[i]);
2220     }
2221 }
2222 
2223 static void tcg_reg_alloc_start(TCGContext *s)
2224 {
2225     int i, n;
2226 
2227     for (i = 0, n = s->nb_temps; i < n; i++) {
2228         TCGTemp *ts = &s->temps[i];
2229         TCGTempVal val = TEMP_VAL_MEM;
2230 
2231         switch (ts->kind) {
2232         case TEMP_CONST:
2233             val = TEMP_VAL_CONST;
2234             break;
2235         case TEMP_FIXED:
2236             val = TEMP_VAL_REG;
2237             break;
2238         case TEMP_GLOBAL:
2239             break;
2240         case TEMP_EBB:
2241             val = TEMP_VAL_DEAD;
2242             /* fall through */
2243         case TEMP_TB:
2244             ts->mem_allocated = 0;
2245             break;
2246         default:
2247             g_assert_not_reached();
2248         }
2249         ts->val_type = val;
2250     }
2251 
2252     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2253 }
2254 
2255 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2256                                  TCGTemp *ts)
2257 {
2258     int idx = temp_idx(ts);
2259 
2260     switch (ts->kind) {
2261     case TEMP_FIXED:
2262     case TEMP_GLOBAL:
2263         pstrcpy(buf, buf_size, ts->name);
2264         break;
2265     case TEMP_TB:
2266         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2267         break;
2268     case TEMP_EBB:
2269         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2270         break;
2271     case TEMP_CONST:
2272         switch (ts->type) {
2273         case TCG_TYPE_I32:
2274             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2275             break;
2276 #if TCG_TARGET_REG_BITS > 32
2277         case TCG_TYPE_I64:
2278             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2279             break;
2280 #endif
2281         case TCG_TYPE_V64:
2282         case TCG_TYPE_V128:
2283         case TCG_TYPE_V256:
2284             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2285                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2286             break;
2287         default:
2288             g_assert_not_reached();
2289         }
2290         break;
2291     }
2292     return buf;
2293 }
2294 
2295 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2296                              int buf_size, TCGArg arg)
2297 {
2298     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2299 }
2300 
2301 static const char * const cond_name[] =
2302 {
2303     [TCG_COND_NEVER] = "never",
2304     [TCG_COND_ALWAYS] = "always",
2305     [TCG_COND_EQ] = "eq",
2306     [TCG_COND_NE] = "ne",
2307     [TCG_COND_LT] = "lt",
2308     [TCG_COND_GE] = "ge",
2309     [TCG_COND_LE] = "le",
2310     [TCG_COND_GT] = "gt",
2311     [TCG_COND_LTU] = "ltu",
2312     [TCG_COND_GEU] = "geu",
2313     [TCG_COND_LEU] = "leu",
2314     [TCG_COND_GTU] = "gtu"
2315 };
2316 
2317 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2318 {
2319     [MO_UB]   = "ub",
2320     [MO_SB]   = "sb",
2321     [MO_LEUW] = "leuw",
2322     [MO_LESW] = "lesw",
2323     [MO_LEUL] = "leul",
2324     [MO_LESL] = "lesl",
2325     [MO_LEUQ] = "leq",
2326     [MO_BEUW] = "beuw",
2327     [MO_BESW] = "besw",
2328     [MO_BEUL] = "beul",
2329     [MO_BESL] = "besl",
2330     [MO_BEUQ] = "beq",
2331     [MO_128 + MO_BE] = "beo",
2332     [MO_128 + MO_LE] = "leo",
2333 };
2334 
2335 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2336     [MO_UNALN >> MO_ASHIFT]    = "un+",
2337     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2338     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2339     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2340     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2341     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2342     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2343     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2344 };
2345 
2346 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2347     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2348     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2349     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2350     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2351     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2352     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2353 };
2354 
2355 static const char bswap_flag_name[][6] = {
2356     [TCG_BSWAP_IZ] = "iz",
2357     [TCG_BSWAP_OZ] = "oz",
2358     [TCG_BSWAP_OS] = "os",
2359     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2360     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2361 };
2362 
2363 static inline bool tcg_regset_single(TCGRegSet d)
2364 {
2365     return (d & (d - 1)) == 0;
2366 }
2367 
2368 static inline TCGReg tcg_regset_first(TCGRegSet d)
2369 {
2370     if (TCG_TARGET_NB_REGS <= 32) {
2371         return ctz32(d);
2372     } else {
2373         return ctz64(d);
2374     }
2375 }
2376 
2377 /* Return only the number of characters output -- no error return. */
2378 #define ne_fprintf(...) \
2379     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2380 
2381 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2382 {
2383     char buf[128];
2384     TCGOp *op;
2385 
2386     QTAILQ_FOREACH(op, &s->ops, link) {
2387         int i, k, nb_oargs, nb_iargs, nb_cargs;
2388         const TCGOpDef *def;
2389         TCGOpcode c;
2390         int col = 0;
2391 
2392         c = op->opc;
2393         def = &tcg_op_defs[c];
2394 
2395         if (c == INDEX_op_insn_start) {
2396             nb_oargs = 0;
2397             col += ne_fprintf(f, "\n ----");
2398 
2399             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2400                 col += ne_fprintf(f, " %016" PRIx64,
2401                                   tcg_get_insn_start_param(op, i));
2402             }
2403         } else if (c == INDEX_op_call) {
2404             const TCGHelperInfo *info = tcg_call_info(op);
2405             void *func = tcg_call_func(op);
2406 
2407             /* variable number of arguments */
2408             nb_oargs = TCGOP_CALLO(op);
2409             nb_iargs = TCGOP_CALLI(op);
2410             nb_cargs = def->nb_cargs;
2411 
2412             col += ne_fprintf(f, " %s ", def->name);
2413 
2414             /*
2415              * Print the function name from TCGHelperInfo, if available.
2416              * Note that plugins have a template function for the info,
2417              * but the actual function pointer comes from the plugin.
2418              */
2419             if (func == info->func) {
2420                 col += ne_fprintf(f, "%s", info->name);
2421             } else {
2422                 col += ne_fprintf(f, "plugin(%p)", func);
2423             }
2424 
2425             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2426             for (i = 0; i < nb_oargs; i++) {
2427                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2428                                                             op->args[i]));
2429             }
2430             for (i = 0; i < nb_iargs; i++) {
2431                 TCGArg arg = op->args[nb_oargs + i];
2432                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2433                 col += ne_fprintf(f, ",%s", t);
2434             }
2435         } else {
2436             col += ne_fprintf(f, " %s ", def->name);
2437 
2438             nb_oargs = def->nb_oargs;
2439             nb_iargs = def->nb_iargs;
2440             nb_cargs = def->nb_cargs;
2441 
2442             if (def->flags & TCG_OPF_VECTOR) {
2443                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2444                                   8 << TCGOP_VECE(op));
2445             }
2446 
2447             k = 0;
2448             for (i = 0; i < nb_oargs; i++) {
2449                 const char *sep =  k ? "," : "";
2450                 col += ne_fprintf(f, "%s%s", sep,
2451                                   tcg_get_arg_str(s, buf, sizeof(buf),
2452                                                   op->args[k++]));
2453             }
2454             for (i = 0; i < nb_iargs; i++) {
2455                 const char *sep =  k ? "," : "";
2456                 col += ne_fprintf(f, "%s%s", sep,
2457                                   tcg_get_arg_str(s, buf, sizeof(buf),
2458                                                   op->args[k++]));
2459             }
2460             switch (c) {
2461             case INDEX_op_brcond_i32:
2462             case INDEX_op_setcond_i32:
2463             case INDEX_op_movcond_i32:
2464             case INDEX_op_brcond2_i32:
2465             case INDEX_op_setcond2_i32:
2466             case INDEX_op_brcond_i64:
2467             case INDEX_op_setcond_i64:
2468             case INDEX_op_movcond_i64:
2469             case INDEX_op_cmp_vec:
2470             case INDEX_op_cmpsel_vec:
2471                 if (op->args[k] < ARRAY_SIZE(cond_name)
2472                     && cond_name[op->args[k]]) {
2473                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2474                 } else {
2475                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2476                 }
2477                 i = 1;
2478                 break;
2479             case INDEX_op_qemu_ld_a32_i32:
2480             case INDEX_op_qemu_ld_a64_i32:
2481             case INDEX_op_qemu_st_a32_i32:
2482             case INDEX_op_qemu_st_a64_i32:
2483             case INDEX_op_qemu_st8_a32_i32:
2484             case INDEX_op_qemu_st8_a64_i32:
2485             case INDEX_op_qemu_ld_a32_i64:
2486             case INDEX_op_qemu_ld_a64_i64:
2487             case INDEX_op_qemu_st_a32_i64:
2488             case INDEX_op_qemu_st_a64_i64:
2489             case INDEX_op_qemu_ld_a32_i128:
2490             case INDEX_op_qemu_ld_a64_i128:
2491             case INDEX_op_qemu_st_a32_i128:
2492             case INDEX_op_qemu_st_a64_i128:
2493                 {
2494                     const char *s_al, *s_op, *s_at;
2495                     MemOpIdx oi = op->args[k++];
2496                     MemOp op = get_memop(oi);
2497                     unsigned ix = get_mmuidx(oi);
2498 
2499                     s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2500                     s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2501                     s_at = atom_name[(op & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2502                     op &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2503 
2504                     /* If all fields are accounted for, print symbolically. */
2505                     if (!op && s_al && s_op && s_at) {
2506                         col += ne_fprintf(f, ",%s%s%s,%u",
2507                                           s_at, s_al, s_op, ix);
2508                     } else {
2509                         op = get_memop(oi);
2510                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2511                     }
2512                     i = 1;
2513                 }
2514                 break;
2515             case INDEX_op_bswap16_i32:
2516             case INDEX_op_bswap16_i64:
2517             case INDEX_op_bswap32_i32:
2518             case INDEX_op_bswap32_i64:
2519             case INDEX_op_bswap64_i64:
2520                 {
2521                     TCGArg flags = op->args[k];
2522                     const char *name = NULL;
2523 
2524                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2525                         name = bswap_flag_name[flags];
2526                     }
2527                     if (name) {
2528                         col += ne_fprintf(f, ",%s", name);
2529                     } else {
2530                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2531                     }
2532                     i = k = 1;
2533                 }
2534                 break;
2535             default:
2536                 i = 0;
2537                 break;
2538             }
2539             switch (c) {
2540             case INDEX_op_set_label:
2541             case INDEX_op_br:
2542             case INDEX_op_brcond_i32:
2543             case INDEX_op_brcond_i64:
2544             case INDEX_op_brcond2_i32:
2545                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2546                                   arg_label(op->args[k])->id);
2547                 i++, k++;
2548                 break;
2549             case INDEX_op_mb:
2550                 {
2551                     TCGBar membar = op->args[k];
2552                     const char *b_op, *m_op;
2553 
2554                     switch (membar & TCG_BAR_SC) {
2555                     case 0:
2556                         b_op = "none";
2557                         break;
2558                     case TCG_BAR_LDAQ:
2559                         b_op = "acq";
2560                         break;
2561                     case TCG_BAR_STRL:
2562                         b_op = "rel";
2563                         break;
2564                     case TCG_BAR_SC:
2565                         b_op = "seq";
2566                         break;
2567                     default:
2568                         g_assert_not_reached();
2569                     }
2570 
2571                     switch (membar & TCG_MO_ALL) {
2572                     case 0:
2573                         m_op = "none";
2574                         break;
2575                     case TCG_MO_LD_LD:
2576                         m_op = "rr";
2577                         break;
2578                     case TCG_MO_LD_ST:
2579                         m_op = "rw";
2580                         break;
2581                     case TCG_MO_ST_LD:
2582                         m_op = "wr";
2583                         break;
2584                     case TCG_MO_ST_ST:
2585                         m_op = "ww";
2586                         break;
2587                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2588                         m_op = "rr+rw";
2589                         break;
2590                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2591                         m_op = "rr+wr";
2592                         break;
2593                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2594                         m_op = "rr+ww";
2595                         break;
2596                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2597                         m_op = "rw+wr";
2598                         break;
2599                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2600                         m_op = "rw+ww";
2601                         break;
2602                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2603                         m_op = "wr+ww";
2604                         break;
2605                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2606                         m_op = "rr+rw+wr";
2607                         break;
2608                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2609                         m_op = "rr+rw+ww";
2610                         break;
2611                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2612                         m_op = "rr+wr+ww";
2613                         break;
2614                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2615                         m_op = "rw+wr+ww";
2616                         break;
2617                     case TCG_MO_ALL:
2618                         m_op = "all";
2619                         break;
2620                     default:
2621                         g_assert_not_reached();
2622                     }
2623 
2624                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2625                     i++, k++;
2626                 }
2627                 break;
2628             default:
2629                 break;
2630             }
2631             for (; i < nb_cargs; i++, k++) {
2632                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2633                                   op->args[k]);
2634             }
2635         }
2636 
2637         if (have_prefs || op->life) {
2638             for (; col < 40; ++col) {
2639                 putc(' ', f);
2640             }
2641         }
2642 
2643         if (op->life) {
2644             unsigned life = op->life;
2645 
2646             if (life & (SYNC_ARG * 3)) {
2647                 ne_fprintf(f, "  sync:");
2648                 for (i = 0; i < 2; ++i) {
2649                     if (life & (SYNC_ARG << i)) {
2650                         ne_fprintf(f, " %d", i);
2651                     }
2652                 }
2653             }
2654             life /= DEAD_ARG;
2655             if (life) {
2656                 ne_fprintf(f, "  dead:");
2657                 for (i = 0; life; ++i, life >>= 1) {
2658                     if (life & 1) {
2659                         ne_fprintf(f, " %d", i);
2660                     }
2661                 }
2662             }
2663         }
2664 
2665         if (have_prefs) {
2666             for (i = 0; i < nb_oargs; ++i) {
2667                 TCGRegSet set = output_pref(op, i);
2668 
2669                 if (i == 0) {
2670                     ne_fprintf(f, "  pref=");
2671                 } else {
2672                     ne_fprintf(f, ",");
2673                 }
2674                 if (set == 0) {
2675                     ne_fprintf(f, "none");
2676                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2677                     ne_fprintf(f, "all");
2678 #ifdef CONFIG_DEBUG_TCG
2679                 } else if (tcg_regset_single(set)) {
2680                     TCGReg reg = tcg_regset_first(set);
2681                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2682 #endif
2683                 } else if (TCG_TARGET_NB_REGS <= 32) {
2684                     ne_fprintf(f, "0x%x", (uint32_t)set);
2685                 } else {
2686                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2687                 }
2688             }
2689         }
2690 
2691         putc('\n', f);
2692     }
2693 }
2694 
2695 /* we give more priority to constraints with less registers */
2696 static int get_constraint_priority(const TCGOpDef *def, int k)
2697 {
2698     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2699     int n = ctpop64(arg_ct->regs);
2700 
2701     /*
2702      * Sort constraints of a single register first, which includes output
2703      * aliases (which must exactly match the input already allocated).
2704      */
2705     if (n == 1 || arg_ct->oalias) {
2706         return INT_MAX;
2707     }
2708 
2709     /*
2710      * Sort register pairs next, first then second immediately after.
2711      * Arbitrarily sort multiple pairs by the index of the first reg;
2712      * there shouldn't be many pairs.
2713      */
2714     switch (arg_ct->pair) {
2715     case 1:
2716     case 3:
2717         return (k + 1) * 2;
2718     case 2:
2719         return (arg_ct->pair_index + 1) * 2 - 1;
2720     }
2721 
2722     /* Finally, sort by decreasing register count. */
2723     assert(n > 1);
2724     return -n;
2725 }
2726 
2727 /* sort from highest priority to lowest */
2728 static void sort_constraints(TCGOpDef *def, int start, int n)
2729 {
2730     int i, j;
2731     TCGArgConstraint *a = def->args_ct;
2732 
2733     for (i = 0; i < n; i++) {
2734         a[start + i].sort_index = start + i;
2735     }
2736     if (n <= 1) {
2737         return;
2738     }
2739     for (i = 0; i < n - 1; i++) {
2740         for (j = i + 1; j < n; j++) {
2741             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2742             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2743             if (p1 < p2) {
2744                 int tmp = a[start + i].sort_index;
2745                 a[start + i].sort_index = a[start + j].sort_index;
2746                 a[start + j].sort_index = tmp;
2747             }
2748         }
2749     }
2750 }
2751 
2752 static void process_op_defs(TCGContext *s)
2753 {
2754     TCGOpcode op;
2755 
2756     for (op = 0; op < NB_OPS; op++) {
2757         TCGOpDef *def = &tcg_op_defs[op];
2758         const TCGTargetOpDef *tdefs;
2759         bool saw_alias_pair = false;
2760         int i, o, i2, o2, nb_args;
2761 
2762         if (def->flags & TCG_OPF_NOT_PRESENT) {
2763             continue;
2764         }
2765 
2766         nb_args = def->nb_iargs + def->nb_oargs;
2767         if (nb_args == 0) {
2768             continue;
2769         }
2770 
2771         /*
2772          * Macro magic should make it impossible, but double-check that
2773          * the array index is in range.  Since the signness of an enum
2774          * is implementation defined, force the result to unsigned.
2775          */
2776         unsigned con_set = tcg_target_op_def(op);
2777         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2778         tdefs = &constraint_sets[con_set];
2779 
2780         for (i = 0; i < nb_args; i++) {
2781             const char *ct_str = tdefs->args_ct_str[i];
2782             bool input_p = i >= def->nb_oargs;
2783 
2784             /* Incomplete TCGTargetOpDef entry. */
2785             tcg_debug_assert(ct_str != NULL);
2786 
2787             switch (*ct_str) {
2788             case '0' ... '9':
2789                 o = *ct_str - '0';
2790                 tcg_debug_assert(input_p);
2791                 tcg_debug_assert(o < def->nb_oargs);
2792                 tcg_debug_assert(def->args_ct[o].regs != 0);
2793                 tcg_debug_assert(!def->args_ct[o].oalias);
2794                 def->args_ct[i] = def->args_ct[o];
2795                 /* The output sets oalias.  */
2796                 def->args_ct[o].oalias = 1;
2797                 def->args_ct[o].alias_index = i;
2798                 /* The input sets ialias. */
2799                 def->args_ct[i].ialias = 1;
2800                 def->args_ct[i].alias_index = o;
2801                 if (def->args_ct[i].pair) {
2802                     saw_alias_pair = true;
2803                 }
2804                 tcg_debug_assert(ct_str[1] == '\0');
2805                 continue;
2806 
2807             case '&':
2808                 tcg_debug_assert(!input_p);
2809                 def->args_ct[i].newreg = true;
2810                 ct_str++;
2811                 break;
2812 
2813             case 'p': /* plus */
2814                 /* Allocate to the register after the previous. */
2815                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2816                 o = i - 1;
2817                 tcg_debug_assert(!def->args_ct[o].pair);
2818                 tcg_debug_assert(!def->args_ct[o].ct);
2819                 def->args_ct[i] = (TCGArgConstraint){
2820                     .pair = 2,
2821                     .pair_index = o,
2822                     .regs = def->args_ct[o].regs << 1,
2823                 };
2824                 def->args_ct[o].pair = 1;
2825                 def->args_ct[o].pair_index = i;
2826                 tcg_debug_assert(ct_str[1] == '\0');
2827                 continue;
2828 
2829             case 'm': /* minus */
2830                 /* Allocate to the register before the previous. */
2831                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2832                 o = i - 1;
2833                 tcg_debug_assert(!def->args_ct[o].pair);
2834                 tcg_debug_assert(!def->args_ct[o].ct);
2835                 def->args_ct[i] = (TCGArgConstraint){
2836                     .pair = 1,
2837                     .pair_index = o,
2838                     .regs = def->args_ct[o].regs >> 1,
2839                 };
2840                 def->args_ct[o].pair = 2;
2841                 def->args_ct[o].pair_index = i;
2842                 tcg_debug_assert(ct_str[1] == '\0');
2843                 continue;
2844             }
2845 
2846             do {
2847                 switch (*ct_str) {
2848                 case 'i':
2849                     def->args_ct[i].ct |= TCG_CT_CONST;
2850                     break;
2851 
2852                 /* Include all of the target-specific constraints. */
2853 
2854 #undef CONST
2855 #define CONST(CASE, MASK) \
2856     case CASE: def->args_ct[i].ct |= MASK; break;
2857 #define REGS(CASE, MASK) \
2858     case CASE: def->args_ct[i].regs |= MASK; break;
2859 
2860 #include "tcg-target-con-str.h"
2861 
2862 #undef REGS
2863 #undef CONST
2864                 default:
2865                 case '0' ... '9':
2866                 case '&':
2867                 case 'p':
2868                 case 'm':
2869                     /* Typo in TCGTargetOpDef constraint. */
2870                     g_assert_not_reached();
2871                 }
2872             } while (*++ct_str != '\0');
2873         }
2874 
2875         /* TCGTargetOpDef entry with too much information? */
2876         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2877 
2878         /*
2879          * Fix up output pairs that are aliased with inputs.
2880          * When we created the alias, we copied pair from the output.
2881          * There are three cases:
2882          *    (1a) Pairs of inputs alias pairs of outputs.
2883          *    (1b) One input aliases the first of a pair of outputs.
2884          *    (2)  One input aliases the second of a pair of outputs.
2885          *
2886          * Case 1a is handled by making sure that the pair_index'es are
2887          * properly updated so that they appear the same as a pair of inputs.
2888          *
2889          * Case 1b is handled by setting the pair_index of the input to
2890          * itself, simply so it doesn't point to an unrelated argument.
2891          * Since we don't encounter the "second" during the input allocation
2892          * phase, nothing happens with the second half of the input pair.
2893          *
2894          * Case 2 is handled by setting the second input to pair=3, the
2895          * first output to pair=3, and the pair_index'es to match.
2896          */
2897         if (saw_alias_pair) {
2898             for (i = def->nb_oargs; i < nb_args; i++) {
2899                 /*
2900                  * Since [0-9pm] must be alone in the constraint string,
2901                  * the only way they can both be set is if the pair comes
2902                  * from the output alias.
2903                  */
2904                 if (!def->args_ct[i].ialias) {
2905                     continue;
2906                 }
2907                 switch (def->args_ct[i].pair) {
2908                 case 0:
2909                     break;
2910                 case 1:
2911                     o = def->args_ct[i].alias_index;
2912                     o2 = def->args_ct[o].pair_index;
2913                     tcg_debug_assert(def->args_ct[o].pair == 1);
2914                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2915                     if (def->args_ct[o2].oalias) {
2916                         /* Case 1a */
2917                         i2 = def->args_ct[o2].alias_index;
2918                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2919                         def->args_ct[i2].pair_index = i;
2920                         def->args_ct[i].pair_index = i2;
2921                     } else {
2922                         /* Case 1b */
2923                         def->args_ct[i].pair_index = i;
2924                     }
2925                     break;
2926                 case 2:
2927                     o = def->args_ct[i].alias_index;
2928                     o2 = def->args_ct[o].pair_index;
2929                     tcg_debug_assert(def->args_ct[o].pair == 2);
2930                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2931                     if (def->args_ct[o2].oalias) {
2932                         /* Case 1a */
2933                         i2 = def->args_ct[o2].alias_index;
2934                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2935                         def->args_ct[i2].pair_index = i;
2936                         def->args_ct[i].pair_index = i2;
2937                     } else {
2938                         /* Case 2 */
2939                         def->args_ct[i].pair = 3;
2940                         def->args_ct[o2].pair = 3;
2941                         def->args_ct[i].pair_index = o2;
2942                         def->args_ct[o2].pair_index = i;
2943                     }
2944                     break;
2945                 default:
2946                     g_assert_not_reached();
2947                 }
2948             }
2949         }
2950 
2951         /* sort the constraints (XXX: this is just an heuristic) */
2952         sort_constraints(def, 0, def->nb_oargs);
2953         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2954     }
2955 }
2956 
2957 static void remove_label_use(TCGOp *op, int idx)
2958 {
2959     TCGLabel *label = arg_label(op->args[idx]);
2960     TCGLabelUse *use;
2961 
2962     QSIMPLEQ_FOREACH(use, &label->branches, next) {
2963         if (use->op == op) {
2964             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
2965             return;
2966         }
2967     }
2968     g_assert_not_reached();
2969 }
2970 
2971 void tcg_op_remove(TCGContext *s, TCGOp *op)
2972 {
2973     switch (op->opc) {
2974     case INDEX_op_br:
2975         remove_label_use(op, 0);
2976         break;
2977     case INDEX_op_brcond_i32:
2978     case INDEX_op_brcond_i64:
2979         remove_label_use(op, 3);
2980         break;
2981     case INDEX_op_brcond2_i32:
2982         remove_label_use(op, 5);
2983         break;
2984     default:
2985         break;
2986     }
2987 
2988     QTAILQ_REMOVE(&s->ops, op, link);
2989     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2990     s->nb_ops--;
2991 
2992 #ifdef CONFIG_PROFILER
2993     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2994 #endif
2995 }
2996 
2997 void tcg_remove_ops_after(TCGOp *op)
2998 {
2999     TCGContext *s = tcg_ctx;
3000 
3001     while (true) {
3002         TCGOp *last = tcg_last_op();
3003         if (last == op) {
3004             return;
3005         }
3006         tcg_op_remove(s, last);
3007     }
3008 }
3009 
3010 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3011 {
3012     TCGContext *s = tcg_ctx;
3013     TCGOp *op = NULL;
3014 
3015     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3016         QTAILQ_FOREACH(op, &s->free_ops, link) {
3017             if (nargs <= op->nargs) {
3018                 QTAILQ_REMOVE(&s->free_ops, op, link);
3019                 nargs = op->nargs;
3020                 goto found;
3021             }
3022         }
3023     }
3024 
3025     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3026     nargs = MAX(4, nargs);
3027     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3028 
3029  found:
3030     memset(op, 0, offsetof(TCGOp, link));
3031     op->opc = opc;
3032     op->nargs = nargs;
3033 
3034     /* Check for bitfield overflow. */
3035     tcg_debug_assert(op->nargs == nargs);
3036 
3037     s->nb_ops++;
3038     return op;
3039 }
3040 
3041 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3042 {
3043     TCGOp *op = tcg_op_alloc(opc, nargs);
3044     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3045     return op;
3046 }
3047 
3048 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3049                             TCGOpcode opc, unsigned nargs)
3050 {
3051     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3052     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3053     return new_op;
3054 }
3055 
3056 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3057                            TCGOpcode opc, unsigned nargs)
3058 {
3059     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3060     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3061     return new_op;
3062 }
3063 
3064 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3065 {
3066     TCGLabelUse *u;
3067 
3068     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3069         TCGOp *op = u->op;
3070         switch (op->opc) {
3071         case INDEX_op_br:
3072             op->args[0] = label_arg(to);
3073             break;
3074         case INDEX_op_brcond_i32:
3075         case INDEX_op_brcond_i64:
3076             op->args[3] = label_arg(to);
3077             break;
3078         case INDEX_op_brcond2_i32:
3079             op->args[5] = label_arg(to);
3080             break;
3081         default:
3082             g_assert_not_reached();
3083         }
3084     }
3085 
3086     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3087 }
3088 
3089 /* Reachable analysis : remove unreachable code.  */
3090 static void __attribute__((noinline))
3091 reachable_code_pass(TCGContext *s)
3092 {
3093     TCGOp *op, *op_next, *op_prev;
3094     bool dead = false;
3095 
3096     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3097         bool remove = dead;
3098         TCGLabel *label;
3099 
3100         switch (op->opc) {
3101         case INDEX_op_set_label:
3102             label = arg_label(op->args[0]);
3103 
3104             /*
3105              * Note that the first op in the TB is always a load,
3106              * so there is always something before a label.
3107              */
3108             op_prev = QTAILQ_PREV(op, link);
3109 
3110             /*
3111              * If we find two sequential labels, move all branches to
3112              * reference the second label and remove the first label.
3113              * Do this before branch to next optimization, so that the
3114              * middle label is out of the way.
3115              */
3116             if (op_prev->opc == INDEX_op_set_label) {
3117                 move_label_uses(label, arg_label(op_prev->args[0]));
3118                 tcg_op_remove(s, op_prev);
3119                 op_prev = QTAILQ_PREV(op, link);
3120             }
3121 
3122             /*
3123              * Optimization can fold conditional branches to unconditional.
3124              * If we find a label which is preceded by an unconditional
3125              * branch to next, remove the branch.  We couldn't do this when
3126              * processing the branch because any dead code between the branch
3127              * and label had not yet been removed.
3128              */
3129             if (op_prev->opc == INDEX_op_br &&
3130                 label == arg_label(op_prev->args[0])) {
3131                 tcg_op_remove(s, op_prev);
3132                 /* Fall through means insns become live again.  */
3133                 dead = false;
3134             }
3135 
3136             if (QSIMPLEQ_EMPTY(&label->branches)) {
3137                 /*
3138                  * While there is an occasional backward branch, virtually
3139                  * all branches generated by the translators are forward.
3140                  * Which means that generally we will have already removed
3141                  * all references to the label that will be, and there is
3142                  * little to be gained by iterating.
3143                  */
3144                 remove = true;
3145             } else {
3146                 /* Once we see a label, insns become live again.  */
3147                 dead = false;
3148                 remove = false;
3149             }
3150             break;
3151 
3152         case INDEX_op_br:
3153         case INDEX_op_exit_tb:
3154         case INDEX_op_goto_ptr:
3155             /* Unconditional branches; everything following is dead.  */
3156             dead = true;
3157             break;
3158 
3159         case INDEX_op_call:
3160             /* Notice noreturn helper calls, raising exceptions.  */
3161             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3162                 dead = true;
3163             }
3164             break;
3165 
3166         case INDEX_op_insn_start:
3167             /* Never remove -- we need to keep these for unwind.  */
3168             remove = false;
3169             break;
3170 
3171         default:
3172             break;
3173         }
3174 
3175         if (remove) {
3176             tcg_op_remove(s, op);
3177         }
3178     }
3179 }
3180 
3181 #define TS_DEAD  1
3182 #define TS_MEM   2
3183 
3184 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3185 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3186 
3187 /* For liveness_pass_1, the register preferences for a given temp.  */
3188 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3189 {
3190     return ts->state_ptr;
3191 }
3192 
3193 /* For liveness_pass_1, reset the preferences for a given temp to the
3194  * maximal regset for its type.
3195  */
3196 static inline void la_reset_pref(TCGTemp *ts)
3197 {
3198     *la_temp_pref(ts)
3199         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3200 }
3201 
3202 /* liveness analysis: end of function: all temps are dead, and globals
3203    should be in memory. */
3204 static void la_func_end(TCGContext *s, int ng, int nt)
3205 {
3206     int i;
3207 
3208     for (i = 0; i < ng; ++i) {
3209         s->temps[i].state = TS_DEAD | TS_MEM;
3210         la_reset_pref(&s->temps[i]);
3211     }
3212     for (i = ng; i < nt; ++i) {
3213         s->temps[i].state = TS_DEAD;
3214         la_reset_pref(&s->temps[i]);
3215     }
3216 }
3217 
3218 /* liveness analysis: end of basic block: all temps are dead, globals
3219    and local temps should be in memory. */
3220 static void la_bb_end(TCGContext *s, int ng, int nt)
3221 {
3222     int i;
3223 
3224     for (i = 0; i < nt; ++i) {
3225         TCGTemp *ts = &s->temps[i];
3226         int state;
3227 
3228         switch (ts->kind) {
3229         case TEMP_FIXED:
3230         case TEMP_GLOBAL:
3231         case TEMP_TB:
3232             state = TS_DEAD | TS_MEM;
3233             break;
3234         case TEMP_EBB:
3235         case TEMP_CONST:
3236             state = TS_DEAD;
3237             break;
3238         default:
3239             g_assert_not_reached();
3240         }
3241         ts->state = state;
3242         la_reset_pref(ts);
3243     }
3244 }
3245 
3246 /* liveness analysis: sync globals back to memory.  */
3247 static void la_global_sync(TCGContext *s, int ng)
3248 {
3249     int i;
3250 
3251     for (i = 0; i < ng; ++i) {
3252         int state = s->temps[i].state;
3253         s->temps[i].state = state | TS_MEM;
3254         if (state == TS_DEAD) {
3255             /* If the global was previously dead, reset prefs.  */
3256             la_reset_pref(&s->temps[i]);
3257         }
3258     }
3259 }
3260 
3261 /*
3262  * liveness analysis: conditional branch: all temps are dead unless
3263  * explicitly live-across-conditional-branch, globals and local temps
3264  * should be synced.
3265  */
3266 static void la_bb_sync(TCGContext *s, int ng, int nt)
3267 {
3268     la_global_sync(s, ng);
3269 
3270     for (int i = ng; i < nt; ++i) {
3271         TCGTemp *ts = &s->temps[i];
3272         int state;
3273 
3274         switch (ts->kind) {
3275         case TEMP_TB:
3276             state = ts->state;
3277             ts->state = state | TS_MEM;
3278             if (state != TS_DEAD) {
3279                 continue;
3280             }
3281             break;
3282         case TEMP_EBB:
3283         case TEMP_CONST:
3284             continue;
3285         default:
3286             g_assert_not_reached();
3287         }
3288         la_reset_pref(&s->temps[i]);
3289     }
3290 }
3291 
3292 /* liveness analysis: sync globals back to memory and kill.  */
3293 static void la_global_kill(TCGContext *s, int ng)
3294 {
3295     int i;
3296 
3297     for (i = 0; i < ng; i++) {
3298         s->temps[i].state = TS_DEAD | TS_MEM;
3299         la_reset_pref(&s->temps[i]);
3300     }
3301 }
3302 
3303 /* liveness analysis: note live globals crossing calls.  */
3304 static void la_cross_call(TCGContext *s, int nt)
3305 {
3306     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3307     int i;
3308 
3309     for (i = 0; i < nt; i++) {
3310         TCGTemp *ts = &s->temps[i];
3311         if (!(ts->state & TS_DEAD)) {
3312             TCGRegSet *pset = la_temp_pref(ts);
3313             TCGRegSet set = *pset;
3314 
3315             set &= mask;
3316             /* If the combination is not possible, restart.  */
3317             if (set == 0) {
3318                 set = tcg_target_available_regs[ts->type] & mask;
3319             }
3320             *pset = set;
3321         }
3322     }
3323 }
3324 
3325 /*
3326  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3327  * to TEMP_EBB, if possible.
3328  */
3329 static void __attribute__((noinline))
3330 liveness_pass_0(TCGContext *s)
3331 {
3332     void * const multiple_ebb = (void *)(uintptr_t)-1;
3333     int nb_temps = s->nb_temps;
3334     TCGOp *op, *ebb;
3335 
3336     for (int i = s->nb_globals; i < nb_temps; ++i) {
3337         s->temps[i].state_ptr = NULL;
3338     }
3339 
3340     /*
3341      * Represent each EBB by the op at which it begins.  In the case of
3342      * the first EBB, this is the first op, otherwise it is a label.
3343      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3344      * within a single EBB, else MULTIPLE_EBB.
3345      */
3346     ebb = QTAILQ_FIRST(&s->ops);
3347     QTAILQ_FOREACH(op, &s->ops, link) {
3348         const TCGOpDef *def;
3349         int nb_oargs, nb_iargs;
3350 
3351         switch (op->opc) {
3352         case INDEX_op_set_label:
3353             ebb = op;
3354             continue;
3355         case INDEX_op_discard:
3356             continue;
3357         case INDEX_op_call:
3358             nb_oargs = TCGOP_CALLO(op);
3359             nb_iargs = TCGOP_CALLI(op);
3360             break;
3361         default:
3362             def = &tcg_op_defs[op->opc];
3363             nb_oargs = def->nb_oargs;
3364             nb_iargs = def->nb_iargs;
3365             break;
3366         }
3367 
3368         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3369             TCGTemp *ts = arg_temp(op->args[i]);
3370 
3371             if (ts->kind != TEMP_TB) {
3372                 continue;
3373             }
3374             if (ts->state_ptr == NULL) {
3375                 ts->state_ptr = ebb;
3376             } else if (ts->state_ptr != ebb) {
3377                 ts->state_ptr = multiple_ebb;
3378             }
3379         }
3380     }
3381 
3382     /*
3383      * For TEMP_TB that turned out not to be used beyond one EBB,
3384      * reduce the liveness to TEMP_EBB.
3385      */
3386     for (int i = s->nb_globals; i < nb_temps; ++i) {
3387         TCGTemp *ts = &s->temps[i];
3388         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3389             ts->kind = TEMP_EBB;
3390         }
3391     }
3392 }
3393 
3394 /* Liveness analysis : update the opc_arg_life array to tell if a
3395    given input arguments is dead. Instructions updating dead
3396    temporaries are removed. */
3397 static void __attribute__((noinline))
3398 liveness_pass_1(TCGContext *s)
3399 {
3400     int nb_globals = s->nb_globals;
3401     int nb_temps = s->nb_temps;
3402     TCGOp *op, *op_prev;
3403     TCGRegSet *prefs;
3404     int i;
3405 
3406     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3407     for (i = 0; i < nb_temps; ++i) {
3408         s->temps[i].state_ptr = prefs + i;
3409     }
3410 
3411     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3412     la_func_end(s, nb_globals, nb_temps);
3413 
3414     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3415         int nb_iargs, nb_oargs;
3416         TCGOpcode opc_new, opc_new2;
3417         bool have_opc_new2;
3418         TCGLifeData arg_life = 0;
3419         TCGTemp *ts;
3420         TCGOpcode opc = op->opc;
3421         const TCGOpDef *def = &tcg_op_defs[opc];
3422 
3423         switch (opc) {
3424         case INDEX_op_call:
3425             {
3426                 const TCGHelperInfo *info = tcg_call_info(op);
3427                 int call_flags = tcg_call_flags(op);
3428 
3429                 nb_oargs = TCGOP_CALLO(op);
3430                 nb_iargs = TCGOP_CALLI(op);
3431 
3432                 /* pure functions can be removed if their result is unused */
3433                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3434                     for (i = 0; i < nb_oargs; i++) {
3435                         ts = arg_temp(op->args[i]);
3436                         if (ts->state != TS_DEAD) {
3437                             goto do_not_remove_call;
3438                         }
3439                     }
3440                     goto do_remove;
3441                 }
3442             do_not_remove_call:
3443 
3444                 /* Output args are dead.  */
3445                 for (i = 0; i < nb_oargs; i++) {
3446                     ts = arg_temp(op->args[i]);
3447                     if (ts->state & TS_DEAD) {
3448                         arg_life |= DEAD_ARG << i;
3449                     }
3450                     if (ts->state & TS_MEM) {
3451                         arg_life |= SYNC_ARG << i;
3452                     }
3453                     ts->state = TS_DEAD;
3454                     la_reset_pref(ts);
3455                 }
3456 
3457                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3458                 memset(op->output_pref, 0, sizeof(op->output_pref));
3459 
3460                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3461                                     TCG_CALL_NO_READ_GLOBALS))) {
3462                     la_global_kill(s, nb_globals);
3463                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3464                     la_global_sync(s, nb_globals);
3465                 }
3466 
3467                 /* Record arguments that die in this helper.  */
3468                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3469                     ts = arg_temp(op->args[i]);
3470                     if (ts->state & TS_DEAD) {
3471                         arg_life |= DEAD_ARG << i;
3472                     }
3473                 }
3474 
3475                 /* For all live registers, remove call-clobbered prefs.  */
3476                 la_cross_call(s, nb_temps);
3477 
3478                 /*
3479                  * Input arguments are live for preceding opcodes.
3480                  *
3481                  * For those arguments that die, and will be allocated in
3482                  * registers, clear the register set for that arg, to be
3483                  * filled in below.  For args that will be on the stack,
3484                  * reset to any available reg.  Process arguments in reverse
3485                  * order so that if a temp is used more than once, the stack
3486                  * reset to max happens before the register reset to 0.
3487                  */
3488                 for (i = nb_iargs - 1; i >= 0; i--) {
3489                     const TCGCallArgumentLoc *loc = &info->in[i];
3490                     ts = arg_temp(op->args[nb_oargs + i]);
3491 
3492                     if (ts->state & TS_DEAD) {
3493                         switch (loc->kind) {
3494                         case TCG_CALL_ARG_NORMAL:
3495                         case TCG_CALL_ARG_EXTEND_U:
3496                         case TCG_CALL_ARG_EXTEND_S:
3497                             if (arg_slot_reg_p(loc->arg_slot)) {
3498                                 *la_temp_pref(ts) = 0;
3499                                 break;
3500                             }
3501                             /* fall through */
3502                         default:
3503                             *la_temp_pref(ts) =
3504                                 tcg_target_available_regs[ts->type];
3505                             break;
3506                         }
3507                         ts->state &= ~TS_DEAD;
3508                     }
3509                 }
3510 
3511                 /*
3512                  * For each input argument, add its input register to prefs.
3513                  * If a temp is used once, this produces a single set bit;
3514                  * if a temp is used multiple times, this produces a set.
3515                  */
3516                 for (i = 0; i < nb_iargs; i++) {
3517                     const TCGCallArgumentLoc *loc = &info->in[i];
3518                     ts = arg_temp(op->args[nb_oargs + i]);
3519 
3520                     switch (loc->kind) {
3521                     case TCG_CALL_ARG_NORMAL:
3522                     case TCG_CALL_ARG_EXTEND_U:
3523                     case TCG_CALL_ARG_EXTEND_S:
3524                         if (arg_slot_reg_p(loc->arg_slot)) {
3525                             tcg_regset_set_reg(*la_temp_pref(ts),
3526                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3527                         }
3528                         break;
3529                     default:
3530                         break;
3531                     }
3532                 }
3533             }
3534             break;
3535         case INDEX_op_insn_start:
3536             break;
3537         case INDEX_op_discard:
3538             /* mark the temporary as dead */
3539             ts = arg_temp(op->args[0]);
3540             ts->state = TS_DEAD;
3541             la_reset_pref(ts);
3542             break;
3543 
3544         case INDEX_op_add2_i32:
3545             opc_new = INDEX_op_add_i32;
3546             goto do_addsub2;
3547         case INDEX_op_sub2_i32:
3548             opc_new = INDEX_op_sub_i32;
3549             goto do_addsub2;
3550         case INDEX_op_add2_i64:
3551             opc_new = INDEX_op_add_i64;
3552             goto do_addsub2;
3553         case INDEX_op_sub2_i64:
3554             opc_new = INDEX_op_sub_i64;
3555         do_addsub2:
3556             nb_iargs = 4;
3557             nb_oargs = 2;
3558             /* Test if the high part of the operation is dead, but not
3559                the low part.  The result can be optimized to a simple
3560                add or sub.  This happens often for x86_64 guest when the
3561                cpu mode is set to 32 bit.  */
3562             if (arg_temp(op->args[1])->state == TS_DEAD) {
3563                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3564                     goto do_remove;
3565                 }
3566                 /* Replace the opcode and adjust the args in place,
3567                    leaving 3 unused args at the end.  */
3568                 op->opc = opc = opc_new;
3569                 op->args[1] = op->args[2];
3570                 op->args[2] = op->args[4];
3571                 /* Fall through and mark the single-word operation live.  */
3572                 nb_iargs = 2;
3573                 nb_oargs = 1;
3574             }
3575             goto do_not_remove;
3576 
3577         case INDEX_op_mulu2_i32:
3578             opc_new = INDEX_op_mul_i32;
3579             opc_new2 = INDEX_op_muluh_i32;
3580             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3581             goto do_mul2;
3582         case INDEX_op_muls2_i32:
3583             opc_new = INDEX_op_mul_i32;
3584             opc_new2 = INDEX_op_mulsh_i32;
3585             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3586             goto do_mul2;
3587         case INDEX_op_mulu2_i64:
3588             opc_new = INDEX_op_mul_i64;
3589             opc_new2 = INDEX_op_muluh_i64;
3590             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3591             goto do_mul2;
3592         case INDEX_op_muls2_i64:
3593             opc_new = INDEX_op_mul_i64;
3594             opc_new2 = INDEX_op_mulsh_i64;
3595             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3596             goto do_mul2;
3597         do_mul2:
3598             nb_iargs = 2;
3599             nb_oargs = 2;
3600             if (arg_temp(op->args[1])->state == TS_DEAD) {
3601                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3602                     /* Both parts of the operation are dead.  */
3603                     goto do_remove;
3604                 }
3605                 /* The high part of the operation is dead; generate the low. */
3606                 op->opc = opc = opc_new;
3607                 op->args[1] = op->args[2];
3608                 op->args[2] = op->args[3];
3609             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3610                 /* The low part of the operation is dead; generate the high. */
3611                 op->opc = opc = opc_new2;
3612                 op->args[0] = op->args[1];
3613                 op->args[1] = op->args[2];
3614                 op->args[2] = op->args[3];
3615             } else {
3616                 goto do_not_remove;
3617             }
3618             /* Mark the single-word operation live.  */
3619             nb_oargs = 1;
3620             goto do_not_remove;
3621 
3622         default:
3623             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3624             nb_iargs = def->nb_iargs;
3625             nb_oargs = def->nb_oargs;
3626 
3627             /* Test if the operation can be removed because all
3628                its outputs are dead. We assume that nb_oargs == 0
3629                implies side effects */
3630             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3631                 for (i = 0; i < nb_oargs; i++) {
3632                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3633                         goto do_not_remove;
3634                     }
3635                 }
3636                 goto do_remove;
3637             }
3638             goto do_not_remove;
3639 
3640         do_remove:
3641             tcg_op_remove(s, op);
3642             break;
3643 
3644         do_not_remove:
3645             for (i = 0; i < nb_oargs; i++) {
3646                 ts = arg_temp(op->args[i]);
3647 
3648                 /* Remember the preference of the uses that followed.  */
3649                 if (i < ARRAY_SIZE(op->output_pref)) {
3650                     op->output_pref[i] = *la_temp_pref(ts);
3651                 }
3652 
3653                 /* Output args are dead.  */
3654                 if (ts->state & TS_DEAD) {
3655                     arg_life |= DEAD_ARG << i;
3656                 }
3657                 if (ts->state & TS_MEM) {
3658                     arg_life |= SYNC_ARG << i;
3659                 }
3660                 ts->state = TS_DEAD;
3661                 la_reset_pref(ts);
3662             }
3663 
3664             /* If end of basic block, update.  */
3665             if (def->flags & TCG_OPF_BB_EXIT) {
3666                 la_func_end(s, nb_globals, nb_temps);
3667             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3668                 la_bb_sync(s, nb_globals, nb_temps);
3669             } else if (def->flags & TCG_OPF_BB_END) {
3670                 la_bb_end(s, nb_globals, nb_temps);
3671             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3672                 la_global_sync(s, nb_globals);
3673                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3674                     la_cross_call(s, nb_temps);
3675                 }
3676             }
3677 
3678             /* Record arguments that die in this opcode.  */
3679             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3680                 ts = arg_temp(op->args[i]);
3681                 if (ts->state & TS_DEAD) {
3682                     arg_life |= DEAD_ARG << i;
3683                 }
3684             }
3685 
3686             /* Input arguments are live for preceding opcodes.  */
3687             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3688                 ts = arg_temp(op->args[i]);
3689                 if (ts->state & TS_DEAD) {
3690                     /* For operands that were dead, initially allow
3691                        all regs for the type.  */
3692                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3693                     ts->state &= ~TS_DEAD;
3694                 }
3695             }
3696 
3697             /* Incorporate constraints for this operand.  */
3698             switch (opc) {
3699             case INDEX_op_mov_i32:
3700             case INDEX_op_mov_i64:
3701                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3702                    have proper constraints.  That said, special case
3703                    moves to propagate preferences backward.  */
3704                 if (IS_DEAD_ARG(1)) {
3705                     *la_temp_pref(arg_temp(op->args[0]))
3706                         = *la_temp_pref(arg_temp(op->args[1]));
3707                 }
3708                 break;
3709 
3710             default:
3711                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3712                     const TCGArgConstraint *ct = &def->args_ct[i];
3713                     TCGRegSet set, *pset;
3714 
3715                     ts = arg_temp(op->args[i]);
3716                     pset = la_temp_pref(ts);
3717                     set = *pset;
3718 
3719                     set &= ct->regs;
3720                     if (ct->ialias) {
3721                         set &= output_pref(op, ct->alias_index);
3722                     }
3723                     /* If the combination is not possible, restart.  */
3724                     if (set == 0) {
3725                         set = ct->regs;
3726                     }
3727                     *pset = set;
3728                 }
3729                 break;
3730             }
3731             break;
3732         }
3733         op->life = arg_life;
3734     }
3735 }
3736 
3737 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3738 static bool __attribute__((noinline))
3739 liveness_pass_2(TCGContext *s)
3740 {
3741     int nb_globals = s->nb_globals;
3742     int nb_temps, i;
3743     bool changes = false;
3744     TCGOp *op, *op_next;
3745 
3746     /* Create a temporary for each indirect global.  */
3747     for (i = 0; i < nb_globals; ++i) {
3748         TCGTemp *its = &s->temps[i];
3749         if (its->indirect_reg) {
3750             TCGTemp *dts = tcg_temp_alloc(s);
3751             dts->type = its->type;
3752             dts->base_type = its->base_type;
3753             dts->temp_subindex = its->temp_subindex;
3754             dts->kind = TEMP_EBB;
3755             its->state_ptr = dts;
3756         } else {
3757             its->state_ptr = NULL;
3758         }
3759         /* All globals begin dead.  */
3760         its->state = TS_DEAD;
3761     }
3762     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3763         TCGTemp *its = &s->temps[i];
3764         its->state_ptr = NULL;
3765         its->state = TS_DEAD;
3766     }
3767 
3768     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3769         TCGOpcode opc = op->opc;
3770         const TCGOpDef *def = &tcg_op_defs[opc];
3771         TCGLifeData arg_life = op->life;
3772         int nb_iargs, nb_oargs, call_flags;
3773         TCGTemp *arg_ts, *dir_ts;
3774 
3775         if (opc == INDEX_op_call) {
3776             nb_oargs = TCGOP_CALLO(op);
3777             nb_iargs = TCGOP_CALLI(op);
3778             call_flags = tcg_call_flags(op);
3779         } else {
3780             nb_iargs = def->nb_iargs;
3781             nb_oargs = def->nb_oargs;
3782 
3783             /* Set flags similar to how calls require.  */
3784             if (def->flags & TCG_OPF_COND_BRANCH) {
3785                 /* Like reading globals: sync_globals */
3786                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3787             } else if (def->flags & TCG_OPF_BB_END) {
3788                 /* Like writing globals: save_globals */
3789                 call_flags = 0;
3790             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3791                 /* Like reading globals: sync_globals */
3792                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3793             } else {
3794                 /* No effect on globals.  */
3795                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3796                               TCG_CALL_NO_WRITE_GLOBALS);
3797             }
3798         }
3799 
3800         /* Make sure that input arguments are available.  */
3801         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3802             arg_ts = arg_temp(op->args[i]);
3803             dir_ts = arg_ts->state_ptr;
3804             if (dir_ts && arg_ts->state == TS_DEAD) {
3805                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3806                                   ? INDEX_op_ld_i32
3807                                   : INDEX_op_ld_i64);
3808                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3809 
3810                 lop->args[0] = temp_arg(dir_ts);
3811                 lop->args[1] = temp_arg(arg_ts->mem_base);
3812                 lop->args[2] = arg_ts->mem_offset;
3813 
3814                 /* Loaded, but synced with memory.  */
3815                 arg_ts->state = TS_MEM;
3816             }
3817         }
3818 
3819         /* Perform input replacement, and mark inputs that became dead.
3820            No action is required except keeping temp_state up to date
3821            so that we reload when needed.  */
3822         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3823             arg_ts = arg_temp(op->args[i]);
3824             dir_ts = arg_ts->state_ptr;
3825             if (dir_ts) {
3826                 op->args[i] = temp_arg(dir_ts);
3827                 changes = true;
3828                 if (IS_DEAD_ARG(i)) {
3829                     arg_ts->state = TS_DEAD;
3830                 }
3831             }
3832         }
3833 
3834         /* Liveness analysis should ensure that the following are
3835            all correct, for call sites and basic block end points.  */
3836         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3837             /* Nothing to do */
3838         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3839             for (i = 0; i < nb_globals; ++i) {
3840                 /* Liveness should see that globals are synced back,
3841                    that is, either TS_DEAD or TS_MEM.  */
3842                 arg_ts = &s->temps[i];
3843                 tcg_debug_assert(arg_ts->state_ptr == 0
3844                                  || arg_ts->state != 0);
3845             }
3846         } else {
3847             for (i = 0; i < nb_globals; ++i) {
3848                 /* Liveness should see that globals are saved back,
3849                    that is, TS_DEAD, waiting to be reloaded.  */
3850                 arg_ts = &s->temps[i];
3851                 tcg_debug_assert(arg_ts->state_ptr == 0
3852                                  || arg_ts->state == TS_DEAD);
3853             }
3854         }
3855 
3856         /* Outputs become available.  */
3857         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3858             arg_ts = arg_temp(op->args[0]);
3859             dir_ts = arg_ts->state_ptr;
3860             if (dir_ts) {
3861                 op->args[0] = temp_arg(dir_ts);
3862                 changes = true;
3863 
3864                 /* The output is now live and modified.  */
3865                 arg_ts->state = 0;
3866 
3867                 if (NEED_SYNC_ARG(0)) {
3868                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3869                                       ? INDEX_op_st_i32
3870                                       : INDEX_op_st_i64);
3871                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3872                     TCGTemp *out_ts = dir_ts;
3873 
3874                     if (IS_DEAD_ARG(0)) {
3875                         out_ts = arg_temp(op->args[1]);
3876                         arg_ts->state = TS_DEAD;
3877                         tcg_op_remove(s, op);
3878                     } else {
3879                         arg_ts->state = TS_MEM;
3880                     }
3881 
3882                     sop->args[0] = temp_arg(out_ts);
3883                     sop->args[1] = temp_arg(arg_ts->mem_base);
3884                     sop->args[2] = arg_ts->mem_offset;
3885                 } else {
3886                     tcg_debug_assert(!IS_DEAD_ARG(0));
3887                 }
3888             }
3889         } else {
3890             for (i = 0; i < nb_oargs; i++) {
3891                 arg_ts = arg_temp(op->args[i]);
3892                 dir_ts = arg_ts->state_ptr;
3893                 if (!dir_ts) {
3894                     continue;
3895                 }
3896                 op->args[i] = temp_arg(dir_ts);
3897                 changes = true;
3898 
3899                 /* The output is now live and modified.  */
3900                 arg_ts->state = 0;
3901 
3902                 /* Sync outputs upon their last write.  */
3903                 if (NEED_SYNC_ARG(i)) {
3904                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3905                                       ? INDEX_op_st_i32
3906                                       : INDEX_op_st_i64);
3907                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3908 
3909                     sop->args[0] = temp_arg(dir_ts);
3910                     sop->args[1] = temp_arg(arg_ts->mem_base);
3911                     sop->args[2] = arg_ts->mem_offset;
3912 
3913                     arg_ts->state = TS_MEM;
3914                 }
3915                 /* Drop outputs that are dead.  */
3916                 if (IS_DEAD_ARG(i)) {
3917                     arg_ts->state = TS_DEAD;
3918                 }
3919             }
3920         }
3921     }
3922 
3923     return changes;
3924 }
3925 
3926 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3927 {
3928     intptr_t off;
3929     int size, align;
3930 
3931     /* When allocating an object, look at the full type. */
3932     size = tcg_type_size(ts->base_type);
3933     switch (ts->base_type) {
3934     case TCG_TYPE_I32:
3935         align = 4;
3936         break;
3937     case TCG_TYPE_I64:
3938     case TCG_TYPE_V64:
3939         align = 8;
3940         break;
3941     case TCG_TYPE_I128:
3942     case TCG_TYPE_V128:
3943     case TCG_TYPE_V256:
3944         /*
3945          * Note that we do not require aligned storage for V256,
3946          * and that we provide alignment for I128 to match V128,
3947          * even if that's above what the host ABI requires.
3948          */
3949         align = 16;
3950         break;
3951     default:
3952         g_assert_not_reached();
3953     }
3954 
3955     /*
3956      * Assume the stack is sufficiently aligned.
3957      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3958      * and do not require 16 byte vector alignment.  This seems slightly
3959      * easier than fully parameterizing the above switch statement.
3960      */
3961     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3962     off = ROUND_UP(s->current_frame_offset, align);
3963 
3964     /* If we've exhausted the stack frame, restart with a smaller TB. */
3965     if (off + size > s->frame_end) {
3966         tcg_raise_tb_overflow(s);
3967     }
3968     s->current_frame_offset = off + size;
3969 #if defined(__sparc__)
3970     off += TCG_TARGET_STACK_BIAS;
3971 #endif
3972 
3973     /* If the object was subdivided, assign memory to all the parts. */
3974     if (ts->base_type != ts->type) {
3975         int part_size = tcg_type_size(ts->type);
3976         int part_count = size / part_size;
3977 
3978         /*
3979          * Each part is allocated sequentially in tcg_temp_new_internal.
3980          * Jump back to the first part by subtracting the current index.
3981          */
3982         ts -= ts->temp_subindex;
3983         for (int i = 0; i < part_count; ++i) {
3984             ts[i].mem_offset = off + i * part_size;
3985             ts[i].mem_base = s->frame_temp;
3986             ts[i].mem_allocated = 1;
3987         }
3988     } else {
3989         ts->mem_offset = off;
3990         ts->mem_base = s->frame_temp;
3991         ts->mem_allocated = 1;
3992     }
3993 }
3994 
3995 /* Assign @reg to @ts, and update reg_to_temp[]. */
3996 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
3997 {
3998     if (ts->val_type == TEMP_VAL_REG) {
3999         TCGReg old = ts->reg;
4000         tcg_debug_assert(s->reg_to_temp[old] == ts);
4001         if (old == reg) {
4002             return;
4003         }
4004         s->reg_to_temp[old] = NULL;
4005     }
4006     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4007     s->reg_to_temp[reg] = ts;
4008     ts->val_type = TEMP_VAL_REG;
4009     ts->reg = reg;
4010 }
4011 
4012 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4013 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4014 {
4015     tcg_debug_assert(type != TEMP_VAL_REG);
4016     if (ts->val_type == TEMP_VAL_REG) {
4017         TCGReg reg = ts->reg;
4018         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4019         s->reg_to_temp[reg] = NULL;
4020     }
4021     ts->val_type = type;
4022 }
4023 
4024 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4025 
4026 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4027    mark it free; otherwise mark it dead.  */
4028 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4029 {
4030     TCGTempVal new_type;
4031 
4032     switch (ts->kind) {
4033     case TEMP_FIXED:
4034         return;
4035     case TEMP_GLOBAL:
4036     case TEMP_TB:
4037         new_type = TEMP_VAL_MEM;
4038         break;
4039     case TEMP_EBB:
4040         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4041         break;
4042     case TEMP_CONST:
4043         new_type = TEMP_VAL_CONST;
4044         break;
4045     default:
4046         g_assert_not_reached();
4047     }
4048     set_temp_val_nonreg(s, ts, new_type);
4049 }
4050 
4051 /* Mark a temporary as dead.  */
4052 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4053 {
4054     temp_free_or_dead(s, ts, 1);
4055 }
4056 
4057 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4058    registers needs to be allocated to store a constant.  If 'free_or_dead'
4059    is non-zero, subsequently release the temporary; if it is positive, the
4060    temp is dead; if it is negative, the temp is free.  */
4061 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4062                       TCGRegSet preferred_regs, int free_or_dead)
4063 {
4064     if (!temp_readonly(ts) && !ts->mem_coherent) {
4065         if (!ts->mem_allocated) {
4066             temp_allocate_frame(s, ts);
4067         }
4068         switch (ts->val_type) {
4069         case TEMP_VAL_CONST:
4070             /* If we're going to free the temp immediately, then we won't
4071                require it later in a register, so attempt to store the
4072                constant to memory directly.  */
4073             if (free_or_dead
4074                 && tcg_out_sti(s, ts->type, ts->val,
4075                                ts->mem_base->reg, ts->mem_offset)) {
4076                 break;
4077             }
4078             temp_load(s, ts, tcg_target_available_regs[ts->type],
4079                       allocated_regs, preferred_regs);
4080             /* fallthrough */
4081 
4082         case TEMP_VAL_REG:
4083             tcg_out_st(s, ts->type, ts->reg,
4084                        ts->mem_base->reg, ts->mem_offset);
4085             break;
4086 
4087         case TEMP_VAL_MEM:
4088             break;
4089 
4090         case TEMP_VAL_DEAD:
4091         default:
4092             g_assert_not_reached();
4093         }
4094         ts->mem_coherent = 1;
4095     }
4096     if (free_or_dead) {
4097         temp_free_or_dead(s, ts, free_or_dead);
4098     }
4099 }
4100 
4101 /* free register 'reg' by spilling the corresponding temporary if necessary */
4102 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4103 {
4104     TCGTemp *ts = s->reg_to_temp[reg];
4105     if (ts != NULL) {
4106         temp_sync(s, ts, allocated_regs, 0, -1);
4107     }
4108 }
4109 
4110 /**
4111  * tcg_reg_alloc:
4112  * @required_regs: Set of registers in which we must allocate.
4113  * @allocated_regs: Set of registers which must be avoided.
4114  * @preferred_regs: Set of registers we should prefer.
4115  * @rev: True if we search the registers in "indirect" order.
4116  *
4117  * The allocated register must be in @required_regs & ~@allocated_regs,
4118  * but if we can put it in @preferred_regs we may save a move later.
4119  */
4120 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4121                             TCGRegSet allocated_regs,
4122                             TCGRegSet preferred_regs, bool rev)
4123 {
4124     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4125     TCGRegSet reg_ct[2];
4126     const int *order;
4127 
4128     reg_ct[1] = required_regs & ~allocated_regs;
4129     tcg_debug_assert(reg_ct[1] != 0);
4130     reg_ct[0] = reg_ct[1] & preferred_regs;
4131 
4132     /* Skip the preferred_regs option if it cannot be satisfied,
4133        or if the preference made no difference.  */
4134     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4135 
4136     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4137 
4138     /* Try free registers, preferences first.  */
4139     for (j = f; j < 2; j++) {
4140         TCGRegSet set = reg_ct[j];
4141 
4142         if (tcg_regset_single(set)) {
4143             /* One register in the set.  */
4144             TCGReg reg = tcg_regset_first(set);
4145             if (s->reg_to_temp[reg] == NULL) {
4146                 return reg;
4147             }
4148         } else {
4149             for (i = 0; i < n; i++) {
4150                 TCGReg reg = order[i];
4151                 if (s->reg_to_temp[reg] == NULL &&
4152                     tcg_regset_test_reg(set, reg)) {
4153                     return reg;
4154                 }
4155             }
4156         }
4157     }
4158 
4159     /* We must spill something.  */
4160     for (j = f; j < 2; j++) {
4161         TCGRegSet set = reg_ct[j];
4162 
4163         if (tcg_regset_single(set)) {
4164             /* One register in the set.  */
4165             TCGReg reg = tcg_regset_first(set);
4166             tcg_reg_free(s, reg, allocated_regs);
4167             return reg;
4168         } else {
4169             for (i = 0; i < n; i++) {
4170                 TCGReg reg = order[i];
4171                 if (tcg_regset_test_reg(set, reg)) {
4172                     tcg_reg_free(s, reg, allocated_regs);
4173                     return reg;
4174                 }
4175             }
4176         }
4177     }
4178 
4179     g_assert_not_reached();
4180 }
4181 
4182 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4183                                  TCGRegSet allocated_regs,
4184                                  TCGRegSet preferred_regs, bool rev)
4185 {
4186     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4187     TCGRegSet reg_ct[2];
4188     const int *order;
4189 
4190     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4191     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4192     tcg_debug_assert(reg_ct[1] != 0);
4193     reg_ct[0] = reg_ct[1] & preferred_regs;
4194 
4195     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4196 
4197     /*
4198      * Skip the preferred_regs option if it cannot be satisfied,
4199      * or if the preference made no difference.
4200      */
4201     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4202 
4203     /*
4204      * Minimize the number of flushes by looking for 2 free registers first,
4205      * then a single flush, then two flushes.
4206      */
4207     for (fmin = 2; fmin >= 0; fmin--) {
4208         for (j = k; j < 2; j++) {
4209             TCGRegSet set = reg_ct[j];
4210 
4211             for (i = 0; i < n; i++) {
4212                 TCGReg reg = order[i];
4213 
4214                 if (tcg_regset_test_reg(set, reg)) {
4215                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4216                     if (f >= fmin) {
4217                         tcg_reg_free(s, reg, allocated_regs);
4218                         tcg_reg_free(s, reg + 1, allocated_regs);
4219                         return reg;
4220                     }
4221                 }
4222             }
4223         }
4224     }
4225     g_assert_not_reached();
4226 }
4227 
4228 /* Make sure the temporary is in a register.  If needed, allocate the register
4229    from DESIRED while avoiding ALLOCATED.  */
4230 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4231                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4232 {
4233     TCGReg reg;
4234 
4235     switch (ts->val_type) {
4236     case TEMP_VAL_REG:
4237         return;
4238     case TEMP_VAL_CONST:
4239         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4240                             preferred_regs, ts->indirect_base);
4241         if (ts->type <= TCG_TYPE_I64) {
4242             tcg_out_movi(s, ts->type, reg, ts->val);
4243         } else {
4244             uint64_t val = ts->val;
4245             MemOp vece = MO_64;
4246 
4247             /*
4248              * Find the minimal vector element that matches the constant.
4249              * The targets will, in general, have to do this search anyway,
4250              * do this generically.
4251              */
4252             if (val == dup_const(MO_8, val)) {
4253                 vece = MO_8;
4254             } else if (val == dup_const(MO_16, val)) {
4255                 vece = MO_16;
4256             } else if (val == dup_const(MO_32, val)) {
4257                 vece = MO_32;
4258             }
4259 
4260             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4261         }
4262         ts->mem_coherent = 0;
4263         break;
4264     case TEMP_VAL_MEM:
4265         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4266                             preferred_regs, ts->indirect_base);
4267         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4268         ts->mem_coherent = 1;
4269         break;
4270     case TEMP_VAL_DEAD:
4271     default:
4272         g_assert_not_reached();
4273     }
4274     set_temp_val_reg(s, ts, reg);
4275 }
4276 
4277 /* Save a temporary to memory. 'allocated_regs' is used in case a
4278    temporary registers needs to be allocated to store a constant.  */
4279 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4280 {
4281     /* The liveness analysis already ensures that globals are back
4282        in memory. Keep an tcg_debug_assert for safety. */
4283     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4284 }
4285 
4286 /* save globals to their canonical location and assume they can be
4287    modified be the following code. 'allocated_regs' is used in case a
4288    temporary registers needs to be allocated to store a constant. */
4289 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4290 {
4291     int i, n;
4292 
4293     for (i = 0, n = s->nb_globals; i < n; i++) {
4294         temp_save(s, &s->temps[i], allocated_regs);
4295     }
4296 }
4297 
4298 /* sync globals to their canonical location and assume they can be
4299    read by the following code. 'allocated_regs' is used in case a
4300    temporary registers needs to be allocated to store a constant. */
4301 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4302 {
4303     int i, n;
4304 
4305     for (i = 0, n = s->nb_globals; i < n; i++) {
4306         TCGTemp *ts = &s->temps[i];
4307         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4308                          || ts->kind == TEMP_FIXED
4309                          || ts->mem_coherent);
4310     }
4311 }
4312 
4313 /* at the end of a basic block, we assume all temporaries are dead and
4314    all globals are stored at their canonical location. */
4315 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4316 {
4317     int i;
4318 
4319     for (i = s->nb_globals; i < s->nb_temps; i++) {
4320         TCGTemp *ts = &s->temps[i];
4321 
4322         switch (ts->kind) {
4323         case TEMP_TB:
4324             temp_save(s, ts, allocated_regs);
4325             break;
4326         case TEMP_EBB:
4327             /* The liveness analysis already ensures that temps are dead.
4328                Keep an tcg_debug_assert for safety. */
4329             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4330             break;
4331         case TEMP_CONST:
4332             /* Similarly, we should have freed any allocated register. */
4333             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4334             break;
4335         default:
4336             g_assert_not_reached();
4337         }
4338     }
4339 
4340     save_globals(s, allocated_regs);
4341 }
4342 
4343 /*
4344  * At a conditional branch, we assume all temporaries are dead unless
4345  * explicitly live-across-conditional-branch; all globals and local
4346  * temps are synced to their location.
4347  */
4348 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4349 {
4350     sync_globals(s, allocated_regs);
4351 
4352     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4353         TCGTemp *ts = &s->temps[i];
4354         /*
4355          * The liveness analysis already ensures that temps are dead.
4356          * Keep tcg_debug_asserts for safety.
4357          */
4358         switch (ts->kind) {
4359         case TEMP_TB:
4360             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4361             break;
4362         case TEMP_EBB:
4363         case TEMP_CONST:
4364             break;
4365         default:
4366             g_assert_not_reached();
4367         }
4368     }
4369 }
4370 
4371 /*
4372  * Specialized code generation for INDEX_op_mov_* with a constant.
4373  */
4374 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4375                                   tcg_target_ulong val, TCGLifeData arg_life,
4376                                   TCGRegSet preferred_regs)
4377 {
4378     /* ENV should not be modified.  */
4379     tcg_debug_assert(!temp_readonly(ots));
4380 
4381     /* The movi is not explicitly generated here.  */
4382     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4383     ots->val = val;
4384     ots->mem_coherent = 0;
4385     if (NEED_SYNC_ARG(0)) {
4386         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4387     } else if (IS_DEAD_ARG(0)) {
4388         temp_dead(s, ots);
4389     }
4390 }
4391 
4392 /*
4393  * Specialized code generation for INDEX_op_mov_*.
4394  */
4395 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4396 {
4397     const TCGLifeData arg_life = op->life;
4398     TCGRegSet allocated_regs, preferred_regs;
4399     TCGTemp *ts, *ots;
4400     TCGType otype, itype;
4401     TCGReg oreg, ireg;
4402 
4403     allocated_regs = s->reserved_regs;
4404     preferred_regs = output_pref(op, 0);
4405     ots = arg_temp(op->args[0]);
4406     ts = arg_temp(op->args[1]);
4407 
4408     /* ENV should not be modified.  */
4409     tcg_debug_assert(!temp_readonly(ots));
4410 
4411     /* Note that otype != itype for no-op truncation.  */
4412     otype = ots->type;
4413     itype = ts->type;
4414 
4415     if (ts->val_type == TEMP_VAL_CONST) {
4416         /* propagate constant or generate sti */
4417         tcg_target_ulong val = ts->val;
4418         if (IS_DEAD_ARG(1)) {
4419             temp_dead(s, ts);
4420         }
4421         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4422         return;
4423     }
4424 
4425     /* If the source value is in memory we're going to be forced
4426        to have it in a register in order to perform the copy.  Copy
4427        the SOURCE value into its own register first, that way we
4428        don't have to reload SOURCE the next time it is used. */
4429     if (ts->val_type == TEMP_VAL_MEM) {
4430         temp_load(s, ts, tcg_target_available_regs[itype],
4431                   allocated_regs, preferred_regs);
4432     }
4433     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4434     ireg = ts->reg;
4435 
4436     if (IS_DEAD_ARG(0)) {
4437         /* mov to a non-saved dead register makes no sense (even with
4438            liveness analysis disabled). */
4439         tcg_debug_assert(NEED_SYNC_ARG(0));
4440         if (!ots->mem_allocated) {
4441             temp_allocate_frame(s, ots);
4442         }
4443         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4444         if (IS_DEAD_ARG(1)) {
4445             temp_dead(s, ts);
4446         }
4447         temp_dead(s, ots);
4448         return;
4449     }
4450 
4451     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4452         /*
4453          * The mov can be suppressed.  Kill input first, so that it
4454          * is unlinked from reg_to_temp, then set the output to the
4455          * reg that we saved from the input.
4456          */
4457         temp_dead(s, ts);
4458         oreg = ireg;
4459     } else {
4460         if (ots->val_type == TEMP_VAL_REG) {
4461             oreg = ots->reg;
4462         } else {
4463             /* Make sure to not spill the input register during allocation. */
4464             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4465                                  allocated_regs | ((TCGRegSet)1 << ireg),
4466                                  preferred_regs, ots->indirect_base);
4467         }
4468         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4469             /*
4470              * Cross register class move not supported.
4471              * Store the source register into the destination slot
4472              * and leave the destination temp as TEMP_VAL_MEM.
4473              */
4474             assert(!temp_readonly(ots));
4475             if (!ts->mem_allocated) {
4476                 temp_allocate_frame(s, ots);
4477             }
4478             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4479             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4480             ots->mem_coherent = 1;
4481             return;
4482         }
4483     }
4484     set_temp_val_reg(s, ots, oreg);
4485     ots->mem_coherent = 0;
4486 
4487     if (NEED_SYNC_ARG(0)) {
4488         temp_sync(s, ots, allocated_regs, 0, 0);
4489     }
4490 }
4491 
4492 /*
4493  * Specialized code generation for INDEX_op_dup_vec.
4494  */
4495 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4496 {
4497     const TCGLifeData arg_life = op->life;
4498     TCGRegSet dup_out_regs, dup_in_regs;
4499     TCGTemp *its, *ots;
4500     TCGType itype, vtype;
4501     unsigned vece;
4502     int lowpart_ofs;
4503     bool ok;
4504 
4505     ots = arg_temp(op->args[0]);
4506     its = arg_temp(op->args[1]);
4507 
4508     /* ENV should not be modified.  */
4509     tcg_debug_assert(!temp_readonly(ots));
4510 
4511     itype = its->type;
4512     vece = TCGOP_VECE(op);
4513     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4514 
4515     if (its->val_type == TEMP_VAL_CONST) {
4516         /* Propagate constant via movi -> dupi.  */
4517         tcg_target_ulong val = its->val;
4518         if (IS_DEAD_ARG(1)) {
4519             temp_dead(s, its);
4520         }
4521         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4522         return;
4523     }
4524 
4525     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4526     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4527 
4528     /* Allocate the output register now.  */
4529     if (ots->val_type != TEMP_VAL_REG) {
4530         TCGRegSet allocated_regs = s->reserved_regs;
4531         TCGReg oreg;
4532 
4533         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4534             /* Make sure to not spill the input register. */
4535             tcg_regset_set_reg(allocated_regs, its->reg);
4536         }
4537         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4538                              output_pref(op, 0), ots->indirect_base);
4539         set_temp_val_reg(s, ots, oreg);
4540     }
4541 
4542     switch (its->val_type) {
4543     case TEMP_VAL_REG:
4544         /*
4545          * The dup constriaints must be broad, covering all possible VECE.
4546          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4547          * to fail, indicating that extra moves are required for that case.
4548          */
4549         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4550             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4551                 goto done;
4552             }
4553             /* Try again from memory or a vector input register.  */
4554         }
4555         if (!its->mem_coherent) {
4556             /*
4557              * The input register is not synced, and so an extra store
4558              * would be required to use memory.  Attempt an integer-vector
4559              * register move first.  We do not have a TCGRegSet for this.
4560              */
4561             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4562                 break;
4563             }
4564             /* Sync the temp back to its slot and load from there.  */
4565             temp_sync(s, its, s->reserved_regs, 0, 0);
4566         }
4567         /* fall through */
4568 
4569     case TEMP_VAL_MEM:
4570         lowpart_ofs = 0;
4571         if (HOST_BIG_ENDIAN) {
4572             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4573         }
4574         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4575                              its->mem_offset + lowpart_ofs)) {
4576             goto done;
4577         }
4578         /* Load the input into the destination vector register. */
4579         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4580         break;
4581 
4582     default:
4583         g_assert_not_reached();
4584     }
4585 
4586     /* We now have a vector input register, so dup must succeed. */
4587     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4588     tcg_debug_assert(ok);
4589 
4590  done:
4591     ots->mem_coherent = 0;
4592     if (IS_DEAD_ARG(1)) {
4593         temp_dead(s, its);
4594     }
4595     if (NEED_SYNC_ARG(0)) {
4596         temp_sync(s, ots, s->reserved_regs, 0, 0);
4597     }
4598     if (IS_DEAD_ARG(0)) {
4599         temp_dead(s, ots);
4600     }
4601 }
4602 
4603 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4604 {
4605     const TCGLifeData arg_life = op->life;
4606     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4607     TCGRegSet i_allocated_regs;
4608     TCGRegSet o_allocated_regs;
4609     int i, k, nb_iargs, nb_oargs;
4610     TCGReg reg;
4611     TCGArg arg;
4612     const TCGArgConstraint *arg_ct;
4613     TCGTemp *ts;
4614     TCGArg new_args[TCG_MAX_OP_ARGS];
4615     int const_args[TCG_MAX_OP_ARGS];
4616 
4617     nb_oargs = def->nb_oargs;
4618     nb_iargs = def->nb_iargs;
4619 
4620     /* copy constants */
4621     memcpy(new_args + nb_oargs + nb_iargs,
4622            op->args + nb_oargs + nb_iargs,
4623            sizeof(TCGArg) * def->nb_cargs);
4624 
4625     i_allocated_regs = s->reserved_regs;
4626     o_allocated_regs = s->reserved_regs;
4627 
4628     /* satisfy input constraints */
4629     for (k = 0; k < nb_iargs; k++) {
4630         TCGRegSet i_preferred_regs, i_required_regs;
4631         bool allocate_new_reg, copyto_new_reg;
4632         TCGTemp *ts2;
4633         int i1, i2;
4634 
4635         i = def->args_ct[nb_oargs + k].sort_index;
4636         arg = op->args[i];
4637         arg_ct = &def->args_ct[i];
4638         ts = arg_temp(arg);
4639 
4640         if (ts->val_type == TEMP_VAL_CONST
4641             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4642             /* constant is OK for instruction */
4643             const_args[i] = 1;
4644             new_args[i] = ts->val;
4645             continue;
4646         }
4647 
4648         reg = ts->reg;
4649         i_preferred_regs = 0;
4650         i_required_regs = arg_ct->regs;
4651         allocate_new_reg = false;
4652         copyto_new_reg = false;
4653 
4654         switch (arg_ct->pair) {
4655         case 0: /* not paired */
4656             if (arg_ct->ialias) {
4657                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4658 
4659                 /*
4660                  * If the input is readonly, then it cannot also be an
4661                  * output and aliased to itself.  If the input is not
4662                  * dead after the instruction, we must allocate a new
4663                  * register and move it.
4664                  */
4665                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4666                     allocate_new_reg = true;
4667                 } else if (ts->val_type == TEMP_VAL_REG) {
4668                     /*
4669                      * Check if the current register has already been
4670                      * allocated for another input.
4671                      */
4672                     allocate_new_reg =
4673                         tcg_regset_test_reg(i_allocated_regs, reg);
4674                 }
4675             }
4676             if (!allocate_new_reg) {
4677                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4678                           i_preferred_regs);
4679                 reg = ts->reg;
4680                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4681             }
4682             if (allocate_new_reg) {
4683                 /*
4684                  * Allocate a new register matching the constraint
4685                  * and move the temporary register into it.
4686                  */
4687                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4688                           i_allocated_regs, 0);
4689                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4690                                     i_preferred_regs, ts->indirect_base);
4691                 copyto_new_reg = true;
4692             }
4693             break;
4694 
4695         case 1:
4696             /* First of an input pair; if i1 == i2, the second is an output. */
4697             i1 = i;
4698             i2 = arg_ct->pair_index;
4699             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4700 
4701             /*
4702              * It is easier to default to allocating a new pair
4703              * and to identify a few cases where it's not required.
4704              */
4705             if (arg_ct->ialias) {
4706                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4707                 if (IS_DEAD_ARG(i1) &&
4708                     IS_DEAD_ARG(i2) &&
4709                     !temp_readonly(ts) &&
4710                     ts->val_type == TEMP_VAL_REG &&
4711                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4712                     tcg_regset_test_reg(i_required_regs, reg) &&
4713                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4714                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4715                     (ts2
4716                      ? ts2->val_type == TEMP_VAL_REG &&
4717                        ts2->reg == reg + 1 &&
4718                        !temp_readonly(ts2)
4719                      : s->reg_to_temp[reg + 1] == NULL)) {
4720                     break;
4721                 }
4722             } else {
4723                 /* Without aliasing, the pair must also be an input. */
4724                 tcg_debug_assert(ts2);
4725                 if (ts->val_type == TEMP_VAL_REG &&
4726                     ts2->val_type == TEMP_VAL_REG &&
4727                     ts2->reg == reg + 1 &&
4728                     tcg_regset_test_reg(i_required_regs, reg)) {
4729                     break;
4730                 }
4731             }
4732             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4733                                      0, ts->indirect_base);
4734             goto do_pair;
4735 
4736         case 2: /* pair second */
4737             reg = new_args[arg_ct->pair_index] + 1;
4738             goto do_pair;
4739 
4740         case 3: /* ialias with second output, no first input */
4741             tcg_debug_assert(arg_ct->ialias);
4742             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4743 
4744             if (IS_DEAD_ARG(i) &&
4745                 !temp_readonly(ts) &&
4746                 ts->val_type == TEMP_VAL_REG &&
4747                 reg > 0 &&
4748                 s->reg_to_temp[reg - 1] == NULL &&
4749                 tcg_regset_test_reg(i_required_regs, reg) &&
4750                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4751                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4752                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4753                 break;
4754             }
4755             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4756                                      i_allocated_regs, 0,
4757                                      ts->indirect_base);
4758             tcg_regset_set_reg(i_allocated_regs, reg);
4759             reg += 1;
4760             goto do_pair;
4761 
4762         do_pair:
4763             /*
4764              * If an aliased input is not dead after the instruction,
4765              * we must allocate a new register and move it.
4766              */
4767             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4768                 TCGRegSet t_allocated_regs = i_allocated_regs;
4769 
4770                 /*
4771                  * Because of the alias, and the continued life, make sure
4772                  * that the temp is somewhere *other* than the reg pair,
4773                  * and we get a copy in reg.
4774                  */
4775                 tcg_regset_set_reg(t_allocated_regs, reg);
4776                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4777                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4778                     /* If ts was already in reg, copy it somewhere else. */
4779                     TCGReg nr;
4780                     bool ok;
4781 
4782                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4783                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4784                                        t_allocated_regs, 0, ts->indirect_base);
4785                     ok = tcg_out_mov(s, ts->type, nr, reg);
4786                     tcg_debug_assert(ok);
4787 
4788                     set_temp_val_reg(s, ts, nr);
4789                 } else {
4790                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4791                               t_allocated_regs, 0);
4792                     copyto_new_reg = true;
4793                 }
4794             } else {
4795                 /* Preferably allocate to reg, otherwise copy. */
4796                 i_required_regs = (TCGRegSet)1 << reg;
4797                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4798                           i_preferred_regs);
4799                 copyto_new_reg = ts->reg != reg;
4800             }
4801             break;
4802 
4803         default:
4804             g_assert_not_reached();
4805         }
4806 
4807         if (copyto_new_reg) {
4808             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4809                 /*
4810                  * Cross register class move not supported.  Sync the
4811                  * temp back to its slot and load from there.
4812                  */
4813                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4814                 tcg_out_ld(s, ts->type, reg,
4815                            ts->mem_base->reg, ts->mem_offset);
4816             }
4817         }
4818         new_args[i] = reg;
4819         const_args[i] = 0;
4820         tcg_regset_set_reg(i_allocated_regs, reg);
4821     }
4822 
4823     /* mark dead temporaries and free the associated registers */
4824     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4825         if (IS_DEAD_ARG(i)) {
4826             temp_dead(s, arg_temp(op->args[i]));
4827         }
4828     }
4829 
4830     if (def->flags & TCG_OPF_COND_BRANCH) {
4831         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4832     } else if (def->flags & TCG_OPF_BB_END) {
4833         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4834     } else {
4835         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4836             /* XXX: permit generic clobber register list ? */
4837             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4838                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4839                     tcg_reg_free(s, i, i_allocated_regs);
4840                 }
4841             }
4842         }
4843         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4844             /* sync globals if the op has side effects and might trigger
4845                an exception. */
4846             sync_globals(s, i_allocated_regs);
4847         }
4848 
4849         /* satisfy the output constraints */
4850         for(k = 0; k < nb_oargs; k++) {
4851             i = def->args_ct[k].sort_index;
4852             arg = op->args[i];
4853             arg_ct = &def->args_ct[i];
4854             ts = arg_temp(arg);
4855 
4856             /* ENV should not be modified.  */
4857             tcg_debug_assert(!temp_readonly(ts));
4858 
4859             switch (arg_ct->pair) {
4860             case 0: /* not paired */
4861                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4862                     reg = new_args[arg_ct->alias_index];
4863                 } else if (arg_ct->newreg) {
4864                     reg = tcg_reg_alloc(s, arg_ct->regs,
4865                                         i_allocated_regs | o_allocated_regs,
4866                                         output_pref(op, k), ts->indirect_base);
4867                 } else {
4868                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4869                                         output_pref(op, k), ts->indirect_base);
4870                 }
4871                 break;
4872 
4873             case 1: /* first of pair */
4874                 tcg_debug_assert(!arg_ct->newreg);
4875                 if (arg_ct->oalias) {
4876                     reg = new_args[arg_ct->alias_index];
4877                     break;
4878                 }
4879                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4880                                          output_pref(op, k), ts->indirect_base);
4881                 break;
4882 
4883             case 2: /* second of pair */
4884                 tcg_debug_assert(!arg_ct->newreg);
4885                 if (arg_ct->oalias) {
4886                     reg = new_args[arg_ct->alias_index];
4887                 } else {
4888                     reg = new_args[arg_ct->pair_index] + 1;
4889                 }
4890                 break;
4891 
4892             case 3: /* first of pair, aliasing with a second input */
4893                 tcg_debug_assert(!arg_ct->newreg);
4894                 reg = new_args[arg_ct->pair_index] - 1;
4895                 break;
4896 
4897             default:
4898                 g_assert_not_reached();
4899             }
4900             tcg_regset_set_reg(o_allocated_regs, reg);
4901             set_temp_val_reg(s, ts, reg);
4902             ts->mem_coherent = 0;
4903             new_args[i] = reg;
4904         }
4905     }
4906 
4907     /* emit instruction */
4908     switch (op->opc) {
4909     case INDEX_op_ext8s_i32:
4910         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4911         break;
4912     case INDEX_op_ext8s_i64:
4913         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4914         break;
4915     case INDEX_op_ext8u_i32:
4916     case INDEX_op_ext8u_i64:
4917         tcg_out_ext8u(s, new_args[0], new_args[1]);
4918         break;
4919     case INDEX_op_ext16s_i32:
4920         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4921         break;
4922     case INDEX_op_ext16s_i64:
4923         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4924         break;
4925     case INDEX_op_ext16u_i32:
4926     case INDEX_op_ext16u_i64:
4927         tcg_out_ext16u(s, new_args[0], new_args[1]);
4928         break;
4929     case INDEX_op_ext32s_i64:
4930         tcg_out_ext32s(s, new_args[0], new_args[1]);
4931         break;
4932     case INDEX_op_ext32u_i64:
4933         tcg_out_ext32u(s, new_args[0], new_args[1]);
4934         break;
4935     case INDEX_op_ext_i32_i64:
4936         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
4937         break;
4938     case INDEX_op_extu_i32_i64:
4939         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
4940         break;
4941     case INDEX_op_extrl_i64_i32:
4942         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
4943         break;
4944     default:
4945         if (def->flags & TCG_OPF_VECTOR) {
4946             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4947                            new_args, const_args);
4948         } else {
4949             tcg_out_op(s, op->opc, new_args, const_args);
4950         }
4951         break;
4952     }
4953 
4954     /* move the outputs in the correct register if needed */
4955     for(i = 0; i < nb_oargs; i++) {
4956         ts = arg_temp(op->args[i]);
4957 
4958         /* ENV should not be modified.  */
4959         tcg_debug_assert(!temp_readonly(ts));
4960 
4961         if (NEED_SYNC_ARG(i)) {
4962             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4963         } else if (IS_DEAD_ARG(i)) {
4964             temp_dead(s, ts);
4965         }
4966     }
4967 }
4968 
4969 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4970 {
4971     const TCGLifeData arg_life = op->life;
4972     TCGTemp *ots, *itsl, *itsh;
4973     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4974 
4975     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4976     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4977     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4978 
4979     ots = arg_temp(op->args[0]);
4980     itsl = arg_temp(op->args[1]);
4981     itsh = arg_temp(op->args[2]);
4982 
4983     /* ENV should not be modified.  */
4984     tcg_debug_assert(!temp_readonly(ots));
4985 
4986     /* Allocate the output register now.  */
4987     if (ots->val_type != TEMP_VAL_REG) {
4988         TCGRegSet allocated_regs = s->reserved_regs;
4989         TCGRegSet dup_out_regs =
4990             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4991         TCGReg oreg;
4992 
4993         /* Make sure to not spill the input registers. */
4994         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4995             tcg_regset_set_reg(allocated_regs, itsl->reg);
4996         }
4997         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4998             tcg_regset_set_reg(allocated_regs, itsh->reg);
4999         }
5000 
5001         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5002                              output_pref(op, 0), ots->indirect_base);
5003         set_temp_val_reg(s, ots, oreg);
5004     }
5005 
5006     /* Promote dup2 of immediates to dupi_vec. */
5007     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5008         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5009         MemOp vece = MO_64;
5010 
5011         if (val == dup_const(MO_8, val)) {
5012             vece = MO_8;
5013         } else if (val == dup_const(MO_16, val)) {
5014             vece = MO_16;
5015         } else if (val == dup_const(MO_32, val)) {
5016             vece = MO_32;
5017         }
5018 
5019         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5020         goto done;
5021     }
5022 
5023     /* If the two inputs form one 64-bit value, try dupm_vec. */
5024     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5025         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5026         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5027         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5028 
5029         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5030         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5031 
5032         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5033                              its->mem_base->reg, its->mem_offset)) {
5034             goto done;
5035         }
5036     }
5037 
5038     /* Fall back to generic expansion. */
5039     return false;
5040 
5041  done:
5042     ots->mem_coherent = 0;
5043     if (IS_DEAD_ARG(1)) {
5044         temp_dead(s, itsl);
5045     }
5046     if (IS_DEAD_ARG(2)) {
5047         temp_dead(s, itsh);
5048     }
5049     if (NEED_SYNC_ARG(0)) {
5050         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5051     } else if (IS_DEAD_ARG(0)) {
5052         temp_dead(s, ots);
5053     }
5054     return true;
5055 }
5056 
5057 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5058                          TCGRegSet allocated_regs)
5059 {
5060     if (ts->val_type == TEMP_VAL_REG) {
5061         if (ts->reg != reg) {
5062             tcg_reg_free(s, reg, allocated_regs);
5063             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5064                 /*
5065                  * Cross register class move not supported.  Sync the
5066                  * temp back to its slot and load from there.
5067                  */
5068                 temp_sync(s, ts, allocated_regs, 0, 0);
5069                 tcg_out_ld(s, ts->type, reg,
5070                            ts->mem_base->reg, ts->mem_offset);
5071             }
5072         }
5073     } else {
5074         TCGRegSet arg_set = 0;
5075 
5076         tcg_reg_free(s, reg, allocated_regs);
5077         tcg_regset_set_reg(arg_set, reg);
5078         temp_load(s, ts, arg_set, allocated_regs, 0);
5079     }
5080 }
5081 
5082 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5083                          TCGRegSet allocated_regs)
5084 {
5085     /*
5086      * When the destination is on the stack, load up the temp and store.
5087      * If there are many call-saved registers, the temp might live to
5088      * see another use; otherwise it'll be discarded.
5089      */
5090     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5091     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5092                arg_slot_stk_ofs(arg_slot));
5093 }
5094 
5095 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5096                             TCGTemp *ts, TCGRegSet *allocated_regs)
5097 {
5098     if (arg_slot_reg_p(l->arg_slot)) {
5099         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5100         load_arg_reg(s, reg, ts, *allocated_regs);
5101         tcg_regset_set_reg(*allocated_regs, reg);
5102     } else {
5103         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5104     }
5105 }
5106 
5107 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5108                          intptr_t ref_off, TCGRegSet *allocated_regs)
5109 {
5110     TCGReg reg;
5111 
5112     if (arg_slot_reg_p(arg_slot)) {
5113         reg = tcg_target_call_iarg_regs[arg_slot];
5114         tcg_reg_free(s, reg, *allocated_regs);
5115         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5116         tcg_regset_set_reg(*allocated_regs, reg);
5117     } else {
5118         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5119                             *allocated_regs, 0, false);
5120         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5121         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5122                    arg_slot_stk_ofs(arg_slot));
5123     }
5124 }
5125 
5126 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5127 {
5128     const int nb_oargs = TCGOP_CALLO(op);
5129     const int nb_iargs = TCGOP_CALLI(op);
5130     const TCGLifeData arg_life = op->life;
5131     const TCGHelperInfo *info = tcg_call_info(op);
5132     TCGRegSet allocated_regs = s->reserved_regs;
5133     int i;
5134 
5135     /*
5136      * Move inputs into place in reverse order,
5137      * so that we place stacked arguments first.
5138      */
5139     for (i = nb_iargs - 1; i >= 0; --i) {
5140         const TCGCallArgumentLoc *loc = &info->in[i];
5141         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5142 
5143         switch (loc->kind) {
5144         case TCG_CALL_ARG_NORMAL:
5145         case TCG_CALL_ARG_EXTEND_U:
5146         case TCG_CALL_ARG_EXTEND_S:
5147             load_arg_normal(s, loc, ts, &allocated_regs);
5148             break;
5149         case TCG_CALL_ARG_BY_REF:
5150             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5151             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5152                          arg_slot_stk_ofs(loc->ref_slot),
5153                          &allocated_regs);
5154             break;
5155         case TCG_CALL_ARG_BY_REF_N:
5156             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5157             break;
5158         default:
5159             g_assert_not_reached();
5160         }
5161     }
5162 
5163     /* Mark dead temporaries and free the associated registers.  */
5164     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5165         if (IS_DEAD_ARG(i)) {
5166             temp_dead(s, arg_temp(op->args[i]));
5167         }
5168     }
5169 
5170     /* Clobber call registers.  */
5171     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5172         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5173             tcg_reg_free(s, i, allocated_regs);
5174         }
5175     }
5176 
5177     /*
5178      * Save globals if they might be written by the helper,
5179      * sync them if they might be read.
5180      */
5181     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5182         /* Nothing to do */
5183     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5184         sync_globals(s, allocated_regs);
5185     } else {
5186         save_globals(s, allocated_regs);
5187     }
5188 
5189     /*
5190      * If the ABI passes a pointer to the returned struct as the first
5191      * argument, load that now.  Pass a pointer to the output home slot.
5192      */
5193     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5194         TCGTemp *ts = arg_temp(op->args[0]);
5195 
5196         if (!ts->mem_allocated) {
5197             temp_allocate_frame(s, ts);
5198         }
5199         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5200     }
5201 
5202     tcg_out_call(s, tcg_call_func(op), info);
5203 
5204     /* Assign output registers and emit moves if needed.  */
5205     switch (info->out_kind) {
5206     case TCG_CALL_RET_NORMAL:
5207         for (i = 0; i < nb_oargs; i++) {
5208             TCGTemp *ts = arg_temp(op->args[i]);
5209             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5210 
5211             /* ENV should not be modified.  */
5212             tcg_debug_assert(!temp_readonly(ts));
5213 
5214             set_temp_val_reg(s, ts, reg);
5215             ts->mem_coherent = 0;
5216         }
5217         break;
5218 
5219     case TCG_CALL_RET_BY_VEC:
5220         {
5221             TCGTemp *ts = arg_temp(op->args[0]);
5222 
5223             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5224             tcg_debug_assert(ts->temp_subindex == 0);
5225             if (!ts->mem_allocated) {
5226                 temp_allocate_frame(s, ts);
5227             }
5228             tcg_out_st(s, TCG_TYPE_V128,
5229                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5230                        ts->mem_base->reg, ts->mem_offset);
5231         }
5232         /* fall through to mark all parts in memory */
5233 
5234     case TCG_CALL_RET_BY_REF:
5235         /* The callee has performed a write through the reference. */
5236         for (i = 0; i < nb_oargs; i++) {
5237             TCGTemp *ts = arg_temp(op->args[i]);
5238             ts->val_type = TEMP_VAL_MEM;
5239         }
5240         break;
5241 
5242     default:
5243         g_assert_not_reached();
5244     }
5245 
5246     /* Flush or discard output registers as needed. */
5247     for (i = 0; i < nb_oargs; i++) {
5248         TCGTemp *ts = arg_temp(op->args[i]);
5249         if (NEED_SYNC_ARG(i)) {
5250             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5251         } else if (IS_DEAD_ARG(i)) {
5252             temp_dead(s, ts);
5253         }
5254     }
5255 }
5256 
5257 /**
5258  * atom_and_align_for_opc:
5259  * @s: tcg context
5260  * @opc: memory operation code
5261  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5262  * @allow_two_ops: true if we are prepared to issue two operations
5263  *
5264  * Return the alignment and atomicity to use for the inline fast path
5265  * for the given memory operation.  The alignment may be larger than
5266  * that specified in @opc, and the correct alignment will be diagnosed
5267  * by the slow path helper.
5268  *
5269  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5270  * and issue two loads or stores for subalignment.
5271  */
5272 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5273                                            MemOp host_atom, bool allow_two_ops)
5274 {
5275     MemOp align = get_alignment_bits(opc);
5276     MemOp size = opc & MO_SIZE;
5277     MemOp half = size ? size - 1 : 0;
5278     MemOp atmax;
5279     MemOp atom;
5280 
5281     /* When serialized, no further atomicity required.  */
5282     if (s->gen_tb->cflags & CF_PARALLEL) {
5283         atom = opc & MO_ATOM_MASK;
5284     } else {
5285         atom = MO_ATOM_NONE;
5286     }
5287 
5288     switch (atom) {
5289     case MO_ATOM_NONE:
5290         /* The operation requires no specific atomicity. */
5291         atmax = MO_8;
5292         break;
5293 
5294     case MO_ATOM_IFALIGN:
5295         atmax = size;
5296         break;
5297 
5298     case MO_ATOM_IFALIGN_PAIR:
5299         atmax = half;
5300         break;
5301 
5302     case MO_ATOM_WITHIN16:
5303         atmax = size;
5304         if (size == MO_128) {
5305             /* Misalignment implies !within16, and therefore no atomicity. */
5306         } else if (host_atom != MO_ATOM_WITHIN16) {
5307             /* The host does not implement within16, so require alignment. */
5308             align = MAX(align, size);
5309         }
5310         break;
5311 
5312     case MO_ATOM_WITHIN16_PAIR:
5313         atmax = size;
5314         /*
5315          * Misalignment implies !within16, and therefore half atomicity.
5316          * Any host prepared for two operations can implement this with
5317          * half alignment.
5318          */
5319         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5320             align = MAX(align, half);
5321         }
5322         break;
5323 
5324     case MO_ATOM_SUBALIGN:
5325         atmax = size;
5326         if (host_atom != MO_ATOM_SUBALIGN) {
5327             /* If unaligned but not odd, there are subobjects up to half. */
5328             if (allow_two_ops) {
5329                 align = MAX(align, half);
5330             } else {
5331                 align = MAX(align, size);
5332             }
5333         }
5334         break;
5335 
5336     default:
5337         g_assert_not_reached();
5338     }
5339 
5340     return (TCGAtomAlign){ .atom = atmax, .align = align };
5341 }
5342 
5343 /*
5344  * Similarly for qemu_ld/st slow path helpers.
5345  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5346  * using only the provided backend tcg_out_* functions.
5347  */
5348 
5349 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5350 {
5351     int ofs = arg_slot_stk_ofs(slot);
5352 
5353     /*
5354      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5355      * require extension to uint64_t, adjust the address for uint32_t.
5356      */
5357     if (HOST_BIG_ENDIAN &&
5358         TCG_TARGET_REG_BITS == 64 &&
5359         type == TCG_TYPE_I32) {
5360         ofs += 4;
5361     }
5362     return ofs;
5363 }
5364 
5365 static void tcg_out_helper_load_slots(TCGContext *s,
5366                                       unsigned nmov, TCGMovExtend *mov,
5367                                       const TCGLdstHelperParam *parm)
5368 {
5369     unsigned i;
5370     TCGReg dst3;
5371 
5372     /*
5373      * Start from the end, storing to the stack first.
5374      * This frees those registers, so we need not consider overlap.
5375      */
5376     for (i = nmov; i-- > 0; ) {
5377         unsigned slot = mov[i].dst;
5378 
5379         if (arg_slot_reg_p(slot)) {
5380             goto found_reg;
5381         }
5382 
5383         TCGReg src = mov[i].src;
5384         TCGType dst_type = mov[i].dst_type;
5385         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5386 
5387         /* The argument is going onto the stack; extend into scratch. */
5388         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5389             tcg_debug_assert(parm->ntmp != 0);
5390             mov[i].dst = src = parm->tmp[0];
5391             tcg_out_movext1(s, &mov[i]);
5392         }
5393 
5394         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5395                    tcg_out_helper_stk_ofs(dst_type, slot));
5396     }
5397     return;
5398 
5399  found_reg:
5400     /*
5401      * The remaining arguments are in registers.
5402      * Convert slot numbers to argument registers.
5403      */
5404     nmov = i + 1;
5405     for (i = 0; i < nmov; ++i) {
5406         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5407     }
5408 
5409     switch (nmov) {
5410     case 4:
5411         /* The backend must have provided enough temps for the worst case. */
5412         tcg_debug_assert(parm->ntmp >= 2);
5413 
5414         dst3 = mov[3].dst;
5415         for (unsigned j = 0; j < 3; ++j) {
5416             if (dst3 == mov[j].src) {
5417                 /*
5418                  * Conflict. Copy the source to a temporary, perform the
5419                  * remaining moves, then the extension from our scratch
5420                  * on the way out.
5421                  */
5422                 TCGReg scratch = parm->tmp[1];
5423 
5424                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5425                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5426                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5427                 break;
5428             }
5429         }
5430 
5431         /* No conflicts: perform this move and continue. */
5432         tcg_out_movext1(s, &mov[3]);
5433         /* fall through */
5434 
5435     case 3:
5436         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5437                         parm->ntmp ? parm->tmp[0] : -1);
5438         break;
5439     case 2:
5440         tcg_out_movext2(s, mov, mov + 1,
5441                         parm->ntmp ? parm->tmp[0] : -1);
5442         break;
5443     case 1:
5444         tcg_out_movext1(s, mov);
5445         break;
5446     default:
5447         g_assert_not_reached();
5448     }
5449 }
5450 
5451 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5452                                     TCGType type, tcg_target_long imm,
5453                                     const TCGLdstHelperParam *parm)
5454 {
5455     if (arg_slot_reg_p(slot)) {
5456         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5457     } else {
5458         int ofs = tcg_out_helper_stk_ofs(type, slot);
5459         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5460             tcg_debug_assert(parm->ntmp != 0);
5461             tcg_out_movi(s, type, parm->tmp[0], imm);
5462             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5463         }
5464     }
5465 }
5466 
5467 static void tcg_out_helper_load_common_args(TCGContext *s,
5468                                             const TCGLabelQemuLdst *ldst,
5469                                             const TCGLdstHelperParam *parm,
5470                                             const TCGHelperInfo *info,
5471                                             unsigned next_arg)
5472 {
5473     TCGMovExtend ptr_mov = {
5474         .dst_type = TCG_TYPE_PTR,
5475         .src_type = TCG_TYPE_PTR,
5476         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5477     };
5478     const TCGCallArgumentLoc *loc = &info->in[0];
5479     TCGType type;
5480     unsigned slot;
5481     tcg_target_ulong imm;
5482 
5483     /*
5484      * Handle env, which is always first.
5485      */
5486     ptr_mov.dst = loc->arg_slot;
5487     ptr_mov.src = TCG_AREG0;
5488     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5489 
5490     /*
5491      * Handle oi.
5492      */
5493     imm = ldst->oi;
5494     loc = &info->in[next_arg];
5495     type = TCG_TYPE_I32;
5496     switch (loc->kind) {
5497     case TCG_CALL_ARG_NORMAL:
5498         break;
5499     case TCG_CALL_ARG_EXTEND_U:
5500     case TCG_CALL_ARG_EXTEND_S:
5501         /* No extension required for MemOpIdx. */
5502         tcg_debug_assert(imm <= INT32_MAX);
5503         type = TCG_TYPE_REG;
5504         break;
5505     default:
5506         g_assert_not_reached();
5507     }
5508     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5509     next_arg++;
5510 
5511     /*
5512      * Handle ra.
5513      */
5514     loc = &info->in[next_arg];
5515     slot = loc->arg_slot;
5516     if (parm->ra_gen) {
5517         int arg_reg = -1;
5518         TCGReg ra_reg;
5519 
5520         if (arg_slot_reg_p(slot)) {
5521             arg_reg = tcg_target_call_iarg_regs[slot];
5522         }
5523         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5524 
5525         ptr_mov.dst = slot;
5526         ptr_mov.src = ra_reg;
5527         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5528     } else {
5529         imm = (uintptr_t)ldst->raddr;
5530         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5531     }
5532 }
5533 
5534 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5535                                        const TCGCallArgumentLoc *loc,
5536                                        TCGType dst_type, TCGType src_type,
5537                                        TCGReg lo, TCGReg hi)
5538 {
5539     MemOp reg_mo;
5540 
5541     if (dst_type <= TCG_TYPE_REG) {
5542         MemOp src_ext;
5543 
5544         switch (loc->kind) {
5545         case TCG_CALL_ARG_NORMAL:
5546             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5547             break;
5548         case TCG_CALL_ARG_EXTEND_U:
5549             dst_type = TCG_TYPE_REG;
5550             src_ext = MO_UL;
5551             break;
5552         case TCG_CALL_ARG_EXTEND_S:
5553             dst_type = TCG_TYPE_REG;
5554             src_ext = MO_SL;
5555             break;
5556         default:
5557             g_assert_not_reached();
5558         }
5559 
5560         mov[0].dst = loc->arg_slot;
5561         mov[0].dst_type = dst_type;
5562         mov[0].src = lo;
5563         mov[0].src_type = src_type;
5564         mov[0].src_ext = src_ext;
5565         return 1;
5566     }
5567 
5568     if (TCG_TARGET_REG_BITS == 32) {
5569         assert(dst_type == TCG_TYPE_I64);
5570         reg_mo = MO_32;
5571     } else {
5572         assert(dst_type == TCG_TYPE_I128);
5573         reg_mo = MO_64;
5574     }
5575 
5576     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5577     mov[0].src = lo;
5578     mov[0].dst_type = TCG_TYPE_REG;
5579     mov[0].src_type = TCG_TYPE_REG;
5580     mov[0].src_ext = reg_mo;
5581 
5582     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5583     mov[1].src = hi;
5584     mov[1].dst_type = TCG_TYPE_REG;
5585     mov[1].src_type = TCG_TYPE_REG;
5586     mov[1].src_ext = reg_mo;
5587 
5588     return 2;
5589 }
5590 
5591 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5592                                    const TCGLdstHelperParam *parm)
5593 {
5594     const TCGHelperInfo *info;
5595     const TCGCallArgumentLoc *loc;
5596     TCGMovExtend mov[2];
5597     unsigned next_arg, nmov;
5598     MemOp mop = get_memop(ldst->oi);
5599 
5600     switch (mop & MO_SIZE) {
5601     case MO_8:
5602     case MO_16:
5603     case MO_32:
5604         info = &info_helper_ld32_mmu;
5605         break;
5606     case MO_64:
5607         info = &info_helper_ld64_mmu;
5608         break;
5609     case MO_128:
5610         info = &info_helper_ld128_mmu;
5611         break;
5612     default:
5613         g_assert_not_reached();
5614     }
5615 
5616     /* Defer env argument. */
5617     next_arg = 1;
5618 
5619     loc = &info->in[next_arg];
5620     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5621         /*
5622          * 32-bit host with 32-bit guest: zero-extend the guest address
5623          * to 64-bits for the helper by storing the low part, then
5624          * load a zero for the high part.
5625          */
5626         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5627                                TCG_TYPE_I32, TCG_TYPE_I32,
5628                                ldst->addrlo_reg, -1);
5629         tcg_out_helper_load_slots(s, 1, mov, parm);
5630 
5631         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5632                                 TCG_TYPE_I32, 0, parm);
5633         next_arg += 2;
5634     } else {
5635         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5636                                       ldst->addrlo_reg, ldst->addrhi_reg);
5637         tcg_out_helper_load_slots(s, nmov, mov, parm);
5638         next_arg += nmov;
5639     }
5640 
5641     switch (info->out_kind) {
5642     case TCG_CALL_RET_NORMAL:
5643     case TCG_CALL_RET_BY_VEC:
5644         break;
5645     case TCG_CALL_RET_BY_REF:
5646         /*
5647          * The return reference is in the first argument slot.
5648          * We need memory in which to return: re-use the top of stack.
5649          */
5650         {
5651             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5652 
5653             if (arg_slot_reg_p(0)) {
5654                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5655                                  TCG_REG_CALL_STACK, ofs_slot0);
5656             } else {
5657                 tcg_debug_assert(parm->ntmp != 0);
5658                 tcg_out_addi_ptr(s, parm->tmp[0],
5659                                  TCG_REG_CALL_STACK, ofs_slot0);
5660                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5661                            TCG_REG_CALL_STACK, ofs_slot0);
5662             }
5663         }
5664         break;
5665     default:
5666         g_assert_not_reached();
5667     }
5668 
5669     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5670 }
5671 
5672 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5673                                   bool load_sign,
5674                                   const TCGLdstHelperParam *parm)
5675 {
5676     MemOp mop = get_memop(ldst->oi);
5677     TCGMovExtend mov[2];
5678     int ofs_slot0;
5679 
5680     switch (ldst->type) {
5681     case TCG_TYPE_I64:
5682         if (TCG_TARGET_REG_BITS == 32) {
5683             break;
5684         }
5685         /* fall through */
5686 
5687     case TCG_TYPE_I32:
5688         mov[0].dst = ldst->datalo_reg;
5689         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5690         mov[0].dst_type = ldst->type;
5691         mov[0].src_type = TCG_TYPE_REG;
5692 
5693         /*
5694          * If load_sign, then we allowed the helper to perform the
5695          * appropriate sign extension to tcg_target_ulong, and all
5696          * we need now is a plain move.
5697          *
5698          * If they do not, then we expect the relevant extension
5699          * instruction to be no more expensive than a move, and
5700          * we thus save the icache etc by only using one of two
5701          * helper functions.
5702          */
5703         if (load_sign || !(mop & MO_SIGN)) {
5704             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5705                 mov[0].src_ext = MO_32;
5706             } else {
5707                 mov[0].src_ext = MO_64;
5708             }
5709         } else {
5710             mov[0].src_ext = mop & MO_SSIZE;
5711         }
5712         tcg_out_movext1(s, mov);
5713         return;
5714 
5715     case TCG_TYPE_I128:
5716         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5717         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5718         switch (TCG_TARGET_CALL_RET_I128) {
5719         case TCG_CALL_RET_NORMAL:
5720             break;
5721         case TCG_CALL_RET_BY_VEC:
5722             tcg_out_st(s, TCG_TYPE_V128,
5723                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5724                        TCG_REG_CALL_STACK, ofs_slot0);
5725             /* fall through */
5726         case TCG_CALL_RET_BY_REF:
5727             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5728                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5729             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5730                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5731             return;
5732         default:
5733             g_assert_not_reached();
5734         }
5735         break;
5736 
5737     default:
5738         g_assert_not_reached();
5739     }
5740 
5741     mov[0].dst = ldst->datalo_reg;
5742     mov[0].src =
5743         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5744     mov[0].dst_type = TCG_TYPE_I32;
5745     mov[0].src_type = TCG_TYPE_I32;
5746     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5747 
5748     mov[1].dst = ldst->datahi_reg;
5749     mov[1].src =
5750         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5751     mov[1].dst_type = TCG_TYPE_REG;
5752     mov[1].src_type = TCG_TYPE_REG;
5753     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5754 
5755     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5756 }
5757 
5758 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5759                                    const TCGLdstHelperParam *parm)
5760 {
5761     const TCGHelperInfo *info;
5762     const TCGCallArgumentLoc *loc;
5763     TCGMovExtend mov[4];
5764     TCGType data_type;
5765     unsigned next_arg, nmov, n;
5766     MemOp mop = get_memop(ldst->oi);
5767 
5768     switch (mop & MO_SIZE) {
5769     case MO_8:
5770     case MO_16:
5771     case MO_32:
5772         info = &info_helper_st32_mmu;
5773         data_type = TCG_TYPE_I32;
5774         break;
5775     case MO_64:
5776         info = &info_helper_st64_mmu;
5777         data_type = TCG_TYPE_I64;
5778         break;
5779     case MO_128:
5780         info = &info_helper_st128_mmu;
5781         data_type = TCG_TYPE_I128;
5782         break;
5783     default:
5784         g_assert_not_reached();
5785     }
5786 
5787     /* Defer env argument. */
5788     next_arg = 1;
5789     nmov = 0;
5790 
5791     /* Handle addr argument. */
5792     loc = &info->in[next_arg];
5793     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5794         /*
5795          * 32-bit host with 32-bit guest: zero-extend the guest address
5796          * to 64-bits for the helper by storing the low part.  Later,
5797          * after we have processed the register inputs, we will load a
5798          * zero for the high part.
5799          */
5800         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5801                                TCG_TYPE_I32, TCG_TYPE_I32,
5802                                ldst->addrlo_reg, -1);
5803         next_arg += 2;
5804         nmov += 1;
5805     } else {
5806         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5807                                    ldst->addrlo_reg, ldst->addrhi_reg);
5808         next_arg += n;
5809         nmov += n;
5810     }
5811 
5812     /* Handle data argument. */
5813     loc = &info->in[next_arg];
5814     switch (loc->kind) {
5815     case TCG_CALL_ARG_NORMAL:
5816     case TCG_CALL_ARG_EXTEND_U:
5817     case TCG_CALL_ARG_EXTEND_S:
5818         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
5819                                    ldst->datalo_reg, ldst->datahi_reg);
5820         next_arg += n;
5821         nmov += n;
5822         tcg_out_helper_load_slots(s, nmov, mov, parm);
5823         break;
5824 
5825     case TCG_CALL_ARG_BY_REF:
5826         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5827         tcg_debug_assert(data_type == TCG_TYPE_I128);
5828         tcg_out_st(s, TCG_TYPE_I64,
5829                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
5830                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
5831         tcg_out_st(s, TCG_TYPE_I64,
5832                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
5833                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
5834 
5835         tcg_out_helper_load_slots(s, nmov, mov, parm);
5836 
5837         if (arg_slot_reg_p(loc->arg_slot)) {
5838             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
5839                              TCG_REG_CALL_STACK,
5840                              arg_slot_stk_ofs(loc->ref_slot));
5841         } else {
5842             tcg_debug_assert(parm->ntmp != 0);
5843             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
5844                              arg_slot_stk_ofs(loc->ref_slot));
5845             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5846                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
5847         }
5848         next_arg += 2;
5849         break;
5850 
5851     default:
5852         g_assert_not_reached();
5853     }
5854 
5855     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5856         /* Zero extend the address by loading a zero for the high part. */
5857         loc = &info->in[1 + !HOST_BIG_ENDIAN];
5858         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
5859     }
5860 
5861     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5862 }
5863 
5864 #ifdef CONFIG_PROFILER
5865 
5866 /* avoid copy/paste errors */
5867 #define PROF_ADD(to, from, field)                       \
5868     do {                                                \
5869         (to)->field += qatomic_read(&((from)->field));  \
5870     } while (0)
5871 
5872 #define PROF_MAX(to, from, field)                                       \
5873     do {                                                                \
5874         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
5875         if (val__ > (to)->field) {                                      \
5876             (to)->field = val__;                                        \
5877         }                                                               \
5878     } while (0)
5879 
5880 /* Pass in a zero'ed @prof */
5881 static inline
5882 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
5883 {
5884     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
5885     unsigned int i;
5886 
5887     for (i = 0; i < n_ctxs; i++) {
5888         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
5889         const TCGProfile *orig = &s->prof;
5890 
5891         if (counters) {
5892             PROF_ADD(prof, orig, cpu_exec_time);
5893             PROF_ADD(prof, orig, tb_count1);
5894             PROF_ADD(prof, orig, tb_count);
5895             PROF_ADD(prof, orig, op_count);
5896             PROF_MAX(prof, orig, op_count_max);
5897             PROF_ADD(prof, orig, temp_count);
5898             PROF_MAX(prof, orig, temp_count_max);
5899             PROF_ADD(prof, orig, del_op_count);
5900             PROF_ADD(prof, orig, code_in_len);
5901             PROF_ADD(prof, orig, code_out_len);
5902             PROF_ADD(prof, orig, search_out_len);
5903             PROF_ADD(prof, orig, interm_time);
5904             PROF_ADD(prof, orig, code_time);
5905             PROF_ADD(prof, orig, la_time);
5906             PROF_ADD(prof, orig, opt_time);
5907             PROF_ADD(prof, orig, restore_count);
5908             PROF_ADD(prof, orig, restore_time);
5909         }
5910         if (table) {
5911             int i;
5912 
5913             for (i = 0; i < NB_OPS; i++) {
5914                 PROF_ADD(prof, orig, table_op_count[i]);
5915             }
5916         }
5917     }
5918 }
5919 
5920 #undef PROF_ADD
5921 #undef PROF_MAX
5922 
5923 static void tcg_profile_snapshot_counters(TCGProfile *prof)
5924 {
5925     tcg_profile_snapshot(prof, true, false);
5926 }
5927 
5928 static void tcg_profile_snapshot_table(TCGProfile *prof)
5929 {
5930     tcg_profile_snapshot(prof, false, true);
5931 }
5932 
5933 void tcg_dump_op_count(GString *buf)
5934 {
5935     TCGProfile prof = {};
5936     int i;
5937 
5938     tcg_profile_snapshot_table(&prof);
5939     for (i = 0; i < NB_OPS; i++) {
5940         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
5941                                prof.table_op_count[i]);
5942     }
5943 }
5944 
5945 int64_t tcg_cpu_exec_time(void)
5946 {
5947     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
5948     unsigned int i;
5949     int64_t ret = 0;
5950 
5951     for (i = 0; i < n_ctxs; i++) {
5952         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
5953         const TCGProfile *prof = &s->prof;
5954 
5955         ret += qatomic_read(&prof->cpu_exec_time);
5956     }
5957     return ret;
5958 }
5959 #else
5960 void tcg_dump_op_count(GString *buf)
5961 {
5962     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5963 }
5964 
5965 int64_t tcg_cpu_exec_time(void)
5966 {
5967     error_report("%s: TCG profiler not compiled", __func__);
5968     exit(EXIT_FAILURE);
5969 }
5970 #endif
5971 
5972 
5973 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
5974 {
5975 #ifdef CONFIG_PROFILER
5976     TCGProfile *prof = &s->prof;
5977 #endif
5978     int i, num_insns;
5979     TCGOp *op;
5980 
5981 #ifdef CONFIG_PROFILER
5982     {
5983         int n = 0;
5984 
5985         QTAILQ_FOREACH(op, &s->ops, link) {
5986             n++;
5987         }
5988         qatomic_set(&prof->op_count, prof->op_count + n);
5989         if (n > prof->op_count_max) {
5990             qatomic_set(&prof->op_count_max, n);
5991         }
5992 
5993         n = s->nb_temps;
5994         qatomic_set(&prof->temp_count, prof->temp_count + n);
5995         if (n > prof->temp_count_max) {
5996             qatomic_set(&prof->temp_count_max, n);
5997         }
5998     }
5999 #endif
6000 
6001 #ifdef DEBUG_DISAS
6002     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6003                  && qemu_log_in_addr_range(pc_start))) {
6004         FILE *logfile = qemu_log_trylock();
6005         if (logfile) {
6006             fprintf(logfile, "OP:\n");
6007             tcg_dump_ops(s, logfile, false);
6008             fprintf(logfile, "\n");
6009             qemu_log_unlock(logfile);
6010         }
6011     }
6012 #endif
6013 
6014 #ifdef CONFIG_DEBUG_TCG
6015     /* Ensure all labels referenced have been emitted.  */
6016     {
6017         TCGLabel *l;
6018         bool error = false;
6019 
6020         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6021             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6022                 qemu_log_mask(CPU_LOG_TB_OP,
6023                               "$L%d referenced but not present.\n", l->id);
6024                 error = true;
6025             }
6026         }
6027         assert(!error);
6028     }
6029 #endif
6030 
6031 #ifdef CONFIG_PROFILER
6032     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
6033 #endif
6034 
6035 #ifdef USE_TCG_OPTIMIZATIONS
6036     tcg_optimize(s);
6037 #endif
6038 
6039 #ifdef CONFIG_PROFILER
6040     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
6041     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
6042 #endif
6043 
6044     reachable_code_pass(s);
6045     liveness_pass_0(s);
6046     liveness_pass_1(s);
6047 
6048     if (s->nb_indirects > 0) {
6049 #ifdef DEBUG_DISAS
6050         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6051                      && qemu_log_in_addr_range(pc_start))) {
6052             FILE *logfile = qemu_log_trylock();
6053             if (logfile) {
6054                 fprintf(logfile, "OP before indirect lowering:\n");
6055                 tcg_dump_ops(s, logfile, false);
6056                 fprintf(logfile, "\n");
6057                 qemu_log_unlock(logfile);
6058             }
6059         }
6060 #endif
6061         /* Replace indirect temps with direct temps.  */
6062         if (liveness_pass_2(s)) {
6063             /* If changes were made, re-run liveness.  */
6064             liveness_pass_1(s);
6065         }
6066     }
6067 
6068 #ifdef CONFIG_PROFILER
6069     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
6070 #endif
6071 
6072 #ifdef DEBUG_DISAS
6073     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6074                  && qemu_log_in_addr_range(pc_start))) {
6075         FILE *logfile = qemu_log_trylock();
6076         if (logfile) {
6077             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6078             tcg_dump_ops(s, logfile, true);
6079             fprintf(logfile, "\n");
6080             qemu_log_unlock(logfile);
6081         }
6082     }
6083 #endif
6084 
6085     /* Initialize goto_tb jump offsets. */
6086     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6087     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6088     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6089     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6090 
6091     tcg_reg_alloc_start(s);
6092 
6093     /*
6094      * Reset the buffer pointers when restarting after overflow.
6095      * TODO: Move this into translate-all.c with the rest of the
6096      * buffer management.  Having only this done here is confusing.
6097      */
6098     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6099     s->code_ptr = s->code_buf;
6100 
6101 #ifdef TCG_TARGET_NEED_LDST_LABELS
6102     QSIMPLEQ_INIT(&s->ldst_labels);
6103 #endif
6104 #ifdef TCG_TARGET_NEED_POOL_LABELS
6105     s->pool_labels = NULL;
6106 #endif
6107 
6108     num_insns = -1;
6109     QTAILQ_FOREACH(op, &s->ops, link) {
6110         TCGOpcode opc = op->opc;
6111 
6112 #ifdef CONFIG_PROFILER
6113         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
6114 #endif
6115 
6116         switch (opc) {
6117         case INDEX_op_mov_i32:
6118         case INDEX_op_mov_i64:
6119         case INDEX_op_mov_vec:
6120             tcg_reg_alloc_mov(s, op);
6121             break;
6122         case INDEX_op_dup_vec:
6123             tcg_reg_alloc_dup(s, op);
6124             break;
6125         case INDEX_op_insn_start:
6126             if (num_insns >= 0) {
6127                 size_t off = tcg_current_code_size(s);
6128                 s->gen_insn_end_off[num_insns] = off;
6129                 /* Assert that we do not overflow our stored offset.  */
6130                 assert(s->gen_insn_end_off[num_insns] == off);
6131             }
6132             num_insns++;
6133             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
6134                 s->gen_insn_data[num_insns][i] =
6135                     tcg_get_insn_start_param(op, i);
6136             }
6137             break;
6138         case INDEX_op_discard:
6139             temp_dead(s, arg_temp(op->args[0]));
6140             break;
6141         case INDEX_op_set_label:
6142             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6143             tcg_out_label(s, arg_label(op->args[0]));
6144             break;
6145         case INDEX_op_call:
6146             tcg_reg_alloc_call(s, op);
6147             break;
6148         case INDEX_op_exit_tb:
6149             tcg_out_exit_tb(s, op->args[0]);
6150             break;
6151         case INDEX_op_goto_tb:
6152             tcg_out_goto_tb(s, op->args[0]);
6153             break;
6154         case INDEX_op_dup2_vec:
6155             if (tcg_reg_alloc_dup2(s, op)) {
6156                 break;
6157             }
6158             /* fall through */
6159         default:
6160             /* Sanity check that we've not introduced any unhandled opcodes. */
6161             tcg_debug_assert(tcg_op_supported(opc));
6162             /* Note: in order to speed up the code, it would be much
6163                faster to have specialized register allocator functions for
6164                some common argument patterns */
6165             tcg_reg_alloc_op(s, op);
6166             break;
6167         }
6168         /* Test for (pending) buffer overflow.  The assumption is that any
6169            one operation beginning below the high water mark cannot overrun
6170            the buffer completely.  Thus we can test for overflow after
6171            generating code without having to check during generation.  */
6172         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6173             return -1;
6174         }
6175         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6176         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6177             return -2;
6178         }
6179     }
6180     tcg_debug_assert(num_insns >= 0);
6181     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6182 
6183     /* Generate TB finalization at the end of block */
6184 #ifdef TCG_TARGET_NEED_LDST_LABELS
6185     i = tcg_out_ldst_finalize(s);
6186     if (i < 0) {
6187         return i;
6188     }
6189 #endif
6190 #ifdef TCG_TARGET_NEED_POOL_LABELS
6191     i = tcg_out_pool_finalize(s);
6192     if (i < 0) {
6193         return i;
6194     }
6195 #endif
6196     if (!tcg_resolve_relocs(s)) {
6197         return -2;
6198     }
6199 
6200 #ifndef CONFIG_TCG_INTERPRETER
6201     /* flush instruction cache */
6202     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6203                         (uintptr_t)s->code_buf,
6204                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6205 #endif
6206 
6207     return tcg_current_code_size(s);
6208 }
6209 
6210 #ifdef CONFIG_PROFILER
6211 void tcg_dump_info(GString *buf)
6212 {
6213     TCGProfile prof = {};
6214     const TCGProfile *s;
6215     int64_t tb_count;
6216     int64_t tb_div_count;
6217     int64_t tot;
6218 
6219     tcg_profile_snapshot_counters(&prof);
6220     s = &prof;
6221     tb_count = s->tb_count;
6222     tb_div_count = tb_count ? tb_count : 1;
6223     tot = s->interm_time + s->code_time;
6224 
6225     g_string_append_printf(buf, "JIT cycles          %" PRId64
6226                            " (%0.3f s at 2.4 GHz)\n",
6227                            tot, tot / 2.4e9);
6228     g_string_append_printf(buf, "translated TBs      %" PRId64
6229                            " (aborted=%" PRId64 " %0.1f%%)\n",
6230                            tb_count, s->tb_count1 - tb_count,
6231                            (double)(s->tb_count1 - s->tb_count)
6232                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
6233     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
6234                            (double)s->op_count / tb_div_count, s->op_count_max);
6235     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
6236                            (double)s->del_op_count / tb_div_count);
6237     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
6238                            (double)s->temp_count / tb_div_count,
6239                            s->temp_count_max);
6240     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
6241                            (double)s->code_out_len / tb_div_count);
6242     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
6243                            (double)s->search_out_len / tb_div_count);
6244 
6245     g_string_append_printf(buf, "cycles/op           %0.1f\n",
6246                            s->op_count ? (double)tot / s->op_count : 0);
6247     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
6248                            s->code_in_len ? (double)tot / s->code_in_len : 0);
6249     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
6250                            s->code_out_len ? (double)tot / s->code_out_len : 0);
6251     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
6252                            s->search_out_len ?
6253                            (double)tot / s->search_out_len : 0);
6254     if (tot == 0) {
6255         tot = 1;
6256     }
6257     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
6258                            (double)s->interm_time / tot * 100.0);
6259     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
6260                            (double)s->code_time / tot * 100.0);
6261     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
6262                            (double)s->opt_time / (s->code_time ?
6263                                                   s->code_time : 1)
6264                            * 100.0);
6265     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
6266                            (double)s->la_time / (s->code_time ?
6267                                                  s->code_time : 1) * 100.0);
6268     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
6269                            s->restore_count);
6270     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
6271                            s->restore_count ?
6272                            (double)s->restore_time / s->restore_count : 0);
6273 }
6274 #else
6275 void tcg_dump_info(GString *buf)
6276 {
6277     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
6278 }
6279 #endif
6280 
6281 #ifdef ELF_HOST_MACHINE
6282 /* In order to use this feature, the backend needs to do three things:
6283 
6284    (1) Define ELF_HOST_MACHINE to indicate both what value to
6285        put into the ELF image and to indicate support for the feature.
6286 
6287    (2) Define tcg_register_jit.  This should create a buffer containing
6288        the contents of a .debug_frame section that describes the post-
6289        prologue unwind info for the tcg machine.
6290 
6291    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6292 */
6293 
6294 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6295 typedef enum {
6296     JIT_NOACTION = 0,
6297     JIT_REGISTER_FN,
6298     JIT_UNREGISTER_FN
6299 } jit_actions_t;
6300 
6301 struct jit_code_entry {
6302     struct jit_code_entry *next_entry;
6303     struct jit_code_entry *prev_entry;
6304     const void *symfile_addr;
6305     uint64_t symfile_size;
6306 };
6307 
6308 struct jit_descriptor {
6309     uint32_t version;
6310     uint32_t action_flag;
6311     struct jit_code_entry *relevant_entry;
6312     struct jit_code_entry *first_entry;
6313 };
6314 
6315 void __jit_debug_register_code(void) __attribute__((noinline));
6316 void __jit_debug_register_code(void)
6317 {
6318     asm("");
6319 }
6320 
6321 /* Must statically initialize the version, because GDB may check
6322    the version before we can set it.  */
6323 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6324 
6325 /* End GDB interface.  */
6326 
6327 static int find_string(const char *strtab, const char *str)
6328 {
6329     const char *p = strtab + 1;
6330 
6331     while (1) {
6332         if (strcmp(p, str) == 0) {
6333             return p - strtab;
6334         }
6335         p += strlen(p) + 1;
6336     }
6337 }
6338 
6339 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6340                                  const void *debug_frame,
6341                                  size_t debug_frame_size)
6342 {
6343     struct __attribute__((packed)) DebugInfo {
6344         uint32_t  len;
6345         uint16_t  version;
6346         uint32_t  abbrev;
6347         uint8_t   ptr_size;
6348         uint8_t   cu_die;
6349         uint16_t  cu_lang;
6350         uintptr_t cu_low_pc;
6351         uintptr_t cu_high_pc;
6352         uint8_t   fn_die;
6353         char      fn_name[16];
6354         uintptr_t fn_low_pc;
6355         uintptr_t fn_high_pc;
6356         uint8_t   cu_eoc;
6357     };
6358 
6359     struct ElfImage {
6360         ElfW(Ehdr) ehdr;
6361         ElfW(Phdr) phdr;
6362         ElfW(Shdr) shdr[7];
6363         ElfW(Sym)  sym[2];
6364         struct DebugInfo di;
6365         uint8_t    da[24];
6366         char       str[80];
6367     };
6368 
6369     struct ElfImage *img;
6370 
6371     static const struct ElfImage img_template = {
6372         .ehdr = {
6373             .e_ident[EI_MAG0] = ELFMAG0,
6374             .e_ident[EI_MAG1] = ELFMAG1,
6375             .e_ident[EI_MAG2] = ELFMAG2,
6376             .e_ident[EI_MAG3] = ELFMAG3,
6377             .e_ident[EI_CLASS] = ELF_CLASS,
6378             .e_ident[EI_DATA] = ELF_DATA,
6379             .e_ident[EI_VERSION] = EV_CURRENT,
6380             .e_type = ET_EXEC,
6381             .e_machine = ELF_HOST_MACHINE,
6382             .e_version = EV_CURRENT,
6383             .e_phoff = offsetof(struct ElfImage, phdr),
6384             .e_shoff = offsetof(struct ElfImage, shdr),
6385             .e_ehsize = sizeof(ElfW(Shdr)),
6386             .e_phentsize = sizeof(ElfW(Phdr)),
6387             .e_phnum = 1,
6388             .e_shentsize = sizeof(ElfW(Shdr)),
6389             .e_shnum = ARRAY_SIZE(img->shdr),
6390             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6391 #ifdef ELF_HOST_FLAGS
6392             .e_flags = ELF_HOST_FLAGS,
6393 #endif
6394 #ifdef ELF_OSABI
6395             .e_ident[EI_OSABI] = ELF_OSABI,
6396 #endif
6397         },
6398         .phdr = {
6399             .p_type = PT_LOAD,
6400             .p_flags = PF_X,
6401         },
6402         .shdr = {
6403             [0] = { .sh_type = SHT_NULL },
6404             /* Trick: The contents of code_gen_buffer are not present in
6405                this fake ELF file; that got allocated elsewhere.  Therefore
6406                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6407                will not look for contents.  We can record any address.  */
6408             [1] = { /* .text */
6409                 .sh_type = SHT_NOBITS,
6410                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6411             },
6412             [2] = { /* .debug_info */
6413                 .sh_type = SHT_PROGBITS,
6414                 .sh_offset = offsetof(struct ElfImage, di),
6415                 .sh_size = sizeof(struct DebugInfo),
6416             },
6417             [3] = { /* .debug_abbrev */
6418                 .sh_type = SHT_PROGBITS,
6419                 .sh_offset = offsetof(struct ElfImage, da),
6420                 .sh_size = sizeof(img->da),
6421             },
6422             [4] = { /* .debug_frame */
6423                 .sh_type = SHT_PROGBITS,
6424                 .sh_offset = sizeof(struct ElfImage),
6425             },
6426             [5] = { /* .symtab */
6427                 .sh_type = SHT_SYMTAB,
6428                 .sh_offset = offsetof(struct ElfImage, sym),
6429                 .sh_size = sizeof(img->sym),
6430                 .sh_info = 1,
6431                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6432                 .sh_entsize = sizeof(ElfW(Sym)),
6433             },
6434             [6] = { /* .strtab */
6435                 .sh_type = SHT_STRTAB,
6436                 .sh_offset = offsetof(struct ElfImage, str),
6437                 .sh_size = sizeof(img->str),
6438             }
6439         },
6440         .sym = {
6441             [1] = { /* code_gen_buffer */
6442                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6443                 .st_shndx = 1,
6444             }
6445         },
6446         .di = {
6447             .len = sizeof(struct DebugInfo) - 4,
6448             .version = 2,
6449             .ptr_size = sizeof(void *),
6450             .cu_die = 1,
6451             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6452             .fn_die = 2,
6453             .fn_name = "code_gen_buffer"
6454         },
6455         .da = {
6456             1,          /* abbrev number (the cu) */
6457             0x11, 1,    /* DW_TAG_compile_unit, has children */
6458             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6459             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6460             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6461             0, 0,       /* end of abbrev */
6462             2,          /* abbrev number (the fn) */
6463             0x2e, 0,    /* DW_TAG_subprogram, no children */
6464             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6465             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6466             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6467             0, 0,       /* end of abbrev */
6468             0           /* no more abbrev */
6469         },
6470         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6471                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6472     };
6473 
6474     /* We only need a single jit entry; statically allocate it.  */
6475     static struct jit_code_entry one_entry;
6476 
6477     uintptr_t buf = (uintptr_t)buf_ptr;
6478     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6479     DebugFrameHeader *dfh;
6480 
6481     img = g_malloc(img_size);
6482     *img = img_template;
6483 
6484     img->phdr.p_vaddr = buf;
6485     img->phdr.p_paddr = buf;
6486     img->phdr.p_memsz = buf_size;
6487 
6488     img->shdr[1].sh_name = find_string(img->str, ".text");
6489     img->shdr[1].sh_addr = buf;
6490     img->shdr[1].sh_size = buf_size;
6491 
6492     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6493     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6494 
6495     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6496     img->shdr[4].sh_size = debug_frame_size;
6497 
6498     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6499     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6500 
6501     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6502     img->sym[1].st_value = buf;
6503     img->sym[1].st_size = buf_size;
6504 
6505     img->di.cu_low_pc = buf;
6506     img->di.cu_high_pc = buf + buf_size;
6507     img->di.fn_low_pc = buf;
6508     img->di.fn_high_pc = buf + buf_size;
6509 
6510     dfh = (DebugFrameHeader *)(img + 1);
6511     memcpy(dfh, debug_frame, debug_frame_size);
6512     dfh->fde.func_start = buf;
6513     dfh->fde.func_len = buf_size;
6514 
6515 #ifdef DEBUG_JIT
6516     /* Enable this block to be able to debug the ELF image file creation.
6517        One can use readelf, objdump, or other inspection utilities.  */
6518     {
6519         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6520         FILE *f = fopen(jit, "w+b");
6521         if (f) {
6522             if (fwrite(img, img_size, 1, f) != img_size) {
6523                 /* Avoid stupid unused return value warning for fwrite.  */
6524             }
6525             fclose(f);
6526         }
6527     }
6528 #endif
6529 
6530     one_entry.symfile_addr = img;
6531     one_entry.symfile_size = img_size;
6532 
6533     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6534     __jit_debug_descriptor.relevant_entry = &one_entry;
6535     __jit_debug_descriptor.first_entry = &one_entry;
6536     __jit_debug_register_code();
6537 }
6538 #else
6539 /* No support for the feature.  Provide the entry point expected by exec.c,
6540    and implement the internal function we declared earlier.  */
6541 
6542 static void tcg_register_jit_int(const void *buf, size_t size,
6543                                  const void *debug_frame,
6544                                  size_t debug_frame_size)
6545 {
6546 }
6547 
6548 void tcg_register_jit(const void *buf, size_t buf_size)
6549 {
6550 }
6551 #endif /* ELF_HOST_MACHINE */
6552 
6553 #if !TCG_TARGET_MAYBE_vec
6554 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6555 {
6556     g_assert_not_reached();
6557 }
6558 #endif
6559