xref: /openbmc/qemu/tcg/tcg.c (revision 33aba058c8fcc9b1581b03a1fbac45d8d91baac6)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_goto_ptr(TCGContext *s, TCGReg dest);
137 static void tcg_out_mb(TCGContext *s, unsigned bar);
138 static void tcg_out_br(TCGContext *s, TCGLabel *l);
139 static void tcg_out_set_carry(TCGContext *s);
140 static void tcg_out_set_borrow(TCGContext *s);
141 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
142                        const TCGArg args[TCG_MAX_OP_ARGS],
143                        const int const_args[TCG_MAX_OP_ARGS]);
144 #if TCG_TARGET_MAYBE_vec
145 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
146                             TCGReg dst, TCGReg src);
147 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
148                              TCGReg dst, TCGReg base, intptr_t offset);
149 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
150                              TCGReg dst, int64_t arg);
151 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
152                            unsigned vecl, unsigned vece,
153                            const TCGArg args[TCG_MAX_OP_ARGS],
154                            const int const_args[TCG_MAX_OP_ARGS]);
155 #else
156 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
157                                    TCGReg dst, TCGReg src)
158 {
159     g_assert_not_reached();
160 }
161 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, TCGReg base, intptr_t offset)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
167                                     TCGReg dst, int64_t arg)
168 {
169     g_assert_not_reached();
170 }
171 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
172                                   unsigned vecl, unsigned vece,
173                                   const TCGArg args[TCG_MAX_OP_ARGS],
174                                   const int const_args[TCG_MAX_OP_ARGS])
175 {
176     g_assert_not_reached();
177 }
178 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
179 {
180     return 0;
181 }
182 #endif
183 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
184                        intptr_t arg2);
185 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
186                         TCGReg base, intptr_t ofs);
187 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
188                          const TCGHelperInfo *info);
189 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
190 static bool tcg_target_const_match(int64_t val, int ct,
191                                    TCGType type, TCGCond cond, int vece);
192 
193 #ifndef CONFIG_USER_ONLY
194 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
195 #endif
196 
197 typedef struct TCGLdstHelperParam {
198     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
199     unsigned ntmp;
200     int tmp[3];
201 } TCGLdstHelperParam;
202 
203 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
204                                    const TCGLdstHelperParam *p)
205     __attribute__((unused));
206 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
207                                   bool load_sign, const TCGLdstHelperParam *p)
208     __attribute__((unused));
209 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
210                                    const TCGLdstHelperParam *p)
211     __attribute__((unused));
212 
213 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
214     [MO_UB] = helper_ldub_mmu,
215     [MO_SB] = helper_ldsb_mmu,
216     [MO_UW] = helper_lduw_mmu,
217     [MO_SW] = helper_ldsw_mmu,
218     [MO_UL] = helper_ldul_mmu,
219     [MO_UQ] = helper_ldq_mmu,
220 #if TCG_TARGET_REG_BITS == 64
221     [MO_SL] = helper_ldsl_mmu,
222     [MO_128] = helper_ld16_mmu,
223 #endif
224 };
225 
226 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
227     [MO_8]  = helper_stb_mmu,
228     [MO_16] = helper_stw_mmu,
229     [MO_32] = helper_stl_mmu,
230     [MO_64] = helper_stq_mmu,
231 #if TCG_TARGET_REG_BITS == 64
232     [MO_128] = helper_st16_mmu,
233 #endif
234 };
235 
236 typedef struct {
237     MemOp atom;   /* lg2 bits of atomicity required */
238     MemOp align;  /* lg2 bits of alignment to use */
239 } TCGAtomAlign;
240 
241 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
242                                            MemOp host_atom, bool allow_two_ops)
243     __attribute__((unused));
244 
245 #ifdef CONFIG_USER_ONLY
246 bool tcg_use_softmmu;
247 #endif
248 
249 TCGContext tcg_init_ctx;
250 __thread TCGContext *tcg_ctx;
251 
252 TCGContext **tcg_ctxs;
253 unsigned int tcg_cur_ctxs;
254 unsigned int tcg_max_ctxs;
255 TCGv_env tcg_env;
256 const void *tcg_code_gen_epilogue;
257 uintptr_t tcg_splitwx_diff;
258 
259 #ifndef CONFIG_TCG_INTERPRETER
260 tcg_prologue_fn *tcg_qemu_tb_exec;
261 #endif
262 
263 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
264 static TCGRegSet tcg_target_call_clobber_regs;
265 
266 #if TCG_TARGET_INSN_UNIT_SIZE == 1
267 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
268 {
269     *s->code_ptr++ = v;
270 }
271 
272 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
273                                                       uint8_t v)
274 {
275     *p = v;
276 }
277 #endif
278 
279 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
280 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
281 {
282     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
283         *s->code_ptr++ = v;
284     } else {
285         tcg_insn_unit *p = s->code_ptr;
286         memcpy(p, &v, sizeof(v));
287         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
288     }
289 }
290 
291 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
292                                                        uint16_t v)
293 {
294     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
295         *p = v;
296     } else {
297         memcpy(p, &v, sizeof(v));
298     }
299 }
300 #endif
301 
302 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
303 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
304 {
305     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
306         *s->code_ptr++ = v;
307     } else {
308         tcg_insn_unit *p = s->code_ptr;
309         memcpy(p, &v, sizeof(v));
310         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
311     }
312 }
313 
314 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
315                                                        uint32_t v)
316 {
317     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
318         *p = v;
319     } else {
320         memcpy(p, &v, sizeof(v));
321     }
322 }
323 #endif
324 
325 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
326 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
327 {
328     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
329         *s->code_ptr++ = v;
330     } else {
331         tcg_insn_unit *p = s->code_ptr;
332         memcpy(p, &v, sizeof(v));
333         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
334     }
335 }
336 
337 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
338                                                        uint64_t v)
339 {
340     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
341         *p = v;
342     } else {
343         memcpy(p, &v, sizeof(v));
344     }
345 }
346 #endif
347 
348 /* label relocation processing */
349 
350 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
351                           TCGLabel *l, intptr_t addend)
352 {
353     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
354 
355     r->type = type;
356     r->ptr = code_ptr;
357     r->addend = addend;
358     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
359 }
360 
361 static void tcg_out_label(TCGContext *s, TCGLabel *l)
362 {
363     tcg_debug_assert(!l->has_value);
364     l->has_value = 1;
365     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
366 }
367 
368 TCGLabel *gen_new_label(void)
369 {
370     TCGContext *s = tcg_ctx;
371     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
372 
373     memset(l, 0, sizeof(TCGLabel));
374     l->id = s->nb_labels++;
375     QSIMPLEQ_INIT(&l->branches);
376     QSIMPLEQ_INIT(&l->relocs);
377 
378     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
379 
380     return l;
381 }
382 
383 static bool tcg_resolve_relocs(TCGContext *s)
384 {
385     TCGLabel *l;
386 
387     QSIMPLEQ_FOREACH(l, &s->labels, next) {
388         TCGRelocation *r;
389         uintptr_t value = l->u.value;
390 
391         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
392             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
393                 return false;
394             }
395         }
396     }
397     return true;
398 }
399 
400 static void set_jmp_reset_offset(TCGContext *s, int which)
401 {
402     /*
403      * We will check for overflow at the end of the opcode loop in
404      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
405      */
406     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
407 }
408 
409 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
410 {
411     /*
412      * We will check for overflow at the end of the opcode loop in
413      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
414      */
415     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
416 }
417 
418 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
419 {
420     /*
421      * Return the read-execute version of the pointer, for the benefit
422      * of any pc-relative addressing mode.
423      */
424     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
425 }
426 
427 static int __attribute__((unused))
428 tlb_mask_table_ofs(TCGContext *s, int which)
429 {
430     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
431             sizeof(CPUNegativeOffsetState));
432 }
433 
434 /* Signal overflow, starting over with fewer guest insns. */
435 static G_NORETURN
436 void tcg_raise_tb_overflow(TCGContext *s)
437 {
438     siglongjmp(s->jmp_trans, -2);
439 }
440 
441 /*
442  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
443  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
444  *
445  * However, tcg_out_helper_load_slots reuses this field to hold an
446  * argument slot number (which may designate a argument register or an
447  * argument stack slot), converting to TCGReg once all arguments that
448  * are destined for the stack are processed.
449  */
450 typedef struct TCGMovExtend {
451     unsigned dst;
452     TCGReg src;
453     TCGType dst_type;
454     TCGType src_type;
455     MemOp src_ext;
456 } TCGMovExtend;
457 
458 /**
459  * tcg_out_movext -- move and extend
460  * @s: tcg context
461  * @dst_type: integral type for destination
462  * @dst: destination register
463  * @src_type: integral type for source
464  * @src_ext: extension to apply to source
465  * @src: source register
466  *
467  * Move or extend @src into @dst, depending on @src_ext and the types.
468  */
469 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
470                            TCGType src_type, MemOp src_ext, TCGReg src)
471 {
472     switch (src_ext) {
473     case MO_UB:
474         tcg_out_ext8u(s, dst, src);
475         break;
476     case MO_SB:
477         tcg_out_ext8s(s, dst_type, dst, src);
478         break;
479     case MO_UW:
480         tcg_out_ext16u(s, dst, src);
481         break;
482     case MO_SW:
483         tcg_out_ext16s(s, dst_type, dst, src);
484         break;
485     case MO_UL:
486     case MO_SL:
487         if (dst_type == TCG_TYPE_I32) {
488             if (src_type == TCG_TYPE_I32) {
489                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
490             } else {
491                 tcg_out_extrl_i64_i32(s, dst, src);
492             }
493         } else if (src_type == TCG_TYPE_I32) {
494             if (src_ext & MO_SIGN) {
495                 tcg_out_exts_i32_i64(s, dst, src);
496             } else {
497                 tcg_out_extu_i32_i64(s, dst, src);
498             }
499         } else {
500             if (src_ext & MO_SIGN) {
501                 tcg_out_ext32s(s, dst, src);
502             } else {
503                 tcg_out_ext32u(s, dst, src);
504             }
505         }
506         break;
507     case MO_UQ:
508         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
509         if (dst_type == TCG_TYPE_I32) {
510             tcg_out_extrl_i64_i32(s, dst, src);
511         } else {
512             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
513         }
514         break;
515     default:
516         g_assert_not_reached();
517     }
518 }
519 
520 /* Minor variations on a theme, using a structure. */
521 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
522                                     TCGReg src)
523 {
524     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
525 }
526 
527 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
528 {
529     tcg_out_movext1_new_src(s, i, i->src);
530 }
531 
532 /**
533  * tcg_out_movext2 -- move and extend two pair
534  * @s: tcg context
535  * @i1: first move description
536  * @i2: second move description
537  * @scratch: temporary register, or -1 for none
538  *
539  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
540  * between the sources and destinations.
541  */
542 
543 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
544                             const TCGMovExtend *i2, int scratch)
545 {
546     TCGReg src1 = i1->src;
547     TCGReg src2 = i2->src;
548 
549     if (i1->dst != src2) {
550         tcg_out_movext1(s, i1);
551         tcg_out_movext1(s, i2);
552         return;
553     }
554     if (i2->dst == src1) {
555         TCGType src1_type = i1->src_type;
556         TCGType src2_type = i2->src_type;
557 
558         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
559             /* The data is now in the correct registers, now extend. */
560             src1 = i2->src;
561             src2 = i1->src;
562         } else {
563             tcg_debug_assert(scratch >= 0);
564             tcg_out_mov(s, src1_type, scratch, src1);
565             src1 = scratch;
566         }
567     }
568     tcg_out_movext1_new_src(s, i2, src2);
569     tcg_out_movext1_new_src(s, i1, src1);
570 }
571 
572 /**
573  * tcg_out_movext3 -- move and extend three pair
574  * @s: tcg context
575  * @i1: first move description
576  * @i2: second move description
577  * @i3: third move description
578  * @scratch: temporary register, or -1 for none
579  *
580  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
581  * between the sources and destinations.
582  */
583 
584 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
585                             const TCGMovExtend *i2, const TCGMovExtend *i3,
586                             int scratch)
587 {
588     TCGReg src1 = i1->src;
589     TCGReg src2 = i2->src;
590     TCGReg src3 = i3->src;
591 
592     if (i1->dst != src2 && i1->dst != src3) {
593         tcg_out_movext1(s, i1);
594         tcg_out_movext2(s, i2, i3, scratch);
595         return;
596     }
597     if (i2->dst != src1 && i2->dst != src3) {
598         tcg_out_movext1(s, i2);
599         tcg_out_movext2(s, i1, i3, scratch);
600         return;
601     }
602     if (i3->dst != src1 && i3->dst != src2) {
603         tcg_out_movext1(s, i3);
604         tcg_out_movext2(s, i1, i2, scratch);
605         return;
606     }
607 
608     /*
609      * There is a cycle.  Since there are only 3 nodes, the cycle is
610      * either "clockwise" or "anti-clockwise", and can be solved with
611      * a single scratch or two xchg.
612      */
613     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
614         /* "Clockwise" */
615         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
616             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
617             /* The data is now in the correct registers, now extend. */
618             tcg_out_movext1_new_src(s, i1, i1->dst);
619             tcg_out_movext1_new_src(s, i2, i2->dst);
620             tcg_out_movext1_new_src(s, i3, i3->dst);
621         } else {
622             tcg_debug_assert(scratch >= 0);
623             tcg_out_mov(s, i1->src_type, scratch, src1);
624             tcg_out_movext1(s, i3);
625             tcg_out_movext1(s, i2);
626             tcg_out_movext1_new_src(s, i1, scratch);
627         }
628     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
629         /* "Anti-clockwise" */
630         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
631             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
632             /* The data is now in the correct registers, now extend. */
633             tcg_out_movext1_new_src(s, i1, i1->dst);
634             tcg_out_movext1_new_src(s, i2, i2->dst);
635             tcg_out_movext1_new_src(s, i3, i3->dst);
636         } else {
637             tcg_debug_assert(scratch >= 0);
638             tcg_out_mov(s, i1->src_type, scratch, src1);
639             tcg_out_movext1(s, i2);
640             tcg_out_movext1(s, i3);
641             tcg_out_movext1_new_src(s, i1, scratch);
642         }
643     } else {
644         g_assert_not_reached();
645     }
646 }
647 
648 /*
649  * Allocate a new TCGLabelQemuLdst entry.
650  */
651 
652 __attribute__((unused))
653 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
654 {
655     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
656 
657     memset(l, 0, sizeof(*l));
658     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
659 
660     return l;
661 }
662 
663 /*
664  * Allocate new constant pool entries.
665  */
666 
667 typedef struct TCGLabelPoolData {
668     struct TCGLabelPoolData *next;
669     tcg_insn_unit *label;
670     intptr_t addend;
671     int rtype;
672     unsigned nlong;
673     tcg_target_ulong data[];
674 } TCGLabelPoolData;
675 
676 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
677                                         tcg_insn_unit *label, intptr_t addend)
678 {
679     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
680                                      + sizeof(tcg_target_ulong) * nlong);
681 
682     n->label = label;
683     n->addend = addend;
684     n->rtype = rtype;
685     n->nlong = nlong;
686     return n;
687 }
688 
689 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
690 {
691     TCGLabelPoolData *i, **pp;
692     int nlong = n->nlong;
693 
694     /* Insertion sort on the pool.  */
695     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
696         if (nlong > i->nlong) {
697             break;
698         }
699         if (nlong < i->nlong) {
700             continue;
701         }
702         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
703             break;
704         }
705     }
706     n->next = *pp;
707     *pp = n;
708 }
709 
710 /* The "usual" for generic integer code.  */
711 __attribute__((unused))
712 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
713                            tcg_insn_unit *label, intptr_t addend)
714 {
715     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
716     n->data[0] = d;
717     new_pool_insert(s, n);
718 }
719 
720 /* For v64 or v128, depending on the host.  */
721 __attribute__((unused))
722 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
723                         intptr_t addend, tcg_target_ulong d0,
724                         tcg_target_ulong d1)
725 {
726     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
727     n->data[0] = d0;
728     n->data[1] = d1;
729     new_pool_insert(s, n);
730 }
731 
732 /* For v128 or v256, depending on the host.  */
733 __attribute__((unused))
734 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
735                         intptr_t addend, tcg_target_ulong d0,
736                         tcg_target_ulong d1, tcg_target_ulong d2,
737                         tcg_target_ulong d3)
738 {
739     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
740     n->data[0] = d0;
741     n->data[1] = d1;
742     n->data[2] = d2;
743     n->data[3] = d3;
744     new_pool_insert(s, n);
745 }
746 
747 /* For v256, for 32-bit host.  */
748 __attribute__((unused))
749 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
750                         intptr_t addend, tcg_target_ulong d0,
751                         tcg_target_ulong d1, tcg_target_ulong d2,
752                         tcg_target_ulong d3, tcg_target_ulong d4,
753                         tcg_target_ulong d5, tcg_target_ulong d6,
754                         tcg_target_ulong d7)
755 {
756     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
757     n->data[0] = d0;
758     n->data[1] = d1;
759     n->data[2] = d2;
760     n->data[3] = d3;
761     n->data[4] = d4;
762     n->data[5] = d5;
763     n->data[6] = d6;
764     n->data[7] = d7;
765     new_pool_insert(s, n);
766 }
767 
768 /*
769  * Generate TB finalization at the end of block
770  */
771 
772 static int tcg_out_ldst_finalize(TCGContext *s)
773 {
774     TCGLabelQemuLdst *lb;
775 
776     /* qemu_ld/st slow paths */
777     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
778         if (lb->is_ld
779             ? !tcg_out_qemu_ld_slow_path(s, lb)
780             : !tcg_out_qemu_st_slow_path(s, lb)) {
781             return -2;
782         }
783 
784         /*
785          * Test for (pending) buffer overflow.  The assumption is that any
786          * one operation beginning below the high water mark cannot overrun
787          * the buffer completely.  Thus we can test for overflow after
788          * generating code without having to check during generation.
789          */
790         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
791             return -1;
792         }
793     }
794     return 0;
795 }
796 
797 static int tcg_out_pool_finalize(TCGContext *s)
798 {
799     TCGLabelPoolData *p = s->pool_labels;
800     TCGLabelPoolData *l = NULL;
801     void *a;
802 
803     if (p == NULL) {
804         return 0;
805     }
806 
807     /*
808      * ??? Round up to qemu_icache_linesize, but then do not round
809      * again when allocating the next TranslationBlock structure.
810      */
811     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
812                          sizeof(tcg_target_ulong) * p->nlong);
813     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
814     s->data_gen_ptr = a;
815 
816     for (; p != NULL; p = p->next) {
817         size_t size = sizeof(tcg_target_ulong) * p->nlong;
818         uintptr_t value;
819 
820         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
821             if (unlikely(a > s->code_gen_highwater)) {
822                 return -1;
823             }
824             memcpy(a, p->data, size);
825             a += size;
826             l = p;
827         }
828 
829         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
830         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
831             return -2;
832         }
833     }
834 
835     s->code_ptr = a;
836     return 0;
837 }
838 
839 #define C_PFX1(P, A)                    P##A
840 #define C_PFX2(P, A, B)                 P##A##_##B
841 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
842 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
843 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
844 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
845 
846 /* Define an enumeration for the various combinations. */
847 
848 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
849 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
850 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
851 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
852 
853 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
854 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
855 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
856 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
857 
858 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
859 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
860 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
861 
862 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
863 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
864 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
865 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
866 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
867 
868 typedef enum {
869     C_Dynamic = -2,
870     C_NotImplemented = -1,
871 #include "tcg-target-con-set.h"
872 } TCGConstraintSetIndex;
873 
874 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
875 
876 #undef C_O0_I1
877 #undef C_O0_I2
878 #undef C_O0_I3
879 #undef C_O0_I4
880 #undef C_O1_I1
881 #undef C_O1_I2
882 #undef C_O1_I3
883 #undef C_O1_I4
884 #undef C_N1_I2
885 #undef C_N1O1_I1
886 #undef C_N2_I1
887 #undef C_O2_I1
888 #undef C_O2_I2
889 #undef C_O2_I3
890 #undef C_O2_I4
891 #undef C_N1_O1_I4
892 
893 /* Put all of the constraint sets into an array, indexed by the enum. */
894 
895 typedef struct TCGConstraintSet {
896     uint8_t nb_oargs, nb_iargs;
897     const char *args_ct_str[TCG_MAX_OP_ARGS];
898 } TCGConstraintSet;
899 
900 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
901 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
902 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
903 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
904 
905 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
906 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
907 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
908 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
909 
910 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
911 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
912 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
913 
914 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
915 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
916 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
917 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
918 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
919 
920 static const TCGConstraintSet constraint_sets[] = {
921 #include "tcg-target-con-set.h"
922 };
923 
924 #undef C_O0_I1
925 #undef C_O0_I2
926 #undef C_O0_I3
927 #undef C_O0_I4
928 #undef C_O1_I1
929 #undef C_O1_I2
930 #undef C_O1_I3
931 #undef C_O1_I4
932 #undef C_N1_I2
933 #undef C_N1O1_I1
934 #undef C_N2_I1
935 #undef C_O2_I1
936 #undef C_O2_I2
937 #undef C_O2_I3
938 #undef C_O2_I4
939 #undef C_N1_O1_I4
940 
941 /* Expand the enumerator to be returned from tcg_target_op_def(). */
942 
943 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
944 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
945 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
946 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
947 
948 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
949 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
950 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
951 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
952 
953 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
954 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
955 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
956 
957 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
958 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
959 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
960 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
961 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
962 
963 /*
964  * TCGOutOp is the base class for a set of structures that describe how
965  * to generate code for a given TCGOpcode.
966  *
967  * @static_constraint:
968  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
969  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
970  *                     based on any of @type, @flags, or host isa.
971  *   Otherwise:        The register allocation constrains for the TCGOpcode.
972  *
973  * Subclasses of TCGOutOp will define a set of output routines that may
974  * be used.  Such routines will often be selected by the set of registers
975  * and constants that come out of register allocation.  The set of
976  * routines that are provided will guide the set of constraints that are
977  * legal.  In particular, assume that tcg_optimize() has done its job in
978  * swapping commutative operands and folding operations for which all
979  * operands are constant.
980  */
981 typedef struct TCGOutOp {
982     TCGConstraintSetIndex static_constraint;
983     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
984 } TCGOutOp;
985 
986 typedef struct TCGOutOpAddSubCarry {
987     TCGOutOp base;
988     void (*out_rrr)(TCGContext *s, TCGType type,
989                     TCGReg a0, TCGReg a1, TCGReg a2);
990     void (*out_rri)(TCGContext *s, TCGType type,
991                     TCGReg a0, TCGReg a1, tcg_target_long a2);
992     void (*out_rir)(TCGContext *s, TCGType type,
993                     TCGReg a0, tcg_target_long a1, TCGReg a2);
994     void (*out_rii)(TCGContext *s, TCGType type,
995                     TCGReg a0, tcg_target_long a1, tcg_target_long a2);
996 } TCGOutOpAddSubCarry;
997 
998 typedef struct TCGOutOpBinary {
999     TCGOutOp base;
1000     void (*out_rrr)(TCGContext *s, TCGType type,
1001                     TCGReg a0, TCGReg a1, TCGReg a2);
1002     void (*out_rri)(TCGContext *s, TCGType type,
1003                     TCGReg a0, TCGReg a1, tcg_target_long a2);
1004 } TCGOutOpBinary;
1005 
1006 typedef struct TCGOutOpBrcond {
1007     TCGOutOp base;
1008     void (*out_rr)(TCGContext *s, TCGType type, TCGCond cond,
1009                    TCGReg a1, TCGReg a2, TCGLabel *label);
1010     void (*out_ri)(TCGContext *s, TCGType type, TCGCond cond,
1011                    TCGReg a1, tcg_target_long a2, TCGLabel *label);
1012 } TCGOutOpBrcond;
1013 
1014 typedef struct TCGOutOpBrcond2 {
1015     TCGOutOp base;
1016     void (*out)(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
1017                 TCGArg bl, bool const_bl,
1018                 TCGArg bh, bool const_bh, TCGLabel *l);
1019 } TCGOutOpBrcond2;
1020 
1021 typedef struct TCGOutOpBswap {
1022     TCGOutOp base;
1023     void (*out_rr)(TCGContext *s, TCGType type,
1024                    TCGReg a0, TCGReg a1, unsigned flags);
1025 } TCGOutOpBswap;
1026 
1027 typedef struct TCGOutOpDeposit {
1028     TCGOutOp base;
1029     void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
1030                     TCGReg a2, unsigned ofs, unsigned len);
1031     void (*out_rri)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
1032                     tcg_target_long a2, unsigned ofs, unsigned len);
1033     void (*out_rzr)(TCGContext *s, TCGType type, TCGReg a0,
1034                     TCGReg a2, unsigned ofs, unsigned len);
1035 } TCGOutOpDeposit;
1036 
1037 typedef struct TCGOutOpDivRem {
1038     TCGOutOp base;
1039     void (*out_rr01r)(TCGContext *s, TCGType type,
1040                       TCGReg a0, TCGReg a1, TCGReg a4);
1041 } TCGOutOpDivRem;
1042 
1043 typedef struct TCGOutOpExtract {
1044     TCGOutOp base;
1045     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
1046                    unsigned ofs, unsigned len);
1047 } TCGOutOpExtract;
1048 
1049 typedef struct TCGOutOpExtract2 {
1050     TCGOutOp base;
1051     void (*out_rrr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1,
1052                     TCGReg a2, unsigned shr);
1053 } TCGOutOpExtract2;
1054 
1055 typedef struct TCGOutOpLoad {
1056     TCGOutOp base;
1057     void (*out)(TCGContext *s, TCGType type, TCGReg dest,
1058                 TCGReg base, intptr_t offset);
1059 } TCGOutOpLoad;
1060 
1061 typedef struct TCGOutOpMovcond {
1062     TCGOutOp base;
1063     void (*out)(TCGContext *s, TCGType type, TCGCond cond,
1064                 TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2,
1065                 TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf);
1066 } TCGOutOpMovcond;
1067 
1068 typedef struct TCGOutOpMul2 {
1069     TCGOutOp base;
1070     void (*out_rrrr)(TCGContext *s, TCGType type,
1071                      TCGReg a0, TCGReg a1, TCGReg a2, TCGReg a3);
1072 } TCGOutOpMul2;
1073 
1074 typedef struct TCGOutOpUnary {
1075     TCGOutOp base;
1076     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
1077 } TCGOutOpUnary;
1078 
1079 typedef struct TCGOutOpSetcond {
1080     TCGOutOp base;
1081     void (*out_rrr)(TCGContext *s, TCGType type, TCGCond cond,
1082                     TCGReg ret, TCGReg a1, TCGReg a2);
1083     void (*out_rri)(TCGContext *s, TCGType type, TCGCond cond,
1084                     TCGReg ret, TCGReg a1, tcg_target_long a2);
1085 } TCGOutOpSetcond;
1086 
1087 typedef struct TCGOutOpSetcond2 {
1088     TCGOutOp base;
1089     void (*out)(TCGContext *s, TCGCond cond, TCGReg ret, TCGReg al, TCGReg ah,
1090                 TCGArg bl, bool const_bl, TCGArg bh, bool const_bh);
1091 } TCGOutOpSetcond2;
1092 
1093 typedef struct TCGOutOpStore {
1094     TCGOutOp base;
1095     void (*out_r)(TCGContext *s, TCGType type, TCGReg data,
1096                   TCGReg base, intptr_t offset);
1097     void (*out_i)(TCGContext *s, TCGType type, tcg_target_long data,
1098                   TCGReg base, intptr_t offset);
1099 } TCGOutOpStore;
1100 
1101 typedef struct TCGOutOpSubtract {
1102     TCGOutOp base;
1103     void (*out_rrr)(TCGContext *s, TCGType type,
1104                     TCGReg a0, TCGReg a1, TCGReg a2);
1105     void (*out_rir)(TCGContext *s, TCGType type,
1106                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1107 } TCGOutOpSubtract;
1108 
1109 #include "tcg-target.c.inc"
1110 
1111 #ifndef CONFIG_TCG_INTERPRETER
1112 /* Validate CPUTLBDescFast placement. */
1113 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1114                         sizeof(CPUNegativeOffsetState))
1115                   < MIN_TLB_MASK_TABLE_OFS);
1116 #endif
1117 
1118 #if TCG_TARGET_REG_BITS == 64
1119 /*
1120  * We require these functions for slow-path function calls.
1121  * Adapt them generically for opcode output.
1122  */
1123 
1124 static void tgen_exts_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
1125 {
1126     tcg_out_exts_i32_i64(s, a0, a1);
1127 }
1128 
1129 static const TCGOutOpUnary outop_exts_i32_i64 = {
1130     .base.static_constraint = C_O1_I1(r, r),
1131     .out_rr = tgen_exts_i32_i64,
1132 };
1133 
1134 static void tgen_extu_i32_i64(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
1135 {
1136     tcg_out_extu_i32_i64(s, a0, a1);
1137 }
1138 
1139 static const TCGOutOpUnary outop_extu_i32_i64 = {
1140     .base.static_constraint = C_O1_I1(r, r),
1141     .out_rr = tgen_extu_i32_i64,
1142 };
1143 
1144 static void tgen_extrl_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1)
1145 {
1146     tcg_out_extrl_i64_i32(s, a0, a1);
1147 }
1148 
1149 static const TCGOutOpUnary outop_extrl_i64_i32 = {
1150     .base.static_constraint = C_O1_I1(r, r),
1151     .out_rr = TCG_TARGET_HAS_extr_i64_i32 ? tgen_extrl_i64_i32 : NULL,
1152 };
1153 #endif
1154 
1155 static const TCGOutOp outop_goto_ptr = {
1156     .static_constraint = C_O0_I1(r),
1157 };
1158 
1159 static const TCGOutOpLoad outop_ld = {
1160     .base.static_constraint = C_O1_I1(r, r),
1161     .out = tcg_out_ld,
1162 };
1163 
1164 /*
1165  * Register V as the TCGOutOp for O.
1166  * This verifies that V is of type T, otherwise give a nice compiler error.
1167  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1168  */
1169 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1170 
1171 /* Register allocation descriptions for every TCGOpcode. */
1172 static const TCGOutOp * const all_outop[NB_OPS] = {
1173     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1174     OUTOP(INDEX_op_addci, TCGOutOpAddSubCarry, outop_addci),
1175     OUTOP(INDEX_op_addcio, TCGOutOpBinary, outop_addcio),
1176     OUTOP(INDEX_op_addco, TCGOutOpBinary, outop_addco),
1177     /* addc1o is implemented with set_carry + addcio */
1178     OUTOP(INDEX_op_addc1o, TCGOutOpBinary, outop_addcio),
1179     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1180     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1181     OUTOP(INDEX_op_brcond, TCGOutOpBrcond, outop_brcond),
1182     OUTOP(INDEX_op_bswap16, TCGOutOpBswap, outop_bswap16),
1183     OUTOP(INDEX_op_bswap32, TCGOutOpBswap, outop_bswap32),
1184     OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz),
1185     OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop),
1186     OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz),
1187     OUTOP(INDEX_op_deposit, TCGOutOpDeposit, outop_deposit),
1188     OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1189     OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
1190     OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2),
1191     OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2),
1192     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1193     OUTOP(INDEX_op_extract, TCGOutOpExtract, outop_extract),
1194     OUTOP(INDEX_op_extract2, TCGOutOpExtract2, outop_extract2),
1195     OUTOP(INDEX_op_ld8u, TCGOutOpLoad, outop_ld8u),
1196     OUTOP(INDEX_op_ld8s, TCGOutOpLoad, outop_ld8s),
1197     OUTOP(INDEX_op_ld16u, TCGOutOpLoad, outop_ld16u),
1198     OUTOP(INDEX_op_ld16s, TCGOutOpLoad, outop_ld16s),
1199     OUTOP(INDEX_op_ld, TCGOutOpLoad, outop_ld),
1200     OUTOP(INDEX_op_movcond, TCGOutOpMovcond, outop_movcond),
1201     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1202     OUTOP(INDEX_op_muls2, TCGOutOpMul2, outop_muls2),
1203     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1204     OUTOP(INDEX_op_mulu2, TCGOutOpMul2, outop_mulu2),
1205     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1206     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1207     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1208     OUTOP(INDEX_op_negsetcond, TCGOutOpSetcond, outop_negsetcond),
1209     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1210     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1211     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1212     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1213     OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems),
1214     OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu),
1215     OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl),
1216     OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr),
1217     OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar),
1218     OUTOP(INDEX_op_setcond, TCGOutOpSetcond, outop_setcond),
1219     OUTOP(INDEX_op_sextract, TCGOutOpExtract, outop_sextract),
1220     OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl),
1221     OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr),
1222     OUTOP(INDEX_op_st, TCGOutOpStore, outop_st),
1223     OUTOP(INDEX_op_st8, TCGOutOpStore, outop_st8),
1224     OUTOP(INDEX_op_st16, TCGOutOpStore, outop_st16),
1225     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1226     OUTOP(INDEX_op_subbi, TCGOutOpAddSubCarry, outop_subbi),
1227     OUTOP(INDEX_op_subbio, TCGOutOpAddSubCarry, outop_subbio),
1228     OUTOP(INDEX_op_subbo, TCGOutOpAddSubCarry, outop_subbo),
1229     /* subb1o is implemented with set_borrow + subbio */
1230     OUTOP(INDEX_op_subb1o, TCGOutOpAddSubCarry, outop_subbio),
1231     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1232 
1233     [INDEX_op_goto_ptr] = &outop_goto_ptr,
1234 
1235 #if TCG_TARGET_REG_BITS == 32
1236     OUTOP(INDEX_op_brcond2_i32, TCGOutOpBrcond2, outop_brcond2),
1237     OUTOP(INDEX_op_setcond2_i32, TCGOutOpSetcond2, outop_setcond2),
1238 #else
1239     OUTOP(INDEX_op_bswap64, TCGOutOpUnary, outop_bswap64),
1240     OUTOP(INDEX_op_ext_i32_i64, TCGOutOpUnary, outop_exts_i32_i64),
1241     OUTOP(INDEX_op_extu_i32_i64, TCGOutOpUnary, outop_extu_i32_i64),
1242     OUTOP(INDEX_op_extrl_i64_i32, TCGOutOpUnary, outop_extrl_i64_i32),
1243     OUTOP(INDEX_op_extrh_i64_i32, TCGOutOpUnary, outop_extrh_i64_i32),
1244     OUTOP(INDEX_op_ld32u, TCGOutOpLoad, outop_ld32u),
1245     OUTOP(INDEX_op_ld32s, TCGOutOpLoad, outop_ld32s),
1246     OUTOP(INDEX_op_st32, TCGOutOpStore, outop_st),
1247 #endif
1248 };
1249 
1250 #undef OUTOP
1251 
1252 /*
1253  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1254  * and registered the target's TCG globals) must register with this function
1255  * before initiating translation.
1256  *
1257  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1258  * of tcg_region_init() for the reasoning behind this.
1259  *
1260  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1261  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1262  * is not used anymore for translation once this function is called.
1263  *
1264  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1265  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1266  * modes.
1267  */
1268 #ifdef CONFIG_USER_ONLY
1269 void tcg_register_thread(void)
1270 {
1271     tcg_ctx = &tcg_init_ctx;
1272 }
1273 #else
1274 void tcg_register_thread(void)
1275 {
1276     TCGContext *s = g_malloc(sizeof(*s));
1277     unsigned int i, n;
1278 
1279     *s = tcg_init_ctx;
1280 
1281     /* Relink mem_base.  */
1282     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1283         if (tcg_init_ctx.temps[i].mem_base) {
1284             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1285             tcg_debug_assert(b >= 0 && b < n);
1286             s->temps[i].mem_base = &s->temps[b];
1287         }
1288     }
1289 
1290     /* Claim an entry in tcg_ctxs */
1291     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1292     g_assert(n < tcg_max_ctxs);
1293     qatomic_set(&tcg_ctxs[n], s);
1294 
1295     if (n > 0) {
1296         tcg_region_initial_alloc(s);
1297     }
1298 
1299     tcg_ctx = s;
1300 }
1301 #endif /* !CONFIG_USER_ONLY */
1302 
1303 /* pool based memory allocation */
1304 void *tcg_malloc_internal(TCGContext *s, int size)
1305 {
1306     TCGPool *p;
1307     int pool_size;
1308 
1309     if (size > TCG_POOL_CHUNK_SIZE) {
1310         /* big malloc: insert a new pool (XXX: could optimize) */
1311         p = g_malloc(sizeof(TCGPool) + size);
1312         p->size = size;
1313         p->next = s->pool_first_large;
1314         s->pool_first_large = p;
1315         return p->data;
1316     } else {
1317         p = s->pool_current;
1318         if (!p) {
1319             p = s->pool_first;
1320             if (!p)
1321                 goto new_pool;
1322         } else {
1323             if (!p->next) {
1324             new_pool:
1325                 pool_size = TCG_POOL_CHUNK_SIZE;
1326                 p = g_malloc(sizeof(TCGPool) + pool_size);
1327                 p->size = pool_size;
1328                 p->next = NULL;
1329                 if (s->pool_current) {
1330                     s->pool_current->next = p;
1331                 } else {
1332                     s->pool_first = p;
1333                 }
1334             } else {
1335                 p = p->next;
1336             }
1337         }
1338     }
1339     s->pool_current = p;
1340     s->pool_cur = p->data + size;
1341     s->pool_end = p->data + p->size;
1342     return p->data;
1343 }
1344 
1345 void tcg_pool_reset(TCGContext *s)
1346 {
1347     TCGPool *p, *t;
1348     for (p = s->pool_first_large; p; p = t) {
1349         t = p->next;
1350         g_free(p);
1351     }
1352     s->pool_first_large = NULL;
1353     s->pool_cur = s->pool_end = NULL;
1354     s->pool_current = NULL;
1355 }
1356 
1357 /*
1358  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1359  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1360  * We only use these for layout in tcg_out_ld_helper_ret and
1361  * tcg_out_st_helper_args, and share them between several of
1362  * the helpers, with the end result that it's easier to build manually.
1363  */
1364 
1365 #if TCG_TARGET_REG_BITS == 32
1366 # define dh_typecode_ttl  dh_typecode_i32
1367 #else
1368 # define dh_typecode_ttl  dh_typecode_i64
1369 #endif
1370 
1371 static TCGHelperInfo info_helper_ld32_mmu = {
1372     .flags = TCG_CALL_NO_WG,
1373     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1374               | dh_typemask(env, 1)
1375               | dh_typemask(i64, 2)  /* uint64_t addr */
1376               | dh_typemask(i32, 3)  /* unsigned oi */
1377               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1378 };
1379 
1380 static TCGHelperInfo info_helper_ld64_mmu = {
1381     .flags = TCG_CALL_NO_WG,
1382     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1383               | dh_typemask(env, 1)
1384               | dh_typemask(i64, 2)  /* uint64_t addr */
1385               | dh_typemask(i32, 3)  /* unsigned oi */
1386               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1387 };
1388 
1389 static TCGHelperInfo info_helper_ld128_mmu = {
1390     .flags = TCG_CALL_NO_WG,
1391     .typemask = dh_typemask(i128, 0) /* return Int128 */
1392               | dh_typemask(env, 1)
1393               | dh_typemask(i64, 2)  /* uint64_t addr */
1394               | dh_typemask(i32, 3)  /* unsigned oi */
1395               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1396 };
1397 
1398 static TCGHelperInfo info_helper_st32_mmu = {
1399     .flags = TCG_CALL_NO_WG,
1400     .typemask = dh_typemask(void, 0)
1401               | dh_typemask(env, 1)
1402               | dh_typemask(i64, 2)  /* uint64_t addr */
1403               | dh_typemask(i32, 3)  /* uint32_t data */
1404               | dh_typemask(i32, 4)  /* unsigned oi */
1405               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1406 };
1407 
1408 static TCGHelperInfo info_helper_st64_mmu = {
1409     .flags = TCG_CALL_NO_WG,
1410     .typemask = dh_typemask(void, 0)
1411               | dh_typemask(env, 1)
1412               | dh_typemask(i64, 2)  /* uint64_t addr */
1413               | dh_typemask(i64, 3)  /* uint64_t data */
1414               | dh_typemask(i32, 4)  /* unsigned oi */
1415               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1416 };
1417 
1418 static TCGHelperInfo info_helper_st128_mmu = {
1419     .flags = TCG_CALL_NO_WG,
1420     .typemask = dh_typemask(void, 0)
1421               | dh_typemask(env, 1)
1422               | dh_typemask(i64, 2)  /* uint64_t addr */
1423               | dh_typemask(i128, 3) /* Int128 data */
1424               | dh_typemask(i32, 4)  /* unsigned oi */
1425               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1426 };
1427 
1428 #ifdef CONFIG_TCG_INTERPRETER
1429 static ffi_type *typecode_to_ffi(int argmask)
1430 {
1431     /*
1432      * libffi does not support __int128_t, so we have forced Int128
1433      * to use the structure definition instead of the builtin type.
1434      */
1435     static ffi_type *ffi_type_i128_elements[3] = {
1436         &ffi_type_uint64,
1437         &ffi_type_uint64,
1438         NULL
1439     };
1440     static ffi_type ffi_type_i128 = {
1441         .size = 16,
1442         .alignment = __alignof__(Int128),
1443         .type = FFI_TYPE_STRUCT,
1444         .elements = ffi_type_i128_elements,
1445     };
1446 
1447     switch (argmask) {
1448     case dh_typecode_void:
1449         return &ffi_type_void;
1450     case dh_typecode_i32:
1451         return &ffi_type_uint32;
1452     case dh_typecode_s32:
1453         return &ffi_type_sint32;
1454     case dh_typecode_i64:
1455         return &ffi_type_uint64;
1456     case dh_typecode_s64:
1457         return &ffi_type_sint64;
1458     case dh_typecode_ptr:
1459         return &ffi_type_pointer;
1460     case dh_typecode_i128:
1461         return &ffi_type_i128;
1462     }
1463     g_assert_not_reached();
1464 }
1465 
1466 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1467 {
1468     unsigned typemask = info->typemask;
1469     struct {
1470         ffi_cif cif;
1471         ffi_type *args[];
1472     } *ca;
1473     ffi_status status;
1474     int nargs;
1475 
1476     /* Ignoring the return type, find the last non-zero field. */
1477     nargs = 32 - clz32(typemask >> 3);
1478     nargs = DIV_ROUND_UP(nargs, 3);
1479     assert(nargs <= MAX_CALL_IARGS);
1480 
1481     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1482     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1483     ca->cif.nargs = nargs;
1484 
1485     if (nargs != 0) {
1486         ca->cif.arg_types = ca->args;
1487         for (int j = 0; j < nargs; ++j) {
1488             int typecode = extract32(typemask, (j + 1) * 3, 3);
1489             ca->args[j] = typecode_to_ffi(typecode);
1490         }
1491     }
1492 
1493     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1494                           ca->cif.rtype, ca->cif.arg_types);
1495     assert(status == FFI_OK);
1496 
1497     return &ca->cif;
1498 }
1499 
1500 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1501 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1502 #else
1503 #define HELPER_INFO_INIT(I)      (&(I)->init)
1504 #define HELPER_INFO_INIT_VAL(I)  1
1505 #endif /* CONFIG_TCG_INTERPRETER */
1506 
1507 static inline bool arg_slot_reg_p(unsigned arg_slot)
1508 {
1509     /*
1510      * Split the sizeof away from the comparison to avoid Werror from
1511      * "unsigned < 0 is always false", when iarg_regs is empty.
1512      */
1513     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1514     return arg_slot < nreg;
1515 }
1516 
1517 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1518 {
1519     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1520     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1521 
1522     tcg_debug_assert(stk_slot < max);
1523     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1524 }
1525 
1526 typedef struct TCGCumulativeArgs {
1527     int arg_idx;                /* tcg_gen_callN args[] */
1528     int info_in_idx;            /* TCGHelperInfo in[] */
1529     int arg_slot;               /* regs+stack slot */
1530     int ref_slot;               /* stack slots for references */
1531 } TCGCumulativeArgs;
1532 
1533 static void layout_arg_even(TCGCumulativeArgs *cum)
1534 {
1535     cum->arg_slot += cum->arg_slot & 1;
1536 }
1537 
1538 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1539                          TCGCallArgumentKind kind)
1540 {
1541     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1542 
1543     *loc = (TCGCallArgumentLoc){
1544         .kind = kind,
1545         .arg_idx = cum->arg_idx,
1546         .arg_slot = cum->arg_slot,
1547     };
1548     cum->info_in_idx++;
1549     cum->arg_slot++;
1550 }
1551 
1552 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1553                                 TCGHelperInfo *info, int n)
1554 {
1555     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1556 
1557     for (int i = 0; i < n; ++i) {
1558         /* Layout all using the same arg_idx, adjusting the subindex. */
1559         loc[i] = (TCGCallArgumentLoc){
1560             .kind = TCG_CALL_ARG_NORMAL,
1561             .arg_idx = cum->arg_idx,
1562             .tmp_subindex = i,
1563             .arg_slot = cum->arg_slot + i,
1564         };
1565     }
1566     cum->info_in_idx += n;
1567     cum->arg_slot += n;
1568 }
1569 
1570 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1571 {
1572     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1573     int n = 128 / TCG_TARGET_REG_BITS;
1574 
1575     /* The first subindex carries the pointer. */
1576     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1577 
1578     /*
1579      * The callee is allowed to clobber memory associated with
1580      * structure pass by-reference.  Therefore we must make copies.
1581      * Allocate space from "ref_slot", which will be adjusted to
1582      * follow the parameters on the stack.
1583      */
1584     loc[0].ref_slot = cum->ref_slot;
1585 
1586     /*
1587      * Subsequent words also go into the reference slot, but
1588      * do not accumulate into the regular arguments.
1589      */
1590     for (int i = 1; i < n; ++i) {
1591         loc[i] = (TCGCallArgumentLoc){
1592             .kind = TCG_CALL_ARG_BY_REF_N,
1593             .arg_idx = cum->arg_idx,
1594             .tmp_subindex = i,
1595             .ref_slot = cum->ref_slot + i,
1596         };
1597     }
1598     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1599     cum->ref_slot += n;
1600 }
1601 
1602 static void init_call_layout(TCGHelperInfo *info)
1603 {
1604     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1605     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1606     unsigned typemask = info->typemask;
1607     unsigned typecode;
1608     TCGCumulativeArgs cum = { };
1609 
1610     /*
1611      * Parse and place any function return value.
1612      */
1613     typecode = typemask & 7;
1614     switch (typecode) {
1615     case dh_typecode_void:
1616         info->nr_out = 0;
1617         break;
1618     case dh_typecode_i32:
1619     case dh_typecode_s32:
1620     case dh_typecode_ptr:
1621         info->nr_out = 1;
1622         info->out_kind = TCG_CALL_RET_NORMAL;
1623         break;
1624     case dh_typecode_i64:
1625     case dh_typecode_s64:
1626         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1627         info->out_kind = TCG_CALL_RET_NORMAL;
1628         /* Query the last register now to trigger any assert early. */
1629         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1630         break;
1631     case dh_typecode_i128:
1632         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1633         info->out_kind = TCG_TARGET_CALL_RET_I128;
1634         switch (TCG_TARGET_CALL_RET_I128) {
1635         case TCG_CALL_RET_NORMAL:
1636             /* Query the last register now to trigger any assert early. */
1637             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1638             break;
1639         case TCG_CALL_RET_BY_VEC:
1640             /* Query the single register now to trigger any assert early. */
1641             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1642             break;
1643         case TCG_CALL_RET_BY_REF:
1644             /*
1645              * Allocate the first argument to the output.
1646              * We don't need to store this anywhere, just make it
1647              * unavailable for use in the input loop below.
1648              */
1649             cum.arg_slot = 1;
1650             break;
1651         default:
1652             qemu_build_not_reached();
1653         }
1654         break;
1655     default:
1656         g_assert_not_reached();
1657     }
1658 
1659     /*
1660      * Parse and place function arguments.
1661      */
1662     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1663         TCGCallArgumentKind kind;
1664         TCGType type;
1665 
1666         typecode = typemask & 7;
1667         switch (typecode) {
1668         case dh_typecode_i32:
1669         case dh_typecode_s32:
1670             type = TCG_TYPE_I32;
1671             break;
1672         case dh_typecode_i64:
1673         case dh_typecode_s64:
1674             type = TCG_TYPE_I64;
1675             break;
1676         case dh_typecode_ptr:
1677             type = TCG_TYPE_PTR;
1678             break;
1679         case dh_typecode_i128:
1680             type = TCG_TYPE_I128;
1681             break;
1682         default:
1683             g_assert_not_reached();
1684         }
1685 
1686         switch (type) {
1687         case TCG_TYPE_I32:
1688             switch (TCG_TARGET_CALL_ARG_I32) {
1689             case TCG_CALL_ARG_EVEN:
1690                 layout_arg_even(&cum);
1691                 /* fall through */
1692             case TCG_CALL_ARG_NORMAL:
1693                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1694                 break;
1695             case TCG_CALL_ARG_EXTEND:
1696                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1697                 layout_arg_1(&cum, info, kind);
1698                 break;
1699             default:
1700                 qemu_build_not_reached();
1701             }
1702             break;
1703 
1704         case TCG_TYPE_I64:
1705             switch (TCG_TARGET_CALL_ARG_I64) {
1706             case TCG_CALL_ARG_EVEN:
1707                 layout_arg_even(&cum);
1708                 /* fall through */
1709             case TCG_CALL_ARG_NORMAL:
1710                 if (TCG_TARGET_REG_BITS == 32) {
1711                     layout_arg_normal_n(&cum, info, 2);
1712                 } else {
1713                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1714                 }
1715                 break;
1716             default:
1717                 qemu_build_not_reached();
1718             }
1719             break;
1720 
1721         case TCG_TYPE_I128:
1722             switch (TCG_TARGET_CALL_ARG_I128) {
1723             case TCG_CALL_ARG_EVEN:
1724                 layout_arg_even(&cum);
1725                 /* fall through */
1726             case TCG_CALL_ARG_NORMAL:
1727                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1728                 break;
1729             case TCG_CALL_ARG_BY_REF:
1730                 layout_arg_by_ref(&cum, info);
1731                 break;
1732             default:
1733                 qemu_build_not_reached();
1734             }
1735             break;
1736 
1737         default:
1738             g_assert_not_reached();
1739         }
1740     }
1741     info->nr_in = cum.info_in_idx;
1742 
1743     /* Validate that we didn't overrun the input array. */
1744     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1745     /* Validate the backend has enough argument space. */
1746     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1747 
1748     /*
1749      * Relocate the "ref_slot" area to the end of the parameters.
1750      * Minimizing this stack offset helps code size for x86,
1751      * which has a signed 8-bit offset encoding.
1752      */
1753     if (cum.ref_slot != 0) {
1754         int ref_base = 0;
1755 
1756         if (cum.arg_slot > max_reg_slots) {
1757             int align = __alignof(Int128) / sizeof(tcg_target_long);
1758 
1759             ref_base = cum.arg_slot - max_reg_slots;
1760             if (align > 1) {
1761                 ref_base = ROUND_UP(ref_base, align);
1762             }
1763         }
1764         assert(ref_base + cum.ref_slot <= max_stk_slots);
1765         ref_base += max_reg_slots;
1766 
1767         if (ref_base != 0) {
1768             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1769                 TCGCallArgumentLoc *loc = &info->in[i];
1770                 switch (loc->kind) {
1771                 case TCG_CALL_ARG_BY_REF:
1772                 case TCG_CALL_ARG_BY_REF_N:
1773                     loc->ref_slot += ref_base;
1774                     break;
1775                 default:
1776                     break;
1777                 }
1778             }
1779         }
1780     }
1781 }
1782 
1783 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1784 static void process_constraint_sets(void);
1785 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1786                                             TCGReg reg, const char *name);
1787 
1788 static void tcg_context_init(unsigned max_threads)
1789 {
1790     TCGContext *s = &tcg_init_ctx;
1791     int n, i;
1792     TCGTemp *ts;
1793 
1794     memset(s, 0, sizeof(*s));
1795     s->nb_globals = 0;
1796 
1797     init_call_layout(&info_helper_ld32_mmu);
1798     init_call_layout(&info_helper_ld64_mmu);
1799     init_call_layout(&info_helper_ld128_mmu);
1800     init_call_layout(&info_helper_st32_mmu);
1801     init_call_layout(&info_helper_st64_mmu);
1802     init_call_layout(&info_helper_st128_mmu);
1803 
1804     tcg_target_init(s);
1805     process_constraint_sets();
1806 
1807     /* Reverse the order of the saved registers, assuming they're all at
1808        the start of tcg_target_reg_alloc_order.  */
1809     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1810         int r = tcg_target_reg_alloc_order[n];
1811         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1812             break;
1813         }
1814     }
1815     for (i = 0; i < n; ++i) {
1816         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1817     }
1818     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1819         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1820     }
1821 
1822     tcg_ctx = s;
1823     /*
1824      * In user-mode we simply share the init context among threads, since we
1825      * use a single region. See the documentation tcg_region_init() for the
1826      * reasoning behind this.
1827      * In system-mode we will have at most max_threads TCG threads.
1828      */
1829 #ifdef CONFIG_USER_ONLY
1830     tcg_ctxs = &tcg_ctx;
1831     tcg_cur_ctxs = 1;
1832     tcg_max_ctxs = 1;
1833 #else
1834     tcg_max_ctxs = max_threads;
1835     tcg_ctxs = g_new0(TCGContext *, max_threads);
1836 #endif
1837 
1838     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1839     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1840     tcg_env = temp_tcgv_ptr(ts);
1841 }
1842 
1843 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1844 {
1845     tcg_context_init(max_threads);
1846     tcg_region_init(tb_size, splitwx, max_threads);
1847 }
1848 
1849 /*
1850  * Allocate TBs right before their corresponding translated code, making
1851  * sure that TBs and code are on different cache lines.
1852  */
1853 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1854 {
1855     uintptr_t align = qemu_icache_linesize;
1856     TranslationBlock *tb;
1857     void *next;
1858 
1859  retry:
1860     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1861     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1862 
1863     if (unlikely(next > s->code_gen_highwater)) {
1864         if (tcg_region_alloc(s)) {
1865             return NULL;
1866         }
1867         goto retry;
1868     }
1869     qatomic_set(&s->code_gen_ptr, next);
1870     return tb;
1871 }
1872 
1873 void tcg_prologue_init(void)
1874 {
1875     TCGContext *s = tcg_ctx;
1876     size_t prologue_size;
1877 
1878     s->code_ptr = s->code_gen_ptr;
1879     s->code_buf = s->code_gen_ptr;
1880     s->data_gen_ptr = NULL;
1881 
1882 #ifndef CONFIG_TCG_INTERPRETER
1883     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1884 #endif
1885 
1886     s->pool_labels = NULL;
1887 
1888     qemu_thread_jit_write();
1889     /* Generate the prologue.  */
1890     tcg_target_qemu_prologue(s);
1891 
1892     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1893     {
1894         int result = tcg_out_pool_finalize(s);
1895         tcg_debug_assert(result == 0);
1896     }
1897 
1898     prologue_size = tcg_current_code_size(s);
1899     perf_report_prologue(s->code_gen_ptr, prologue_size);
1900 
1901 #ifndef CONFIG_TCG_INTERPRETER
1902     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1903                         (uintptr_t)s->code_buf, prologue_size);
1904 #endif
1905 
1906     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1907         FILE *logfile = qemu_log_trylock();
1908         if (logfile) {
1909             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1910             if (s->data_gen_ptr) {
1911                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1912                 size_t data_size = prologue_size - code_size;
1913                 size_t i;
1914 
1915                 disas(logfile, s->code_gen_ptr, code_size);
1916 
1917                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1918                     if (sizeof(tcg_target_ulong) == 8) {
1919                         fprintf(logfile,
1920                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1921                                 (uintptr_t)s->data_gen_ptr + i,
1922                                 *(uint64_t *)(s->data_gen_ptr + i));
1923                     } else {
1924                         fprintf(logfile,
1925                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1926                                 (uintptr_t)s->data_gen_ptr + i,
1927                                 *(uint32_t *)(s->data_gen_ptr + i));
1928                     }
1929                 }
1930             } else {
1931                 disas(logfile, s->code_gen_ptr, prologue_size);
1932             }
1933             fprintf(logfile, "\n");
1934             qemu_log_unlock(logfile);
1935         }
1936     }
1937 
1938 #ifndef CONFIG_TCG_INTERPRETER
1939     /*
1940      * Assert that goto_ptr is implemented completely, setting an epilogue.
1941      * For tci, we use NULL as the signal to return from the interpreter,
1942      * so skip this check.
1943      */
1944     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1945 #endif
1946 
1947     tcg_region_prologue_set(s);
1948 }
1949 
1950 void tcg_func_start(TCGContext *s)
1951 {
1952     tcg_pool_reset(s);
1953     s->nb_temps = s->nb_globals;
1954 
1955     /* No temps have been previously allocated for size or locality.  */
1956     tcg_temp_ebb_reset_freed(s);
1957 
1958     /* No constant temps have been previously allocated. */
1959     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1960         if (s->const_table[i]) {
1961             g_hash_table_remove_all(s->const_table[i]);
1962         }
1963     }
1964 
1965     s->nb_ops = 0;
1966     s->nb_labels = 0;
1967     s->current_frame_offset = s->frame_start;
1968 
1969 #ifdef CONFIG_DEBUG_TCG
1970     s->goto_tb_issue_mask = 0;
1971 #endif
1972 
1973     QTAILQ_INIT(&s->ops);
1974     QTAILQ_INIT(&s->free_ops);
1975     s->emit_before_op = NULL;
1976     QSIMPLEQ_INIT(&s->labels);
1977 
1978     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1979     tcg_debug_assert(s->insn_start_words > 0);
1980 }
1981 
1982 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1983 {
1984     int n = s->nb_temps++;
1985 
1986     if (n >= TCG_MAX_TEMPS) {
1987         tcg_raise_tb_overflow(s);
1988     }
1989     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1990 }
1991 
1992 static TCGTemp *tcg_global_alloc(TCGContext *s)
1993 {
1994     TCGTemp *ts;
1995 
1996     tcg_debug_assert(s->nb_globals == s->nb_temps);
1997     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1998     s->nb_globals++;
1999     ts = tcg_temp_alloc(s);
2000     ts->kind = TEMP_GLOBAL;
2001 
2002     return ts;
2003 }
2004 
2005 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
2006                                             TCGReg reg, const char *name)
2007 {
2008     TCGTemp *ts;
2009 
2010     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
2011 
2012     ts = tcg_global_alloc(s);
2013     ts->base_type = type;
2014     ts->type = type;
2015     ts->kind = TEMP_FIXED;
2016     ts->reg = reg;
2017     ts->name = name;
2018     tcg_regset_set_reg(s->reserved_regs, reg);
2019 
2020     return ts;
2021 }
2022 
2023 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
2024 {
2025     s->frame_start = start;
2026     s->frame_end = start + size;
2027     s->frame_temp
2028         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
2029 }
2030 
2031 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
2032                                             const char *name, TCGType type)
2033 {
2034     TCGContext *s = tcg_ctx;
2035     TCGTemp *base_ts = tcgv_ptr_temp(base);
2036     TCGTemp *ts = tcg_global_alloc(s);
2037     int indirect_reg = 0;
2038 
2039     switch (base_ts->kind) {
2040     case TEMP_FIXED:
2041         break;
2042     case TEMP_GLOBAL:
2043         /* We do not support double-indirect registers.  */
2044         tcg_debug_assert(!base_ts->indirect_reg);
2045         base_ts->indirect_base = 1;
2046         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
2047                             ? 2 : 1);
2048         indirect_reg = 1;
2049         break;
2050     default:
2051         g_assert_not_reached();
2052     }
2053 
2054     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2055         TCGTemp *ts2 = tcg_global_alloc(s);
2056         char buf[64];
2057 
2058         ts->base_type = TCG_TYPE_I64;
2059         ts->type = TCG_TYPE_I32;
2060         ts->indirect_reg = indirect_reg;
2061         ts->mem_allocated = 1;
2062         ts->mem_base = base_ts;
2063         ts->mem_offset = offset;
2064         pstrcpy(buf, sizeof(buf), name);
2065         pstrcat(buf, sizeof(buf), "_0");
2066         ts->name = strdup(buf);
2067 
2068         tcg_debug_assert(ts2 == ts + 1);
2069         ts2->base_type = TCG_TYPE_I64;
2070         ts2->type = TCG_TYPE_I32;
2071         ts2->indirect_reg = indirect_reg;
2072         ts2->mem_allocated = 1;
2073         ts2->mem_base = base_ts;
2074         ts2->mem_offset = offset + 4;
2075         ts2->temp_subindex = 1;
2076         pstrcpy(buf, sizeof(buf), name);
2077         pstrcat(buf, sizeof(buf), "_1");
2078         ts2->name = strdup(buf);
2079     } else {
2080         ts->base_type = type;
2081         ts->type = type;
2082         ts->indirect_reg = indirect_reg;
2083         ts->mem_allocated = 1;
2084         ts->mem_base = base_ts;
2085         ts->mem_offset = offset;
2086         ts->name = name;
2087     }
2088     return ts;
2089 }
2090 
2091 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
2092 {
2093     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
2094     return temp_tcgv_i32(ts);
2095 }
2096 
2097 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
2098 {
2099     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
2100     return temp_tcgv_i64(ts);
2101 }
2102 
2103 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
2104 {
2105     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
2106     return temp_tcgv_ptr(ts);
2107 }
2108 
2109 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
2110 {
2111     TCGContext *s = tcg_ctx;
2112     TCGTemp *ts;
2113     int n;
2114 
2115     if (kind == TEMP_EBB) {
2116         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
2117 
2118         if (idx < TCG_MAX_TEMPS) {
2119             /* There is already an available temp with the right type.  */
2120             clear_bit(idx, s->free_temps[type].l);
2121 
2122             ts = &s->temps[idx];
2123             ts->temp_allocated = 1;
2124             tcg_debug_assert(ts->base_type == type);
2125             tcg_debug_assert(ts->kind == kind);
2126             return ts;
2127         }
2128     } else {
2129         tcg_debug_assert(kind == TEMP_TB);
2130     }
2131 
2132     switch (type) {
2133     case TCG_TYPE_I32:
2134     case TCG_TYPE_V64:
2135     case TCG_TYPE_V128:
2136     case TCG_TYPE_V256:
2137         n = 1;
2138         break;
2139     case TCG_TYPE_I64:
2140         n = 64 / TCG_TARGET_REG_BITS;
2141         break;
2142     case TCG_TYPE_I128:
2143         n = 128 / TCG_TARGET_REG_BITS;
2144         break;
2145     default:
2146         g_assert_not_reached();
2147     }
2148 
2149     ts = tcg_temp_alloc(s);
2150     ts->base_type = type;
2151     ts->temp_allocated = 1;
2152     ts->kind = kind;
2153 
2154     if (n == 1) {
2155         ts->type = type;
2156     } else {
2157         ts->type = TCG_TYPE_REG;
2158 
2159         for (int i = 1; i < n; ++i) {
2160             TCGTemp *ts2 = tcg_temp_alloc(s);
2161 
2162             tcg_debug_assert(ts2 == ts + i);
2163             ts2->base_type = type;
2164             ts2->type = TCG_TYPE_REG;
2165             ts2->temp_allocated = 1;
2166             ts2->temp_subindex = i;
2167             ts2->kind = kind;
2168         }
2169     }
2170     return ts;
2171 }
2172 
2173 TCGv_i32 tcg_temp_new_i32(void)
2174 {
2175     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
2176 }
2177 
2178 TCGv_i32 tcg_temp_ebb_new_i32(void)
2179 {
2180     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
2181 }
2182 
2183 TCGv_i64 tcg_temp_new_i64(void)
2184 {
2185     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
2186 }
2187 
2188 TCGv_i64 tcg_temp_ebb_new_i64(void)
2189 {
2190     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
2191 }
2192 
2193 TCGv_ptr tcg_temp_new_ptr(void)
2194 {
2195     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
2196 }
2197 
2198 TCGv_ptr tcg_temp_ebb_new_ptr(void)
2199 {
2200     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
2201 }
2202 
2203 TCGv_i128 tcg_temp_new_i128(void)
2204 {
2205     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
2206 }
2207 
2208 TCGv_i128 tcg_temp_ebb_new_i128(void)
2209 {
2210     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
2211 }
2212 
2213 TCGv_vec tcg_temp_new_vec(TCGType type)
2214 {
2215     TCGTemp *t;
2216 
2217 #ifdef CONFIG_DEBUG_TCG
2218     switch (type) {
2219     case TCG_TYPE_V64:
2220         assert(TCG_TARGET_HAS_v64);
2221         break;
2222     case TCG_TYPE_V128:
2223         assert(TCG_TARGET_HAS_v128);
2224         break;
2225     case TCG_TYPE_V256:
2226         assert(TCG_TARGET_HAS_v256);
2227         break;
2228     default:
2229         g_assert_not_reached();
2230     }
2231 #endif
2232 
2233     t = tcg_temp_new_internal(type, TEMP_EBB);
2234     return temp_tcgv_vec(t);
2235 }
2236 
2237 /* Create a new temp of the same type as an existing temp.  */
2238 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2239 {
2240     TCGTemp *t = tcgv_vec_temp(match);
2241 
2242     tcg_debug_assert(t->temp_allocated != 0);
2243 
2244     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2245     return temp_tcgv_vec(t);
2246 }
2247 
2248 void tcg_temp_free_internal(TCGTemp *ts)
2249 {
2250     TCGContext *s = tcg_ctx;
2251 
2252     switch (ts->kind) {
2253     case TEMP_CONST:
2254     case TEMP_TB:
2255         /* Silently ignore free. */
2256         break;
2257     case TEMP_EBB:
2258         tcg_debug_assert(ts->temp_allocated != 0);
2259         ts->temp_allocated = 0;
2260         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2261         break;
2262     default:
2263         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2264         g_assert_not_reached();
2265     }
2266 }
2267 
2268 void tcg_temp_free_i32(TCGv_i32 arg)
2269 {
2270     tcg_temp_free_internal(tcgv_i32_temp(arg));
2271 }
2272 
2273 void tcg_temp_free_i64(TCGv_i64 arg)
2274 {
2275     tcg_temp_free_internal(tcgv_i64_temp(arg));
2276 }
2277 
2278 void tcg_temp_free_i128(TCGv_i128 arg)
2279 {
2280     tcg_temp_free_internal(tcgv_i128_temp(arg));
2281 }
2282 
2283 void tcg_temp_free_ptr(TCGv_ptr arg)
2284 {
2285     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2286 }
2287 
2288 void tcg_temp_free_vec(TCGv_vec arg)
2289 {
2290     tcg_temp_free_internal(tcgv_vec_temp(arg));
2291 }
2292 
2293 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2294 {
2295     TCGContext *s = tcg_ctx;
2296     GHashTable *h = s->const_table[type];
2297     TCGTemp *ts;
2298 
2299     if (h == NULL) {
2300         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2301         s->const_table[type] = h;
2302     }
2303 
2304     ts = g_hash_table_lookup(h, &val);
2305     if (ts == NULL) {
2306         int64_t *val_ptr;
2307 
2308         ts = tcg_temp_alloc(s);
2309 
2310         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2311             TCGTemp *ts2 = tcg_temp_alloc(s);
2312 
2313             tcg_debug_assert(ts2 == ts + 1);
2314 
2315             ts->base_type = TCG_TYPE_I64;
2316             ts->type = TCG_TYPE_I32;
2317             ts->kind = TEMP_CONST;
2318             ts->temp_allocated = 1;
2319 
2320             ts2->base_type = TCG_TYPE_I64;
2321             ts2->type = TCG_TYPE_I32;
2322             ts2->kind = TEMP_CONST;
2323             ts2->temp_allocated = 1;
2324             ts2->temp_subindex = 1;
2325 
2326             /*
2327              * Retain the full value of the 64-bit constant in the low
2328              * part, so that the hash table works.  Actual uses will
2329              * truncate the value to the low part.
2330              */
2331             ts[HOST_BIG_ENDIAN].val = val;
2332             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2333             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2334         } else {
2335             ts->base_type = type;
2336             ts->type = type;
2337             ts->kind = TEMP_CONST;
2338             ts->temp_allocated = 1;
2339             ts->val = val;
2340             val_ptr = &ts->val;
2341         }
2342         g_hash_table_insert(h, val_ptr, ts);
2343     }
2344 
2345     return ts;
2346 }
2347 
2348 TCGv_i32 tcg_constant_i32(int32_t val)
2349 {
2350     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2351 }
2352 
2353 TCGv_i64 tcg_constant_i64(int64_t val)
2354 {
2355     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2356 }
2357 
2358 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2359 {
2360     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2361 }
2362 
2363 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2364 {
2365     val = dup_const(vece, val);
2366     return temp_tcgv_vec(tcg_constant_internal(type, val));
2367 }
2368 
2369 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2370 {
2371     TCGTemp *t = tcgv_vec_temp(match);
2372 
2373     tcg_debug_assert(t->temp_allocated != 0);
2374     return tcg_constant_vec(t->base_type, vece, val);
2375 }
2376 
2377 #ifdef CONFIG_DEBUG_TCG
2378 size_t temp_idx(TCGTemp *ts)
2379 {
2380     ptrdiff_t n = ts - tcg_ctx->temps;
2381     assert(n >= 0 && n < tcg_ctx->nb_temps);
2382     return n;
2383 }
2384 
2385 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2386 {
2387     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2388 
2389     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2390     assert(o % sizeof(TCGTemp) == 0);
2391 
2392     return (void *)tcg_ctx + (uintptr_t)v;
2393 }
2394 #endif /* CONFIG_DEBUG_TCG */
2395 
2396 /*
2397  * Return true if OP may appear in the opcode stream with TYPE.
2398  * Test the runtime variable that controls each opcode.
2399  */
2400 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2401 {
2402     bool has_type;
2403 
2404     switch (type) {
2405     case TCG_TYPE_I32:
2406         has_type = true;
2407         break;
2408     case TCG_TYPE_I64:
2409         has_type = TCG_TARGET_REG_BITS == 64;
2410         break;
2411     case TCG_TYPE_V64:
2412         has_type = TCG_TARGET_HAS_v64;
2413         break;
2414     case TCG_TYPE_V128:
2415         has_type = TCG_TARGET_HAS_v128;
2416         break;
2417     case TCG_TYPE_V256:
2418         has_type = TCG_TARGET_HAS_v256;
2419         break;
2420     default:
2421         has_type = false;
2422         break;
2423     }
2424 
2425     switch (op) {
2426     case INDEX_op_discard:
2427     case INDEX_op_set_label:
2428     case INDEX_op_call:
2429     case INDEX_op_br:
2430     case INDEX_op_mb:
2431     case INDEX_op_insn_start:
2432     case INDEX_op_exit_tb:
2433     case INDEX_op_goto_tb:
2434     case INDEX_op_goto_ptr:
2435     case INDEX_op_qemu_ld_i32:
2436     case INDEX_op_qemu_st_i32:
2437     case INDEX_op_qemu_ld_i64:
2438     case INDEX_op_qemu_st_i64:
2439         return true;
2440 
2441     case INDEX_op_qemu_ld_i128:
2442     case INDEX_op_qemu_st_i128:
2443         return TCG_TARGET_HAS_qemu_ldst_i128;
2444 
2445     case INDEX_op_add:
2446     case INDEX_op_and:
2447     case INDEX_op_brcond:
2448     case INDEX_op_deposit:
2449     case INDEX_op_extract:
2450     case INDEX_op_ld8u:
2451     case INDEX_op_ld8s:
2452     case INDEX_op_ld16u:
2453     case INDEX_op_ld16s:
2454     case INDEX_op_ld:
2455     case INDEX_op_mov:
2456     case INDEX_op_movcond:
2457     case INDEX_op_negsetcond:
2458     case INDEX_op_or:
2459     case INDEX_op_setcond:
2460     case INDEX_op_sextract:
2461     case INDEX_op_st8:
2462     case INDEX_op_st16:
2463     case INDEX_op_st:
2464     case INDEX_op_xor:
2465         return has_type;
2466 
2467     case INDEX_op_brcond2_i32:
2468     case INDEX_op_setcond2_i32:
2469         return TCG_TARGET_REG_BITS == 32;
2470 
2471     case INDEX_op_ld32u:
2472     case INDEX_op_ld32s:
2473     case INDEX_op_st32:
2474     case INDEX_op_ext_i32_i64:
2475     case INDEX_op_extu_i32_i64:
2476     case INDEX_op_extrl_i64_i32:
2477     case INDEX_op_extrh_i64_i32:
2478         return TCG_TARGET_REG_BITS == 64;
2479 
2480     case INDEX_op_mov_vec:
2481     case INDEX_op_dup_vec:
2482     case INDEX_op_dupm_vec:
2483     case INDEX_op_ld_vec:
2484     case INDEX_op_st_vec:
2485     case INDEX_op_add_vec:
2486     case INDEX_op_sub_vec:
2487     case INDEX_op_and_vec:
2488     case INDEX_op_or_vec:
2489     case INDEX_op_xor_vec:
2490     case INDEX_op_cmp_vec:
2491         return has_type;
2492     case INDEX_op_dup2_vec:
2493         return has_type && TCG_TARGET_REG_BITS == 32;
2494     case INDEX_op_not_vec:
2495         return has_type && TCG_TARGET_HAS_not_vec;
2496     case INDEX_op_neg_vec:
2497         return has_type && TCG_TARGET_HAS_neg_vec;
2498     case INDEX_op_abs_vec:
2499         return has_type && TCG_TARGET_HAS_abs_vec;
2500     case INDEX_op_andc_vec:
2501         return has_type && TCG_TARGET_HAS_andc_vec;
2502     case INDEX_op_orc_vec:
2503         return has_type && TCG_TARGET_HAS_orc_vec;
2504     case INDEX_op_nand_vec:
2505         return has_type && TCG_TARGET_HAS_nand_vec;
2506     case INDEX_op_nor_vec:
2507         return has_type && TCG_TARGET_HAS_nor_vec;
2508     case INDEX_op_eqv_vec:
2509         return has_type && TCG_TARGET_HAS_eqv_vec;
2510     case INDEX_op_mul_vec:
2511         return has_type && TCG_TARGET_HAS_mul_vec;
2512     case INDEX_op_shli_vec:
2513     case INDEX_op_shri_vec:
2514     case INDEX_op_sari_vec:
2515         return has_type && TCG_TARGET_HAS_shi_vec;
2516     case INDEX_op_shls_vec:
2517     case INDEX_op_shrs_vec:
2518     case INDEX_op_sars_vec:
2519         return has_type && TCG_TARGET_HAS_shs_vec;
2520     case INDEX_op_shlv_vec:
2521     case INDEX_op_shrv_vec:
2522     case INDEX_op_sarv_vec:
2523         return has_type && TCG_TARGET_HAS_shv_vec;
2524     case INDEX_op_rotli_vec:
2525         return has_type && TCG_TARGET_HAS_roti_vec;
2526     case INDEX_op_rotls_vec:
2527         return has_type && TCG_TARGET_HAS_rots_vec;
2528     case INDEX_op_rotlv_vec:
2529     case INDEX_op_rotrv_vec:
2530         return has_type && TCG_TARGET_HAS_rotv_vec;
2531     case INDEX_op_ssadd_vec:
2532     case INDEX_op_usadd_vec:
2533     case INDEX_op_sssub_vec:
2534     case INDEX_op_ussub_vec:
2535         return has_type && TCG_TARGET_HAS_sat_vec;
2536     case INDEX_op_smin_vec:
2537     case INDEX_op_umin_vec:
2538     case INDEX_op_smax_vec:
2539     case INDEX_op_umax_vec:
2540         return has_type && TCG_TARGET_HAS_minmax_vec;
2541     case INDEX_op_bitsel_vec:
2542         return has_type && TCG_TARGET_HAS_bitsel_vec;
2543     case INDEX_op_cmpsel_vec:
2544         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2545 
2546     default:
2547         if (op < INDEX_op_last_generic) {
2548             const TCGOutOp *outop;
2549             TCGConstraintSetIndex con_set;
2550 
2551             if (!has_type) {
2552                 return false;
2553             }
2554 
2555             outop = all_outop[op];
2556             tcg_debug_assert(outop != NULL);
2557 
2558             con_set = outop->static_constraint;
2559             if (con_set == C_Dynamic) {
2560                 con_set = outop->dynamic_constraint(type, flags);
2561             }
2562             if (con_set >= 0) {
2563                 return true;
2564             }
2565             tcg_debug_assert(con_set == C_NotImplemented);
2566             return false;
2567         }
2568         tcg_debug_assert(op < NB_OPS);
2569         return true;
2570 
2571     case INDEX_op_last_generic:
2572         g_assert_not_reached();
2573     }
2574 }
2575 
2576 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2577 {
2578     unsigned width;
2579 
2580     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2581     width = (type == TCG_TYPE_I32 ? 32 : 64);
2582 
2583     tcg_debug_assert(ofs < width);
2584     tcg_debug_assert(len > 0);
2585     tcg_debug_assert(len <= width - ofs);
2586 
2587     return TCG_TARGET_deposit_valid(type, ofs, len);
2588 }
2589 
2590 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2591 
2592 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2593                           TCGTemp *ret, TCGTemp **args)
2594 {
2595     TCGv_i64 extend_free[MAX_CALL_IARGS];
2596     int n_extend = 0;
2597     TCGOp *op;
2598     int i, n, pi = 0, total_args;
2599 
2600     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2601         init_call_layout(info);
2602         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2603     }
2604 
2605     total_args = info->nr_out + info->nr_in + 2;
2606     op = tcg_op_alloc(INDEX_op_call, total_args);
2607 
2608 #ifdef CONFIG_PLUGIN
2609     /* Flag helpers that may affect guest state */
2610     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2611         tcg_ctx->plugin_insn->calls_helpers = true;
2612     }
2613 #endif
2614 
2615     TCGOP_CALLO(op) = n = info->nr_out;
2616     switch (n) {
2617     case 0:
2618         tcg_debug_assert(ret == NULL);
2619         break;
2620     case 1:
2621         tcg_debug_assert(ret != NULL);
2622         op->args[pi++] = temp_arg(ret);
2623         break;
2624     case 2:
2625     case 4:
2626         tcg_debug_assert(ret != NULL);
2627         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2628         tcg_debug_assert(ret->temp_subindex == 0);
2629         for (i = 0; i < n; ++i) {
2630             op->args[pi++] = temp_arg(ret + i);
2631         }
2632         break;
2633     default:
2634         g_assert_not_reached();
2635     }
2636 
2637     TCGOP_CALLI(op) = n = info->nr_in;
2638     for (i = 0; i < n; i++) {
2639         const TCGCallArgumentLoc *loc = &info->in[i];
2640         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2641 
2642         switch (loc->kind) {
2643         case TCG_CALL_ARG_NORMAL:
2644         case TCG_CALL_ARG_BY_REF:
2645         case TCG_CALL_ARG_BY_REF_N:
2646             op->args[pi++] = temp_arg(ts);
2647             break;
2648 
2649         case TCG_CALL_ARG_EXTEND_U:
2650         case TCG_CALL_ARG_EXTEND_S:
2651             {
2652                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2653                 TCGv_i32 orig = temp_tcgv_i32(ts);
2654 
2655                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2656                     tcg_gen_ext_i32_i64(temp, orig);
2657                 } else {
2658                     tcg_gen_extu_i32_i64(temp, orig);
2659                 }
2660                 op->args[pi++] = tcgv_i64_arg(temp);
2661                 extend_free[n_extend++] = temp;
2662             }
2663             break;
2664 
2665         default:
2666             g_assert_not_reached();
2667         }
2668     }
2669     op->args[pi++] = (uintptr_t)func;
2670     op->args[pi++] = (uintptr_t)info;
2671     tcg_debug_assert(pi == total_args);
2672 
2673     if (tcg_ctx->emit_before_op) {
2674         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2675     } else {
2676         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2677     }
2678 
2679     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2680     for (i = 0; i < n_extend; ++i) {
2681         tcg_temp_free_i64(extend_free[i]);
2682     }
2683 }
2684 
2685 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2686 {
2687     tcg_gen_callN(func, info, ret, NULL);
2688 }
2689 
2690 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2691 {
2692     tcg_gen_callN(func, info, ret, &t1);
2693 }
2694 
2695 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2696                    TCGTemp *t1, TCGTemp *t2)
2697 {
2698     TCGTemp *args[2] = { t1, t2 };
2699     tcg_gen_callN(func, info, ret, args);
2700 }
2701 
2702 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2703                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2704 {
2705     TCGTemp *args[3] = { t1, t2, t3 };
2706     tcg_gen_callN(func, info, ret, args);
2707 }
2708 
2709 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2710                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2711 {
2712     TCGTemp *args[4] = { t1, t2, t3, t4 };
2713     tcg_gen_callN(func, info, ret, args);
2714 }
2715 
2716 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2717                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2718 {
2719     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2720     tcg_gen_callN(func, info, ret, args);
2721 }
2722 
2723 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2724                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2725                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2726 {
2727     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2728     tcg_gen_callN(func, info, ret, args);
2729 }
2730 
2731 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2732                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2733                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2734 {
2735     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2736     tcg_gen_callN(func, info, ret, args);
2737 }
2738 
2739 static void tcg_reg_alloc_start(TCGContext *s)
2740 {
2741     int i, n;
2742 
2743     for (i = 0, n = s->nb_temps; i < n; i++) {
2744         TCGTemp *ts = &s->temps[i];
2745         TCGTempVal val = TEMP_VAL_MEM;
2746 
2747         switch (ts->kind) {
2748         case TEMP_CONST:
2749             val = TEMP_VAL_CONST;
2750             break;
2751         case TEMP_FIXED:
2752             val = TEMP_VAL_REG;
2753             break;
2754         case TEMP_GLOBAL:
2755             break;
2756         case TEMP_EBB:
2757             val = TEMP_VAL_DEAD;
2758             /* fall through */
2759         case TEMP_TB:
2760             ts->mem_allocated = 0;
2761             break;
2762         default:
2763             g_assert_not_reached();
2764         }
2765         ts->val_type = val;
2766     }
2767 
2768     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2769 }
2770 
2771 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2772                                  TCGTemp *ts)
2773 {
2774     int idx = temp_idx(ts);
2775 
2776     switch (ts->kind) {
2777     case TEMP_FIXED:
2778     case TEMP_GLOBAL:
2779         pstrcpy(buf, buf_size, ts->name);
2780         break;
2781     case TEMP_TB:
2782         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2783         break;
2784     case TEMP_EBB:
2785         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2786         break;
2787     case TEMP_CONST:
2788         switch (ts->type) {
2789         case TCG_TYPE_I32:
2790             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2791             break;
2792 #if TCG_TARGET_REG_BITS > 32
2793         case TCG_TYPE_I64:
2794             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2795             break;
2796 #endif
2797         case TCG_TYPE_V64:
2798         case TCG_TYPE_V128:
2799         case TCG_TYPE_V256:
2800             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2801                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2802             break;
2803         default:
2804             g_assert_not_reached();
2805         }
2806         break;
2807     }
2808     return buf;
2809 }
2810 
2811 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2812                              int buf_size, TCGArg arg)
2813 {
2814     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2815 }
2816 
2817 static const char * const cond_name[] =
2818 {
2819     [TCG_COND_NEVER] = "never",
2820     [TCG_COND_ALWAYS] = "always",
2821     [TCG_COND_EQ] = "eq",
2822     [TCG_COND_NE] = "ne",
2823     [TCG_COND_LT] = "lt",
2824     [TCG_COND_GE] = "ge",
2825     [TCG_COND_LE] = "le",
2826     [TCG_COND_GT] = "gt",
2827     [TCG_COND_LTU] = "ltu",
2828     [TCG_COND_GEU] = "geu",
2829     [TCG_COND_LEU] = "leu",
2830     [TCG_COND_GTU] = "gtu",
2831     [TCG_COND_TSTEQ] = "tsteq",
2832     [TCG_COND_TSTNE] = "tstne",
2833 };
2834 
2835 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2836 {
2837     [MO_UB]   = "ub",
2838     [MO_SB]   = "sb",
2839     [MO_LEUW] = "leuw",
2840     [MO_LESW] = "lesw",
2841     [MO_LEUL] = "leul",
2842     [MO_LESL] = "lesl",
2843     [MO_LEUQ] = "leq",
2844     [MO_BEUW] = "beuw",
2845     [MO_BESW] = "besw",
2846     [MO_BEUL] = "beul",
2847     [MO_BESL] = "besl",
2848     [MO_BEUQ] = "beq",
2849     [MO_128 + MO_BE] = "beo",
2850     [MO_128 + MO_LE] = "leo",
2851 };
2852 
2853 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2854     [MO_UNALN >> MO_ASHIFT]    = "un+",
2855     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2856     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2857     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2858     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2859     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2860     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2861     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2862 };
2863 
2864 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2865     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2866     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2867     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2868     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2869     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2870     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2871 };
2872 
2873 static const char bswap_flag_name[][6] = {
2874     [TCG_BSWAP_IZ] = "iz",
2875     [TCG_BSWAP_OZ] = "oz",
2876     [TCG_BSWAP_OS] = "os",
2877     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2878     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2879 };
2880 
2881 #ifdef CONFIG_PLUGIN
2882 static const char * const plugin_from_name[] = {
2883     "from-tb",
2884     "from-insn",
2885     "after-insn",
2886     "after-tb",
2887 };
2888 #endif
2889 
2890 static inline bool tcg_regset_single(TCGRegSet d)
2891 {
2892     return (d & (d - 1)) == 0;
2893 }
2894 
2895 static inline TCGReg tcg_regset_first(TCGRegSet d)
2896 {
2897     if (TCG_TARGET_NB_REGS <= 32) {
2898         return ctz32(d);
2899     } else {
2900         return ctz64(d);
2901     }
2902 }
2903 
2904 /* Return only the number of characters output -- no error return. */
2905 #define ne_fprintf(...) \
2906     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2907 
2908 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2909 {
2910     char buf[128];
2911     TCGOp *op;
2912 
2913     QTAILQ_FOREACH(op, &s->ops, link) {
2914         int i, k, nb_oargs, nb_iargs, nb_cargs;
2915         const TCGOpDef *def;
2916         TCGOpcode c;
2917         int col = 0;
2918 
2919         c = op->opc;
2920         def = &tcg_op_defs[c];
2921 
2922         if (c == INDEX_op_insn_start) {
2923             nb_oargs = 0;
2924             col += ne_fprintf(f, "\n ----");
2925 
2926             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2927                 col += ne_fprintf(f, " %016" PRIx64,
2928                                   tcg_get_insn_start_param(op, i));
2929             }
2930         } else if (c == INDEX_op_call) {
2931             const TCGHelperInfo *info = tcg_call_info(op);
2932             void *func = tcg_call_func(op);
2933 
2934             /* variable number of arguments */
2935             nb_oargs = TCGOP_CALLO(op);
2936             nb_iargs = TCGOP_CALLI(op);
2937             nb_cargs = def->nb_cargs;
2938 
2939             col += ne_fprintf(f, " %s ", def->name);
2940 
2941             /*
2942              * Print the function name from TCGHelperInfo, if available.
2943              * Note that plugins have a template function for the info,
2944              * but the actual function pointer comes from the plugin.
2945              */
2946             if (func == info->func) {
2947                 col += ne_fprintf(f, "%s", info->name);
2948             } else {
2949                 col += ne_fprintf(f, "plugin(%p)", func);
2950             }
2951 
2952             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2953             for (i = 0; i < nb_oargs; i++) {
2954                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2955                                                             op->args[i]));
2956             }
2957             for (i = 0; i < nb_iargs; i++) {
2958                 TCGArg arg = op->args[nb_oargs + i];
2959                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2960                 col += ne_fprintf(f, ",%s", t);
2961             }
2962         } else {
2963             if (def->flags & TCG_OPF_INT) {
2964                 col += ne_fprintf(f, " %s_i%d ",
2965                                   def->name,
2966                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2967             } else if (def->flags & TCG_OPF_VECTOR) {
2968                 col += ne_fprintf(f, "%s v%d,e%d,",
2969                                   def->name,
2970                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2971                                   8 << TCGOP_VECE(op));
2972             } else {
2973                 col += ne_fprintf(f, " %s ", def->name);
2974             }
2975 
2976             nb_oargs = def->nb_oargs;
2977             nb_iargs = def->nb_iargs;
2978             nb_cargs = def->nb_cargs;
2979 
2980             k = 0;
2981             for (i = 0; i < nb_oargs; i++) {
2982                 const char *sep =  k ? "," : "";
2983                 col += ne_fprintf(f, "%s%s", sep,
2984                                   tcg_get_arg_str(s, buf, sizeof(buf),
2985                                                   op->args[k++]));
2986             }
2987             for (i = 0; i < nb_iargs; i++) {
2988                 const char *sep =  k ? "," : "";
2989                 col += ne_fprintf(f, "%s%s", sep,
2990                                   tcg_get_arg_str(s, buf, sizeof(buf),
2991                                                   op->args[k++]));
2992             }
2993             switch (c) {
2994             case INDEX_op_brcond:
2995             case INDEX_op_setcond:
2996             case INDEX_op_negsetcond:
2997             case INDEX_op_movcond:
2998             case INDEX_op_brcond2_i32:
2999             case INDEX_op_setcond2_i32:
3000             case INDEX_op_cmp_vec:
3001             case INDEX_op_cmpsel_vec:
3002                 if (op->args[k] < ARRAY_SIZE(cond_name)
3003                     && cond_name[op->args[k]]) {
3004                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
3005                 } else {
3006                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
3007                 }
3008                 i = 1;
3009                 break;
3010             case INDEX_op_qemu_ld_i32:
3011             case INDEX_op_qemu_st_i32:
3012             case INDEX_op_qemu_ld_i64:
3013             case INDEX_op_qemu_st_i64:
3014             case INDEX_op_qemu_ld_i128:
3015             case INDEX_op_qemu_st_i128:
3016                 {
3017                     const char *s_al, *s_op, *s_at;
3018                     MemOpIdx oi = op->args[k++];
3019                     MemOp mop = get_memop(oi);
3020                     unsigned ix = get_mmuidx(oi);
3021 
3022                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
3023                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
3024                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
3025                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
3026 
3027                     /* If all fields are accounted for, print symbolically. */
3028                     if (!mop && s_al && s_op && s_at) {
3029                         col += ne_fprintf(f, ",%s%s%s,%u",
3030                                           s_at, s_al, s_op, ix);
3031                     } else {
3032                         mop = get_memop(oi);
3033                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
3034                     }
3035                     i = 1;
3036                 }
3037                 break;
3038             case INDEX_op_bswap16:
3039             case INDEX_op_bswap32:
3040             case INDEX_op_bswap64:
3041                 {
3042                     TCGArg flags = op->args[k];
3043                     const char *name = NULL;
3044 
3045                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
3046                         name = bswap_flag_name[flags];
3047                     }
3048                     if (name) {
3049                         col += ne_fprintf(f, ",%s", name);
3050                     } else {
3051                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
3052                     }
3053                     i = k = 1;
3054                 }
3055                 break;
3056 #ifdef CONFIG_PLUGIN
3057             case INDEX_op_plugin_cb:
3058                 {
3059                     TCGArg from = op->args[k++];
3060                     const char *name = NULL;
3061 
3062                     if (from < ARRAY_SIZE(plugin_from_name)) {
3063                         name = plugin_from_name[from];
3064                     }
3065                     if (name) {
3066                         col += ne_fprintf(f, "%s", name);
3067                     } else {
3068                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
3069                     }
3070                     i = 1;
3071                 }
3072                 break;
3073 #endif
3074             default:
3075                 i = 0;
3076                 break;
3077             }
3078             switch (c) {
3079             case INDEX_op_set_label:
3080             case INDEX_op_br:
3081             case INDEX_op_brcond:
3082             case INDEX_op_brcond2_i32:
3083                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
3084                                   arg_label(op->args[k])->id);
3085                 i++, k++;
3086                 break;
3087             case INDEX_op_mb:
3088                 {
3089                     TCGBar membar = op->args[k];
3090                     const char *b_op, *m_op;
3091 
3092                     switch (membar & TCG_BAR_SC) {
3093                     case 0:
3094                         b_op = "none";
3095                         break;
3096                     case TCG_BAR_LDAQ:
3097                         b_op = "acq";
3098                         break;
3099                     case TCG_BAR_STRL:
3100                         b_op = "rel";
3101                         break;
3102                     case TCG_BAR_SC:
3103                         b_op = "seq";
3104                         break;
3105                     default:
3106                         g_assert_not_reached();
3107                     }
3108 
3109                     switch (membar & TCG_MO_ALL) {
3110                     case 0:
3111                         m_op = "none";
3112                         break;
3113                     case TCG_MO_LD_LD:
3114                         m_op = "rr";
3115                         break;
3116                     case TCG_MO_LD_ST:
3117                         m_op = "rw";
3118                         break;
3119                     case TCG_MO_ST_LD:
3120                         m_op = "wr";
3121                         break;
3122                     case TCG_MO_ST_ST:
3123                         m_op = "ww";
3124                         break;
3125                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3126                         m_op = "rr+rw";
3127                         break;
3128                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3129                         m_op = "rr+wr";
3130                         break;
3131                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3132                         m_op = "rr+ww";
3133                         break;
3134                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3135                         m_op = "rw+wr";
3136                         break;
3137                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3138                         m_op = "rw+ww";
3139                         break;
3140                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3141                         m_op = "wr+ww";
3142                         break;
3143                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3144                         m_op = "rr+rw+wr";
3145                         break;
3146                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3147                         m_op = "rr+rw+ww";
3148                         break;
3149                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3150                         m_op = "rr+wr+ww";
3151                         break;
3152                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3153                         m_op = "rw+wr+ww";
3154                         break;
3155                     case TCG_MO_ALL:
3156                         m_op = "all";
3157                         break;
3158                     default:
3159                         g_assert_not_reached();
3160                     }
3161 
3162                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3163                     i++, k++;
3164                 }
3165                 break;
3166             default:
3167                 break;
3168             }
3169             for (; i < nb_cargs; i++, k++) {
3170                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3171                                   op->args[k]);
3172             }
3173         }
3174 
3175         if (have_prefs || op->life) {
3176             for (; col < 40; ++col) {
3177                 putc(' ', f);
3178             }
3179         }
3180 
3181         if (op->life) {
3182             unsigned life = op->life;
3183 
3184             if (life & (SYNC_ARG * 3)) {
3185                 ne_fprintf(f, "  sync:");
3186                 for (i = 0; i < 2; ++i) {
3187                     if (life & (SYNC_ARG << i)) {
3188                         ne_fprintf(f, " %d", i);
3189                     }
3190                 }
3191             }
3192             life /= DEAD_ARG;
3193             if (life) {
3194                 ne_fprintf(f, "  dead:");
3195                 for (i = 0; life; ++i, life >>= 1) {
3196                     if (life & 1) {
3197                         ne_fprintf(f, " %d", i);
3198                     }
3199                 }
3200             }
3201         }
3202 
3203         if (have_prefs) {
3204             for (i = 0; i < nb_oargs; ++i) {
3205                 TCGRegSet set = output_pref(op, i);
3206 
3207                 if (i == 0) {
3208                     ne_fprintf(f, "  pref=");
3209                 } else {
3210                     ne_fprintf(f, ",");
3211                 }
3212                 if (set == 0) {
3213                     ne_fprintf(f, "none");
3214                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3215                     ne_fprintf(f, "all");
3216 #ifdef CONFIG_DEBUG_TCG
3217                 } else if (tcg_regset_single(set)) {
3218                     TCGReg reg = tcg_regset_first(set);
3219                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3220 #endif
3221                 } else if (TCG_TARGET_NB_REGS <= 32) {
3222                     ne_fprintf(f, "0x%x", (uint32_t)set);
3223                 } else {
3224                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3225                 }
3226             }
3227         }
3228 
3229         putc('\n', f);
3230     }
3231 }
3232 
3233 /* we give more priority to constraints with less registers */
3234 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3235 {
3236     int n;
3237 
3238     arg_ct += k;
3239     n = ctpop64(arg_ct->regs);
3240 
3241     /*
3242      * Sort constraints of a single register first, which includes output
3243      * aliases (which must exactly match the input already allocated).
3244      */
3245     if (n == 1 || arg_ct->oalias) {
3246         return INT_MAX;
3247     }
3248 
3249     /*
3250      * Sort register pairs next, first then second immediately after.
3251      * Arbitrarily sort multiple pairs by the index of the first reg;
3252      * there shouldn't be many pairs.
3253      */
3254     switch (arg_ct->pair) {
3255     case 1:
3256     case 3:
3257         return (k + 1) * 2;
3258     case 2:
3259         return (arg_ct->pair_index + 1) * 2 - 1;
3260     }
3261 
3262     /* Finally, sort by decreasing register count. */
3263     assert(n > 1);
3264     return -n;
3265 }
3266 
3267 /* sort from highest priority to lowest */
3268 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3269 {
3270     int i, j;
3271 
3272     for (i = 0; i < n; i++) {
3273         a[start + i].sort_index = start + i;
3274     }
3275     if (n <= 1) {
3276         return;
3277     }
3278     for (i = 0; i < n - 1; i++) {
3279         for (j = i + 1; j < n; j++) {
3280             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3281             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3282             if (p1 < p2) {
3283                 int tmp = a[start + i].sort_index;
3284                 a[start + i].sort_index = a[start + j].sort_index;
3285                 a[start + j].sort_index = tmp;
3286             }
3287         }
3288     }
3289 }
3290 
3291 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3292 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3293 
3294 static void process_constraint_sets(void)
3295 {
3296     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3297         const TCGConstraintSet *tdefs = &constraint_sets[c];
3298         TCGArgConstraint *args_ct = all_cts[c];
3299         int nb_oargs = tdefs->nb_oargs;
3300         int nb_iargs = tdefs->nb_iargs;
3301         int nb_args = nb_oargs + nb_iargs;
3302         bool saw_alias_pair = false;
3303 
3304         for (int i = 0; i < nb_args; i++) {
3305             const char *ct_str = tdefs->args_ct_str[i];
3306             bool input_p = i >= nb_oargs;
3307             int o;
3308 
3309             switch (*ct_str) {
3310             case '0' ... '9':
3311                 o = *ct_str - '0';
3312                 tcg_debug_assert(input_p);
3313                 tcg_debug_assert(o < nb_oargs);
3314                 tcg_debug_assert(args_ct[o].regs != 0);
3315                 tcg_debug_assert(!args_ct[o].oalias);
3316                 args_ct[i] = args_ct[o];
3317                 /* The output sets oalias.  */
3318                 args_ct[o].oalias = 1;
3319                 args_ct[o].alias_index = i;
3320                 /* The input sets ialias. */
3321                 args_ct[i].ialias = 1;
3322                 args_ct[i].alias_index = o;
3323                 if (args_ct[i].pair) {
3324                     saw_alias_pair = true;
3325                 }
3326                 tcg_debug_assert(ct_str[1] == '\0');
3327                 continue;
3328 
3329             case '&':
3330                 tcg_debug_assert(!input_p);
3331                 args_ct[i].newreg = true;
3332                 ct_str++;
3333                 break;
3334 
3335             case 'p': /* plus */
3336                 /* Allocate to the register after the previous. */
3337                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3338                 o = i - 1;
3339                 tcg_debug_assert(!args_ct[o].pair);
3340                 tcg_debug_assert(!args_ct[o].ct);
3341                 args_ct[i] = (TCGArgConstraint){
3342                     .pair = 2,
3343                     .pair_index = o,
3344                     .regs = args_ct[o].regs << 1,
3345                     .newreg = args_ct[o].newreg,
3346                 };
3347                 args_ct[o].pair = 1;
3348                 args_ct[o].pair_index = i;
3349                 tcg_debug_assert(ct_str[1] == '\0');
3350                 continue;
3351 
3352             case 'm': /* minus */
3353                 /* Allocate to the register before the previous. */
3354                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3355                 o = i - 1;
3356                 tcg_debug_assert(!args_ct[o].pair);
3357                 tcg_debug_assert(!args_ct[o].ct);
3358                 args_ct[i] = (TCGArgConstraint){
3359                     .pair = 1,
3360                     .pair_index = o,
3361                     .regs = args_ct[o].regs >> 1,
3362                     .newreg = args_ct[o].newreg,
3363                 };
3364                 args_ct[o].pair = 2;
3365                 args_ct[o].pair_index = i;
3366                 tcg_debug_assert(ct_str[1] == '\0');
3367                 continue;
3368             }
3369 
3370             do {
3371                 switch (*ct_str) {
3372                 case 'i':
3373                     args_ct[i].ct |= TCG_CT_CONST;
3374                     break;
3375 #ifdef TCG_REG_ZERO
3376                 case 'z':
3377                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3378                     break;
3379 #endif
3380 
3381                 /* Include all of the target-specific constraints. */
3382 
3383 #undef CONST
3384 #define CONST(CASE, MASK) \
3385     case CASE: args_ct[i].ct |= MASK; break;
3386 #define REGS(CASE, MASK) \
3387     case CASE: args_ct[i].regs |= MASK; break;
3388 
3389 #include "tcg-target-con-str.h"
3390 
3391 #undef REGS
3392 #undef CONST
3393                 default:
3394                 case '0' ... '9':
3395                 case '&':
3396                 case 'p':
3397                 case 'm':
3398                     /* Typo in TCGConstraintSet constraint. */
3399                     g_assert_not_reached();
3400                 }
3401             } while (*++ct_str != '\0');
3402         }
3403 
3404         /*
3405          * Fix up output pairs that are aliased with inputs.
3406          * When we created the alias, we copied pair from the output.
3407          * There are three cases:
3408          *    (1a) Pairs of inputs alias pairs of outputs.
3409          *    (1b) One input aliases the first of a pair of outputs.
3410          *    (2)  One input aliases the second of a pair of outputs.
3411          *
3412          * Case 1a is handled by making sure that the pair_index'es are
3413          * properly updated so that they appear the same as a pair of inputs.
3414          *
3415          * Case 1b is handled by setting the pair_index of the input to
3416          * itself, simply so it doesn't point to an unrelated argument.
3417          * Since we don't encounter the "second" during the input allocation
3418          * phase, nothing happens with the second half of the input pair.
3419          *
3420          * Case 2 is handled by setting the second input to pair=3, the
3421          * first output to pair=3, and the pair_index'es to match.
3422          */
3423         if (saw_alias_pair) {
3424             for (int i = nb_oargs; i < nb_args; i++) {
3425                 int o, o2, i2;
3426 
3427                 /*
3428                  * Since [0-9pm] must be alone in the constraint string,
3429                  * the only way they can both be set is if the pair comes
3430                  * from the output alias.
3431                  */
3432                 if (!args_ct[i].ialias) {
3433                     continue;
3434                 }
3435                 switch (args_ct[i].pair) {
3436                 case 0:
3437                     break;
3438                 case 1:
3439                     o = args_ct[i].alias_index;
3440                     o2 = args_ct[o].pair_index;
3441                     tcg_debug_assert(args_ct[o].pair == 1);
3442                     tcg_debug_assert(args_ct[o2].pair == 2);
3443                     if (args_ct[o2].oalias) {
3444                         /* Case 1a */
3445                         i2 = args_ct[o2].alias_index;
3446                         tcg_debug_assert(args_ct[i2].pair == 2);
3447                         args_ct[i2].pair_index = i;
3448                         args_ct[i].pair_index = i2;
3449                     } else {
3450                         /* Case 1b */
3451                         args_ct[i].pair_index = i;
3452                     }
3453                     break;
3454                 case 2:
3455                     o = args_ct[i].alias_index;
3456                     o2 = args_ct[o].pair_index;
3457                     tcg_debug_assert(args_ct[o].pair == 2);
3458                     tcg_debug_assert(args_ct[o2].pair == 1);
3459                     if (args_ct[o2].oalias) {
3460                         /* Case 1a */
3461                         i2 = args_ct[o2].alias_index;
3462                         tcg_debug_assert(args_ct[i2].pair == 1);
3463                         args_ct[i2].pair_index = i;
3464                         args_ct[i].pair_index = i2;
3465                     } else {
3466                         /* Case 2 */
3467                         args_ct[i].pair = 3;
3468                         args_ct[o2].pair = 3;
3469                         args_ct[i].pair_index = o2;
3470                         args_ct[o2].pair_index = i;
3471                     }
3472                     break;
3473                 default:
3474                     g_assert_not_reached();
3475                 }
3476             }
3477         }
3478 
3479         /* sort the constraints (XXX: this is just an heuristic) */
3480         sort_constraints(args_ct, 0, nb_oargs);
3481         sort_constraints(args_ct, nb_oargs, nb_iargs);
3482     }
3483 }
3484 
3485 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3486 {
3487     TCGOpcode opc = op->opc;
3488     TCGType type = TCGOP_TYPE(op);
3489     unsigned flags = TCGOP_FLAGS(op);
3490     const TCGOpDef *def = &tcg_op_defs[opc];
3491     const TCGOutOp *outop = all_outop[opc];
3492     TCGConstraintSetIndex con_set;
3493 
3494     if (def->flags & TCG_OPF_NOT_PRESENT) {
3495         return empty_cts;
3496     }
3497 
3498     if (outop) {
3499         con_set = outop->static_constraint;
3500         if (con_set == C_Dynamic) {
3501             con_set = outop->dynamic_constraint(type, flags);
3502         }
3503     } else {
3504         con_set = tcg_target_op_def(opc, type, flags);
3505     }
3506     tcg_debug_assert(con_set >= 0);
3507     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3508 
3509     /* The constraint arguments must match TCGOpcode arguments. */
3510     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3511     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3512 
3513     return all_cts[con_set];
3514 }
3515 
3516 static void remove_label_use(TCGOp *op, int idx)
3517 {
3518     TCGLabel *label = arg_label(op->args[idx]);
3519     TCGLabelUse *use;
3520 
3521     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3522         if (use->op == op) {
3523             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3524             return;
3525         }
3526     }
3527     g_assert_not_reached();
3528 }
3529 
3530 void tcg_op_remove(TCGContext *s, TCGOp *op)
3531 {
3532     switch (op->opc) {
3533     case INDEX_op_br:
3534         remove_label_use(op, 0);
3535         break;
3536     case INDEX_op_brcond:
3537         remove_label_use(op, 3);
3538         break;
3539     case INDEX_op_brcond2_i32:
3540         remove_label_use(op, 5);
3541         break;
3542     default:
3543         break;
3544     }
3545 
3546     QTAILQ_REMOVE(&s->ops, op, link);
3547     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3548     s->nb_ops--;
3549 }
3550 
3551 void tcg_remove_ops_after(TCGOp *op)
3552 {
3553     TCGContext *s = tcg_ctx;
3554 
3555     while (true) {
3556         TCGOp *last = tcg_last_op();
3557         if (last == op) {
3558             return;
3559         }
3560         tcg_op_remove(s, last);
3561     }
3562 }
3563 
3564 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3565 {
3566     TCGContext *s = tcg_ctx;
3567     TCGOp *op = NULL;
3568 
3569     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3570         QTAILQ_FOREACH(op, &s->free_ops, link) {
3571             if (nargs <= op->nargs) {
3572                 QTAILQ_REMOVE(&s->free_ops, op, link);
3573                 nargs = op->nargs;
3574                 goto found;
3575             }
3576         }
3577     }
3578 
3579     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3580     nargs = MAX(4, nargs);
3581     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3582 
3583  found:
3584     memset(op, 0, offsetof(TCGOp, link));
3585     op->opc = opc;
3586     op->nargs = nargs;
3587 
3588     /* Check for bitfield overflow. */
3589     tcg_debug_assert(op->nargs == nargs);
3590 
3591     s->nb_ops++;
3592     return op;
3593 }
3594 
3595 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3596 {
3597     TCGOp *op = tcg_op_alloc(opc, nargs);
3598 
3599     if (tcg_ctx->emit_before_op) {
3600         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3601     } else {
3602         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3603     }
3604     return op;
3605 }
3606 
3607 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3608                             TCGOpcode opc, TCGType type, unsigned nargs)
3609 {
3610     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3611 
3612     TCGOP_TYPE(new_op) = type;
3613     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3614     return new_op;
3615 }
3616 
3617 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3618                            TCGOpcode opc, TCGType type, unsigned nargs)
3619 {
3620     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3621 
3622     TCGOP_TYPE(new_op) = type;
3623     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3624     return new_op;
3625 }
3626 
3627 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3628 {
3629     TCGLabelUse *u;
3630 
3631     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3632         TCGOp *op = u->op;
3633         switch (op->opc) {
3634         case INDEX_op_br:
3635             op->args[0] = label_arg(to);
3636             break;
3637         case INDEX_op_brcond:
3638             op->args[3] = label_arg(to);
3639             break;
3640         case INDEX_op_brcond2_i32:
3641             op->args[5] = label_arg(to);
3642             break;
3643         default:
3644             g_assert_not_reached();
3645         }
3646     }
3647 
3648     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3649 }
3650 
3651 /* Reachable analysis : remove unreachable code.  */
3652 static void __attribute__((noinline))
3653 reachable_code_pass(TCGContext *s)
3654 {
3655     TCGOp *op, *op_next, *op_prev;
3656     bool dead = false;
3657 
3658     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3659         bool remove = dead;
3660         TCGLabel *label;
3661 
3662         switch (op->opc) {
3663         case INDEX_op_set_label:
3664             label = arg_label(op->args[0]);
3665 
3666             /*
3667              * Note that the first op in the TB is always a load,
3668              * so there is always something before a label.
3669              */
3670             op_prev = QTAILQ_PREV(op, link);
3671 
3672             /*
3673              * If we find two sequential labels, move all branches to
3674              * reference the second label and remove the first label.
3675              * Do this before branch to next optimization, so that the
3676              * middle label is out of the way.
3677              */
3678             if (op_prev->opc == INDEX_op_set_label) {
3679                 move_label_uses(label, arg_label(op_prev->args[0]));
3680                 tcg_op_remove(s, op_prev);
3681                 op_prev = QTAILQ_PREV(op, link);
3682             }
3683 
3684             /*
3685              * Optimization can fold conditional branches to unconditional.
3686              * If we find a label which is preceded by an unconditional
3687              * branch to next, remove the branch.  We couldn't do this when
3688              * processing the branch because any dead code between the branch
3689              * and label had not yet been removed.
3690              */
3691             if (op_prev->opc == INDEX_op_br &&
3692                 label == arg_label(op_prev->args[0])) {
3693                 tcg_op_remove(s, op_prev);
3694                 /* Fall through means insns become live again.  */
3695                 dead = false;
3696             }
3697 
3698             if (QSIMPLEQ_EMPTY(&label->branches)) {
3699                 /*
3700                  * While there is an occasional backward branch, virtually
3701                  * all branches generated by the translators are forward.
3702                  * Which means that generally we will have already removed
3703                  * all references to the label that will be, and there is
3704                  * little to be gained by iterating.
3705                  */
3706                 remove = true;
3707             } else {
3708                 /* Once we see a label, insns become live again.  */
3709                 dead = false;
3710                 remove = false;
3711             }
3712             break;
3713 
3714         case INDEX_op_br:
3715         case INDEX_op_exit_tb:
3716         case INDEX_op_goto_ptr:
3717             /* Unconditional branches; everything following is dead.  */
3718             dead = true;
3719             break;
3720 
3721         case INDEX_op_call:
3722             /* Notice noreturn helper calls, raising exceptions.  */
3723             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3724                 dead = true;
3725             }
3726             break;
3727 
3728         case INDEX_op_insn_start:
3729             /* Never remove -- we need to keep these for unwind.  */
3730             remove = false;
3731             break;
3732 
3733         default:
3734             break;
3735         }
3736 
3737         if (remove) {
3738             tcg_op_remove(s, op);
3739         }
3740     }
3741 }
3742 
3743 #define TS_DEAD  1
3744 #define TS_MEM   2
3745 
3746 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3747 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3748 
3749 /* For liveness_pass_1, the register preferences for a given temp.  */
3750 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3751 {
3752     return ts->state_ptr;
3753 }
3754 
3755 /* For liveness_pass_1, reset the preferences for a given temp to the
3756  * maximal regset for its type.
3757  */
3758 static inline void la_reset_pref(TCGTemp *ts)
3759 {
3760     *la_temp_pref(ts)
3761         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3762 }
3763 
3764 /* liveness analysis: end of function: all temps are dead, and globals
3765    should be in memory. */
3766 static void la_func_end(TCGContext *s, int ng, int nt)
3767 {
3768     int i;
3769 
3770     for (i = 0; i < ng; ++i) {
3771         s->temps[i].state = TS_DEAD | TS_MEM;
3772         la_reset_pref(&s->temps[i]);
3773     }
3774     for (i = ng; i < nt; ++i) {
3775         s->temps[i].state = TS_DEAD;
3776         la_reset_pref(&s->temps[i]);
3777     }
3778 }
3779 
3780 /* liveness analysis: end of basic block: all temps are dead, globals
3781    and local temps should be in memory. */
3782 static void la_bb_end(TCGContext *s, int ng, int nt)
3783 {
3784     int i;
3785 
3786     for (i = 0; i < nt; ++i) {
3787         TCGTemp *ts = &s->temps[i];
3788         int state;
3789 
3790         switch (ts->kind) {
3791         case TEMP_FIXED:
3792         case TEMP_GLOBAL:
3793         case TEMP_TB:
3794             state = TS_DEAD | TS_MEM;
3795             break;
3796         case TEMP_EBB:
3797         case TEMP_CONST:
3798             state = TS_DEAD;
3799             break;
3800         default:
3801             g_assert_not_reached();
3802         }
3803         ts->state = state;
3804         la_reset_pref(ts);
3805     }
3806 }
3807 
3808 /* liveness analysis: sync globals back to memory.  */
3809 static void la_global_sync(TCGContext *s, int ng)
3810 {
3811     int i;
3812 
3813     for (i = 0; i < ng; ++i) {
3814         int state = s->temps[i].state;
3815         s->temps[i].state = state | TS_MEM;
3816         if (state == TS_DEAD) {
3817             /* If the global was previously dead, reset prefs.  */
3818             la_reset_pref(&s->temps[i]);
3819         }
3820     }
3821 }
3822 
3823 /*
3824  * liveness analysis: conditional branch: all temps are dead unless
3825  * explicitly live-across-conditional-branch, globals and local temps
3826  * should be synced.
3827  */
3828 static void la_bb_sync(TCGContext *s, int ng, int nt)
3829 {
3830     la_global_sync(s, ng);
3831 
3832     for (int i = ng; i < nt; ++i) {
3833         TCGTemp *ts = &s->temps[i];
3834         int state;
3835 
3836         switch (ts->kind) {
3837         case TEMP_TB:
3838             state = ts->state;
3839             ts->state = state | TS_MEM;
3840             if (state != TS_DEAD) {
3841                 continue;
3842             }
3843             break;
3844         case TEMP_EBB:
3845         case TEMP_CONST:
3846             continue;
3847         default:
3848             g_assert_not_reached();
3849         }
3850         la_reset_pref(&s->temps[i]);
3851     }
3852 }
3853 
3854 /* liveness analysis: sync globals back to memory and kill.  */
3855 static void la_global_kill(TCGContext *s, int ng)
3856 {
3857     int i;
3858 
3859     for (i = 0; i < ng; i++) {
3860         s->temps[i].state = TS_DEAD | TS_MEM;
3861         la_reset_pref(&s->temps[i]);
3862     }
3863 }
3864 
3865 /* liveness analysis: note live globals crossing calls.  */
3866 static void la_cross_call(TCGContext *s, int nt)
3867 {
3868     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3869     int i;
3870 
3871     for (i = 0; i < nt; i++) {
3872         TCGTemp *ts = &s->temps[i];
3873         if (!(ts->state & TS_DEAD)) {
3874             TCGRegSet *pset = la_temp_pref(ts);
3875             TCGRegSet set = *pset;
3876 
3877             set &= mask;
3878             /* If the combination is not possible, restart.  */
3879             if (set == 0) {
3880                 set = tcg_target_available_regs[ts->type] & mask;
3881             }
3882             *pset = set;
3883         }
3884     }
3885 }
3886 
3887 /*
3888  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3889  * to TEMP_EBB, if possible.
3890  */
3891 static void __attribute__((noinline))
3892 liveness_pass_0(TCGContext *s)
3893 {
3894     void * const multiple_ebb = (void *)(uintptr_t)-1;
3895     int nb_temps = s->nb_temps;
3896     TCGOp *op, *ebb;
3897 
3898     for (int i = s->nb_globals; i < nb_temps; ++i) {
3899         s->temps[i].state_ptr = NULL;
3900     }
3901 
3902     /*
3903      * Represent each EBB by the op at which it begins.  In the case of
3904      * the first EBB, this is the first op, otherwise it is a label.
3905      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3906      * within a single EBB, else MULTIPLE_EBB.
3907      */
3908     ebb = QTAILQ_FIRST(&s->ops);
3909     QTAILQ_FOREACH(op, &s->ops, link) {
3910         const TCGOpDef *def;
3911         int nb_oargs, nb_iargs;
3912 
3913         switch (op->opc) {
3914         case INDEX_op_set_label:
3915             ebb = op;
3916             continue;
3917         case INDEX_op_discard:
3918             continue;
3919         case INDEX_op_call:
3920             nb_oargs = TCGOP_CALLO(op);
3921             nb_iargs = TCGOP_CALLI(op);
3922             break;
3923         default:
3924             def = &tcg_op_defs[op->opc];
3925             nb_oargs = def->nb_oargs;
3926             nb_iargs = def->nb_iargs;
3927             break;
3928         }
3929 
3930         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3931             TCGTemp *ts = arg_temp(op->args[i]);
3932 
3933             if (ts->kind != TEMP_TB) {
3934                 continue;
3935             }
3936             if (ts->state_ptr == NULL) {
3937                 ts->state_ptr = ebb;
3938             } else if (ts->state_ptr != ebb) {
3939                 ts->state_ptr = multiple_ebb;
3940             }
3941         }
3942     }
3943 
3944     /*
3945      * For TEMP_TB that turned out not to be used beyond one EBB,
3946      * reduce the liveness to TEMP_EBB.
3947      */
3948     for (int i = s->nb_globals; i < nb_temps; ++i) {
3949         TCGTemp *ts = &s->temps[i];
3950         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3951             ts->kind = TEMP_EBB;
3952         }
3953     }
3954 }
3955 
3956 static void assert_carry_dead(TCGContext *s)
3957 {
3958     /*
3959      * Carry operations can be separated by a few insns like mov,
3960      * load or store, but they should always be "close", and
3961      * carry-out operations should always be paired with carry-in.
3962      * At various boundaries, carry must have been consumed.
3963      */
3964     tcg_debug_assert(!s->carry_live);
3965 }
3966 
3967 /* Liveness analysis : update the opc_arg_life array to tell if a
3968    given input arguments is dead. Instructions updating dead
3969    temporaries are removed. */
3970 static void __attribute__((noinline))
3971 liveness_pass_1(TCGContext *s)
3972 {
3973     int nb_globals = s->nb_globals;
3974     int nb_temps = s->nb_temps;
3975     TCGOp *op, *op_prev;
3976     TCGRegSet *prefs;
3977 
3978     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3979     for (int i = 0; i < nb_temps; ++i) {
3980         s->temps[i].state_ptr = prefs + i;
3981     }
3982 
3983     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3984     la_func_end(s, nb_globals, nb_temps);
3985 
3986     s->carry_live = false;
3987     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3988         int nb_iargs, nb_oargs;
3989         TCGOpcode opc_new, opc_new2;
3990         TCGLifeData arg_life = 0;
3991         TCGTemp *ts;
3992         TCGOpcode opc = op->opc;
3993         const TCGOpDef *def;
3994         const TCGArgConstraint *args_ct;
3995 
3996         switch (opc) {
3997         case INDEX_op_call:
3998             assert_carry_dead(s);
3999             {
4000                 const TCGHelperInfo *info = tcg_call_info(op);
4001                 int call_flags = tcg_call_flags(op);
4002 
4003                 nb_oargs = TCGOP_CALLO(op);
4004                 nb_iargs = TCGOP_CALLI(op);
4005 
4006                 /* pure functions can be removed if their result is unused */
4007                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
4008                     for (int i = 0; i < nb_oargs; i++) {
4009                         ts = arg_temp(op->args[i]);
4010                         if (ts->state != TS_DEAD) {
4011                             goto do_not_remove_call;
4012                         }
4013                     }
4014                     goto do_remove;
4015                 }
4016             do_not_remove_call:
4017 
4018                 /* Output args are dead.  */
4019                 for (int i = 0; i < nb_oargs; i++) {
4020                     ts = arg_temp(op->args[i]);
4021                     if (ts->state & TS_DEAD) {
4022                         arg_life |= DEAD_ARG << i;
4023                     }
4024                     if (ts->state & TS_MEM) {
4025                         arg_life |= SYNC_ARG << i;
4026                     }
4027                     ts->state = TS_DEAD;
4028                     la_reset_pref(ts);
4029                 }
4030 
4031                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
4032                 memset(op->output_pref, 0, sizeof(op->output_pref));
4033 
4034                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
4035                                     TCG_CALL_NO_READ_GLOBALS))) {
4036                     la_global_kill(s, nb_globals);
4037                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
4038                     la_global_sync(s, nb_globals);
4039                 }
4040 
4041                 /* Record arguments that die in this helper.  */
4042                 for (int i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4043                     ts = arg_temp(op->args[i]);
4044                     if (ts->state & TS_DEAD) {
4045                         arg_life |= DEAD_ARG << i;
4046                     }
4047                 }
4048 
4049                 /* For all live registers, remove call-clobbered prefs.  */
4050                 la_cross_call(s, nb_temps);
4051 
4052                 /*
4053                  * Input arguments are live for preceding opcodes.
4054                  *
4055                  * For those arguments that die, and will be allocated in
4056                  * registers, clear the register set for that arg, to be
4057                  * filled in below.  For args that will be on the stack,
4058                  * reset to any available reg.  Process arguments in reverse
4059                  * order so that if a temp is used more than once, the stack
4060                  * reset to max happens before the register reset to 0.
4061                  */
4062                 for (int i = nb_iargs - 1; i >= 0; i--) {
4063                     const TCGCallArgumentLoc *loc = &info->in[i];
4064                     ts = arg_temp(op->args[nb_oargs + i]);
4065 
4066                     if (ts->state & TS_DEAD) {
4067                         switch (loc->kind) {
4068                         case TCG_CALL_ARG_NORMAL:
4069                         case TCG_CALL_ARG_EXTEND_U:
4070                         case TCG_CALL_ARG_EXTEND_S:
4071                             if (arg_slot_reg_p(loc->arg_slot)) {
4072                                 *la_temp_pref(ts) = 0;
4073                                 break;
4074                             }
4075                             /* fall through */
4076                         default:
4077                             *la_temp_pref(ts) =
4078                                 tcg_target_available_regs[ts->type];
4079                             break;
4080                         }
4081                         ts->state &= ~TS_DEAD;
4082                     }
4083                 }
4084 
4085                 /*
4086                  * For each input argument, add its input register to prefs.
4087                  * If a temp is used once, this produces a single set bit;
4088                  * if a temp is used multiple times, this produces a set.
4089                  */
4090                 for (int i = 0; i < nb_iargs; i++) {
4091                     const TCGCallArgumentLoc *loc = &info->in[i];
4092                     ts = arg_temp(op->args[nb_oargs + i]);
4093 
4094                     switch (loc->kind) {
4095                     case TCG_CALL_ARG_NORMAL:
4096                     case TCG_CALL_ARG_EXTEND_U:
4097                     case TCG_CALL_ARG_EXTEND_S:
4098                         if (arg_slot_reg_p(loc->arg_slot)) {
4099                             tcg_regset_set_reg(*la_temp_pref(ts),
4100                                 tcg_target_call_iarg_regs[loc->arg_slot]);
4101                         }
4102                         break;
4103                     default:
4104                         break;
4105                     }
4106                 }
4107             }
4108             break;
4109         case INDEX_op_insn_start:
4110             assert_carry_dead(s);
4111             break;
4112         case INDEX_op_discard:
4113             /* mark the temporary as dead */
4114             ts = arg_temp(op->args[0]);
4115             ts->state = TS_DEAD;
4116             la_reset_pref(ts);
4117             break;
4118 
4119         case INDEX_op_muls2:
4120             opc_new = INDEX_op_mul;
4121             opc_new2 = INDEX_op_mulsh;
4122             goto do_mul2;
4123         case INDEX_op_mulu2:
4124             opc_new = INDEX_op_mul;
4125             opc_new2 = INDEX_op_muluh;
4126         do_mul2:
4127             assert_carry_dead(s);
4128             if (arg_temp(op->args[1])->state == TS_DEAD) {
4129                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4130                     /* Both parts of the operation are dead.  */
4131                     goto do_remove;
4132                 }
4133                 /* The high part of the operation is dead; generate the low. */
4134                 op->opc = opc = opc_new;
4135                 op->args[1] = op->args[2];
4136                 op->args[2] = op->args[3];
4137             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4138                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4139                 /* The low part of the operation is dead; generate the high. */
4140                 op->opc = opc = opc_new2;
4141                 op->args[0] = op->args[1];
4142                 op->args[1] = op->args[2];
4143                 op->args[2] = op->args[3];
4144             } else {
4145                 goto do_not_remove;
4146             }
4147             /* Mark the single-word operation live.  */
4148             goto do_not_remove;
4149 
4150         case INDEX_op_addco:
4151             if (s->carry_live) {
4152                 goto do_not_remove;
4153             }
4154             op->opc = opc = INDEX_op_add;
4155             goto do_default;
4156 
4157         case INDEX_op_addcio:
4158             if (s->carry_live) {
4159                 goto do_not_remove;
4160             }
4161             op->opc = opc = INDEX_op_addci;
4162             goto do_default;
4163 
4164         case INDEX_op_subbo:
4165             if (s->carry_live) {
4166                 goto do_not_remove;
4167             }
4168             /* Lower to sub, but this may also require canonicalization. */
4169             op->opc = opc = INDEX_op_sub;
4170             ts = arg_temp(op->args[2]);
4171             if (ts->kind == TEMP_CONST) {
4172                 ts = tcg_constant_internal(ts->type, -ts->val);
4173                 if (ts->state_ptr == NULL) {
4174                     tcg_debug_assert(temp_idx(ts) == nb_temps);
4175                     nb_temps++;
4176                     ts->state_ptr = tcg_malloc(sizeof(TCGRegSet));
4177                     ts->state = TS_DEAD;
4178                     la_reset_pref(ts);
4179                 }
4180                 op->args[2] = temp_arg(ts);
4181                 op->opc = opc = INDEX_op_add;
4182             }
4183             goto do_default;
4184 
4185         case INDEX_op_subbio:
4186             if (s->carry_live) {
4187                 goto do_not_remove;
4188             }
4189             op->opc = opc = INDEX_op_subbi;
4190             goto do_default;
4191 
4192         case INDEX_op_addc1o:
4193             if (s->carry_live) {
4194                 goto do_not_remove;
4195             }
4196             /* Lower to add, add +1. */
4197             op_prev = tcg_op_insert_before(s, op, INDEX_op_add,
4198                                            TCGOP_TYPE(op), 3);
4199             op_prev->args[0] = op->args[0];
4200             op_prev->args[1] = op->args[1];
4201             op_prev->args[2] = op->args[2];
4202             op->opc = opc = INDEX_op_add;
4203             op->args[1] = op->args[0];
4204             ts = arg_temp(op->args[0]);
4205             ts = tcg_constant_internal(ts->type, 1);
4206             op->args[2] = temp_arg(ts);
4207             goto do_default;
4208 
4209         case INDEX_op_subb1o:
4210             if (s->carry_live) {
4211                 goto do_not_remove;
4212             }
4213             /* Lower to sub, add -1. */
4214             op_prev = tcg_op_insert_before(s, op, INDEX_op_sub,
4215                                            TCGOP_TYPE(op), 3);
4216             op_prev->args[0] = op->args[0];
4217             op_prev->args[1] = op->args[1];
4218             op_prev->args[2] = op->args[2];
4219             op->opc = opc = INDEX_op_add;
4220             op->args[1] = op->args[0];
4221             ts = arg_temp(op->args[0]);
4222             ts = tcg_constant_internal(ts->type, -1);
4223             op->args[2] = temp_arg(ts);
4224             goto do_default;
4225 
4226         default:
4227         do_default:
4228             /*
4229              * Test if the operation can be removed because all
4230              * its outputs are dead. We assume that nb_oargs == 0
4231              * implies side effects.
4232              */
4233             def = &tcg_op_defs[opc];
4234             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && def->nb_oargs != 0) {
4235                 for (int i = def->nb_oargs - 1; i >= 0; i--) {
4236                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4237                         goto do_not_remove;
4238                     }
4239                 }
4240                 goto do_remove;
4241             }
4242             goto do_not_remove;
4243 
4244         do_remove:
4245             tcg_op_remove(s, op);
4246             break;
4247 
4248         do_not_remove:
4249             def = &tcg_op_defs[opc];
4250             nb_iargs = def->nb_iargs;
4251             nb_oargs = def->nb_oargs;
4252 
4253             for (int i = 0; i < nb_oargs; i++) {
4254                 ts = arg_temp(op->args[i]);
4255 
4256                 /* Remember the preference of the uses that followed.  */
4257                 if (i < ARRAY_SIZE(op->output_pref)) {
4258                     op->output_pref[i] = *la_temp_pref(ts);
4259                 }
4260 
4261                 /* Output args are dead.  */
4262                 if (ts->state & TS_DEAD) {
4263                     arg_life |= DEAD_ARG << i;
4264                 }
4265                 if (ts->state & TS_MEM) {
4266                     arg_life |= SYNC_ARG << i;
4267                 }
4268                 ts->state = TS_DEAD;
4269                 la_reset_pref(ts);
4270             }
4271 
4272             /* If end of basic block, update.  */
4273             if (def->flags & TCG_OPF_BB_EXIT) {
4274                 assert_carry_dead(s);
4275                 la_func_end(s, nb_globals, nb_temps);
4276             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4277                 assert_carry_dead(s);
4278                 la_bb_sync(s, nb_globals, nb_temps);
4279             } else if (def->flags & TCG_OPF_BB_END) {
4280                 assert_carry_dead(s);
4281                 la_bb_end(s, nb_globals, nb_temps);
4282             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4283                 assert_carry_dead(s);
4284                 la_global_sync(s, nb_globals);
4285                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4286                     la_cross_call(s, nb_temps);
4287                 }
4288             }
4289 
4290             /* Record arguments that die in this opcode.  */
4291             for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4292                 ts = arg_temp(op->args[i]);
4293                 if (ts->state & TS_DEAD) {
4294                     arg_life |= DEAD_ARG << i;
4295                 }
4296             }
4297             if (def->flags & TCG_OPF_CARRY_OUT) {
4298                 s->carry_live = false;
4299             }
4300 
4301             /* Input arguments are live for preceding opcodes.  */
4302             for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4303                 ts = arg_temp(op->args[i]);
4304                 if (ts->state & TS_DEAD) {
4305                     /* For operands that were dead, initially allow
4306                        all regs for the type.  */
4307                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4308                     ts->state &= ~TS_DEAD;
4309                 }
4310             }
4311             if (def->flags & TCG_OPF_CARRY_IN) {
4312                 s->carry_live = true;
4313             }
4314 
4315             /* Incorporate constraints for this operand.  */
4316             switch (opc) {
4317             case INDEX_op_mov:
4318                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4319                    have proper constraints.  That said, special case
4320                    moves to propagate preferences backward.  */
4321                 if (IS_DEAD_ARG(1)) {
4322                     *la_temp_pref(arg_temp(op->args[0]))
4323                         = *la_temp_pref(arg_temp(op->args[1]));
4324                 }
4325                 break;
4326 
4327             default:
4328                 args_ct = opcode_args_ct(op);
4329                 for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4330                     const TCGArgConstraint *ct = &args_ct[i];
4331                     TCGRegSet set, *pset;
4332 
4333                     ts = arg_temp(op->args[i]);
4334                     pset = la_temp_pref(ts);
4335                     set = *pset;
4336 
4337                     set &= ct->regs;
4338                     if (ct->ialias) {
4339                         set &= output_pref(op, ct->alias_index);
4340                     }
4341                     /* If the combination is not possible, restart.  */
4342                     if (set == 0) {
4343                         set = ct->regs;
4344                     }
4345                     *pset = set;
4346                 }
4347                 break;
4348             }
4349             break;
4350         }
4351         op->life = arg_life;
4352     }
4353     assert_carry_dead(s);
4354 }
4355 
4356 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4357 static bool __attribute__((noinline))
4358 liveness_pass_2(TCGContext *s)
4359 {
4360     int nb_globals = s->nb_globals;
4361     int nb_temps, i;
4362     bool changes = false;
4363     TCGOp *op, *op_next;
4364 
4365     /* Create a temporary for each indirect global.  */
4366     for (i = 0; i < nb_globals; ++i) {
4367         TCGTemp *its = &s->temps[i];
4368         if (its->indirect_reg) {
4369             TCGTemp *dts = tcg_temp_alloc(s);
4370             dts->type = its->type;
4371             dts->base_type = its->base_type;
4372             dts->temp_subindex = its->temp_subindex;
4373             dts->kind = TEMP_EBB;
4374             its->state_ptr = dts;
4375         } else {
4376             its->state_ptr = NULL;
4377         }
4378         /* All globals begin dead.  */
4379         its->state = TS_DEAD;
4380     }
4381     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4382         TCGTemp *its = &s->temps[i];
4383         its->state_ptr = NULL;
4384         its->state = TS_DEAD;
4385     }
4386 
4387     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4388         TCGOpcode opc = op->opc;
4389         const TCGOpDef *def = &tcg_op_defs[opc];
4390         TCGLifeData arg_life = op->life;
4391         int nb_iargs, nb_oargs, call_flags;
4392         TCGTemp *arg_ts, *dir_ts;
4393 
4394         if (opc == INDEX_op_call) {
4395             nb_oargs = TCGOP_CALLO(op);
4396             nb_iargs = TCGOP_CALLI(op);
4397             call_flags = tcg_call_flags(op);
4398         } else {
4399             nb_iargs = def->nb_iargs;
4400             nb_oargs = def->nb_oargs;
4401 
4402             /* Set flags similar to how calls require.  */
4403             if (def->flags & TCG_OPF_COND_BRANCH) {
4404                 /* Like reading globals: sync_globals */
4405                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4406             } else if (def->flags & TCG_OPF_BB_END) {
4407                 /* Like writing globals: save_globals */
4408                 call_flags = 0;
4409             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4410                 /* Like reading globals: sync_globals */
4411                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4412             } else {
4413                 /* No effect on globals.  */
4414                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4415                               TCG_CALL_NO_WRITE_GLOBALS);
4416             }
4417         }
4418 
4419         /* Make sure that input arguments are available.  */
4420         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4421             arg_ts = arg_temp(op->args[i]);
4422             dir_ts = arg_ts->state_ptr;
4423             if (dir_ts && arg_ts->state == TS_DEAD) {
4424                 TCGOp *lop = tcg_op_insert_before(s, op, INDEX_op_ld,
4425                                                   arg_ts->type, 3);
4426 
4427                 lop->args[0] = temp_arg(dir_ts);
4428                 lop->args[1] = temp_arg(arg_ts->mem_base);
4429                 lop->args[2] = arg_ts->mem_offset;
4430 
4431                 /* Loaded, but synced with memory.  */
4432                 arg_ts->state = TS_MEM;
4433             }
4434         }
4435 
4436         /* Perform input replacement, and mark inputs that became dead.
4437            No action is required except keeping temp_state up to date
4438            so that we reload when needed.  */
4439         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4440             arg_ts = arg_temp(op->args[i]);
4441             dir_ts = arg_ts->state_ptr;
4442             if (dir_ts) {
4443                 op->args[i] = temp_arg(dir_ts);
4444                 changes = true;
4445                 if (IS_DEAD_ARG(i)) {
4446                     arg_ts->state = TS_DEAD;
4447                 }
4448             }
4449         }
4450 
4451         /* Liveness analysis should ensure that the following are
4452            all correct, for call sites and basic block end points.  */
4453         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4454             /* Nothing to do */
4455         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4456             for (i = 0; i < nb_globals; ++i) {
4457                 /* Liveness should see that globals are synced back,
4458                    that is, either TS_DEAD or TS_MEM.  */
4459                 arg_ts = &s->temps[i];
4460                 tcg_debug_assert(arg_ts->state_ptr == 0
4461                                  || arg_ts->state != 0);
4462             }
4463         } else {
4464             for (i = 0; i < nb_globals; ++i) {
4465                 /* Liveness should see that globals are saved back,
4466                    that is, TS_DEAD, waiting to be reloaded.  */
4467                 arg_ts = &s->temps[i];
4468                 tcg_debug_assert(arg_ts->state_ptr == 0
4469                                  || arg_ts->state == TS_DEAD);
4470             }
4471         }
4472 
4473         /* Outputs become available.  */
4474         if (opc == INDEX_op_mov) {
4475             arg_ts = arg_temp(op->args[0]);
4476             dir_ts = arg_ts->state_ptr;
4477             if (dir_ts) {
4478                 op->args[0] = temp_arg(dir_ts);
4479                 changes = true;
4480 
4481                 /* The output is now live and modified.  */
4482                 arg_ts->state = 0;
4483 
4484                 if (NEED_SYNC_ARG(0)) {
4485                     TCGOp *sop = tcg_op_insert_after(s, op, INDEX_op_st,
4486                                                      arg_ts->type, 3);
4487                     TCGTemp *out_ts = dir_ts;
4488 
4489                     if (IS_DEAD_ARG(0)) {
4490                         out_ts = arg_temp(op->args[1]);
4491                         arg_ts->state = TS_DEAD;
4492                         tcg_op_remove(s, op);
4493                     } else {
4494                         arg_ts->state = TS_MEM;
4495                     }
4496 
4497                     sop->args[0] = temp_arg(out_ts);
4498                     sop->args[1] = temp_arg(arg_ts->mem_base);
4499                     sop->args[2] = arg_ts->mem_offset;
4500                 } else {
4501                     tcg_debug_assert(!IS_DEAD_ARG(0));
4502                 }
4503             }
4504         } else {
4505             for (i = 0; i < nb_oargs; i++) {
4506                 arg_ts = arg_temp(op->args[i]);
4507                 dir_ts = arg_ts->state_ptr;
4508                 if (!dir_ts) {
4509                     continue;
4510                 }
4511                 op->args[i] = temp_arg(dir_ts);
4512                 changes = true;
4513 
4514                 /* The output is now live and modified.  */
4515                 arg_ts->state = 0;
4516 
4517                 /* Sync outputs upon their last write.  */
4518                 if (NEED_SYNC_ARG(i)) {
4519                     TCGOp *sop = tcg_op_insert_after(s, op, INDEX_op_st,
4520                                                      arg_ts->type, 3);
4521 
4522                     sop->args[0] = temp_arg(dir_ts);
4523                     sop->args[1] = temp_arg(arg_ts->mem_base);
4524                     sop->args[2] = arg_ts->mem_offset;
4525 
4526                     arg_ts->state = TS_MEM;
4527                 }
4528                 /* Drop outputs that are dead.  */
4529                 if (IS_DEAD_ARG(i)) {
4530                     arg_ts->state = TS_DEAD;
4531                 }
4532             }
4533         }
4534     }
4535 
4536     return changes;
4537 }
4538 
4539 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4540 {
4541     intptr_t off;
4542     int size, align;
4543 
4544     /* When allocating an object, look at the full type. */
4545     size = tcg_type_size(ts->base_type);
4546     switch (ts->base_type) {
4547     case TCG_TYPE_I32:
4548         align = 4;
4549         break;
4550     case TCG_TYPE_I64:
4551     case TCG_TYPE_V64:
4552         align = 8;
4553         break;
4554     case TCG_TYPE_I128:
4555     case TCG_TYPE_V128:
4556     case TCG_TYPE_V256:
4557         /*
4558          * Note that we do not require aligned storage for V256,
4559          * and that we provide alignment for I128 to match V128,
4560          * even if that's above what the host ABI requires.
4561          */
4562         align = 16;
4563         break;
4564     default:
4565         g_assert_not_reached();
4566     }
4567 
4568     /*
4569      * Assume the stack is sufficiently aligned.
4570      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4571      * and do not require 16 byte vector alignment.  This seems slightly
4572      * easier than fully parameterizing the above switch statement.
4573      */
4574     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4575     off = ROUND_UP(s->current_frame_offset, align);
4576 
4577     /* If we've exhausted the stack frame, restart with a smaller TB. */
4578     if (off + size > s->frame_end) {
4579         tcg_raise_tb_overflow(s);
4580     }
4581     s->current_frame_offset = off + size;
4582 #if defined(__sparc__)
4583     off += TCG_TARGET_STACK_BIAS;
4584 #endif
4585 
4586     /* If the object was subdivided, assign memory to all the parts. */
4587     if (ts->base_type != ts->type) {
4588         int part_size = tcg_type_size(ts->type);
4589         int part_count = size / part_size;
4590 
4591         /*
4592          * Each part is allocated sequentially in tcg_temp_new_internal.
4593          * Jump back to the first part by subtracting the current index.
4594          */
4595         ts -= ts->temp_subindex;
4596         for (int i = 0; i < part_count; ++i) {
4597             ts[i].mem_offset = off + i * part_size;
4598             ts[i].mem_base = s->frame_temp;
4599             ts[i].mem_allocated = 1;
4600         }
4601     } else {
4602         ts->mem_offset = off;
4603         ts->mem_base = s->frame_temp;
4604         ts->mem_allocated = 1;
4605     }
4606 }
4607 
4608 /* Assign @reg to @ts, and update reg_to_temp[]. */
4609 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4610 {
4611     if (ts->val_type == TEMP_VAL_REG) {
4612         TCGReg old = ts->reg;
4613         tcg_debug_assert(s->reg_to_temp[old] == ts);
4614         if (old == reg) {
4615             return;
4616         }
4617         s->reg_to_temp[old] = NULL;
4618     }
4619     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4620     s->reg_to_temp[reg] = ts;
4621     ts->val_type = TEMP_VAL_REG;
4622     ts->reg = reg;
4623 }
4624 
4625 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4626 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4627 {
4628     tcg_debug_assert(type != TEMP_VAL_REG);
4629     if (ts->val_type == TEMP_VAL_REG) {
4630         TCGReg reg = ts->reg;
4631         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4632         s->reg_to_temp[reg] = NULL;
4633     }
4634     ts->val_type = type;
4635 }
4636 
4637 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4638 
4639 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4640    mark it free; otherwise mark it dead.  */
4641 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4642 {
4643     TCGTempVal new_type;
4644 
4645     switch (ts->kind) {
4646     case TEMP_FIXED:
4647         return;
4648     case TEMP_GLOBAL:
4649     case TEMP_TB:
4650         new_type = TEMP_VAL_MEM;
4651         break;
4652     case TEMP_EBB:
4653         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4654         break;
4655     case TEMP_CONST:
4656         new_type = TEMP_VAL_CONST;
4657         break;
4658     default:
4659         g_assert_not_reached();
4660     }
4661     set_temp_val_nonreg(s, ts, new_type);
4662 }
4663 
4664 /* Mark a temporary as dead.  */
4665 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4666 {
4667     temp_free_or_dead(s, ts, 1);
4668 }
4669 
4670 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4671    registers needs to be allocated to store a constant.  If 'free_or_dead'
4672    is non-zero, subsequently release the temporary; if it is positive, the
4673    temp is dead; if it is negative, the temp is free.  */
4674 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4675                       TCGRegSet preferred_regs, int free_or_dead)
4676 {
4677     if (!temp_readonly(ts) && !ts->mem_coherent) {
4678         if (!ts->mem_allocated) {
4679             temp_allocate_frame(s, ts);
4680         }
4681         switch (ts->val_type) {
4682         case TEMP_VAL_CONST:
4683             /* If we're going to free the temp immediately, then we won't
4684                require it later in a register, so attempt to store the
4685                constant to memory directly.  */
4686             if (free_or_dead
4687                 && tcg_out_sti(s, ts->type, ts->val,
4688                                ts->mem_base->reg, ts->mem_offset)) {
4689                 break;
4690             }
4691             temp_load(s, ts, tcg_target_available_regs[ts->type],
4692                       allocated_regs, preferred_regs);
4693             /* fallthrough */
4694 
4695         case TEMP_VAL_REG:
4696             tcg_out_st(s, ts->type, ts->reg,
4697                        ts->mem_base->reg, ts->mem_offset);
4698             break;
4699 
4700         case TEMP_VAL_MEM:
4701             break;
4702 
4703         case TEMP_VAL_DEAD:
4704         default:
4705             g_assert_not_reached();
4706         }
4707         ts->mem_coherent = 1;
4708     }
4709     if (free_or_dead) {
4710         temp_free_or_dead(s, ts, free_or_dead);
4711     }
4712 }
4713 
4714 /* free register 'reg' by spilling the corresponding temporary if necessary */
4715 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4716 {
4717     TCGTemp *ts = s->reg_to_temp[reg];
4718     if (ts != NULL) {
4719         temp_sync(s, ts, allocated_regs, 0, -1);
4720     }
4721 }
4722 
4723 /**
4724  * tcg_reg_alloc:
4725  * @required_regs: Set of registers in which we must allocate.
4726  * @allocated_regs: Set of registers which must be avoided.
4727  * @preferred_regs: Set of registers we should prefer.
4728  * @rev: True if we search the registers in "indirect" order.
4729  *
4730  * The allocated register must be in @required_regs & ~@allocated_regs,
4731  * but if we can put it in @preferred_regs we may save a move later.
4732  */
4733 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4734                             TCGRegSet allocated_regs,
4735                             TCGRegSet preferred_regs, bool rev)
4736 {
4737     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4738     TCGRegSet reg_ct[2];
4739     const int *order;
4740 
4741     reg_ct[1] = required_regs & ~allocated_regs;
4742     tcg_debug_assert(reg_ct[1] != 0);
4743     reg_ct[0] = reg_ct[1] & preferred_regs;
4744 
4745     /* Skip the preferred_regs option if it cannot be satisfied,
4746        or if the preference made no difference.  */
4747     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4748 
4749     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4750 
4751     /* Try free registers, preferences first.  */
4752     for (j = f; j < 2; j++) {
4753         TCGRegSet set = reg_ct[j];
4754 
4755         if (tcg_regset_single(set)) {
4756             /* One register in the set.  */
4757             TCGReg reg = tcg_regset_first(set);
4758             if (s->reg_to_temp[reg] == NULL) {
4759                 return reg;
4760             }
4761         } else {
4762             for (i = 0; i < n; i++) {
4763                 TCGReg reg = order[i];
4764                 if (s->reg_to_temp[reg] == NULL &&
4765                     tcg_regset_test_reg(set, reg)) {
4766                     return reg;
4767                 }
4768             }
4769         }
4770     }
4771 
4772     /* We must spill something.  */
4773     for (j = f; j < 2; j++) {
4774         TCGRegSet set = reg_ct[j];
4775 
4776         if (tcg_regset_single(set)) {
4777             /* One register in the set.  */
4778             TCGReg reg = tcg_regset_first(set);
4779             tcg_reg_free(s, reg, allocated_regs);
4780             return reg;
4781         } else {
4782             for (i = 0; i < n; i++) {
4783                 TCGReg reg = order[i];
4784                 if (tcg_regset_test_reg(set, reg)) {
4785                     tcg_reg_free(s, reg, allocated_regs);
4786                     return reg;
4787                 }
4788             }
4789         }
4790     }
4791 
4792     g_assert_not_reached();
4793 }
4794 
4795 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4796                                  TCGRegSet allocated_regs,
4797                                  TCGRegSet preferred_regs, bool rev)
4798 {
4799     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4800     TCGRegSet reg_ct[2];
4801     const int *order;
4802 
4803     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4804     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4805     tcg_debug_assert(reg_ct[1] != 0);
4806     reg_ct[0] = reg_ct[1] & preferred_regs;
4807 
4808     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4809 
4810     /*
4811      * Skip the preferred_regs option if it cannot be satisfied,
4812      * or if the preference made no difference.
4813      */
4814     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4815 
4816     /*
4817      * Minimize the number of flushes by looking for 2 free registers first,
4818      * then a single flush, then two flushes.
4819      */
4820     for (fmin = 2; fmin >= 0; fmin--) {
4821         for (j = k; j < 2; j++) {
4822             TCGRegSet set = reg_ct[j];
4823 
4824             for (i = 0; i < n; i++) {
4825                 TCGReg reg = order[i];
4826 
4827                 if (tcg_regset_test_reg(set, reg)) {
4828                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4829                     if (f >= fmin) {
4830                         tcg_reg_free(s, reg, allocated_regs);
4831                         tcg_reg_free(s, reg + 1, allocated_regs);
4832                         return reg;
4833                     }
4834                 }
4835             }
4836         }
4837     }
4838     g_assert_not_reached();
4839 }
4840 
4841 /* Make sure the temporary is in a register.  If needed, allocate the register
4842    from DESIRED while avoiding ALLOCATED.  */
4843 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4844                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4845 {
4846     TCGReg reg;
4847 
4848     switch (ts->val_type) {
4849     case TEMP_VAL_REG:
4850         return;
4851     case TEMP_VAL_CONST:
4852         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4853                             preferred_regs, ts->indirect_base);
4854         if (ts->type <= TCG_TYPE_I64) {
4855             tcg_out_movi(s, ts->type, reg, ts->val);
4856         } else {
4857             uint64_t val = ts->val;
4858             MemOp vece = MO_64;
4859 
4860             /*
4861              * Find the minimal vector element that matches the constant.
4862              * The targets will, in general, have to do this search anyway,
4863              * do this generically.
4864              */
4865             if (val == dup_const(MO_8, val)) {
4866                 vece = MO_8;
4867             } else if (val == dup_const(MO_16, val)) {
4868                 vece = MO_16;
4869             } else if (val == dup_const(MO_32, val)) {
4870                 vece = MO_32;
4871             }
4872 
4873             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4874         }
4875         ts->mem_coherent = 0;
4876         break;
4877     case TEMP_VAL_MEM:
4878         if (!ts->mem_allocated) {
4879             temp_allocate_frame(s, ts);
4880         }
4881         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4882                             preferred_regs, ts->indirect_base);
4883         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4884         ts->mem_coherent = 1;
4885         break;
4886     case TEMP_VAL_DEAD:
4887     default:
4888         g_assert_not_reached();
4889     }
4890     set_temp_val_reg(s, ts, reg);
4891 }
4892 
4893 /* Save a temporary to memory. 'allocated_regs' is used in case a
4894    temporary registers needs to be allocated to store a constant.  */
4895 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4896 {
4897     /* The liveness analysis already ensures that globals are back
4898        in memory. Keep an tcg_debug_assert for safety. */
4899     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4900 }
4901 
4902 /* save globals to their canonical location and assume they can be
4903    modified be the following code. 'allocated_regs' is used in case a
4904    temporary registers needs to be allocated to store a constant. */
4905 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4906 {
4907     int i, n;
4908 
4909     for (i = 0, n = s->nb_globals; i < n; i++) {
4910         temp_save(s, &s->temps[i], allocated_regs);
4911     }
4912 }
4913 
4914 /* sync globals to their canonical location and assume they can be
4915    read by the following code. 'allocated_regs' is used in case a
4916    temporary registers needs to be allocated to store a constant. */
4917 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4918 {
4919     int i, n;
4920 
4921     for (i = 0, n = s->nb_globals; i < n; i++) {
4922         TCGTemp *ts = &s->temps[i];
4923         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4924                          || ts->kind == TEMP_FIXED
4925                          || ts->mem_coherent);
4926     }
4927 }
4928 
4929 /* at the end of a basic block, we assume all temporaries are dead and
4930    all globals are stored at their canonical location. */
4931 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4932 {
4933     assert_carry_dead(s);
4934     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4935         TCGTemp *ts = &s->temps[i];
4936 
4937         switch (ts->kind) {
4938         case TEMP_TB:
4939             temp_save(s, ts, allocated_regs);
4940             break;
4941         case TEMP_EBB:
4942             /* The liveness analysis already ensures that temps are dead.
4943                Keep an tcg_debug_assert for safety. */
4944             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4945             break;
4946         case TEMP_CONST:
4947             /* Similarly, we should have freed any allocated register. */
4948             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4949             break;
4950         default:
4951             g_assert_not_reached();
4952         }
4953     }
4954 
4955     save_globals(s, allocated_regs);
4956 }
4957 
4958 /*
4959  * At a conditional branch, we assume all temporaries are dead unless
4960  * explicitly live-across-conditional-branch; all globals and local
4961  * temps are synced to their location.
4962  */
4963 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4964 {
4965     assert_carry_dead(s);
4966     sync_globals(s, allocated_regs);
4967 
4968     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4969         TCGTemp *ts = &s->temps[i];
4970         /*
4971          * The liveness analysis already ensures that temps are dead.
4972          * Keep tcg_debug_asserts for safety.
4973          */
4974         switch (ts->kind) {
4975         case TEMP_TB:
4976             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4977             break;
4978         case TEMP_EBB:
4979         case TEMP_CONST:
4980             break;
4981         default:
4982             g_assert_not_reached();
4983         }
4984     }
4985 }
4986 
4987 /*
4988  * Specialized code generation for INDEX_op_mov_* with a constant.
4989  */
4990 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4991                                   tcg_target_ulong val, TCGLifeData arg_life,
4992                                   TCGRegSet preferred_regs)
4993 {
4994     /* ENV should not be modified.  */
4995     tcg_debug_assert(!temp_readonly(ots));
4996 
4997     /* The movi is not explicitly generated here.  */
4998     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4999     ots->val = val;
5000     ots->mem_coherent = 0;
5001     if (NEED_SYNC_ARG(0)) {
5002         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
5003     } else if (IS_DEAD_ARG(0)) {
5004         temp_dead(s, ots);
5005     }
5006 }
5007 
5008 /*
5009  * Specialized code generation for INDEX_op_mov_*.
5010  */
5011 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
5012 {
5013     const TCGLifeData arg_life = op->life;
5014     TCGRegSet allocated_regs, preferred_regs;
5015     TCGTemp *ts, *ots;
5016     TCGType otype, itype;
5017     TCGReg oreg, ireg;
5018 
5019     allocated_regs = s->reserved_regs;
5020     preferred_regs = output_pref(op, 0);
5021     ots = arg_temp(op->args[0]);
5022     ts = arg_temp(op->args[1]);
5023 
5024     /* ENV should not be modified.  */
5025     tcg_debug_assert(!temp_readonly(ots));
5026 
5027     /* Note that otype != itype for no-op truncation.  */
5028     otype = ots->type;
5029     itype = ts->type;
5030 
5031     if (ts->val_type == TEMP_VAL_CONST) {
5032         /* propagate constant or generate sti */
5033         tcg_target_ulong val = ts->val;
5034         if (IS_DEAD_ARG(1)) {
5035             temp_dead(s, ts);
5036         }
5037         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
5038         return;
5039     }
5040 
5041     /* If the source value is in memory we're going to be forced
5042        to have it in a register in order to perform the copy.  Copy
5043        the SOURCE value into its own register first, that way we
5044        don't have to reload SOURCE the next time it is used. */
5045     if (ts->val_type == TEMP_VAL_MEM) {
5046         temp_load(s, ts, tcg_target_available_regs[itype],
5047                   allocated_regs, preferred_regs);
5048     }
5049     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
5050     ireg = ts->reg;
5051 
5052     if (IS_DEAD_ARG(0)) {
5053         /* mov to a non-saved dead register makes no sense (even with
5054            liveness analysis disabled). */
5055         tcg_debug_assert(NEED_SYNC_ARG(0));
5056         if (!ots->mem_allocated) {
5057             temp_allocate_frame(s, ots);
5058         }
5059         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
5060         if (IS_DEAD_ARG(1)) {
5061             temp_dead(s, ts);
5062         }
5063         temp_dead(s, ots);
5064         return;
5065     }
5066 
5067     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
5068         /*
5069          * The mov can be suppressed.  Kill input first, so that it
5070          * is unlinked from reg_to_temp, then set the output to the
5071          * reg that we saved from the input.
5072          */
5073         temp_dead(s, ts);
5074         oreg = ireg;
5075     } else {
5076         if (ots->val_type == TEMP_VAL_REG) {
5077             oreg = ots->reg;
5078         } else {
5079             /* Make sure to not spill the input register during allocation. */
5080             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
5081                                  allocated_regs | ((TCGRegSet)1 << ireg),
5082                                  preferred_regs, ots->indirect_base);
5083         }
5084         if (!tcg_out_mov(s, otype, oreg, ireg)) {
5085             /*
5086              * Cross register class move not supported.
5087              * Store the source register into the destination slot
5088              * and leave the destination temp as TEMP_VAL_MEM.
5089              */
5090             assert(!temp_readonly(ots));
5091             if (!ts->mem_allocated) {
5092                 temp_allocate_frame(s, ots);
5093             }
5094             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
5095             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
5096             ots->mem_coherent = 1;
5097             return;
5098         }
5099     }
5100     set_temp_val_reg(s, ots, oreg);
5101     ots->mem_coherent = 0;
5102 
5103     if (NEED_SYNC_ARG(0)) {
5104         temp_sync(s, ots, allocated_regs, 0, 0);
5105     }
5106 }
5107 
5108 /*
5109  * Specialized code generation for INDEX_op_dup_vec.
5110  */
5111 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
5112 {
5113     const TCGLifeData arg_life = op->life;
5114     TCGRegSet dup_out_regs, dup_in_regs;
5115     const TCGArgConstraint *dup_args_ct;
5116     TCGTemp *its, *ots;
5117     TCGType itype, vtype;
5118     unsigned vece;
5119     int lowpart_ofs;
5120     bool ok;
5121 
5122     ots = arg_temp(op->args[0]);
5123     its = arg_temp(op->args[1]);
5124 
5125     /* ENV should not be modified.  */
5126     tcg_debug_assert(!temp_readonly(ots));
5127 
5128     itype = its->type;
5129     vece = TCGOP_VECE(op);
5130     vtype = TCGOP_TYPE(op);
5131 
5132     if (its->val_type == TEMP_VAL_CONST) {
5133         /* Propagate constant via movi -> dupi.  */
5134         tcg_target_ulong val = its->val;
5135         if (IS_DEAD_ARG(1)) {
5136             temp_dead(s, its);
5137         }
5138         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
5139         return;
5140     }
5141 
5142     dup_args_ct = opcode_args_ct(op);
5143     dup_out_regs = dup_args_ct[0].regs;
5144     dup_in_regs = dup_args_ct[1].regs;
5145 
5146     /* Allocate the output register now.  */
5147     if (ots->val_type != TEMP_VAL_REG) {
5148         TCGRegSet allocated_regs = s->reserved_regs;
5149         TCGReg oreg;
5150 
5151         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
5152             /* Make sure to not spill the input register. */
5153             tcg_regset_set_reg(allocated_regs, its->reg);
5154         }
5155         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5156                              output_pref(op, 0), ots->indirect_base);
5157         set_temp_val_reg(s, ots, oreg);
5158     }
5159 
5160     switch (its->val_type) {
5161     case TEMP_VAL_REG:
5162         /*
5163          * The dup constriaints must be broad, covering all possible VECE.
5164          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
5165          * to fail, indicating that extra moves are required for that case.
5166          */
5167         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
5168             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
5169                 goto done;
5170             }
5171             /* Try again from memory or a vector input register.  */
5172         }
5173         if (!its->mem_coherent) {
5174             /*
5175              * The input register is not synced, and so an extra store
5176              * would be required to use memory.  Attempt an integer-vector
5177              * register move first.  We do not have a TCGRegSet for this.
5178              */
5179             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5180                 break;
5181             }
5182             /* Sync the temp back to its slot and load from there.  */
5183             temp_sync(s, its, s->reserved_regs, 0, 0);
5184         }
5185         /* fall through */
5186 
5187     case TEMP_VAL_MEM:
5188         lowpart_ofs = 0;
5189         if (HOST_BIG_ENDIAN) {
5190             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5191         }
5192         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5193                              its->mem_offset + lowpart_ofs)) {
5194             goto done;
5195         }
5196         /* Load the input into the destination vector register. */
5197         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5198         break;
5199 
5200     default:
5201         g_assert_not_reached();
5202     }
5203 
5204     /* We now have a vector input register, so dup must succeed. */
5205     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5206     tcg_debug_assert(ok);
5207 
5208  done:
5209     ots->mem_coherent = 0;
5210     if (IS_DEAD_ARG(1)) {
5211         temp_dead(s, its);
5212     }
5213     if (NEED_SYNC_ARG(0)) {
5214         temp_sync(s, ots, s->reserved_regs, 0, 0);
5215     }
5216     if (IS_DEAD_ARG(0)) {
5217         temp_dead(s, ots);
5218     }
5219 }
5220 
5221 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5222 {
5223     const TCGLifeData arg_life = op->life;
5224     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5225     TCGRegSet i_allocated_regs;
5226     TCGRegSet o_allocated_regs;
5227     int i, k, nb_iargs, nb_oargs;
5228     TCGReg reg;
5229     TCGArg arg;
5230     const TCGArgConstraint *args_ct;
5231     const TCGArgConstraint *arg_ct;
5232     TCGTemp *ts;
5233     TCGArg new_args[TCG_MAX_OP_ARGS];
5234     int const_args[TCG_MAX_OP_ARGS];
5235     TCGCond op_cond;
5236 
5237     if (def->flags & TCG_OPF_CARRY_IN) {
5238         tcg_debug_assert(s->carry_live);
5239     }
5240 
5241     nb_oargs = def->nb_oargs;
5242     nb_iargs = def->nb_iargs;
5243 
5244     /* copy constants */
5245     memcpy(new_args + nb_oargs + nb_iargs,
5246            op->args + nb_oargs + nb_iargs,
5247            sizeof(TCGArg) * def->nb_cargs);
5248 
5249     i_allocated_regs = s->reserved_regs;
5250     o_allocated_regs = s->reserved_regs;
5251 
5252     switch (op->opc) {
5253     case INDEX_op_brcond:
5254         op_cond = op->args[2];
5255         break;
5256     case INDEX_op_setcond:
5257     case INDEX_op_negsetcond:
5258     case INDEX_op_cmp_vec:
5259         op_cond = op->args[3];
5260         break;
5261     case INDEX_op_brcond2_i32:
5262         op_cond = op->args[4];
5263         break;
5264     case INDEX_op_movcond:
5265     case INDEX_op_setcond2_i32:
5266     case INDEX_op_cmpsel_vec:
5267         op_cond = op->args[5];
5268         break;
5269     default:
5270         /* No condition within opcode. */
5271         op_cond = TCG_COND_ALWAYS;
5272         break;
5273     }
5274 
5275     args_ct = opcode_args_ct(op);
5276 
5277     /* satisfy input constraints */
5278     for (k = 0; k < nb_iargs; k++) {
5279         TCGRegSet i_preferred_regs, i_required_regs;
5280         bool allocate_new_reg, copyto_new_reg;
5281         TCGTemp *ts2;
5282         int i1, i2;
5283 
5284         i = args_ct[nb_oargs + k].sort_index;
5285         arg = op->args[i];
5286         arg_ct = &args_ct[i];
5287         ts = arg_temp(arg);
5288 
5289         if (ts->val_type == TEMP_VAL_CONST) {
5290 #ifdef TCG_REG_ZERO
5291             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5292                 /* Hardware zero register: indicate register via non-const. */
5293                 const_args[i] = 0;
5294                 new_args[i] = TCG_REG_ZERO;
5295                 continue;
5296             }
5297 #endif
5298 
5299             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5300                                        op_cond, TCGOP_VECE(op))) {
5301                 /* constant is OK for instruction */
5302                 const_args[i] = 1;
5303                 new_args[i] = ts->val;
5304                 continue;
5305             }
5306         }
5307 
5308         reg = ts->reg;
5309         i_preferred_regs = 0;
5310         i_required_regs = arg_ct->regs;
5311         allocate_new_reg = false;
5312         copyto_new_reg = false;
5313 
5314         switch (arg_ct->pair) {
5315         case 0: /* not paired */
5316             if (arg_ct->ialias) {
5317                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5318 
5319                 /*
5320                  * If the input is readonly, then it cannot also be an
5321                  * output and aliased to itself.  If the input is not
5322                  * dead after the instruction, we must allocate a new
5323                  * register and move it.
5324                  */
5325                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5326                     || args_ct[arg_ct->alias_index].newreg) {
5327                     allocate_new_reg = true;
5328                 } else if (ts->val_type == TEMP_VAL_REG) {
5329                     /*
5330                      * Check if the current register has already been
5331                      * allocated for another input.
5332                      */
5333                     allocate_new_reg =
5334                         tcg_regset_test_reg(i_allocated_regs, reg);
5335                 }
5336             }
5337             if (!allocate_new_reg) {
5338                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5339                           i_preferred_regs);
5340                 reg = ts->reg;
5341                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5342             }
5343             if (allocate_new_reg) {
5344                 /*
5345                  * Allocate a new register matching the constraint
5346                  * and move the temporary register into it.
5347                  */
5348                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5349                           i_allocated_regs, 0);
5350                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5351                                     i_preferred_regs, ts->indirect_base);
5352                 copyto_new_reg = true;
5353             }
5354             break;
5355 
5356         case 1:
5357             /* First of an input pair; if i1 == i2, the second is an output. */
5358             i1 = i;
5359             i2 = arg_ct->pair_index;
5360             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5361 
5362             /*
5363              * It is easier to default to allocating a new pair
5364              * and to identify a few cases where it's not required.
5365              */
5366             if (arg_ct->ialias) {
5367                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5368                 if (IS_DEAD_ARG(i1) &&
5369                     IS_DEAD_ARG(i2) &&
5370                     !temp_readonly(ts) &&
5371                     ts->val_type == TEMP_VAL_REG &&
5372                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5373                     tcg_regset_test_reg(i_required_regs, reg) &&
5374                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5375                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5376                     (ts2
5377                      ? ts2->val_type == TEMP_VAL_REG &&
5378                        ts2->reg == reg + 1 &&
5379                        !temp_readonly(ts2)
5380                      : s->reg_to_temp[reg + 1] == NULL)) {
5381                     break;
5382                 }
5383             } else {
5384                 /* Without aliasing, the pair must also be an input. */
5385                 tcg_debug_assert(ts2);
5386                 if (ts->val_type == TEMP_VAL_REG &&
5387                     ts2->val_type == TEMP_VAL_REG &&
5388                     ts2->reg == reg + 1 &&
5389                     tcg_regset_test_reg(i_required_regs, reg)) {
5390                     break;
5391                 }
5392             }
5393             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5394                                      0, ts->indirect_base);
5395             goto do_pair;
5396 
5397         case 2: /* pair second */
5398             reg = new_args[arg_ct->pair_index] + 1;
5399             goto do_pair;
5400 
5401         case 3: /* ialias with second output, no first input */
5402             tcg_debug_assert(arg_ct->ialias);
5403             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5404 
5405             if (IS_DEAD_ARG(i) &&
5406                 !temp_readonly(ts) &&
5407                 ts->val_type == TEMP_VAL_REG &&
5408                 reg > 0 &&
5409                 s->reg_to_temp[reg - 1] == NULL &&
5410                 tcg_regset_test_reg(i_required_regs, reg) &&
5411                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5412                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5413                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5414                 break;
5415             }
5416             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5417                                      i_allocated_regs, 0,
5418                                      ts->indirect_base);
5419             tcg_regset_set_reg(i_allocated_regs, reg);
5420             reg += 1;
5421             goto do_pair;
5422 
5423         do_pair:
5424             /*
5425              * If an aliased input is not dead after the instruction,
5426              * we must allocate a new register and move it.
5427              */
5428             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5429                 TCGRegSet t_allocated_regs = i_allocated_regs;
5430 
5431                 /*
5432                  * Because of the alias, and the continued life, make sure
5433                  * that the temp is somewhere *other* than the reg pair,
5434                  * and we get a copy in reg.
5435                  */
5436                 tcg_regset_set_reg(t_allocated_regs, reg);
5437                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5438                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5439                     /* If ts was already in reg, copy it somewhere else. */
5440                     TCGReg nr;
5441                     bool ok;
5442 
5443                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5444                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5445                                        t_allocated_regs, 0, ts->indirect_base);
5446                     ok = tcg_out_mov(s, ts->type, nr, reg);
5447                     tcg_debug_assert(ok);
5448 
5449                     set_temp_val_reg(s, ts, nr);
5450                 } else {
5451                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5452                               t_allocated_regs, 0);
5453                     copyto_new_reg = true;
5454                 }
5455             } else {
5456                 /* Preferably allocate to reg, otherwise copy. */
5457                 i_required_regs = (TCGRegSet)1 << reg;
5458                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5459                           i_preferred_regs);
5460                 copyto_new_reg = ts->reg != reg;
5461             }
5462             break;
5463 
5464         default:
5465             g_assert_not_reached();
5466         }
5467 
5468         if (copyto_new_reg) {
5469             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5470                 /*
5471                  * Cross register class move not supported.  Sync the
5472                  * temp back to its slot and load from there.
5473                  */
5474                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5475                 tcg_out_ld(s, ts->type, reg,
5476                            ts->mem_base->reg, ts->mem_offset);
5477             }
5478         }
5479         new_args[i] = reg;
5480         const_args[i] = 0;
5481         tcg_regset_set_reg(i_allocated_regs, reg);
5482     }
5483 
5484     /* mark dead temporaries and free the associated registers */
5485     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5486         if (IS_DEAD_ARG(i)) {
5487             temp_dead(s, arg_temp(op->args[i]));
5488         }
5489     }
5490 
5491     if (def->flags & TCG_OPF_COND_BRANCH) {
5492         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5493     } else if (def->flags & TCG_OPF_BB_END) {
5494         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5495     } else {
5496         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5497             assert_carry_dead(s);
5498             /* XXX: permit generic clobber register list ? */
5499             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5500                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5501                     tcg_reg_free(s, i, i_allocated_regs);
5502                 }
5503             }
5504         }
5505         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5506             /* sync globals if the op has side effects and might trigger
5507                an exception. */
5508             sync_globals(s, i_allocated_regs);
5509         }
5510 
5511         /* satisfy the output constraints */
5512         for (k = 0; k < nb_oargs; k++) {
5513             i = args_ct[k].sort_index;
5514             arg = op->args[i];
5515             arg_ct = &args_ct[i];
5516             ts = arg_temp(arg);
5517 
5518             /* ENV should not be modified.  */
5519             tcg_debug_assert(!temp_readonly(ts));
5520 
5521             switch (arg_ct->pair) {
5522             case 0: /* not paired */
5523                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5524                     reg = new_args[arg_ct->alias_index];
5525                 } else if (arg_ct->newreg) {
5526                     reg = tcg_reg_alloc(s, arg_ct->regs,
5527                                         i_allocated_regs | o_allocated_regs,
5528                                         output_pref(op, k), ts->indirect_base);
5529                 } else {
5530                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5531                                         output_pref(op, k), ts->indirect_base);
5532                 }
5533                 break;
5534 
5535             case 1: /* first of pair */
5536                 if (arg_ct->oalias) {
5537                     reg = new_args[arg_ct->alias_index];
5538                 } else if (arg_ct->newreg) {
5539                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5540                                              i_allocated_regs | o_allocated_regs,
5541                                              output_pref(op, k),
5542                                              ts->indirect_base);
5543                 } else {
5544                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5545                                              output_pref(op, k),
5546                                              ts->indirect_base);
5547                 }
5548                 break;
5549 
5550             case 2: /* second of pair */
5551                 if (arg_ct->oalias) {
5552                     reg = new_args[arg_ct->alias_index];
5553                 } else {
5554                     reg = new_args[arg_ct->pair_index] + 1;
5555                 }
5556                 break;
5557 
5558             case 3: /* first of pair, aliasing with a second input */
5559                 tcg_debug_assert(!arg_ct->newreg);
5560                 reg = new_args[arg_ct->pair_index] - 1;
5561                 break;
5562 
5563             default:
5564                 g_assert_not_reached();
5565             }
5566             tcg_regset_set_reg(o_allocated_regs, reg);
5567             set_temp_val_reg(s, ts, reg);
5568             ts->mem_coherent = 0;
5569             new_args[i] = reg;
5570         }
5571     }
5572 
5573     /* emit instruction */
5574     TCGType type = TCGOP_TYPE(op);
5575     switch (op->opc) {
5576     case INDEX_op_addc1o:
5577         tcg_out_set_carry(s);
5578         /* fall through */
5579     case INDEX_op_add:
5580     case INDEX_op_addcio:
5581     case INDEX_op_addco:
5582     case INDEX_op_and:
5583     case INDEX_op_andc:
5584     case INDEX_op_clz:
5585     case INDEX_op_ctz:
5586     case INDEX_op_divs:
5587     case INDEX_op_divu:
5588     case INDEX_op_eqv:
5589     case INDEX_op_mul:
5590     case INDEX_op_mulsh:
5591     case INDEX_op_muluh:
5592     case INDEX_op_nand:
5593     case INDEX_op_nor:
5594     case INDEX_op_or:
5595     case INDEX_op_orc:
5596     case INDEX_op_rems:
5597     case INDEX_op_remu:
5598     case INDEX_op_rotl:
5599     case INDEX_op_rotr:
5600     case INDEX_op_sar:
5601     case INDEX_op_shl:
5602     case INDEX_op_shr:
5603     case INDEX_op_xor:
5604         {
5605             const TCGOutOpBinary *out =
5606                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5607 
5608             /* Constants should never appear in the first source operand. */
5609             tcg_debug_assert(!const_args[1]);
5610             if (const_args[2]) {
5611                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5612             } else {
5613                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5614             }
5615         }
5616         break;
5617 
5618     case INDEX_op_sub:
5619         {
5620             const TCGOutOpSubtract *out = &outop_sub;
5621 
5622             /*
5623              * Constants should never appear in the second source operand.
5624              * These are folded to add with negative constant.
5625              */
5626             tcg_debug_assert(!const_args[2]);
5627             if (const_args[1]) {
5628                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5629             } else {
5630                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5631             }
5632         }
5633         break;
5634 
5635     case INDEX_op_subb1o:
5636         tcg_out_set_borrow(s);
5637         /* fall through */
5638     case INDEX_op_addci:
5639     case INDEX_op_subbi:
5640     case INDEX_op_subbio:
5641     case INDEX_op_subbo:
5642         {
5643             const TCGOutOpAddSubCarry *out =
5644                 container_of(all_outop[op->opc], TCGOutOpAddSubCarry, base);
5645 
5646             if (const_args[2]) {
5647                 if (const_args[1]) {
5648                     out->out_rii(s, type, new_args[0],
5649                                  new_args[1], new_args[2]);
5650                 } else {
5651                     out->out_rri(s, type, new_args[0],
5652                                  new_args[1], new_args[2]);
5653                 }
5654             } else if (const_args[1]) {
5655                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5656             } else {
5657                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5658             }
5659         }
5660         break;
5661 
5662     case INDEX_op_bswap64:
5663     case INDEX_op_ext_i32_i64:
5664     case INDEX_op_extu_i32_i64:
5665     case INDEX_op_extrl_i64_i32:
5666     case INDEX_op_extrh_i64_i32:
5667         assert(TCG_TARGET_REG_BITS == 64);
5668         /* fall through */
5669     case INDEX_op_ctpop:
5670     case INDEX_op_neg:
5671     case INDEX_op_not:
5672         {
5673             const TCGOutOpUnary *out =
5674                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5675 
5676             /* Constants should have been folded. */
5677             tcg_debug_assert(!const_args[1]);
5678             out->out_rr(s, type, new_args[0], new_args[1]);
5679         }
5680         break;
5681 
5682     case INDEX_op_bswap16:
5683     case INDEX_op_bswap32:
5684         {
5685             const TCGOutOpBswap *out =
5686                 container_of(all_outop[op->opc], TCGOutOpBswap, base);
5687 
5688             tcg_debug_assert(!const_args[1]);
5689             out->out_rr(s, type, new_args[0], new_args[1], new_args[2]);
5690         }
5691         break;
5692 
5693     case INDEX_op_deposit:
5694         {
5695             const TCGOutOpDeposit *out = &outop_deposit;
5696 
5697             if (const_args[2]) {
5698                 tcg_debug_assert(!const_args[1]);
5699                 out->out_rri(s, type, new_args[0], new_args[1],
5700                              new_args[2], new_args[3], new_args[4]);
5701             } else if (const_args[1]) {
5702                 tcg_debug_assert(new_args[1] == 0);
5703                 tcg_debug_assert(!const_args[2]);
5704                 out->out_rzr(s, type, new_args[0], new_args[2],
5705                              new_args[3], new_args[4]);
5706             } else {
5707                 out->out_rrr(s, type, new_args[0], new_args[1],
5708                              new_args[2], new_args[3], new_args[4]);
5709             }
5710         }
5711         break;
5712 
5713     case INDEX_op_divs2:
5714     case INDEX_op_divu2:
5715         {
5716             const TCGOutOpDivRem *out =
5717                 container_of(all_outop[op->opc], TCGOutOpDivRem, base);
5718 
5719             /* Only used by x86 and s390x, which use matching constraints. */
5720             tcg_debug_assert(new_args[0] == new_args[2]);
5721             tcg_debug_assert(new_args[1] == new_args[3]);
5722             tcg_debug_assert(!const_args[4]);
5723             out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]);
5724         }
5725         break;
5726 
5727     case INDEX_op_extract:
5728     case INDEX_op_sextract:
5729         {
5730             const TCGOutOpExtract *out =
5731                 container_of(all_outop[op->opc], TCGOutOpExtract, base);
5732 
5733             tcg_debug_assert(!const_args[1]);
5734             out->out_rr(s, type, new_args[0], new_args[1],
5735                         new_args[2], new_args[3]);
5736         }
5737         break;
5738 
5739     case INDEX_op_extract2:
5740         {
5741             const TCGOutOpExtract2 *out = &outop_extract2;
5742 
5743             tcg_debug_assert(!const_args[1]);
5744             tcg_debug_assert(!const_args[2]);
5745             out->out_rrr(s, type, new_args[0], new_args[1],
5746                          new_args[2], new_args[3]);
5747         }
5748         break;
5749 
5750     case INDEX_op_ld8u:
5751     case INDEX_op_ld8s:
5752     case INDEX_op_ld16u:
5753     case INDEX_op_ld16s:
5754     case INDEX_op_ld32u:
5755     case INDEX_op_ld32s:
5756     case INDEX_op_ld:
5757         {
5758             const TCGOutOpLoad *out =
5759                 container_of(all_outop[op->opc], TCGOutOpLoad, base);
5760 
5761             tcg_debug_assert(!const_args[1]);
5762             out->out(s, type, new_args[0], new_args[1], new_args[2]);
5763         }
5764         break;
5765 
5766     case INDEX_op_muls2:
5767     case INDEX_op_mulu2:
5768         {
5769             const TCGOutOpMul2 *out =
5770                 container_of(all_outop[op->opc], TCGOutOpMul2, base);
5771 
5772             tcg_debug_assert(!const_args[2]);
5773             tcg_debug_assert(!const_args[3]);
5774             out->out_rrrr(s, type, new_args[0], new_args[1],
5775                           new_args[2], new_args[3]);
5776         }
5777         break;
5778 
5779     case INDEX_op_st32:
5780         /* Use tcg_op_st w/ I32. */
5781         type = TCG_TYPE_I32;
5782         /* fall through */
5783     case INDEX_op_st:
5784     case INDEX_op_st8:
5785     case INDEX_op_st16:
5786         {
5787             const TCGOutOpStore *out =
5788                 container_of(all_outop[op->opc], TCGOutOpStore, base);
5789 
5790             if (const_args[0]) {
5791                 out->out_i(s, type, new_args[0], new_args[1], new_args[2]);
5792             } else {
5793                 out->out_r(s, type, new_args[0], new_args[1], new_args[2]);
5794             }
5795         }
5796         break;
5797 
5798     case INDEX_op_brcond:
5799         {
5800             const TCGOutOpBrcond *out = &outop_brcond;
5801             TCGCond cond = new_args[2];
5802             TCGLabel *label = arg_label(new_args[3]);
5803 
5804             tcg_debug_assert(!const_args[0]);
5805             if (const_args[1]) {
5806                 out->out_ri(s, type, cond, new_args[0], new_args[1], label);
5807             } else {
5808                 out->out_rr(s, type, cond, new_args[0], new_args[1], label);
5809             }
5810         }
5811         break;
5812 
5813     case INDEX_op_movcond:
5814         {
5815             const TCGOutOpMovcond *out = &outop_movcond;
5816             TCGCond cond = new_args[5];
5817 
5818             tcg_debug_assert(!const_args[1]);
5819             out->out(s, type, cond, new_args[0],
5820                      new_args[1], new_args[2], const_args[2],
5821                      new_args[3], const_args[3],
5822                      new_args[4], const_args[4]);
5823         }
5824         break;
5825 
5826     case INDEX_op_setcond:
5827     case INDEX_op_negsetcond:
5828         {
5829             const TCGOutOpSetcond *out =
5830                 container_of(all_outop[op->opc], TCGOutOpSetcond, base);
5831             TCGCond cond = new_args[3];
5832 
5833             tcg_debug_assert(!const_args[1]);
5834             if (const_args[2]) {
5835                 out->out_rri(s, type, cond,
5836                              new_args[0], new_args[1], new_args[2]);
5837             } else {
5838                 out->out_rrr(s, type, cond,
5839                              new_args[0], new_args[1], new_args[2]);
5840             }
5841         }
5842         break;
5843 
5844 #if TCG_TARGET_REG_BITS == 32
5845     case INDEX_op_brcond2_i32:
5846         {
5847             const TCGOutOpBrcond2 *out = &outop_brcond2;
5848             TCGCond cond = new_args[4];
5849             TCGLabel *label = arg_label(new_args[5]);
5850 
5851             tcg_debug_assert(!const_args[0]);
5852             tcg_debug_assert(!const_args[1]);
5853             out->out(s, cond, new_args[0], new_args[1],
5854                      new_args[2], const_args[2],
5855                      new_args[3], const_args[3], label);
5856         }
5857         break;
5858     case INDEX_op_setcond2_i32:
5859         {
5860             const TCGOutOpSetcond2 *out = &outop_setcond2;
5861             TCGCond cond = new_args[5];
5862 
5863             tcg_debug_assert(!const_args[1]);
5864             tcg_debug_assert(!const_args[2]);
5865             out->out(s, cond, new_args[0], new_args[1], new_args[2],
5866                      new_args[3], const_args[3], new_args[4], const_args[4]);
5867         }
5868         break;
5869 #else
5870     case INDEX_op_brcond2_i32:
5871     case INDEX_op_setcond2_i32:
5872         g_assert_not_reached();
5873 #endif
5874 
5875     case INDEX_op_goto_ptr:
5876         tcg_debug_assert(!const_args[0]);
5877         tcg_out_goto_ptr(s, new_args[0]);
5878         break;
5879 
5880     default:
5881         if (def->flags & TCG_OPF_VECTOR) {
5882             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5883                            TCGOP_VECE(op), new_args, const_args);
5884         } else {
5885             tcg_out_op(s, op->opc, type, new_args, const_args);
5886         }
5887         break;
5888     }
5889 
5890     if (def->flags & TCG_OPF_CARRY_IN) {
5891         s->carry_live = false;
5892     }
5893     if (def->flags & TCG_OPF_CARRY_OUT) {
5894         s->carry_live = true;
5895     }
5896 
5897     /* move the outputs in the correct register if needed */
5898     for(i = 0; i < nb_oargs; i++) {
5899         ts = arg_temp(op->args[i]);
5900 
5901         /* ENV should not be modified.  */
5902         tcg_debug_assert(!temp_readonly(ts));
5903 
5904         if (NEED_SYNC_ARG(i)) {
5905             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5906         } else if (IS_DEAD_ARG(i)) {
5907             temp_dead(s, ts);
5908         }
5909     }
5910 }
5911 
5912 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5913 {
5914     const TCGLifeData arg_life = op->life;
5915     TCGTemp *ots, *itsl, *itsh;
5916     TCGType vtype = TCGOP_TYPE(op);
5917 
5918     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5919     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5920     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5921 
5922     ots = arg_temp(op->args[0]);
5923     itsl = arg_temp(op->args[1]);
5924     itsh = arg_temp(op->args[2]);
5925 
5926     /* ENV should not be modified.  */
5927     tcg_debug_assert(!temp_readonly(ots));
5928 
5929     /* Allocate the output register now.  */
5930     if (ots->val_type != TEMP_VAL_REG) {
5931         TCGRegSet allocated_regs = s->reserved_regs;
5932         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5933         TCGReg oreg;
5934 
5935         /* Make sure to not spill the input registers. */
5936         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5937             tcg_regset_set_reg(allocated_regs, itsl->reg);
5938         }
5939         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5940             tcg_regset_set_reg(allocated_regs, itsh->reg);
5941         }
5942 
5943         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5944                              output_pref(op, 0), ots->indirect_base);
5945         set_temp_val_reg(s, ots, oreg);
5946     }
5947 
5948     /* Promote dup2 of immediates to dupi_vec. */
5949     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5950         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5951         MemOp vece = MO_64;
5952 
5953         if (val == dup_const(MO_8, val)) {
5954             vece = MO_8;
5955         } else if (val == dup_const(MO_16, val)) {
5956             vece = MO_16;
5957         } else if (val == dup_const(MO_32, val)) {
5958             vece = MO_32;
5959         }
5960 
5961         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5962         goto done;
5963     }
5964 
5965     /* If the two inputs form one 64-bit value, try dupm_vec. */
5966     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5967         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5968         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5969         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5970 
5971         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5972         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5973 
5974         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5975                              its->mem_base->reg, its->mem_offset)) {
5976             goto done;
5977         }
5978     }
5979 
5980     /* Fall back to generic expansion. */
5981     return false;
5982 
5983  done:
5984     ots->mem_coherent = 0;
5985     if (IS_DEAD_ARG(1)) {
5986         temp_dead(s, itsl);
5987     }
5988     if (IS_DEAD_ARG(2)) {
5989         temp_dead(s, itsh);
5990     }
5991     if (NEED_SYNC_ARG(0)) {
5992         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5993     } else if (IS_DEAD_ARG(0)) {
5994         temp_dead(s, ots);
5995     }
5996     return true;
5997 }
5998 
5999 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
6000                          TCGRegSet allocated_regs)
6001 {
6002     if (ts->val_type == TEMP_VAL_REG) {
6003         if (ts->reg != reg) {
6004             tcg_reg_free(s, reg, allocated_regs);
6005             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
6006                 /*
6007                  * Cross register class move not supported.  Sync the
6008                  * temp back to its slot and load from there.
6009                  */
6010                 temp_sync(s, ts, allocated_regs, 0, 0);
6011                 tcg_out_ld(s, ts->type, reg,
6012                            ts->mem_base->reg, ts->mem_offset);
6013             }
6014         }
6015     } else {
6016         TCGRegSet arg_set = 0;
6017 
6018         tcg_reg_free(s, reg, allocated_regs);
6019         tcg_regset_set_reg(arg_set, reg);
6020         temp_load(s, ts, arg_set, allocated_regs, 0);
6021     }
6022 }
6023 
6024 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
6025                          TCGRegSet allocated_regs)
6026 {
6027     /*
6028      * When the destination is on the stack, load up the temp and store.
6029      * If there are many call-saved registers, the temp might live to
6030      * see another use; otherwise it'll be discarded.
6031      */
6032     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
6033     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
6034                arg_slot_stk_ofs(arg_slot));
6035 }
6036 
6037 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
6038                             TCGTemp *ts, TCGRegSet *allocated_regs)
6039 {
6040     if (arg_slot_reg_p(l->arg_slot)) {
6041         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
6042         load_arg_reg(s, reg, ts, *allocated_regs);
6043         tcg_regset_set_reg(*allocated_regs, reg);
6044     } else {
6045         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
6046     }
6047 }
6048 
6049 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
6050                          intptr_t ref_off, TCGRegSet *allocated_regs)
6051 {
6052     TCGReg reg;
6053 
6054     if (arg_slot_reg_p(arg_slot)) {
6055         reg = tcg_target_call_iarg_regs[arg_slot];
6056         tcg_reg_free(s, reg, *allocated_regs);
6057         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
6058         tcg_regset_set_reg(*allocated_regs, reg);
6059     } else {
6060         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
6061                             *allocated_regs, 0, false);
6062         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
6063         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
6064                    arg_slot_stk_ofs(arg_slot));
6065     }
6066 }
6067 
6068 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
6069 {
6070     const int nb_oargs = TCGOP_CALLO(op);
6071     const int nb_iargs = TCGOP_CALLI(op);
6072     const TCGLifeData arg_life = op->life;
6073     const TCGHelperInfo *info = tcg_call_info(op);
6074     TCGRegSet allocated_regs = s->reserved_regs;
6075     int i;
6076 
6077     /*
6078      * Move inputs into place in reverse order,
6079      * so that we place stacked arguments first.
6080      */
6081     for (i = nb_iargs - 1; i >= 0; --i) {
6082         const TCGCallArgumentLoc *loc = &info->in[i];
6083         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
6084 
6085         switch (loc->kind) {
6086         case TCG_CALL_ARG_NORMAL:
6087         case TCG_CALL_ARG_EXTEND_U:
6088         case TCG_CALL_ARG_EXTEND_S:
6089             load_arg_normal(s, loc, ts, &allocated_regs);
6090             break;
6091         case TCG_CALL_ARG_BY_REF:
6092             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
6093             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
6094                          arg_slot_stk_ofs(loc->ref_slot),
6095                          &allocated_regs);
6096             break;
6097         case TCG_CALL_ARG_BY_REF_N:
6098             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
6099             break;
6100         default:
6101             g_assert_not_reached();
6102         }
6103     }
6104 
6105     /* Mark dead temporaries and free the associated registers.  */
6106     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
6107         if (IS_DEAD_ARG(i)) {
6108             temp_dead(s, arg_temp(op->args[i]));
6109         }
6110     }
6111 
6112     /* Clobber call registers.  */
6113     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
6114         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
6115             tcg_reg_free(s, i, allocated_regs);
6116         }
6117     }
6118 
6119     /*
6120      * Save globals if they might be written by the helper,
6121      * sync them if they might be read.
6122      */
6123     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
6124         /* Nothing to do */
6125     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
6126         sync_globals(s, allocated_regs);
6127     } else {
6128         save_globals(s, allocated_regs);
6129     }
6130 
6131     /*
6132      * If the ABI passes a pointer to the returned struct as the first
6133      * argument, load that now.  Pass a pointer to the output home slot.
6134      */
6135     if (info->out_kind == TCG_CALL_RET_BY_REF) {
6136         TCGTemp *ts = arg_temp(op->args[0]);
6137 
6138         if (!ts->mem_allocated) {
6139             temp_allocate_frame(s, ts);
6140         }
6141         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
6142     }
6143 
6144     tcg_out_call(s, tcg_call_func(op), info);
6145 
6146     /* Assign output registers and emit moves if needed.  */
6147     switch (info->out_kind) {
6148     case TCG_CALL_RET_NORMAL:
6149         for (i = 0; i < nb_oargs; i++) {
6150             TCGTemp *ts = arg_temp(op->args[i]);
6151             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
6152 
6153             /* ENV should not be modified.  */
6154             tcg_debug_assert(!temp_readonly(ts));
6155 
6156             set_temp_val_reg(s, ts, reg);
6157             ts->mem_coherent = 0;
6158         }
6159         break;
6160 
6161     case TCG_CALL_RET_BY_VEC:
6162         {
6163             TCGTemp *ts = arg_temp(op->args[0]);
6164 
6165             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
6166             tcg_debug_assert(ts->temp_subindex == 0);
6167             if (!ts->mem_allocated) {
6168                 temp_allocate_frame(s, ts);
6169             }
6170             tcg_out_st(s, TCG_TYPE_V128,
6171                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6172                        ts->mem_base->reg, ts->mem_offset);
6173         }
6174         /* fall through to mark all parts in memory */
6175 
6176     case TCG_CALL_RET_BY_REF:
6177         /* The callee has performed a write through the reference. */
6178         for (i = 0; i < nb_oargs; i++) {
6179             TCGTemp *ts = arg_temp(op->args[i]);
6180             ts->val_type = TEMP_VAL_MEM;
6181         }
6182         break;
6183 
6184     default:
6185         g_assert_not_reached();
6186     }
6187 
6188     /* Flush or discard output registers as needed. */
6189     for (i = 0; i < nb_oargs; i++) {
6190         TCGTemp *ts = arg_temp(op->args[i]);
6191         if (NEED_SYNC_ARG(i)) {
6192             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
6193         } else if (IS_DEAD_ARG(i)) {
6194             temp_dead(s, ts);
6195         }
6196     }
6197 }
6198 
6199 /**
6200  * atom_and_align_for_opc:
6201  * @s: tcg context
6202  * @opc: memory operation code
6203  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
6204  * @allow_two_ops: true if we are prepared to issue two operations
6205  *
6206  * Return the alignment and atomicity to use for the inline fast path
6207  * for the given memory operation.  The alignment may be larger than
6208  * that specified in @opc, and the correct alignment will be diagnosed
6209  * by the slow path helper.
6210  *
6211  * If @allow_two_ops, the host is prepared to test for 2x alignment,
6212  * and issue two loads or stores for subalignment.
6213  */
6214 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
6215                                            MemOp host_atom, bool allow_two_ops)
6216 {
6217     MemOp align = memop_alignment_bits(opc);
6218     MemOp size = opc & MO_SIZE;
6219     MemOp half = size ? size - 1 : 0;
6220     MemOp atom = opc & MO_ATOM_MASK;
6221     MemOp atmax;
6222 
6223     switch (atom) {
6224     case MO_ATOM_NONE:
6225         /* The operation requires no specific atomicity. */
6226         atmax = MO_8;
6227         break;
6228 
6229     case MO_ATOM_IFALIGN:
6230         atmax = size;
6231         break;
6232 
6233     case MO_ATOM_IFALIGN_PAIR:
6234         atmax = half;
6235         break;
6236 
6237     case MO_ATOM_WITHIN16:
6238         atmax = size;
6239         if (size == MO_128) {
6240             /* Misalignment implies !within16, and therefore no atomicity. */
6241         } else if (host_atom != MO_ATOM_WITHIN16) {
6242             /* The host does not implement within16, so require alignment. */
6243             align = MAX(align, size);
6244         }
6245         break;
6246 
6247     case MO_ATOM_WITHIN16_PAIR:
6248         atmax = size;
6249         /*
6250          * Misalignment implies !within16, and therefore half atomicity.
6251          * Any host prepared for two operations can implement this with
6252          * half alignment.
6253          */
6254         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
6255             align = MAX(align, half);
6256         }
6257         break;
6258 
6259     case MO_ATOM_SUBALIGN:
6260         atmax = size;
6261         if (host_atom != MO_ATOM_SUBALIGN) {
6262             /* If unaligned but not odd, there are subobjects up to half. */
6263             if (allow_two_ops) {
6264                 align = MAX(align, half);
6265             } else {
6266                 align = MAX(align, size);
6267             }
6268         }
6269         break;
6270 
6271     default:
6272         g_assert_not_reached();
6273     }
6274 
6275     return (TCGAtomAlign){ .atom = atmax, .align = align };
6276 }
6277 
6278 /*
6279  * Similarly for qemu_ld/st slow path helpers.
6280  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
6281  * using only the provided backend tcg_out_* functions.
6282  */
6283 
6284 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
6285 {
6286     int ofs = arg_slot_stk_ofs(slot);
6287 
6288     /*
6289      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
6290      * require extension to uint64_t, adjust the address for uint32_t.
6291      */
6292     if (HOST_BIG_ENDIAN &&
6293         TCG_TARGET_REG_BITS == 64 &&
6294         type == TCG_TYPE_I32) {
6295         ofs += 4;
6296     }
6297     return ofs;
6298 }
6299 
6300 static void tcg_out_helper_load_slots(TCGContext *s,
6301                                       unsigned nmov, TCGMovExtend *mov,
6302                                       const TCGLdstHelperParam *parm)
6303 {
6304     unsigned i;
6305     TCGReg dst3;
6306 
6307     /*
6308      * Start from the end, storing to the stack first.
6309      * This frees those registers, so we need not consider overlap.
6310      */
6311     for (i = nmov; i-- > 0; ) {
6312         unsigned slot = mov[i].dst;
6313 
6314         if (arg_slot_reg_p(slot)) {
6315             goto found_reg;
6316         }
6317 
6318         TCGReg src = mov[i].src;
6319         TCGType dst_type = mov[i].dst_type;
6320         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6321 
6322         /* The argument is going onto the stack; extend into scratch. */
6323         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
6324             tcg_debug_assert(parm->ntmp != 0);
6325             mov[i].dst = src = parm->tmp[0];
6326             tcg_out_movext1(s, &mov[i]);
6327         }
6328 
6329         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
6330                    tcg_out_helper_stk_ofs(dst_type, slot));
6331     }
6332     return;
6333 
6334  found_reg:
6335     /*
6336      * The remaining arguments are in registers.
6337      * Convert slot numbers to argument registers.
6338      */
6339     nmov = i + 1;
6340     for (i = 0; i < nmov; ++i) {
6341         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
6342     }
6343 
6344     switch (nmov) {
6345     case 4:
6346         /* The backend must have provided enough temps for the worst case. */
6347         tcg_debug_assert(parm->ntmp >= 2);
6348 
6349         dst3 = mov[3].dst;
6350         for (unsigned j = 0; j < 3; ++j) {
6351             if (dst3 == mov[j].src) {
6352                 /*
6353                  * Conflict. Copy the source to a temporary, perform the
6354                  * remaining moves, then the extension from our scratch
6355                  * on the way out.
6356                  */
6357                 TCGReg scratch = parm->tmp[1];
6358 
6359                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
6360                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
6361                 tcg_out_movext1_new_src(s, &mov[3], scratch);
6362                 break;
6363             }
6364         }
6365 
6366         /* No conflicts: perform this move and continue. */
6367         tcg_out_movext1(s, &mov[3]);
6368         /* fall through */
6369 
6370     case 3:
6371         tcg_out_movext3(s, mov, mov + 1, mov + 2,
6372                         parm->ntmp ? parm->tmp[0] : -1);
6373         break;
6374     case 2:
6375         tcg_out_movext2(s, mov, mov + 1,
6376                         parm->ntmp ? parm->tmp[0] : -1);
6377         break;
6378     case 1:
6379         tcg_out_movext1(s, mov);
6380         break;
6381     default:
6382         g_assert_not_reached();
6383     }
6384 }
6385 
6386 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
6387                                     TCGType type, tcg_target_long imm,
6388                                     const TCGLdstHelperParam *parm)
6389 {
6390     if (arg_slot_reg_p(slot)) {
6391         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
6392     } else {
6393         int ofs = tcg_out_helper_stk_ofs(type, slot);
6394         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
6395             tcg_debug_assert(parm->ntmp != 0);
6396             tcg_out_movi(s, type, parm->tmp[0], imm);
6397             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
6398         }
6399     }
6400 }
6401 
6402 static void tcg_out_helper_load_common_args(TCGContext *s,
6403                                             const TCGLabelQemuLdst *ldst,
6404                                             const TCGLdstHelperParam *parm,
6405                                             const TCGHelperInfo *info,
6406                                             unsigned next_arg)
6407 {
6408     TCGMovExtend ptr_mov = {
6409         .dst_type = TCG_TYPE_PTR,
6410         .src_type = TCG_TYPE_PTR,
6411         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6412     };
6413     const TCGCallArgumentLoc *loc = &info->in[0];
6414     TCGType type;
6415     unsigned slot;
6416     tcg_target_ulong imm;
6417 
6418     /*
6419      * Handle env, which is always first.
6420      */
6421     ptr_mov.dst = loc->arg_slot;
6422     ptr_mov.src = TCG_AREG0;
6423     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6424 
6425     /*
6426      * Handle oi.
6427      */
6428     imm = ldst->oi;
6429     loc = &info->in[next_arg];
6430     type = TCG_TYPE_I32;
6431     switch (loc->kind) {
6432     case TCG_CALL_ARG_NORMAL:
6433         break;
6434     case TCG_CALL_ARG_EXTEND_U:
6435     case TCG_CALL_ARG_EXTEND_S:
6436         /* No extension required for MemOpIdx. */
6437         tcg_debug_assert(imm <= INT32_MAX);
6438         type = TCG_TYPE_REG;
6439         break;
6440     default:
6441         g_assert_not_reached();
6442     }
6443     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6444     next_arg++;
6445 
6446     /*
6447      * Handle ra.
6448      */
6449     loc = &info->in[next_arg];
6450     slot = loc->arg_slot;
6451     if (parm->ra_gen) {
6452         int arg_reg = -1;
6453         TCGReg ra_reg;
6454 
6455         if (arg_slot_reg_p(slot)) {
6456             arg_reg = tcg_target_call_iarg_regs[slot];
6457         }
6458         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6459 
6460         ptr_mov.dst = slot;
6461         ptr_mov.src = ra_reg;
6462         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6463     } else {
6464         imm = (uintptr_t)ldst->raddr;
6465         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6466     }
6467 }
6468 
6469 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6470                                        const TCGCallArgumentLoc *loc,
6471                                        TCGType dst_type, TCGType src_type,
6472                                        TCGReg lo, TCGReg hi)
6473 {
6474     MemOp reg_mo;
6475 
6476     if (dst_type <= TCG_TYPE_REG) {
6477         MemOp src_ext;
6478 
6479         switch (loc->kind) {
6480         case TCG_CALL_ARG_NORMAL:
6481             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6482             break;
6483         case TCG_CALL_ARG_EXTEND_U:
6484             dst_type = TCG_TYPE_REG;
6485             src_ext = MO_UL;
6486             break;
6487         case TCG_CALL_ARG_EXTEND_S:
6488             dst_type = TCG_TYPE_REG;
6489             src_ext = MO_SL;
6490             break;
6491         default:
6492             g_assert_not_reached();
6493         }
6494 
6495         mov[0].dst = loc->arg_slot;
6496         mov[0].dst_type = dst_type;
6497         mov[0].src = lo;
6498         mov[0].src_type = src_type;
6499         mov[0].src_ext = src_ext;
6500         return 1;
6501     }
6502 
6503     if (TCG_TARGET_REG_BITS == 32) {
6504         assert(dst_type == TCG_TYPE_I64);
6505         reg_mo = MO_32;
6506     } else {
6507         assert(dst_type == TCG_TYPE_I128);
6508         reg_mo = MO_64;
6509     }
6510 
6511     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6512     mov[0].src = lo;
6513     mov[0].dst_type = TCG_TYPE_REG;
6514     mov[0].src_type = TCG_TYPE_REG;
6515     mov[0].src_ext = reg_mo;
6516 
6517     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6518     mov[1].src = hi;
6519     mov[1].dst_type = TCG_TYPE_REG;
6520     mov[1].src_type = TCG_TYPE_REG;
6521     mov[1].src_ext = reg_mo;
6522 
6523     return 2;
6524 }
6525 
6526 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6527                                    const TCGLdstHelperParam *parm)
6528 {
6529     const TCGHelperInfo *info;
6530     const TCGCallArgumentLoc *loc;
6531     TCGMovExtend mov[2];
6532     unsigned next_arg, nmov;
6533     MemOp mop = get_memop(ldst->oi);
6534 
6535     switch (mop & MO_SIZE) {
6536     case MO_8:
6537     case MO_16:
6538     case MO_32:
6539         info = &info_helper_ld32_mmu;
6540         break;
6541     case MO_64:
6542         info = &info_helper_ld64_mmu;
6543         break;
6544     case MO_128:
6545         info = &info_helper_ld128_mmu;
6546         break;
6547     default:
6548         g_assert_not_reached();
6549     }
6550 
6551     /* Defer env argument. */
6552     next_arg = 1;
6553 
6554     loc = &info->in[next_arg];
6555     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6556         /*
6557          * 32-bit host with 32-bit guest: zero-extend the guest address
6558          * to 64-bits for the helper by storing the low part, then
6559          * load a zero for the high part.
6560          */
6561         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6562                                TCG_TYPE_I32, TCG_TYPE_I32,
6563                                ldst->addr_reg, -1);
6564         tcg_out_helper_load_slots(s, 1, mov, parm);
6565 
6566         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6567                                 TCG_TYPE_I32, 0, parm);
6568         next_arg += 2;
6569     } else {
6570         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6571                                       ldst->addr_reg, -1);
6572         tcg_out_helper_load_slots(s, nmov, mov, parm);
6573         next_arg += nmov;
6574     }
6575 
6576     switch (info->out_kind) {
6577     case TCG_CALL_RET_NORMAL:
6578     case TCG_CALL_RET_BY_VEC:
6579         break;
6580     case TCG_CALL_RET_BY_REF:
6581         /*
6582          * The return reference is in the first argument slot.
6583          * We need memory in which to return: re-use the top of stack.
6584          */
6585         {
6586             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6587 
6588             if (arg_slot_reg_p(0)) {
6589                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6590                                  TCG_REG_CALL_STACK, ofs_slot0);
6591             } else {
6592                 tcg_debug_assert(parm->ntmp != 0);
6593                 tcg_out_addi_ptr(s, parm->tmp[0],
6594                                  TCG_REG_CALL_STACK, ofs_slot0);
6595                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6596                            TCG_REG_CALL_STACK, ofs_slot0);
6597             }
6598         }
6599         break;
6600     default:
6601         g_assert_not_reached();
6602     }
6603 
6604     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6605 }
6606 
6607 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6608                                   bool load_sign,
6609                                   const TCGLdstHelperParam *parm)
6610 {
6611     MemOp mop = get_memop(ldst->oi);
6612     TCGMovExtend mov[2];
6613     int ofs_slot0;
6614 
6615     switch (ldst->type) {
6616     case TCG_TYPE_I64:
6617         if (TCG_TARGET_REG_BITS == 32) {
6618             break;
6619         }
6620         /* fall through */
6621 
6622     case TCG_TYPE_I32:
6623         mov[0].dst = ldst->datalo_reg;
6624         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6625         mov[0].dst_type = ldst->type;
6626         mov[0].src_type = TCG_TYPE_REG;
6627 
6628         /*
6629          * If load_sign, then we allowed the helper to perform the
6630          * appropriate sign extension to tcg_target_ulong, and all
6631          * we need now is a plain move.
6632          *
6633          * If they do not, then we expect the relevant extension
6634          * instruction to be no more expensive than a move, and
6635          * we thus save the icache etc by only using one of two
6636          * helper functions.
6637          */
6638         if (load_sign || !(mop & MO_SIGN)) {
6639             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6640                 mov[0].src_ext = MO_32;
6641             } else {
6642                 mov[0].src_ext = MO_64;
6643             }
6644         } else {
6645             mov[0].src_ext = mop & MO_SSIZE;
6646         }
6647         tcg_out_movext1(s, mov);
6648         return;
6649 
6650     case TCG_TYPE_I128:
6651         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6652         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6653         switch (TCG_TARGET_CALL_RET_I128) {
6654         case TCG_CALL_RET_NORMAL:
6655             break;
6656         case TCG_CALL_RET_BY_VEC:
6657             tcg_out_st(s, TCG_TYPE_V128,
6658                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6659                        TCG_REG_CALL_STACK, ofs_slot0);
6660             /* fall through */
6661         case TCG_CALL_RET_BY_REF:
6662             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6663                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6664             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6665                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6666             return;
6667         default:
6668             g_assert_not_reached();
6669         }
6670         break;
6671 
6672     default:
6673         g_assert_not_reached();
6674     }
6675 
6676     mov[0].dst = ldst->datalo_reg;
6677     mov[0].src =
6678         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6679     mov[0].dst_type = TCG_TYPE_REG;
6680     mov[0].src_type = TCG_TYPE_REG;
6681     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6682 
6683     mov[1].dst = ldst->datahi_reg;
6684     mov[1].src =
6685         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6686     mov[1].dst_type = TCG_TYPE_REG;
6687     mov[1].src_type = TCG_TYPE_REG;
6688     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6689 
6690     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6691 }
6692 
6693 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6694                                    const TCGLdstHelperParam *parm)
6695 {
6696     const TCGHelperInfo *info;
6697     const TCGCallArgumentLoc *loc;
6698     TCGMovExtend mov[4];
6699     TCGType data_type;
6700     unsigned next_arg, nmov, n;
6701     MemOp mop = get_memop(ldst->oi);
6702 
6703     switch (mop & MO_SIZE) {
6704     case MO_8:
6705     case MO_16:
6706     case MO_32:
6707         info = &info_helper_st32_mmu;
6708         data_type = TCG_TYPE_I32;
6709         break;
6710     case MO_64:
6711         info = &info_helper_st64_mmu;
6712         data_type = TCG_TYPE_I64;
6713         break;
6714     case MO_128:
6715         info = &info_helper_st128_mmu;
6716         data_type = TCG_TYPE_I128;
6717         break;
6718     default:
6719         g_assert_not_reached();
6720     }
6721 
6722     /* Defer env argument. */
6723     next_arg = 1;
6724     nmov = 0;
6725 
6726     /* Handle addr argument. */
6727     loc = &info->in[next_arg];
6728     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6729     if (TCG_TARGET_REG_BITS == 32) {
6730         /*
6731          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6732          * to 64-bits for the helper by storing the low part.  Later,
6733          * after we have processed the register inputs, we will load a
6734          * zero for the high part.
6735          */
6736         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6737                                TCG_TYPE_I32, TCG_TYPE_I32,
6738                                ldst->addr_reg, -1);
6739         next_arg += 2;
6740         nmov += 1;
6741     } else {
6742         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6743                                    ldst->addr_reg, -1);
6744         next_arg += n;
6745         nmov += n;
6746     }
6747 
6748     /* Handle data argument. */
6749     loc = &info->in[next_arg];
6750     switch (loc->kind) {
6751     case TCG_CALL_ARG_NORMAL:
6752     case TCG_CALL_ARG_EXTEND_U:
6753     case TCG_CALL_ARG_EXTEND_S:
6754         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6755                                    ldst->datalo_reg, ldst->datahi_reg);
6756         next_arg += n;
6757         nmov += n;
6758         tcg_out_helper_load_slots(s, nmov, mov, parm);
6759         break;
6760 
6761     case TCG_CALL_ARG_BY_REF:
6762         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6763         tcg_debug_assert(data_type == TCG_TYPE_I128);
6764         tcg_out_st(s, TCG_TYPE_I64,
6765                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6766                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6767         tcg_out_st(s, TCG_TYPE_I64,
6768                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6769                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6770 
6771         tcg_out_helper_load_slots(s, nmov, mov, parm);
6772 
6773         if (arg_slot_reg_p(loc->arg_slot)) {
6774             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6775                              TCG_REG_CALL_STACK,
6776                              arg_slot_stk_ofs(loc->ref_slot));
6777         } else {
6778             tcg_debug_assert(parm->ntmp != 0);
6779             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6780                              arg_slot_stk_ofs(loc->ref_slot));
6781             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6782                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6783         }
6784         next_arg += 2;
6785         break;
6786 
6787     default:
6788         g_assert_not_reached();
6789     }
6790 
6791     if (TCG_TARGET_REG_BITS == 32) {
6792         /* Zero extend the address by loading a zero for the high part. */
6793         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6794         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6795     }
6796 
6797     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6798 }
6799 
6800 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6801 {
6802     int i, start_words, num_insns;
6803     TCGOp *op;
6804 
6805     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6806                  && qemu_log_in_addr_range(pc_start))) {
6807         FILE *logfile = qemu_log_trylock();
6808         if (logfile) {
6809             fprintf(logfile, "OP:\n");
6810             tcg_dump_ops(s, logfile, false);
6811             fprintf(logfile, "\n");
6812             qemu_log_unlock(logfile);
6813         }
6814     }
6815 
6816 #ifdef CONFIG_DEBUG_TCG
6817     /* Ensure all labels referenced have been emitted.  */
6818     {
6819         TCGLabel *l;
6820         bool error = false;
6821 
6822         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6823             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6824                 qemu_log_mask(CPU_LOG_TB_OP,
6825                               "$L%d referenced but not present.\n", l->id);
6826                 error = true;
6827             }
6828         }
6829         assert(!error);
6830     }
6831 #endif
6832 
6833     /* Do not reuse any EBB that may be allocated within the TB. */
6834     tcg_temp_ebb_reset_freed(s);
6835 
6836     tcg_optimize(s);
6837 
6838     reachable_code_pass(s);
6839     liveness_pass_0(s);
6840     liveness_pass_1(s);
6841 
6842     if (s->nb_indirects > 0) {
6843         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6844                      && qemu_log_in_addr_range(pc_start))) {
6845             FILE *logfile = qemu_log_trylock();
6846             if (logfile) {
6847                 fprintf(logfile, "OP before indirect lowering:\n");
6848                 tcg_dump_ops(s, logfile, false);
6849                 fprintf(logfile, "\n");
6850                 qemu_log_unlock(logfile);
6851             }
6852         }
6853 
6854         /* Replace indirect temps with direct temps.  */
6855         if (liveness_pass_2(s)) {
6856             /* If changes were made, re-run liveness.  */
6857             liveness_pass_1(s);
6858         }
6859     }
6860 
6861     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6862                  && qemu_log_in_addr_range(pc_start))) {
6863         FILE *logfile = qemu_log_trylock();
6864         if (logfile) {
6865             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6866             tcg_dump_ops(s, logfile, true);
6867             fprintf(logfile, "\n");
6868             qemu_log_unlock(logfile);
6869         }
6870     }
6871 
6872     /* Initialize goto_tb jump offsets. */
6873     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6874     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6875     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6876     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6877 
6878     tcg_reg_alloc_start(s);
6879 
6880     /*
6881      * Reset the buffer pointers when restarting after overflow.
6882      * TODO: Move this into translate-all.c with the rest of the
6883      * buffer management.  Having only this done here is confusing.
6884      */
6885     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6886     s->code_ptr = s->code_buf;
6887     s->data_gen_ptr = NULL;
6888 
6889     QSIMPLEQ_INIT(&s->ldst_labels);
6890     s->pool_labels = NULL;
6891 
6892     start_words = s->insn_start_words;
6893     s->gen_insn_data =
6894         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6895 
6896     tcg_out_tb_start(s);
6897 
6898     num_insns = -1;
6899     s->carry_live = false;
6900     QTAILQ_FOREACH(op, &s->ops, link) {
6901         TCGOpcode opc = op->opc;
6902 
6903         switch (opc) {
6904         case INDEX_op_extrl_i64_i32:
6905             assert(TCG_TARGET_REG_BITS == 64);
6906             /*
6907              * If TCG_TYPE_I32 is represented in some canonical form,
6908              * e.g. zero or sign-extended, then emit as a unary op.
6909              * Otherwise we can treat this as a plain move.
6910              * If the output dies, treat this as a plain move, because
6911              * this will be implemented with a store.
6912              */
6913             if (TCG_TARGET_HAS_extr_i64_i32) {
6914                 TCGLifeData arg_life = op->life;
6915                 if (!IS_DEAD_ARG(0)) {
6916                     goto do_default;
6917                 }
6918             }
6919             /* fall through */
6920         case INDEX_op_mov:
6921         case INDEX_op_mov_vec:
6922             tcg_reg_alloc_mov(s, op);
6923             break;
6924         case INDEX_op_dup_vec:
6925             tcg_reg_alloc_dup(s, op);
6926             break;
6927         case INDEX_op_insn_start:
6928             assert_carry_dead(s);
6929             if (num_insns >= 0) {
6930                 size_t off = tcg_current_code_size(s);
6931                 s->gen_insn_end_off[num_insns] = off;
6932                 /* Assert that we do not overflow our stored offset.  */
6933                 assert(s->gen_insn_end_off[num_insns] == off);
6934             }
6935             num_insns++;
6936             for (i = 0; i < start_words; ++i) {
6937                 s->gen_insn_data[num_insns * start_words + i] =
6938                     tcg_get_insn_start_param(op, i);
6939             }
6940             break;
6941         case INDEX_op_discard:
6942             temp_dead(s, arg_temp(op->args[0]));
6943             break;
6944         case INDEX_op_set_label:
6945             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6946             tcg_out_label(s, arg_label(op->args[0]));
6947             break;
6948         case INDEX_op_call:
6949             assert_carry_dead(s);
6950             tcg_reg_alloc_call(s, op);
6951             break;
6952         case INDEX_op_exit_tb:
6953             tcg_out_exit_tb(s, op->args[0]);
6954             break;
6955         case INDEX_op_goto_tb:
6956             tcg_out_goto_tb(s, op->args[0]);
6957             break;
6958         case INDEX_op_br:
6959             tcg_out_br(s, arg_label(op->args[0]));
6960             break;
6961         case INDEX_op_mb:
6962             tcg_out_mb(s, op->args[0]);
6963             break;
6964         case INDEX_op_dup2_vec:
6965             if (tcg_reg_alloc_dup2(s, op)) {
6966                 break;
6967             }
6968             /* fall through */
6969         default:
6970         do_default:
6971             /* Sanity check that we've not introduced any unhandled opcodes. */
6972             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6973                                               TCGOP_FLAGS(op)));
6974             /* Note: in order to speed up the code, it would be much
6975                faster to have specialized register allocator functions for
6976                some common argument patterns */
6977             tcg_reg_alloc_op(s, op);
6978             break;
6979         }
6980         /* Test for (pending) buffer overflow.  The assumption is that any
6981            one operation beginning below the high water mark cannot overrun
6982            the buffer completely.  Thus we can test for overflow after
6983            generating code without having to check during generation.  */
6984         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6985             return -1;
6986         }
6987         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6988         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6989             return -2;
6990         }
6991     }
6992     assert_carry_dead(s);
6993 
6994     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6995     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6996 
6997     /* Generate TB finalization at the end of block */
6998     i = tcg_out_ldst_finalize(s);
6999     if (i < 0) {
7000         return i;
7001     }
7002     i = tcg_out_pool_finalize(s);
7003     if (i < 0) {
7004         return i;
7005     }
7006     if (!tcg_resolve_relocs(s)) {
7007         return -2;
7008     }
7009 
7010 #ifndef CONFIG_TCG_INTERPRETER
7011     /* flush instruction cache */
7012     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
7013                         (uintptr_t)s->code_buf,
7014                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
7015 #endif
7016 
7017     return tcg_current_code_size(s);
7018 }
7019 
7020 #ifdef ELF_HOST_MACHINE
7021 /* In order to use this feature, the backend needs to do three things:
7022 
7023    (1) Define ELF_HOST_MACHINE to indicate both what value to
7024        put into the ELF image and to indicate support for the feature.
7025 
7026    (2) Define tcg_register_jit.  This should create a buffer containing
7027        the contents of a .debug_frame section that describes the post-
7028        prologue unwind info for the tcg machine.
7029 
7030    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
7031 */
7032 
7033 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
7034 typedef enum {
7035     JIT_NOACTION = 0,
7036     JIT_REGISTER_FN,
7037     JIT_UNREGISTER_FN
7038 } jit_actions_t;
7039 
7040 struct jit_code_entry {
7041     struct jit_code_entry *next_entry;
7042     struct jit_code_entry *prev_entry;
7043     const void *symfile_addr;
7044     uint64_t symfile_size;
7045 };
7046 
7047 struct jit_descriptor {
7048     uint32_t version;
7049     uint32_t action_flag;
7050     struct jit_code_entry *relevant_entry;
7051     struct jit_code_entry *first_entry;
7052 };
7053 
7054 void __jit_debug_register_code(void) __attribute__((noinline));
7055 void __jit_debug_register_code(void)
7056 {
7057     asm("");
7058 }
7059 
7060 /* Must statically initialize the version, because GDB may check
7061    the version before we can set it.  */
7062 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
7063 
7064 /* End GDB interface.  */
7065 
7066 static int find_string(const char *strtab, const char *str)
7067 {
7068     const char *p = strtab + 1;
7069 
7070     while (1) {
7071         if (strcmp(p, str) == 0) {
7072             return p - strtab;
7073         }
7074         p += strlen(p) + 1;
7075     }
7076 }
7077 
7078 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
7079                                  const void *debug_frame,
7080                                  size_t debug_frame_size)
7081 {
7082     struct __attribute__((packed)) DebugInfo {
7083         uint32_t  len;
7084         uint16_t  version;
7085         uint32_t  abbrev;
7086         uint8_t   ptr_size;
7087         uint8_t   cu_die;
7088         uint16_t  cu_lang;
7089         uintptr_t cu_low_pc;
7090         uintptr_t cu_high_pc;
7091         uint8_t   fn_die;
7092         char      fn_name[16];
7093         uintptr_t fn_low_pc;
7094         uintptr_t fn_high_pc;
7095         uint8_t   cu_eoc;
7096     };
7097 
7098     struct ElfImage {
7099         ElfW(Ehdr) ehdr;
7100         ElfW(Phdr) phdr;
7101         ElfW(Shdr) shdr[7];
7102         ElfW(Sym)  sym[2];
7103         struct DebugInfo di;
7104         uint8_t    da[24];
7105         char       str[80];
7106     };
7107 
7108     struct ElfImage *img;
7109 
7110     static const struct ElfImage img_template = {
7111         .ehdr = {
7112             .e_ident[EI_MAG0] = ELFMAG0,
7113             .e_ident[EI_MAG1] = ELFMAG1,
7114             .e_ident[EI_MAG2] = ELFMAG2,
7115             .e_ident[EI_MAG3] = ELFMAG3,
7116             .e_ident[EI_CLASS] = ELF_CLASS,
7117             .e_ident[EI_DATA] = ELF_DATA,
7118             .e_ident[EI_VERSION] = EV_CURRENT,
7119             .e_type = ET_EXEC,
7120             .e_machine = ELF_HOST_MACHINE,
7121             .e_version = EV_CURRENT,
7122             .e_phoff = offsetof(struct ElfImage, phdr),
7123             .e_shoff = offsetof(struct ElfImage, shdr),
7124             .e_ehsize = sizeof(ElfW(Shdr)),
7125             .e_phentsize = sizeof(ElfW(Phdr)),
7126             .e_phnum = 1,
7127             .e_shentsize = sizeof(ElfW(Shdr)),
7128             .e_shnum = ARRAY_SIZE(img->shdr),
7129             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
7130 #ifdef ELF_HOST_FLAGS
7131             .e_flags = ELF_HOST_FLAGS,
7132 #endif
7133 #ifdef ELF_OSABI
7134             .e_ident[EI_OSABI] = ELF_OSABI,
7135 #endif
7136         },
7137         .phdr = {
7138             .p_type = PT_LOAD,
7139             .p_flags = PF_X,
7140         },
7141         .shdr = {
7142             [0] = { .sh_type = SHT_NULL },
7143             /* Trick: The contents of code_gen_buffer are not present in
7144                this fake ELF file; that got allocated elsewhere.  Therefore
7145                we mark .text as SHT_NOBITS (similar to .bss) so that readers
7146                will not look for contents.  We can record any address.  */
7147             [1] = { /* .text */
7148                 .sh_type = SHT_NOBITS,
7149                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
7150             },
7151             [2] = { /* .debug_info */
7152                 .sh_type = SHT_PROGBITS,
7153                 .sh_offset = offsetof(struct ElfImage, di),
7154                 .sh_size = sizeof(struct DebugInfo),
7155             },
7156             [3] = { /* .debug_abbrev */
7157                 .sh_type = SHT_PROGBITS,
7158                 .sh_offset = offsetof(struct ElfImage, da),
7159                 .sh_size = sizeof(img->da),
7160             },
7161             [4] = { /* .debug_frame */
7162                 .sh_type = SHT_PROGBITS,
7163                 .sh_offset = sizeof(struct ElfImage),
7164             },
7165             [5] = { /* .symtab */
7166                 .sh_type = SHT_SYMTAB,
7167                 .sh_offset = offsetof(struct ElfImage, sym),
7168                 .sh_size = sizeof(img->sym),
7169                 .sh_info = 1,
7170                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
7171                 .sh_entsize = sizeof(ElfW(Sym)),
7172             },
7173             [6] = { /* .strtab */
7174                 .sh_type = SHT_STRTAB,
7175                 .sh_offset = offsetof(struct ElfImage, str),
7176                 .sh_size = sizeof(img->str),
7177             }
7178         },
7179         .sym = {
7180             [1] = { /* code_gen_buffer */
7181                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
7182                 .st_shndx = 1,
7183             }
7184         },
7185         .di = {
7186             .len = sizeof(struct DebugInfo) - 4,
7187             .version = 2,
7188             .ptr_size = sizeof(void *),
7189             .cu_die = 1,
7190             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
7191             .fn_die = 2,
7192             .fn_name = "code_gen_buffer"
7193         },
7194         .da = {
7195             1,          /* abbrev number (the cu) */
7196             0x11, 1,    /* DW_TAG_compile_unit, has children */
7197             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
7198             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
7199             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
7200             0, 0,       /* end of abbrev */
7201             2,          /* abbrev number (the fn) */
7202             0x2e, 0,    /* DW_TAG_subprogram, no children */
7203             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
7204             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
7205             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
7206             0, 0,       /* end of abbrev */
7207             0           /* no more abbrev */
7208         },
7209         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
7210                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
7211     };
7212 
7213     /* We only need a single jit entry; statically allocate it.  */
7214     static struct jit_code_entry one_entry;
7215 
7216     uintptr_t buf = (uintptr_t)buf_ptr;
7217     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
7218     DebugFrameHeader *dfh;
7219 
7220     img = g_malloc(img_size);
7221     *img = img_template;
7222 
7223     img->phdr.p_vaddr = buf;
7224     img->phdr.p_paddr = buf;
7225     img->phdr.p_memsz = buf_size;
7226 
7227     img->shdr[1].sh_name = find_string(img->str, ".text");
7228     img->shdr[1].sh_addr = buf;
7229     img->shdr[1].sh_size = buf_size;
7230 
7231     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
7232     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
7233 
7234     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
7235     img->shdr[4].sh_size = debug_frame_size;
7236 
7237     img->shdr[5].sh_name = find_string(img->str, ".symtab");
7238     img->shdr[6].sh_name = find_string(img->str, ".strtab");
7239 
7240     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
7241     img->sym[1].st_value = buf;
7242     img->sym[1].st_size = buf_size;
7243 
7244     img->di.cu_low_pc = buf;
7245     img->di.cu_high_pc = buf + buf_size;
7246     img->di.fn_low_pc = buf;
7247     img->di.fn_high_pc = buf + buf_size;
7248 
7249     dfh = (DebugFrameHeader *)(img + 1);
7250     memcpy(dfh, debug_frame, debug_frame_size);
7251     dfh->fde.func_start = buf;
7252     dfh->fde.func_len = buf_size;
7253 
7254 #ifdef DEBUG_JIT
7255     /* Enable this block to be able to debug the ELF image file creation.
7256        One can use readelf, objdump, or other inspection utilities.  */
7257     {
7258         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
7259         FILE *f = fopen(jit, "w+b");
7260         if (f) {
7261             if (fwrite(img, img_size, 1, f) != img_size) {
7262                 /* Avoid stupid unused return value warning for fwrite.  */
7263             }
7264             fclose(f);
7265         }
7266     }
7267 #endif
7268 
7269     one_entry.symfile_addr = img;
7270     one_entry.symfile_size = img_size;
7271 
7272     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
7273     __jit_debug_descriptor.relevant_entry = &one_entry;
7274     __jit_debug_descriptor.first_entry = &one_entry;
7275     __jit_debug_register_code();
7276 }
7277 #else
7278 /* No support for the feature.  Provide the entry point expected by exec.c,
7279    and implement the internal function we declared earlier.  */
7280 
7281 static void tcg_register_jit_int(const void *buf, size_t size,
7282                                  const void *debug_frame,
7283                                  size_t debug_frame_size)
7284 {
7285 }
7286 
7287 void tcg_register_jit(const void *buf, size_t buf_size)
7288 {
7289 }
7290 #endif /* ELF_HOST_MACHINE */
7291 
7292 #if !TCG_TARGET_MAYBE_vec
7293 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
7294 {
7295     g_assert_not_reached();
7296 }
7297 #endif
7298