xref: /openbmc/qemu/tcg/tcg.c (revision 97218ae918b1504a63623130f3dc8f4b423b5f1b)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpDivRem {
990     TCGOutOp base;
991     void (*out_rr01r)(TCGContext *s, TCGType type,
992                       TCGReg a0, TCGReg a1, TCGReg a4);
993 } TCGOutOpDivRem;
994 
995 typedef struct TCGOutOpUnary {
996     TCGOutOp base;
997     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
998 } TCGOutOpUnary;
999 
1000 typedef struct TCGOutOpSubtract {
1001     TCGOutOp base;
1002     void (*out_rrr)(TCGContext *s, TCGType type,
1003                     TCGReg a0, TCGReg a1, TCGReg a2);
1004     void (*out_rir)(TCGContext *s, TCGType type,
1005                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1006 } TCGOutOpSubtract;
1007 
1008 #include "tcg-target.c.inc"
1009 
1010 #ifndef CONFIG_TCG_INTERPRETER
1011 /* Validate CPUTLBDescFast placement. */
1012 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1013                         sizeof(CPUNegativeOffsetState))
1014                   < MIN_TLB_MASK_TABLE_OFS);
1015 #endif
1016 
1017 /*
1018  * Register V as the TCGOutOp for O.
1019  * This verifies that V is of type T, otherwise give a nice compiler error.
1020  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1021  */
1022 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1023 
1024 /* Register allocation descriptions for every TCGOpcode. */
1025 static const TCGOutOp * const all_outop[NB_OPS] = {
1026     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1027     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1028     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1029     OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz),
1030     OUTOP(INDEX_op_ctpop, TCGOutOpUnary, outop_ctpop),
1031     OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz),
1032     OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1033     OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
1034     OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2),
1035     OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2),
1036     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1037     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1038     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1039     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1040     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1041     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1042     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1043     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1044     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1045     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1046     OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems),
1047     OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu),
1048     OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl),
1049     OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr),
1050     OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar),
1051     OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl),
1052     OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr),
1053     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1054     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1055 };
1056 
1057 #undef OUTOP
1058 
1059 /*
1060  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1061  * and registered the target's TCG globals) must register with this function
1062  * before initiating translation.
1063  *
1064  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1065  * of tcg_region_init() for the reasoning behind this.
1066  *
1067  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1068  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1069  * is not used anymore for translation once this function is called.
1070  *
1071  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1072  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1073  * modes.
1074  */
1075 #ifdef CONFIG_USER_ONLY
1076 void tcg_register_thread(void)
1077 {
1078     tcg_ctx = &tcg_init_ctx;
1079 }
1080 #else
1081 void tcg_register_thread(void)
1082 {
1083     TCGContext *s = g_malloc(sizeof(*s));
1084     unsigned int i, n;
1085 
1086     *s = tcg_init_ctx;
1087 
1088     /* Relink mem_base.  */
1089     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1090         if (tcg_init_ctx.temps[i].mem_base) {
1091             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1092             tcg_debug_assert(b >= 0 && b < n);
1093             s->temps[i].mem_base = &s->temps[b];
1094         }
1095     }
1096 
1097     /* Claim an entry in tcg_ctxs */
1098     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1099     g_assert(n < tcg_max_ctxs);
1100     qatomic_set(&tcg_ctxs[n], s);
1101 
1102     if (n > 0) {
1103         tcg_region_initial_alloc(s);
1104     }
1105 
1106     tcg_ctx = s;
1107 }
1108 #endif /* !CONFIG_USER_ONLY */
1109 
1110 /* pool based memory allocation */
1111 void *tcg_malloc_internal(TCGContext *s, int size)
1112 {
1113     TCGPool *p;
1114     int pool_size;
1115 
1116     if (size > TCG_POOL_CHUNK_SIZE) {
1117         /* big malloc: insert a new pool (XXX: could optimize) */
1118         p = g_malloc(sizeof(TCGPool) + size);
1119         p->size = size;
1120         p->next = s->pool_first_large;
1121         s->pool_first_large = p;
1122         return p->data;
1123     } else {
1124         p = s->pool_current;
1125         if (!p) {
1126             p = s->pool_first;
1127             if (!p)
1128                 goto new_pool;
1129         } else {
1130             if (!p->next) {
1131             new_pool:
1132                 pool_size = TCG_POOL_CHUNK_SIZE;
1133                 p = g_malloc(sizeof(TCGPool) + pool_size);
1134                 p->size = pool_size;
1135                 p->next = NULL;
1136                 if (s->pool_current) {
1137                     s->pool_current->next = p;
1138                 } else {
1139                     s->pool_first = p;
1140                 }
1141             } else {
1142                 p = p->next;
1143             }
1144         }
1145     }
1146     s->pool_current = p;
1147     s->pool_cur = p->data + size;
1148     s->pool_end = p->data + p->size;
1149     return p->data;
1150 }
1151 
1152 void tcg_pool_reset(TCGContext *s)
1153 {
1154     TCGPool *p, *t;
1155     for (p = s->pool_first_large; p; p = t) {
1156         t = p->next;
1157         g_free(p);
1158     }
1159     s->pool_first_large = NULL;
1160     s->pool_cur = s->pool_end = NULL;
1161     s->pool_current = NULL;
1162 }
1163 
1164 /*
1165  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1166  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1167  * We only use these for layout in tcg_out_ld_helper_ret and
1168  * tcg_out_st_helper_args, and share them between several of
1169  * the helpers, with the end result that it's easier to build manually.
1170  */
1171 
1172 #if TCG_TARGET_REG_BITS == 32
1173 # define dh_typecode_ttl  dh_typecode_i32
1174 #else
1175 # define dh_typecode_ttl  dh_typecode_i64
1176 #endif
1177 
1178 static TCGHelperInfo info_helper_ld32_mmu = {
1179     .flags = TCG_CALL_NO_WG,
1180     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1181               | dh_typemask(env, 1)
1182               | dh_typemask(i64, 2)  /* uint64_t addr */
1183               | dh_typemask(i32, 3)  /* unsigned oi */
1184               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1185 };
1186 
1187 static TCGHelperInfo info_helper_ld64_mmu = {
1188     .flags = TCG_CALL_NO_WG,
1189     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1190               | dh_typemask(env, 1)
1191               | dh_typemask(i64, 2)  /* uint64_t addr */
1192               | dh_typemask(i32, 3)  /* unsigned oi */
1193               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1194 };
1195 
1196 static TCGHelperInfo info_helper_ld128_mmu = {
1197     .flags = TCG_CALL_NO_WG,
1198     .typemask = dh_typemask(i128, 0) /* return Int128 */
1199               | dh_typemask(env, 1)
1200               | dh_typemask(i64, 2)  /* uint64_t addr */
1201               | dh_typemask(i32, 3)  /* unsigned oi */
1202               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1203 };
1204 
1205 static TCGHelperInfo info_helper_st32_mmu = {
1206     .flags = TCG_CALL_NO_WG,
1207     .typemask = dh_typemask(void, 0)
1208               | dh_typemask(env, 1)
1209               | dh_typemask(i64, 2)  /* uint64_t addr */
1210               | dh_typemask(i32, 3)  /* uint32_t data */
1211               | dh_typemask(i32, 4)  /* unsigned oi */
1212               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1213 };
1214 
1215 static TCGHelperInfo info_helper_st64_mmu = {
1216     .flags = TCG_CALL_NO_WG,
1217     .typemask = dh_typemask(void, 0)
1218               | dh_typemask(env, 1)
1219               | dh_typemask(i64, 2)  /* uint64_t addr */
1220               | dh_typemask(i64, 3)  /* uint64_t data */
1221               | dh_typemask(i32, 4)  /* unsigned oi */
1222               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1223 };
1224 
1225 static TCGHelperInfo info_helper_st128_mmu = {
1226     .flags = TCG_CALL_NO_WG,
1227     .typemask = dh_typemask(void, 0)
1228               | dh_typemask(env, 1)
1229               | dh_typemask(i64, 2)  /* uint64_t addr */
1230               | dh_typemask(i128, 3) /* Int128 data */
1231               | dh_typemask(i32, 4)  /* unsigned oi */
1232               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1233 };
1234 
1235 #ifdef CONFIG_TCG_INTERPRETER
1236 static ffi_type *typecode_to_ffi(int argmask)
1237 {
1238     /*
1239      * libffi does not support __int128_t, so we have forced Int128
1240      * to use the structure definition instead of the builtin type.
1241      */
1242     static ffi_type *ffi_type_i128_elements[3] = {
1243         &ffi_type_uint64,
1244         &ffi_type_uint64,
1245         NULL
1246     };
1247     static ffi_type ffi_type_i128 = {
1248         .size = 16,
1249         .alignment = __alignof__(Int128),
1250         .type = FFI_TYPE_STRUCT,
1251         .elements = ffi_type_i128_elements,
1252     };
1253 
1254     switch (argmask) {
1255     case dh_typecode_void:
1256         return &ffi_type_void;
1257     case dh_typecode_i32:
1258         return &ffi_type_uint32;
1259     case dh_typecode_s32:
1260         return &ffi_type_sint32;
1261     case dh_typecode_i64:
1262         return &ffi_type_uint64;
1263     case dh_typecode_s64:
1264         return &ffi_type_sint64;
1265     case dh_typecode_ptr:
1266         return &ffi_type_pointer;
1267     case dh_typecode_i128:
1268         return &ffi_type_i128;
1269     }
1270     g_assert_not_reached();
1271 }
1272 
1273 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1274 {
1275     unsigned typemask = info->typemask;
1276     struct {
1277         ffi_cif cif;
1278         ffi_type *args[];
1279     } *ca;
1280     ffi_status status;
1281     int nargs;
1282 
1283     /* Ignoring the return type, find the last non-zero field. */
1284     nargs = 32 - clz32(typemask >> 3);
1285     nargs = DIV_ROUND_UP(nargs, 3);
1286     assert(nargs <= MAX_CALL_IARGS);
1287 
1288     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1289     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1290     ca->cif.nargs = nargs;
1291 
1292     if (nargs != 0) {
1293         ca->cif.arg_types = ca->args;
1294         for (int j = 0; j < nargs; ++j) {
1295             int typecode = extract32(typemask, (j + 1) * 3, 3);
1296             ca->args[j] = typecode_to_ffi(typecode);
1297         }
1298     }
1299 
1300     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1301                           ca->cif.rtype, ca->cif.arg_types);
1302     assert(status == FFI_OK);
1303 
1304     return &ca->cif;
1305 }
1306 
1307 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1308 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1309 #else
1310 #define HELPER_INFO_INIT(I)      (&(I)->init)
1311 #define HELPER_INFO_INIT_VAL(I)  1
1312 #endif /* CONFIG_TCG_INTERPRETER */
1313 
1314 static inline bool arg_slot_reg_p(unsigned arg_slot)
1315 {
1316     /*
1317      * Split the sizeof away from the comparison to avoid Werror from
1318      * "unsigned < 0 is always false", when iarg_regs is empty.
1319      */
1320     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1321     return arg_slot < nreg;
1322 }
1323 
1324 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1325 {
1326     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1327     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1328 
1329     tcg_debug_assert(stk_slot < max);
1330     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1331 }
1332 
1333 typedef struct TCGCumulativeArgs {
1334     int arg_idx;                /* tcg_gen_callN args[] */
1335     int info_in_idx;            /* TCGHelperInfo in[] */
1336     int arg_slot;               /* regs+stack slot */
1337     int ref_slot;               /* stack slots for references */
1338 } TCGCumulativeArgs;
1339 
1340 static void layout_arg_even(TCGCumulativeArgs *cum)
1341 {
1342     cum->arg_slot += cum->arg_slot & 1;
1343 }
1344 
1345 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1346                          TCGCallArgumentKind kind)
1347 {
1348     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1349 
1350     *loc = (TCGCallArgumentLoc){
1351         .kind = kind,
1352         .arg_idx = cum->arg_idx,
1353         .arg_slot = cum->arg_slot,
1354     };
1355     cum->info_in_idx++;
1356     cum->arg_slot++;
1357 }
1358 
1359 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1360                                 TCGHelperInfo *info, int n)
1361 {
1362     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1363 
1364     for (int i = 0; i < n; ++i) {
1365         /* Layout all using the same arg_idx, adjusting the subindex. */
1366         loc[i] = (TCGCallArgumentLoc){
1367             .kind = TCG_CALL_ARG_NORMAL,
1368             .arg_idx = cum->arg_idx,
1369             .tmp_subindex = i,
1370             .arg_slot = cum->arg_slot + i,
1371         };
1372     }
1373     cum->info_in_idx += n;
1374     cum->arg_slot += n;
1375 }
1376 
1377 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1378 {
1379     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1380     int n = 128 / TCG_TARGET_REG_BITS;
1381 
1382     /* The first subindex carries the pointer. */
1383     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1384 
1385     /*
1386      * The callee is allowed to clobber memory associated with
1387      * structure pass by-reference.  Therefore we must make copies.
1388      * Allocate space from "ref_slot", which will be adjusted to
1389      * follow the parameters on the stack.
1390      */
1391     loc[0].ref_slot = cum->ref_slot;
1392 
1393     /*
1394      * Subsequent words also go into the reference slot, but
1395      * do not accumulate into the regular arguments.
1396      */
1397     for (int i = 1; i < n; ++i) {
1398         loc[i] = (TCGCallArgumentLoc){
1399             .kind = TCG_CALL_ARG_BY_REF_N,
1400             .arg_idx = cum->arg_idx,
1401             .tmp_subindex = i,
1402             .ref_slot = cum->ref_slot + i,
1403         };
1404     }
1405     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1406     cum->ref_slot += n;
1407 }
1408 
1409 static void init_call_layout(TCGHelperInfo *info)
1410 {
1411     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1412     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1413     unsigned typemask = info->typemask;
1414     unsigned typecode;
1415     TCGCumulativeArgs cum = { };
1416 
1417     /*
1418      * Parse and place any function return value.
1419      */
1420     typecode = typemask & 7;
1421     switch (typecode) {
1422     case dh_typecode_void:
1423         info->nr_out = 0;
1424         break;
1425     case dh_typecode_i32:
1426     case dh_typecode_s32:
1427     case dh_typecode_ptr:
1428         info->nr_out = 1;
1429         info->out_kind = TCG_CALL_RET_NORMAL;
1430         break;
1431     case dh_typecode_i64:
1432     case dh_typecode_s64:
1433         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1434         info->out_kind = TCG_CALL_RET_NORMAL;
1435         /* Query the last register now to trigger any assert early. */
1436         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1437         break;
1438     case dh_typecode_i128:
1439         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1440         info->out_kind = TCG_TARGET_CALL_RET_I128;
1441         switch (TCG_TARGET_CALL_RET_I128) {
1442         case TCG_CALL_RET_NORMAL:
1443             /* Query the last register now to trigger any assert early. */
1444             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1445             break;
1446         case TCG_CALL_RET_BY_VEC:
1447             /* Query the single register now to trigger any assert early. */
1448             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1449             break;
1450         case TCG_CALL_RET_BY_REF:
1451             /*
1452              * Allocate the first argument to the output.
1453              * We don't need to store this anywhere, just make it
1454              * unavailable for use in the input loop below.
1455              */
1456             cum.arg_slot = 1;
1457             break;
1458         default:
1459             qemu_build_not_reached();
1460         }
1461         break;
1462     default:
1463         g_assert_not_reached();
1464     }
1465 
1466     /*
1467      * Parse and place function arguments.
1468      */
1469     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1470         TCGCallArgumentKind kind;
1471         TCGType type;
1472 
1473         typecode = typemask & 7;
1474         switch (typecode) {
1475         case dh_typecode_i32:
1476         case dh_typecode_s32:
1477             type = TCG_TYPE_I32;
1478             break;
1479         case dh_typecode_i64:
1480         case dh_typecode_s64:
1481             type = TCG_TYPE_I64;
1482             break;
1483         case dh_typecode_ptr:
1484             type = TCG_TYPE_PTR;
1485             break;
1486         case dh_typecode_i128:
1487             type = TCG_TYPE_I128;
1488             break;
1489         default:
1490             g_assert_not_reached();
1491         }
1492 
1493         switch (type) {
1494         case TCG_TYPE_I32:
1495             switch (TCG_TARGET_CALL_ARG_I32) {
1496             case TCG_CALL_ARG_EVEN:
1497                 layout_arg_even(&cum);
1498                 /* fall through */
1499             case TCG_CALL_ARG_NORMAL:
1500                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1501                 break;
1502             case TCG_CALL_ARG_EXTEND:
1503                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1504                 layout_arg_1(&cum, info, kind);
1505                 break;
1506             default:
1507                 qemu_build_not_reached();
1508             }
1509             break;
1510 
1511         case TCG_TYPE_I64:
1512             switch (TCG_TARGET_CALL_ARG_I64) {
1513             case TCG_CALL_ARG_EVEN:
1514                 layout_arg_even(&cum);
1515                 /* fall through */
1516             case TCG_CALL_ARG_NORMAL:
1517                 if (TCG_TARGET_REG_BITS == 32) {
1518                     layout_arg_normal_n(&cum, info, 2);
1519                 } else {
1520                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1521                 }
1522                 break;
1523             default:
1524                 qemu_build_not_reached();
1525             }
1526             break;
1527 
1528         case TCG_TYPE_I128:
1529             switch (TCG_TARGET_CALL_ARG_I128) {
1530             case TCG_CALL_ARG_EVEN:
1531                 layout_arg_even(&cum);
1532                 /* fall through */
1533             case TCG_CALL_ARG_NORMAL:
1534                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1535                 break;
1536             case TCG_CALL_ARG_BY_REF:
1537                 layout_arg_by_ref(&cum, info);
1538                 break;
1539             default:
1540                 qemu_build_not_reached();
1541             }
1542             break;
1543 
1544         default:
1545             g_assert_not_reached();
1546         }
1547     }
1548     info->nr_in = cum.info_in_idx;
1549 
1550     /* Validate that we didn't overrun the input array. */
1551     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1552     /* Validate the backend has enough argument space. */
1553     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1554 
1555     /*
1556      * Relocate the "ref_slot" area to the end of the parameters.
1557      * Minimizing this stack offset helps code size for x86,
1558      * which has a signed 8-bit offset encoding.
1559      */
1560     if (cum.ref_slot != 0) {
1561         int ref_base = 0;
1562 
1563         if (cum.arg_slot > max_reg_slots) {
1564             int align = __alignof(Int128) / sizeof(tcg_target_long);
1565 
1566             ref_base = cum.arg_slot - max_reg_slots;
1567             if (align > 1) {
1568                 ref_base = ROUND_UP(ref_base, align);
1569             }
1570         }
1571         assert(ref_base + cum.ref_slot <= max_stk_slots);
1572         ref_base += max_reg_slots;
1573 
1574         if (ref_base != 0) {
1575             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1576                 TCGCallArgumentLoc *loc = &info->in[i];
1577                 switch (loc->kind) {
1578                 case TCG_CALL_ARG_BY_REF:
1579                 case TCG_CALL_ARG_BY_REF_N:
1580                     loc->ref_slot += ref_base;
1581                     break;
1582                 default:
1583                     break;
1584                 }
1585             }
1586         }
1587     }
1588 }
1589 
1590 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1591 static void process_constraint_sets(void);
1592 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1593                                             TCGReg reg, const char *name);
1594 
1595 static void tcg_context_init(unsigned max_threads)
1596 {
1597     TCGContext *s = &tcg_init_ctx;
1598     int n, i;
1599     TCGTemp *ts;
1600 
1601     memset(s, 0, sizeof(*s));
1602     s->nb_globals = 0;
1603 
1604     init_call_layout(&info_helper_ld32_mmu);
1605     init_call_layout(&info_helper_ld64_mmu);
1606     init_call_layout(&info_helper_ld128_mmu);
1607     init_call_layout(&info_helper_st32_mmu);
1608     init_call_layout(&info_helper_st64_mmu);
1609     init_call_layout(&info_helper_st128_mmu);
1610 
1611     tcg_target_init(s);
1612     process_constraint_sets();
1613 
1614     /* Reverse the order of the saved registers, assuming they're all at
1615        the start of tcg_target_reg_alloc_order.  */
1616     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1617         int r = tcg_target_reg_alloc_order[n];
1618         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1619             break;
1620         }
1621     }
1622     for (i = 0; i < n; ++i) {
1623         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1624     }
1625     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1626         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1627     }
1628 
1629     tcg_ctx = s;
1630     /*
1631      * In user-mode we simply share the init context among threads, since we
1632      * use a single region. See the documentation tcg_region_init() for the
1633      * reasoning behind this.
1634      * In system-mode we will have at most max_threads TCG threads.
1635      */
1636 #ifdef CONFIG_USER_ONLY
1637     tcg_ctxs = &tcg_ctx;
1638     tcg_cur_ctxs = 1;
1639     tcg_max_ctxs = 1;
1640 #else
1641     tcg_max_ctxs = max_threads;
1642     tcg_ctxs = g_new0(TCGContext *, max_threads);
1643 #endif
1644 
1645     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1646     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1647     tcg_env = temp_tcgv_ptr(ts);
1648 }
1649 
1650 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1651 {
1652     tcg_context_init(max_threads);
1653     tcg_region_init(tb_size, splitwx, max_threads);
1654 }
1655 
1656 /*
1657  * Allocate TBs right before their corresponding translated code, making
1658  * sure that TBs and code are on different cache lines.
1659  */
1660 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1661 {
1662     uintptr_t align = qemu_icache_linesize;
1663     TranslationBlock *tb;
1664     void *next;
1665 
1666  retry:
1667     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1668     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1669 
1670     if (unlikely(next > s->code_gen_highwater)) {
1671         if (tcg_region_alloc(s)) {
1672             return NULL;
1673         }
1674         goto retry;
1675     }
1676     qatomic_set(&s->code_gen_ptr, next);
1677     return tb;
1678 }
1679 
1680 void tcg_prologue_init(void)
1681 {
1682     TCGContext *s = tcg_ctx;
1683     size_t prologue_size;
1684 
1685     s->code_ptr = s->code_gen_ptr;
1686     s->code_buf = s->code_gen_ptr;
1687     s->data_gen_ptr = NULL;
1688 
1689 #ifndef CONFIG_TCG_INTERPRETER
1690     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1691 #endif
1692 
1693     s->pool_labels = NULL;
1694 
1695     qemu_thread_jit_write();
1696     /* Generate the prologue.  */
1697     tcg_target_qemu_prologue(s);
1698 
1699     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1700     {
1701         int result = tcg_out_pool_finalize(s);
1702         tcg_debug_assert(result == 0);
1703     }
1704 
1705     prologue_size = tcg_current_code_size(s);
1706     perf_report_prologue(s->code_gen_ptr, prologue_size);
1707 
1708 #ifndef CONFIG_TCG_INTERPRETER
1709     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1710                         (uintptr_t)s->code_buf, prologue_size);
1711 #endif
1712 
1713     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1714         FILE *logfile = qemu_log_trylock();
1715         if (logfile) {
1716             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1717             if (s->data_gen_ptr) {
1718                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1719                 size_t data_size = prologue_size - code_size;
1720                 size_t i;
1721 
1722                 disas(logfile, s->code_gen_ptr, code_size);
1723 
1724                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1725                     if (sizeof(tcg_target_ulong) == 8) {
1726                         fprintf(logfile,
1727                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1728                                 (uintptr_t)s->data_gen_ptr + i,
1729                                 *(uint64_t *)(s->data_gen_ptr + i));
1730                     } else {
1731                         fprintf(logfile,
1732                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1733                                 (uintptr_t)s->data_gen_ptr + i,
1734                                 *(uint32_t *)(s->data_gen_ptr + i));
1735                     }
1736                 }
1737             } else {
1738                 disas(logfile, s->code_gen_ptr, prologue_size);
1739             }
1740             fprintf(logfile, "\n");
1741             qemu_log_unlock(logfile);
1742         }
1743     }
1744 
1745 #ifndef CONFIG_TCG_INTERPRETER
1746     /*
1747      * Assert that goto_ptr is implemented completely, setting an epilogue.
1748      * For tci, we use NULL as the signal to return from the interpreter,
1749      * so skip this check.
1750      */
1751     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1752 #endif
1753 
1754     tcg_region_prologue_set(s);
1755 }
1756 
1757 void tcg_func_start(TCGContext *s)
1758 {
1759     tcg_pool_reset(s);
1760     s->nb_temps = s->nb_globals;
1761 
1762     /* No temps have been previously allocated for size or locality.  */
1763     tcg_temp_ebb_reset_freed(s);
1764 
1765     /* No constant temps have been previously allocated. */
1766     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1767         if (s->const_table[i]) {
1768             g_hash_table_remove_all(s->const_table[i]);
1769         }
1770     }
1771 
1772     s->nb_ops = 0;
1773     s->nb_labels = 0;
1774     s->current_frame_offset = s->frame_start;
1775 
1776 #ifdef CONFIG_DEBUG_TCG
1777     s->goto_tb_issue_mask = 0;
1778 #endif
1779 
1780     QTAILQ_INIT(&s->ops);
1781     QTAILQ_INIT(&s->free_ops);
1782     s->emit_before_op = NULL;
1783     QSIMPLEQ_INIT(&s->labels);
1784 
1785     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1786     tcg_debug_assert(s->insn_start_words > 0);
1787 }
1788 
1789 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1790 {
1791     int n = s->nb_temps++;
1792 
1793     if (n >= TCG_MAX_TEMPS) {
1794         tcg_raise_tb_overflow(s);
1795     }
1796     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1797 }
1798 
1799 static TCGTemp *tcg_global_alloc(TCGContext *s)
1800 {
1801     TCGTemp *ts;
1802 
1803     tcg_debug_assert(s->nb_globals == s->nb_temps);
1804     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1805     s->nb_globals++;
1806     ts = tcg_temp_alloc(s);
1807     ts->kind = TEMP_GLOBAL;
1808 
1809     return ts;
1810 }
1811 
1812 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1813                                             TCGReg reg, const char *name)
1814 {
1815     TCGTemp *ts;
1816 
1817     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1818 
1819     ts = tcg_global_alloc(s);
1820     ts->base_type = type;
1821     ts->type = type;
1822     ts->kind = TEMP_FIXED;
1823     ts->reg = reg;
1824     ts->name = name;
1825     tcg_regset_set_reg(s->reserved_regs, reg);
1826 
1827     return ts;
1828 }
1829 
1830 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1831 {
1832     s->frame_start = start;
1833     s->frame_end = start + size;
1834     s->frame_temp
1835         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1836 }
1837 
1838 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1839                                             const char *name, TCGType type)
1840 {
1841     TCGContext *s = tcg_ctx;
1842     TCGTemp *base_ts = tcgv_ptr_temp(base);
1843     TCGTemp *ts = tcg_global_alloc(s);
1844     int indirect_reg = 0;
1845 
1846     switch (base_ts->kind) {
1847     case TEMP_FIXED:
1848         break;
1849     case TEMP_GLOBAL:
1850         /* We do not support double-indirect registers.  */
1851         tcg_debug_assert(!base_ts->indirect_reg);
1852         base_ts->indirect_base = 1;
1853         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1854                             ? 2 : 1);
1855         indirect_reg = 1;
1856         break;
1857     default:
1858         g_assert_not_reached();
1859     }
1860 
1861     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1862         TCGTemp *ts2 = tcg_global_alloc(s);
1863         char buf[64];
1864 
1865         ts->base_type = TCG_TYPE_I64;
1866         ts->type = TCG_TYPE_I32;
1867         ts->indirect_reg = indirect_reg;
1868         ts->mem_allocated = 1;
1869         ts->mem_base = base_ts;
1870         ts->mem_offset = offset;
1871         pstrcpy(buf, sizeof(buf), name);
1872         pstrcat(buf, sizeof(buf), "_0");
1873         ts->name = strdup(buf);
1874 
1875         tcg_debug_assert(ts2 == ts + 1);
1876         ts2->base_type = TCG_TYPE_I64;
1877         ts2->type = TCG_TYPE_I32;
1878         ts2->indirect_reg = indirect_reg;
1879         ts2->mem_allocated = 1;
1880         ts2->mem_base = base_ts;
1881         ts2->mem_offset = offset + 4;
1882         ts2->temp_subindex = 1;
1883         pstrcpy(buf, sizeof(buf), name);
1884         pstrcat(buf, sizeof(buf), "_1");
1885         ts2->name = strdup(buf);
1886     } else {
1887         ts->base_type = type;
1888         ts->type = type;
1889         ts->indirect_reg = indirect_reg;
1890         ts->mem_allocated = 1;
1891         ts->mem_base = base_ts;
1892         ts->mem_offset = offset;
1893         ts->name = name;
1894     }
1895     return ts;
1896 }
1897 
1898 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1899 {
1900     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1901     return temp_tcgv_i32(ts);
1902 }
1903 
1904 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1905 {
1906     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1907     return temp_tcgv_i64(ts);
1908 }
1909 
1910 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1911 {
1912     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1913     return temp_tcgv_ptr(ts);
1914 }
1915 
1916 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1917 {
1918     TCGContext *s = tcg_ctx;
1919     TCGTemp *ts;
1920     int n;
1921 
1922     if (kind == TEMP_EBB) {
1923         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1924 
1925         if (idx < TCG_MAX_TEMPS) {
1926             /* There is already an available temp with the right type.  */
1927             clear_bit(idx, s->free_temps[type].l);
1928 
1929             ts = &s->temps[idx];
1930             ts->temp_allocated = 1;
1931             tcg_debug_assert(ts->base_type == type);
1932             tcg_debug_assert(ts->kind == kind);
1933             return ts;
1934         }
1935     } else {
1936         tcg_debug_assert(kind == TEMP_TB);
1937     }
1938 
1939     switch (type) {
1940     case TCG_TYPE_I32:
1941     case TCG_TYPE_V64:
1942     case TCG_TYPE_V128:
1943     case TCG_TYPE_V256:
1944         n = 1;
1945         break;
1946     case TCG_TYPE_I64:
1947         n = 64 / TCG_TARGET_REG_BITS;
1948         break;
1949     case TCG_TYPE_I128:
1950         n = 128 / TCG_TARGET_REG_BITS;
1951         break;
1952     default:
1953         g_assert_not_reached();
1954     }
1955 
1956     ts = tcg_temp_alloc(s);
1957     ts->base_type = type;
1958     ts->temp_allocated = 1;
1959     ts->kind = kind;
1960 
1961     if (n == 1) {
1962         ts->type = type;
1963     } else {
1964         ts->type = TCG_TYPE_REG;
1965 
1966         for (int i = 1; i < n; ++i) {
1967             TCGTemp *ts2 = tcg_temp_alloc(s);
1968 
1969             tcg_debug_assert(ts2 == ts + i);
1970             ts2->base_type = type;
1971             ts2->type = TCG_TYPE_REG;
1972             ts2->temp_allocated = 1;
1973             ts2->temp_subindex = i;
1974             ts2->kind = kind;
1975         }
1976     }
1977     return ts;
1978 }
1979 
1980 TCGv_i32 tcg_temp_new_i32(void)
1981 {
1982     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1983 }
1984 
1985 TCGv_i32 tcg_temp_ebb_new_i32(void)
1986 {
1987     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1988 }
1989 
1990 TCGv_i64 tcg_temp_new_i64(void)
1991 {
1992     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1993 }
1994 
1995 TCGv_i64 tcg_temp_ebb_new_i64(void)
1996 {
1997     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1998 }
1999 
2000 TCGv_ptr tcg_temp_new_ptr(void)
2001 {
2002     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
2003 }
2004 
2005 TCGv_ptr tcg_temp_ebb_new_ptr(void)
2006 {
2007     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
2008 }
2009 
2010 TCGv_i128 tcg_temp_new_i128(void)
2011 {
2012     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
2013 }
2014 
2015 TCGv_i128 tcg_temp_ebb_new_i128(void)
2016 {
2017     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
2018 }
2019 
2020 TCGv_vec tcg_temp_new_vec(TCGType type)
2021 {
2022     TCGTemp *t;
2023 
2024 #ifdef CONFIG_DEBUG_TCG
2025     switch (type) {
2026     case TCG_TYPE_V64:
2027         assert(TCG_TARGET_HAS_v64);
2028         break;
2029     case TCG_TYPE_V128:
2030         assert(TCG_TARGET_HAS_v128);
2031         break;
2032     case TCG_TYPE_V256:
2033         assert(TCG_TARGET_HAS_v256);
2034         break;
2035     default:
2036         g_assert_not_reached();
2037     }
2038 #endif
2039 
2040     t = tcg_temp_new_internal(type, TEMP_EBB);
2041     return temp_tcgv_vec(t);
2042 }
2043 
2044 /* Create a new temp of the same type as an existing temp.  */
2045 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2046 {
2047     TCGTemp *t = tcgv_vec_temp(match);
2048 
2049     tcg_debug_assert(t->temp_allocated != 0);
2050 
2051     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2052     return temp_tcgv_vec(t);
2053 }
2054 
2055 void tcg_temp_free_internal(TCGTemp *ts)
2056 {
2057     TCGContext *s = tcg_ctx;
2058 
2059     switch (ts->kind) {
2060     case TEMP_CONST:
2061     case TEMP_TB:
2062         /* Silently ignore free. */
2063         break;
2064     case TEMP_EBB:
2065         tcg_debug_assert(ts->temp_allocated != 0);
2066         ts->temp_allocated = 0;
2067         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2068         break;
2069     default:
2070         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2071         g_assert_not_reached();
2072     }
2073 }
2074 
2075 void tcg_temp_free_i32(TCGv_i32 arg)
2076 {
2077     tcg_temp_free_internal(tcgv_i32_temp(arg));
2078 }
2079 
2080 void tcg_temp_free_i64(TCGv_i64 arg)
2081 {
2082     tcg_temp_free_internal(tcgv_i64_temp(arg));
2083 }
2084 
2085 void tcg_temp_free_i128(TCGv_i128 arg)
2086 {
2087     tcg_temp_free_internal(tcgv_i128_temp(arg));
2088 }
2089 
2090 void tcg_temp_free_ptr(TCGv_ptr arg)
2091 {
2092     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2093 }
2094 
2095 void tcg_temp_free_vec(TCGv_vec arg)
2096 {
2097     tcg_temp_free_internal(tcgv_vec_temp(arg));
2098 }
2099 
2100 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2101 {
2102     TCGContext *s = tcg_ctx;
2103     GHashTable *h = s->const_table[type];
2104     TCGTemp *ts;
2105 
2106     if (h == NULL) {
2107         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2108         s->const_table[type] = h;
2109     }
2110 
2111     ts = g_hash_table_lookup(h, &val);
2112     if (ts == NULL) {
2113         int64_t *val_ptr;
2114 
2115         ts = tcg_temp_alloc(s);
2116 
2117         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2118             TCGTemp *ts2 = tcg_temp_alloc(s);
2119 
2120             tcg_debug_assert(ts2 == ts + 1);
2121 
2122             ts->base_type = TCG_TYPE_I64;
2123             ts->type = TCG_TYPE_I32;
2124             ts->kind = TEMP_CONST;
2125             ts->temp_allocated = 1;
2126 
2127             ts2->base_type = TCG_TYPE_I64;
2128             ts2->type = TCG_TYPE_I32;
2129             ts2->kind = TEMP_CONST;
2130             ts2->temp_allocated = 1;
2131             ts2->temp_subindex = 1;
2132 
2133             /*
2134              * Retain the full value of the 64-bit constant in the low
2135              * part, so that the hash table works.  Actual uses will
2136              * truncate the value to the low part.
2137              */
2138             ts[HOST_BIG_ENDIAN].val = val;
2139             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2140             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2141         } else {
2142             ts->base_type = type;
2143             ts->type = type;
2144             ts->kind = TEMP_CONST;
2145             ts->temp_allocated = 1;
2146             ts->val = val;
2147             val_ptr = &ts->val;
2148         }
2149         g_hash_table_insert(h, val_ptr, ts);
2150     }
2151 
2152     return ts;
2153 }
2154 
2155 TCGv_i32 tcg_constant_i32(int32_t val)
2156 {
2157     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2158 }
2159 
2160 TCGv_i64 tcg_constant_i64(int64_t val)
2161 {
2162     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2163 }
2164 
2165 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2166 {
2167     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2168 }
2169 
2170 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2171 {
2172     val = dup_const(vece, val);
2173     return temp_tcgv_vec(tcg_constant_internal(type, val));
2174 }
2175 
2176 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2177 {
2178     TCGTemp *t = tcgv_vec_temp(match);
2179 
2180     tcg_debug_assert(t->temp_allocated != 0);
2181     return tcg_constant_vec(t->base_type, vece, val);
2182 }
2183 
2184 #ifdef CONFIG_DEBUG_TCG
2185 size_t temp_idx(TCGTemp *ts)
2186 {
2187     ptrdiff_t n = ts - tcg_ctx->temps;
2188     assert(n >= 0 && n < tcg_ctx->nb_temps);
2189     return n;
2190 }
2191 
2192 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2193 {
2194     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2195 
2196     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2197     assert(o % sizeof(TCGTemp) == 0);
2198 
2199     return (void *)tcg_ctx + (uintptr_t)v;
2200 }
2201 #endif /* CONFIG_DEBUG_TCG */
2202 
2203 /*
2204  * Return true if OP may appear in the opcode stream with TYPE.
2205  * Test the runtime variable that controls each opcode.
2206  */
2207 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2208 {
2209     bool has_type;
2210 
2211     switch (type) {
2212     case TCG_TYPE_I32:
2213         has_type = true;
2214         break;
2215     case TCG_TYPE_I64:
2216         has_type = TCG_TARGET_REG_BITS == 64;
2217         break;
2218     case TCG_TYPE_V64:
2219         has_type = TCG_TARGET_HAS_v64;
2220         break;
2221     case TCG_TYPE_V128:
2222         has_type = TCG_TARGET_HAS_v128;
2223         break;
2224     case TCG_TYPE_V256:
2225         has_type = TCG_TARGET_HAS_v256;
2226         break;
2227     default:
2228         has_type = false;
2229         break;
2230     }
2231 
2232     switch (op) {
2233     case INDEX_op_discard:
2234     case INDEX_op_set_label:
2235     case INDEX_op_call:
2236     case INDEX_op_br:
2237     case INDEX_op_mb:
2238     case INDEX_op_insn_start:
2239     case INDEX_op_exit_tb:
2240     case INDEX_op_goto_tb:
2241     case INDEX_op_goto_ptr:
2242     case INDEX_op_qemu_ld_i32:
2243     case INDEX_op_qemu_st_i32:
2244     case INDEX_op_qemu_ld_i64:
2245     case INDEX_op_qemu_st_i64:
2246         return true;
2247 
2248     case INDEX_op_qemu_st8_i32:
2249         return TCG_TARGET_HAS_qemu_st8_i32;
2250 
2251     case INDEX_op_qemu_ld_i128:
2252     case INDEX_op_qemu_st_i128:
2253         return TCG_TARGET_HAS_qemu_ldst_i128;
2254 
2255     case INDEX_op_add:
2256     case INDEX_op_and:
2257     case INDEX_op_mov:
2258     case INDEX_op_or:
2259     case INDEX_op_xor:
2260         return has_type;
2261 
2262     case INDEX_op_setcond_i32:
2263     case INDEX_op_brcond_i32:
2264     case INDEX_op_movcond_i32:
2265     case INDEX_op_ld8u_i32:
2266     case INDEX_op_ld8s_i32:
2267     case INDEX_op_ld16u_i32:
2268     case INDEX_op_ld16s_i32:
2269     case INDEX_op_ld_i32:
2270     case INDEX_op_st8_i32:
2271     case INDEX_op_st16_i32:
2272     case INDEX_op_st_i32:
2273     case INDEX_op_extract_i32:
2274     case INDEX_op_sextract_i32:
2275     case INDEX_op_deposit_i32:
2276         return true;
2277 
2278     case INDEX_op_negsetcond_i32:
2279         return TCG_TARGET_HAS_negsetcond_i32;
2280     case INDEX_op_extract2_i32:
2281         return TCG_TARGET_HAS_extract2_i32;
2282     case INDEX_op_add2_i32:
2283         return TCG_TARGET_HAS_add2_i32;
2284     case INDEX_op_sub2_i32:
2285         return TCG_TARGET_HAS_sub2_i32;
2286     case INDEX_op_mulu2_i32:
2287         return TCG_TARGET_HAS_mulu2_i32;
2288     case INDEX_op_muls2_i32:
2289         return TCG_TARGET_HAS_muls2_i32;
2290     case INDEX_op_bswap16_i32:
2291         return TCG_TARGET_HAS_bswap16_i32;
2292     case INDEX_op_bswap32_i32:
2293         return TCG_TARGET_HAS_bswap32_i32;
2294 
2295     case INDEX_op_brcond2_i32:
2296     case INDEX_op_setcond2_i32:
2297         return TCG_TARGET_REG_BITS == 32;
2298 
2299     case INDEX_op_setcond_i64:
2300     case INDEX_op_brcond_i64:
2301     case INDEX_op_movcond_i64:
2302     case INDEX_op_ld8u_i64:
2303     case INDEX_op_ld8s_i64:
2304     case INDEX_op_ld16u_i64:
2305     case INDEX_op_ld16s_i64:
2306     case INDEX_op_ld32u_i64:
2307     case INDEX_op_ld32s_i64:
2308     case INDEX_op_ld_i64:
2309     case INDEX_op_st8_i64:
2310     case INDEX_op_st16_i64:
2311     case INDEX_op_st32_i64:
2312     case INDEX_op_st_i64:
2313     case INDEX_op_ext_i32_i64:
2314     case INDEX_op_extu_i32_i64:
2315     case INDEX_op_extract_i64:
2316     case INDEX_op_sextract_i64:
2317     case INDEX_op_deposit_i64:
2318         return TCG_TARGET_REG_BITS == 64;
2319 
2320     case INDEX_op_negsetcond_i64:
2321         return TCG_TARGET_HAS_negsetcond_i64;
2322     case INDEX_op_extract2_i64:
2323         return TCG_TARGET_HAS_extract2_i64;
2324     case INDEX_op_extrl_i64_i32:
2325     case INDEX_op_extrh_i64_i32:
2326         return TCG_TARGET_HAS_extr_i64_i32;
2327     case INDEX_op_bswap16_i64:
2328         return TCG_TARGET_HAS_bswap16_i64;
2329     case INDEX_op_bswap32_i64:
2330         return TCG_TARGET_HAS_bswap32_i64;
2331     case INDEX_op_bswap64_i64:
2332         return TCG_TARGET_HAS_bswap64_i64;
2333     case INDEX_op_add2_i64:
2334         return TCG_TARGET_HAS_add2_i64;
2335     case INDEX_op_sub2_i64:
2336         return TCG_TARGET_HAS_sub2_i64;
2337     case INDEX_op_mulu2_i64:
2338         return TCG_TARGET_HAS_mulu2_i64;
2339     case INDEX_op_muls2_i64:
2340         return TCG_TARGET_HAS_muls2_i64;
2341 
2342     case INDEX_op_mov_vec:
2343     case INDEX_op_dup_vec:
2344     case INDEX_op_dupm_vec:
2345     case INDEX_op_ld_vec:
2346     case INDEX_op_st_vec:
2347     case INDEX_op_add_vec:
2348     case INDEX_op_sub_vec:
2349     case INDEX_op_and_vec:
2350     case INDEX_op_or_vec:
2351     case INDEX_op_xor_vec:
2352     case INDEX_op_cmp_vec:
2353         return has_type;
2354     case INDEX_op_dup2_vec:
2355         return has_type && TCG_TARGET_REG_BITS == 32;
2356     case INDEX_op_not_vec:
2357         return has_type && TCG_TARGET_HAS_not_vec;
2358     case INDEX_op_neg_vec:
2359         return has_type && TCG_TARGET_HAS_neg_vec;
2360     case INDEX_op_abs_vec:
2361         return has_type && TCG_TARGET_HAS_abs_vec;
2362     case INDEX_op_andc_vec:
2363         return has_type && TCG_TARGET_HAS_andc_vec;
2364     case INDEX_op_orc_vec:
2365         return has_type && TCG_TARGET_HAS_orc_vec;
2366     case INDEX_op_nand_vec:
2367         return has_type && TCG_TARGET_HAS_nand_vec;
2368     case INDEX_op_nor_vec:
2369         return has_type && TCG_TARGET_HAS_nor_vec;
2370     case INDEX_op_eqv_vec:
2371         return has_type && TCG_TARGET_HAS_eqv_vec;
2372     case INDEX_op_mul_vec:
2373         return has_type && TCG_TARGET_HAS_mul_vec;
2374     case INDEX_op_shli_vec:
2375     case INDEX_op_shri_vec:
2376     case INDEX_op_sari_vec:
2377         return has_type && TCG_TARGET_HAS_shi_vec;
2378     case INDEX_op_shls_vec:
2379     case INDEX_op_shrs_vec:
2380     case INDEX_op_sars_vec:
2381         return has_type && TCG_TARGET_HAS_shs_vec;
2382     case INDEX_op_shlv_vec:
2383     case INDEX_op_shrv_vec:
2384     case INDEX_op_sarv_vec:
2385         return has_type && TCG_TARGET_HAS_shv_vec;
2386     case INDEX_op_rotli_vec:
2387         return has_type && TCG_TARGET_HAS_roti_vec;
2388     case INDEX_op_rotls_vec:
2389         return has_type && TCG_TARGET_HAS_rots_vec;
2390     case INDEX_op_rotlv_vec:
2391     case INDEX_op_rotrv_vec:
2392         return has_type && TCG_TARGET_HAS_rotv_vec;
2393     case INDEX_op_ssadd_vec:
2394     case INDEX_op_usadd_vec:
2395     case INDEX_op_sssub_vec:
2396     case INDEX_op_ussub_vec:
2397         return has_type && TCG_TARGET_HAS_sat_vec;
2398     case INDEX_op_smin_vec:
2399     case INDEX_op_umin_vec:
2400     case INDEX_op_smax_vec:
2401     case INDEX_op_umax_vec:
2402         return has_type && TCG_TARGET_HAS_minmax_vec;
2403     case INDEX_op_bitsel_vec:
2404         return has_type && TCG_TARGET_HAS_bitsel_vec;
2405     case INDEX_op_cmpsel_vec:
2406         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2407 
2408     default:
2409         if (op < INDEX_op_last_generic) {
2410             const TCGOutOp *outop;
2411             TCGConstraintSetIndex con_set;
2412 
2413             if (!has_type) {
2414                 return false;
2415             }
2416 
2417             outop = all_outop[op];
2418             tcg_debug_assert(outop != NULL);
2419 
2420             con_set = outop->static_constraint;
2421             if (con_set == C_Dynamic) {
2422                 con_set = outop->dynamic_constraint(type, flags);
2423             }
2424             if (con_set >= 0) {
2425                 return true;
2426             }
2427             tcg_debug_assert(con_set == C_NotImplemented);
2428             return false;
2429         }
2430         tcg_debug_assert(op < NB_OPS);
2431         return true;
2432 
2433     case INDEX_op_last_generic:
2434         g_assert_not_reached();
2435     }
2436 }
2437 
2438 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2439 {
2440     unsigned width;
2441 
2442     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2443     width = (type == TCG_TYPE_I32 ? 32 : 64);
2444 
2445     tcg_debug_assert(ofs < width);
2446     tcg_debug_assert(len > 0);
2447     tcg_debug_assert(len <= width - ofs);
2448 
2449     return TCG_TARGET_deposit_valid(type, ofs, len);
2450 }
2451 
2452 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2453 
2454 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2455                           TCGTemp *ret, TCGTemp **args)
2456 {
2457     TCGv_i64 extend_free[MAX_CALL_IARGS];
2458     int n_extend = 0;
2459     TCGOp *op;
2460     int i, n, pi = 0, total_args;
2461 
2462     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2463         init_call_layout(info);
2464         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2465     }
2466 
2467     total_args = info->nr_out + info->nr_in + 2;
2468     op = tcg_op_alloc(INDEX_op_call, total_args);
2469 
2470 #ifdef CONFIG_PLUGIN
2471     /* Flag helpers that may affect guest state */
2472     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2473         tcg_ctx->plugin_insn->calls_helpers = true;
2474     }
2475 #endif
2476 
2477     TCGOP_CALLO(op) = n = info->nr_out;
2478     switch (n) {
2479     case 0:
2480         tcg_debug_assert(ret == NULL);
2481         break;
2482     case 1:
2483         tcg_debug_assert(ret != NULL);
2484         op->args[pi++] = temp_arg(ret);
2485         break;
2486     case 2:
2487     case 4:
2488         tcg_debug_assert(ret != NULL);
2489         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2490         tcg_debug_assert(ret->temp_subindex == 0);
2491         for (i = 0; i < n; ++i) {
2492             op->args[pi++] = temp_arg(ret + i);
2493         }
2494         break;
2495     default:
2496         g_assert_not_reached();
2497     }
2498 
2499     TCGOP_CALLI(op) = n = info->nr_in;
2500     for (i = 0; i < n; i++) {
2501         const TCGCallArgumentLoc *loc = &info->in[i];
2502         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2503 
2504         switch (loc->kind) {
2505         case TCG_CALL_ARG_NORMAL:
2506         case TCG_CALL_ARG_BY_REF:
2507         case TCG_CALL_ARG_BY_REF_N:
2508             op->args[pi++] = temp_arg(ts);
2509             break;
2510 
2511         case TCG_CALL_ARG_EXTEND_U:
2512         case TCG_CALL_ARG_EXTEND_S:
2513             {
2514                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2515                 TCGv_i32 orig = temp_tcgv_i32(ts);
2516 
2517                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2518                     tcg_gen_ext_i32_i64(temp, orig);
2519                 } else {
2520                     tcg_gen_extu_i32_i64(temp, orig);
2521                 }
2522                 op->args[pi++] = tcgv_i64_arg(temp);
2523                 extend_free[n_extend++] = temp;
2524             }
2525             break;
2526 
2527         default:
2528             g_assert_not_reached();
2529         }
2530     }
2531     op->args[pi++] = (uintptr_t)func;
2532     op->args[pi++] = (uintptr_t)info;
2533     tcg_debug_assert(pi == total_args);
2534 
2535     if (tcg_ctx->emit_before_op) {
2536         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2537     } else {
2538         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2539     }
2540 
2541     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2542     for (i = 0; i < n_extend; ++i) {
2543         tcg_temp_free_i64(extend_free[i]);
2544     }
2545 }
2546 
2547 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2548 {
2549     tcg_gen_callN(func, info, ret, NULL);
2550 }
2551 
2552 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2553 {
2554     tcg_gen_callN(func, info, ret, &t1);
2555 }
2556 
2557 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2558                    TCGTemp *t1, TCGTemp *t2)
2559 {
2560     TCGTemp *args[2] = { t1, t2 };
2561     tcg_gen_callN(func, info, ret, args);
2562 }
2563 
2564 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2565                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2566 {
2567     TCGTemp *args[3] = { t1, t2, t3 };
2568     tcg_gen_callN(func, info, ret, args);
2569 }
2570 
2571 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2572                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2573 {
2574     TCGTemp *args[4] = { t1, t2, t3, t4 };
2575     tcg_gen_callN(func, info, ret, args);
2576 }
2577 
2578 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2579                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2580 {
2581     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2582     tcg_gen_callN(func, info, ret, args);
2583 }
2584 
2585 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2586                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2587                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2588 {
2589     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2590     tcg_gen_callN(func, info, ret, args);
2591 }
2592 
2593 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2594                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2595                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2596 {
2597     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2598     tcg_gen_callN(func, info, ret, args);
2599 }
2600 
2601 static void tcg_reg_alloc_start(TCGContext *s)
2602 {
2603     int i, n;
2604 
2605     for (i = 0, n = s->nb_temps; i < n; i++) {
2606         TCGTemp *ts = &s->temps[i];
2607         TCGTempVal val = TEMP_VAL_MEM;
2608 
2609         switch (ts->kind) {
2610         case TEMP_CONST:
2611             val = TEMP_VAL_CONST;
2612             break;
2613         case TEMP_FIXED:
2614             val = TEMP_VAL_REG;
2615             break;
2616         case TEMP_GLOBAL:
2617             break;
2618         case TEMP_EBB:
2619             val = TEMP_VAL_DEAD;
2620             /* fall through */
2621         case TEMP_TB:
2622             ts->mem_allocated = 0;
2623             break;
2624         default:
2625             g_assert_not_reached();
2626         }
2627         ts->val_type = val;
2628     }
2629 
2630     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2631 }
2632 
2633 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2634                                  TCGTemp *ts)
2635 {
2636     int idx = temp_idx(ts);
2637 
2638     switch (ts->kind) {
2639     case TEMP_FIXED:
2640     case TEMP_GLOBAL:
2641         pstrcpy(buf, buf_size, ts->name);
2642         break;
2643     case TEMP_TB:
2644         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2645         break;
2646     case TEMP_EBB:
2647         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2648         break;
2649     case TEMP_CONST:
2650         switch (ts->type) {
2651         case TCG_TYPE_I32:
2652             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2653             break;
2654 #if TCG_TARGET_REG_BITS > 32
2655         case TCG_TYPE_I64:
2656             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2657             break;
2658 #endif
2659         case TCG_TYPE_V64:
2660         case TCG_TYPE_V128:
2661         case TCG_TYPE_V256:
2662             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2663                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2664             break;
2665         default:
2666             g_assert_not_reached();
2667         }
2668         break;
2669     }
2670     return buf;
2671 }
2672 
2673 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2674                              int buf_size, TCGArg arg)
2675 {
2676     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2677 }
2678 
2679 static const char * const cond_name[] =
2680 {
2681     [TCG_COND_NEVER] = "never",
2682     [TCG_COND_ALWAYS] = "always",
2683     [TCG_COND_EQ] = "eq",
2684     [TCG_COND_NE] = "ne",
2685     [TCG_COND_LT] = "lt",
2686     [TCG_COND_GE] = "ge",
2687     [TCG_COND_LE] = "le",
2688     [TCG_COND_GT] = "gt",
2689     [TCG_COND_LTU] = "ltu",
2690     [TCG_COND_GEU] = "geu",
2691     [TCG_COND_LEU] = "leu",
2692     [TCG_COND_GTU] = "gtu",
2693     [TCG_COND_TSTEQ] = "tsteq",
2694     [TCG_COND_TSTNE] = "tstne",
2695 };
2696 
2697 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2698 {
2699     [MO_UB]   = "ub",
2700     [MO_SB]   = "sb",
2701     [MO_LEUW] = "leuw",
2702     [MO_LESW] = "lesw",
2703     [MO_LEUL] = "leul",
2704     [MO_LESL] = "lesl",
2705     [MO_LEUQ] = "leq",
2706     [MO_BEUW] = "beuw",
2707     [MO_BESW] = "besw",
2708     [MO_BEUL] = "beul",
2709     [MO_BESL] = "besl",
2710     [MO_BEUQ] = "beq",
2711     [MO_128 + MO_BE] = "beo",
2712     [MO_128 + MO_LE] = "leo",
2713 };
2714 
2715 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2716     [MO_UNALN >> MO_ASHIFT]    = "un+",
2717     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2718     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2719     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2720     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2721     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2722     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2723     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2724 };
2725 
2726 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2727     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2728     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2729     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2730     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2731     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2732     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2733 };
2734 
2735 static const char bswap_flag_name[][6] = {
2736     [TCG_BSWAP_IZ] = "iz",
2737     [TCG_BSWAP_OZ] = "oz",
2738     [TCG_BSWAP_OS] = "os",
2739     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2740     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2741 };
2742 
2743 #ifdef CONFIG_PLUGIN
2744 static const char * const plugin_from_name[] = {
2745     "from-tb",
2746     "from-insn",
2747     "after-insn",
2748     "after-tb",
2749 };
2750 #endif
2751 
2752 static inline bool tcg_regset_single(TCGRegSet d)
2753 {
2754     return (d & (d - 1)) == 0;
2755 }
2756 
2757 static inline TCGReg tcg_regset_first(TCGRegSet d)
2758 {
2759     if (TCG_TARGET_NB_REGS <= 32) {
2760         return ctz32(d);
2761     } else {
2762         return ctz64(d);
2763     }
2764 }
2765 
2766 /* Return only the number of characters output -- no error return. */
2767 #define ne_fprintf(...) \
2768     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2769 
2770 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2771 {
2772     char buf[128];
2773     TCGOp *op;
2774 
2775     QTAILQ_FOREACH(op, &s->ops, link) {
2776         int i, k, nb_oargs, nb_iargs, nb_cargs;
2777         const TCGOpDef *def;
2778         TCGOpcode c;
2779         int col = 0;
2780 
2781         c = op->opc;
2782         def = &tcg_op_defs[c];
2783 
2784         if (c == INDEX_op_insn_start) {
2785             nb_oargs = 0;
2786             col += ne_fprintf(f, "\n ----");
2787 
2788             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2789                 col += ne_fprintf(f, " %016" PRIx64,
2790                                   tcg_get_insn_start_param(op, i));
2791             }
2792         } else if (c == INDEX_op_call) {
2793             const TCGHelperInfo *info = tcg_call_info(op);
2794             void *func = tcg_call_func(op);
2795 
2796             /* variable number of arguments */
2797             nb_oargs = TCGOP_CALLO(op);
2798             nb_iargs = TCGOP_CALLI(op);
2799             nb_cargs = def->nb_cargs;
2800 
2801             col += ne_fprintf(f, " %s ", def->name);
2802 
2803             /*
2804              * Print the function name from TCGHelperInfo, if available.
2805              * Note that plugins have a template function for the info,
2806              * but the actual function pointer comes from the plugin.
2807              */
2808             if (func == info->func) {
2809                 col += ne_fprintf(f, "%s", info->name);
2810             } else {
2811                 col += ne_fprintf(f, "plugin(%p)", func);
2812             }
2813 
2814             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2815             for (i = 0; i < nb_oargs; i++) {
2816                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2817                                                             op->args[i]));
2818             }
2819             for (i = 0; i < nb_iargs; i++) {
2820                 TCGArg arg = op->args[nb_oargs + i];
2821                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2822                 col += ne_fprintf(f, ",%s", t);
2823             }
2824         } else {
2825             if (def->flags & TCG_OPF_INT) {
2826                 col += ne_fprintf(f, " %s_i%d ",
2827                                   def->name,
2828                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2829             } else if (def->flags & TCG_OPF_VECTOR) {
2830                 col += ne_fprintf(f, "%s v%d,e%d,",
2831                                   def->name,
2832                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2833                                   8 << TCGOP_VECE(op));
2834             } else {
2835                 col += ne_fprintf(f, " %s ", def->name);
2836             }
2837 
2838             nb_oargs = def->nb_oargs;
2839             nb_iargs = def->nb_iargs;
2840             nb_cargs = def->nb_cargs;
2841 
2842             k = 0;
2843             for (i = 0; i < nb_oargs; i++) {
2844                 const char *sep =  k ? "," : "";
2845                 col += ne_fprintf(f, "%s%s", sep,
2846                                   tcg_get_arg_str(s, buf, sizeof(buf),
2847                                                   op->args[k++]));
2848             }
2849             for (i = 0; i < nb_iargs; i++) {
2850                 const char *sep =  k ? "," : "";
2851                 col += ne_fprintf(f, "%s%s", sep,
2852                                   tcg_get_arg_str(s, buf, sizeof(buf),
2853                                                   op->args[k++]));
2854             }
2855             switch (c) {
2856             case INDEX_op_brcond_i32:
2857             case INDEX_op_setcond_i32:
2858             case INDEX_op_negsetcond_i32:
2859             case INDEX_op_movcond_i32:
2860             case INDEX_op_brcond2_i32:
2861             case INDEX_op_setcond2_i32:
2862             case INDEX_op_brcond_i64:
2863             case INDEX_op_setcond_i64:
2864             case INDEX_op_negsetcond_i64:
2865             case INDEX_op_movcond_i64:
2866             case INDEX_op_cmp_vec:
2867             case INDEX_op_cmpsel_vec:
2868                 if (op->args[k] < ARRAY_SIZE(cond_name)
2869                     && cond_name[op->args[k]]) {
2870                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2871                 } else {
2872                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2873                 }
2874                 i = 1;
2875                 break;
2876             case INDEX_op_qemu_ld_i32:
2877             case INDEX_op_qemu_st_i32:
2878             case INDEX_op_qemu_st8_i32:
2879             case INDEX_op_qemu_ld_i64:
2880             case INDEX_op_qemu_st_i64:
2881             case INDEX_op_qemu_ld_i128:
2882             case INDEX_op_qemu_st_i128:
2883                 {
2884                     const char *s_al, *s_op, *s_at;
2885                     MemOpIdx oi = op->args[k++];
2886                     MemOp mop = get_memop(oi);
2887                     unsigned ix = get_mmuidx(oi);
2888 
2889                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2890                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2891                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2892                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2893 
2894                     /* If all fields are accounted for, print symbolically. */
2895                     if (!mop && s_al && s_op && s_at) {
2896                         col += ne_fprintf(f, ",%s%s%s,%u",
2897                                           s_at, s_al, s_op, ix);
2898                     } else {
2899                         mop = get_memop(oi);
2900                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2901                     }
2902                     i = 1;
2903                 }
2904                 break;
2905             case INDEX_op_bswap16_i32:
2906             case INDEX_op_bswap16_i64:
2907             case INDEX_op_bswap32_i32:
2908             case INDEX_op_bswap32_i64:
2909             case INDEX_op_bswap64_i64:
2910                 {
2911                     TCGArg flags = op->args[k];
2912                     const char *name = NULL;
2913 
2914                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2915                         name = bswap_flag_name[flags];
2916                     }
2917                     if (name) {
2918                         col += ne_fprintf(f, ",%s", name);
2919                     } else {
2920                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2921                     }
2922                     i = k = 1;
2923                 }
2924                 break;
2925 #ifdef CONFIG_PLUGIN
2926             case INDEX_op_plugin_cb:
2927                 {
2928                     TCGArg from = op->args[k++];
2929                     const char *name = NULL;
2930 
2931                     if (from < ARRAY_SIZE(plugin_from_name)) {
2932                         name = plugin_from_name[from];
2933                     }
2934                     if (name) {
2935                         col += ne_fprintf(f, "%s", name);
2936                     } else {
2937                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2938                     }
2939                     i = 1;
2940                 }
2941                 break;
2942 #endif
2943             default:
2944                 i = 0;
2945                 break;
2946             }
2947             switch (c) {
2948             case INDEX_op_set_label:
2949             case INDEX_op_br:
2950             case INDEX_op_brcond_i32:
2951             case INDEX_op_brcond_i64:
2952             case INDEX_op_brcond2_i32:
2953                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2954                                   arg_label(op->args[k])->id);
2955                 i++, k++;
2956                 break;
2957             case INDEX_op_mb:
2958                 {
2959                     TCGBar membar = op->args[k];
2960                     const char *b_op, *m_op;
2961 
2962                     switch (membar & TCG_BAR_SC) {
2963                     case 0:
2964                         b_op = "none";
2965                         break;
2966                     case TCG_BAR_LDAQ:
2967                         b_op = "acq";
2968                         break;
2969                     case TCG_BAR_STRL:
2970                         b_op = "rel";
2971                         break;
2972                     case TCG_BAR_SC:
2973                         b_op = "seq";
2974                         break;
2975                     default:
2976                         g_assert_not_reached();
2977                     }
2978 
2979                     switch (membar & TCG_MO_ALL) {
2980                     case 0:
2981                         m_op = "none";
2982                         break;
2983                     case TCG_MO_LD_LD:
2984                         m_op = "rr";
2985                         break;
2986                     case TCG_MO_LD_ST:
2987                         m_op = "rw";
2988                         break;
2989                     case TCG_MO_ST_LD:
2990                         m_op = "wr";
2991                         break;
2992                     case TCG_MO_ST_ST:
2993                         m_op = "ww";
2994                         break;
2995                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2996                         m_op = "rr+rw";
2997                         break;
2998                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2999                         m_op = "rr+wr";
3000                         break;
3001                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3002                         m_op = "rr+ww";
3003                         break;
3004                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3005                         m_op = "rw+wr";
3006                         break;
3007                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3008                         m_op = "rw+ww";
3009                         break;
3010                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3011                         m_op = "wr+ww";
3012                         break;
3013                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3014                         m_op = "rr+rw+wr";
3015                         break;
3016                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3017                         m_op = "rr+rw+ww";
3018                         break;
3019                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3020                         m_op = "rr+wr+ww";
3021                         break;
3022                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3023                         m_op = "rw+wr+ww";
3024                         break;
3025                     case TCG_MO_ALL:
3026                         m_op = "all";
3027                         break;
3028                     default:
3029                         g_assert_not_reached();
3030                     }
3031 
3032                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3033                     i++, k++;
3034                 }
3035                 break;
3036             default:
3037                 break;
3038             }
3039             for (; i < nb_cargs; i++, k++) {
3040                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3041                                   op->args[k]);
3042             }
3043         }
3044 
3045         if (have_prefs || op->life) {
3046             for (; col < 40; ++col) {
3047                 putc(' ', f);
3048             }
3049         }
3050 
3051         if (op->life) {
3052             unsigned life = op->life;
3053 
3054             if (life & (SYNC_ARG * 3)) {
3055                 ne_fprintf(f, "  sync:");
3056                 for (i = 0; i < 2; ++i) {
3057                     if (life & (SYNC_ARG << i)) {
3058                         ne_fprintf(f, " %d", i);
3059                     }
3060                 }
3061             }
3062             life /= DEAD_ARG;
3063             if (life) {
3064                 ne_fprintf(f, "  dead:");
3065                 for (i = 0; life; ++i, life >>= 1) {
3066                     if (life & 1) {
3067                         ne_fprintf(f, " %d", i);
3068                     }
3069                 }
3070             }
3071         }
3072 
3073         if (have_prefs) {
3074             for (i = 0; i < nb_oargs; ++i) {
3075                 TCGRegSet set = output_pref(op, i);
3076 
3077                 if (i == 0) {
3078                     ne_fprintf(f, "  pref=");
3079                 } else {
3080                     ne_fprintf(f, ",");
3081                 }
3082                 if (set == 0) {
3083                     ne_fprintf(f, "none");
3084                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3085                     ne_fprintf(f, "all");
3086 #ifdef CONFIG_DEBUG_TCG
3087                 } else if (tcg_regset_single(set)) {
3088                     TCGReg reg = tcg_regset_first(set);
3089                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3090 #endif
3091                 } else if (TCG_TARGET_NB_REGS <= 32) {
3092                     ne_fprintf(f, "0x%x", (uint32_t)set);
3093                 } else {
3094                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3095                 }
3096             }
3097         }
3098 
3099         putc('\n', f);
3100     }
3101 }
3102 
3103 /* we give more priority to constraints with less registers */
3104 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3105 {
3106     int n;
3107 
3108     arg_ct += k;
3109     n = ctpop64(arg_ct->regs);
3110 
3111     /*
3112      * Sort constraints of a single register first, which includes output
3113      * aliases (which must exactly match the input already allocated).
3114      */
3115     if (n == 1 || arg_ct->oalias) {
3116         return INT_MAX;
3117     }
3118 
3119     /*
3120      * Sort register pairs next, first then second immediately after.
3121      * Arbitrarily sort multiple pairs by the index of the first reg;
3122      * there shouldn't be many pairs.
3123      */
3124     switch (arg_ct->pair) {
3125     case 1:
3126     case 3:
3127         return (k + 1) * 2;
3128     case 2:
3129         return (arg_ct->pair_index + 1) * 2 - 1;
3130     }
3131 
3132     /* Finally, sort by decreasing register count. */
3133     assert(n > 1);
3134     return -n;
3135 }
3136 
3137 /* sort from highest priority to lowest */
3138 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3139 {
3140     int i, j;
3141 
3142     for (i = 0; i < n; i++) {
3143         a[start + i].sort_index = start + i;
3144     }
3145     if (n <= 1) {
3146         return;
3147     }
3148     for (i = 0; i < n - 1; i++) {
3149         for (j = i + 1; j < n; j++) {
3150             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3151             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3152             if (p1 < p2) {
3153                 int tmp = a[start + i].sort_index;
3154                 a[start + i].sort_index = a[start + j].sort_index;
3155                 a[start + j].sort_index = tmp;
3156             }
3157         }
3158     }
3159 }
3160 
3161 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3162 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3163 
3164 static void process_constraint_sets(void)
3165 {
3166     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3167         const TCGConstraintSet *tdefs = &constraint_sets[c];
3168         TCGArgConstraint *args_ct = all_cts[c];
3169         int nb_oargs = tdefs->nb_oargs;
3170         int nb_iargs = tdefs->nb_iargs;
3171         int nb_args = nb_oargs + nb_iargs;
3172         bool saw_alias_pair = false;
3173 
3174         for (int i = 0; i < nb_args; i++) {
3175             const char *ct_str = tdefs->args_ct_str[i];
3176             bool input_p = i >= nb_oargs;
3177             int o;
3178 
3179             switch (*ct_str) {
3180             case '0' ... '9':
3181                 o = *ct_str - '0';
3182                 tcg_debug_assert(input_p);
3183                 tcg_debug_assert(o < nb_oargs);
3184                 tcg_debug_assert(args_ct[o].regs != 0);
3185                 tcg_debug_assert(!args_ct[o].oalias);
3186                 args_ct[i] = args_ct[o];
3187                 /* The output sets oalias.  */
3188                 args_ct[o].oalias = 1;
3189                 args_ct[o].alias_index = i;
3190                 /* The input sets ialias. */
3191                 args_ct[i].ialias = 1;
3192                 args_ct[i].alias_index = o;
3193                 if (args_ct[i].pair) {
3194                     saw_alias_pair = true;
3195                 }
3196                 tcg_debug_assert(ct_str[1] == '\0');
3197                 continue;
3198 
3199             case '&':
3200                 tcg_debug_assert(!input_p);
3201                 args_ct[i].newreg = true;
3202                 ct_str++;
3203                 break;
3204 
3205             case 'p': /* plus */
3206                 /* Allocate to the register after the previous. */
3207                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3208                 o = i - 1;
3209                 tcg_debug_assert(!args_ct[o].pair);
3210                 tcg_debug_assert(!args_ct[o].ct);
3211                 args_ct[i] = (TCGArgConstraint){
3212                     .pair = 2,
3213                     .pair_index = o,
3214                     .regs = args_ct[o].regs << 1,
3215                     .newreg = args_ct[o].newreg,
3216                 };
3217                 args_ct[o].pair = 1;
3218                 args_ct[o].pair_index = i;
3219                 tcg_debug_assert(ct_str[1] == '\0');
3220                 continue;
3221 
3222             case 'm': /* minus */
3223                 /* Allocate to the register before the previous. */
3224                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3225                 o = i - 1;
3226                 tcg_debug_assert(!args_ct[o].pair);
3227                 tcg_debug_assert(!args_ct[o].ct);
3228                 args_ct[i] = (TCGArgConstraint){
3229                     .pair = 1,
3230                     .pair_index = o,
3231                     .regs = args_ct[o].regs >> 1,
3232                     .newreg = args_ct[o].newreg,
3233                 };
3234                 args_ct[o].pair = 2;
3235                 args_ct[o].pair_index = i;
3236                 tcg_debug_assert(ct_str[1] == '\0');
3237                 continue;
3238             }
3239 
3240             do {
3241                 switch (*ct_str) {
3242                 case 'i':
3243                     args_ct[i].ct |= TCG_CT_CONST;
3244                     break;
3245 #ifdef TCG_REG_ZERO
3246                 case 'z':
3247                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3248                     break;
3249 #endif
3250 
3251                 /* Include all of the target-specific constraints. */
3252 
3253 #undef CONST
3254 #define CONST(CASE, MASK) \
3255     case CASE: args_ct[i].ct |= MASK; break;
3256 #define REGS(CASE, MASK) \
3257     case CASE: args_ct[i].regs |= MASK; break;
3258 
3259 #include "tcg-target-con-str.h"
3260 
3261 #undef REGS
3262 #undef CONST
3263                 default:
3264                 case '0' ... '9':
3265                 case '&':
3266                 case 'p':
3267                 case 'm':
3268                     /* Typo in TCGConstraintSet constraint. */
3269                     g_assert_not_reached();
3270                 }
3271             } while (*++ct_str != '\0');
3272         }
3273 
3274         /*
3275          * Fix up output pairs that are aliased with inputs.
3276          * When we created the alias, we copied pair from the output.
3277          * There are three cases:
3278          *    (1a) Pairs of inputs alias pairs of outputs.
3279          *    (1b) One input aliases the first of a pair of outputs.
3280          *    (2)  One input aliases the second of a pair of outputs.
3281          *
3282          * Case 1a is handled by making sure that the pair_index'es are
3283          * properly updated so that they appear the same as a pair of inputs.
3284          *
3285          * Case 1b is handled by setting the pair_index of the input to
3286          * itself, simply so it doesn't point to an unrelated argument.
3287          * Since we don't encounter the "second" during the input allocation
3288          * phase, nothing happens with the second half of the input pair.
3289          *
3290          * Case 2 is handled by setting the second input to pair=3, the
3291          * first output to pair=3, and the pair_index'es to match.
3292          */
3293         if (saw_alias_pair) {
3294             for (int i = nb_oargs; i < nb_args; i++) {
3295                 int o, o2, i2;
3296 
3297                 /*
3298                  * Since [0-9pm] must be alone in the constraint string,
3299                  * the only way they can both be set is if the pair comes
3300                  * from the output alias.
3301                  */
3302                 if (!args_ct[i].ialias) {
3303                     continue;
3304                 }
3305                 switch (args_ct[i].pair) {
3306                 case 0:
3307                     break;
3308                 case 1:
3309                     o = args_ct[i].alias_index;
3310                     o2 = args_ct[o].pair_index;
3311                     tcg_debug_assert(args_ct[o].pair == 1);
3312                     tcg_debug_assert(args_ct[o2].pair == 2);
3313                     if (args_ct[o2].oalias) {
3314                         /* Case 1a */
3315                         i2 = args_ct[o2].alias_index;
3316                         tcg_debug_assert(args_ct[i2].pair == 2);
3317                         args_ct[i2].pair_index = i;
3318                         args_ct[i].pair_index = i2;
3319                     } else {
3320                         /* Case 1b */
3321                         args_ct[i].pair_index = i;
3322                     }
3323                     break;
3324                 case 2:
3325                     o = args_ct[i].alias_index;
3326                     o2 = args_ct[o].pair_index;
3327                     tcg_debug_assert(args_ct[o].pair == 2);
3328                     tcg_debug_assert(args_ct[o2].pair == 1);
3329                     if (args_ct[o2].oalias) {
3330                         /* Case 1a */
3331                         i2 = args_ct[o2].alias_index;
3332                         tcg_debug_assert(args_ct[i2].pair == 1);
3333                         args_ct[i2].pair_index = i;
3334                         args_ct[i].pair_index = i2;
3335                     } else {
3336                         /* Case 2 */
3337                         args_ct[i].pair = 3;
3338                         args_ct[o2].pair = 3;
3339                         args_ct[i].pair_index = o2;
3340                         args_ct[o2].pair_index = i;
3341                     }
3342                     break;
3343                 default:
3344                     g_assert_not_reached();
3345                 }
3346             }
3347         }
3348 
3349         /* sort the constraints (XXX: this is just an heuristic) */
3350         sort_constraints(args_ct, 0, nb_oargs);
3351         sort_constraints(args_ct, nb_oargs, nb_iargs);
3352     }
3353 }
3354 
3355 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3356 {
3357     TCGOpcode opc = op->opc;
3358     TCGType type = TCGOP_TYPE(op);
3359     unsigned flags = TCGOP_FLAGS(op);
3360     const TCGOpDef *def = &tcg_op_defs[opc];
3361     const TCGOutOp *outop = all_outop[opc];
3362     TCGConstraintSetIndex con_set;
3363 
3364     if (def->flags & TCG_OPF_NOT_PRESENT) {
3365         return empty_cts;
3366     }
3367 
3368     if (outop) {
3369         con_set = outop->static_constraint;
3370         if (con_set == C_Dynamic) {
3371             con_set = outop->dynamic_constraint(type, flags);
3372         }
3373     } else {
3374         con_set = tcg_target_op_def(opc, type, flags);
3375     }
3376     tcg_debug_assert(con_set >= 0);
3377     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3378 
3379     /* The constraint arguments must match TCGOpcode arguments. */
3380     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3381     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3382 
3383     return all_cts[con_set];
3384 }
3385 
3386 static void remove_label_use(TCGOp *op, int idx)
3387 {
3388     TCGLabel *label = arg_label(op->args[idx]);
3389     TCGLabelUse *use;
3390 
3391     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3392         if (use->op == op) {
3393             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3394             return;
3395         }
3396     }
3397     g_assert_not_reached();
3398 }
3399 
3400 void tcg_op_remove(TCGContext *s, TCGOp *op)
3401 {
3402     switch (op->opc) {
3403     case INDEX_op_br:
3404         remove_label_use(op, 0);
3405         break;
3406     case INDEX_op_brcond_i32:
3407     case INDEX_op_brcond_i64:
3408         remove_label_use(op, 3);
3409         break;
3410     case INDEX_op_brcond2_i32:
3411         remove_label_use(op, 5);
3412         break;
3413     default:
3414         break;
3415     }
3416 
3417     QTAILQ_REMOVE(&s->ops, op, link);
3418     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3419     s->nb_ops--;
3420 }
3421 
3422 void tcg_remove_ops_after(TCGOp *op)
3423 {
3424     TCGContext *s = tcg_ctx;
3425 
3426     while (true) {
3427         TCGOp *last = tcg_last_op();
3428         if (last == op) {
3429             return;
3430         }
3431         tcg_op_remove(s, last);
3432     }
3433 }
3434 
3435 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3436 {
3437     TCGContext *s = tcg_ctx;
3438     TCGOp *op = NULL;
3439 
3440     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3441         QTAILQ_FOREACH(op, &s->free_ops, link) {
3442             if (nargs <= op->nargs) {
3443                 QTAILQ_REMOVE(&s->free_ops, op, link);
3444                 nargs = op->nargs;
3445                 goto found;
3446             }
3447         }
3448     }
3449 
3450     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3451     nargs = MAX(4, nargs);
3452     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3453 
3454  found:
3455     memset(op, 0, offsetof(TCGOp, link));
3456     op->opc = opc;
3457     op->nargs = nargs;
3458 
3459     /* Check for bitfield overflow. */
3460     tcg_debug_assert(op->nargs == nargs);
3461 
3462     s->nb_ops++;
3463     return op;
3464 }
3465 
3466 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3467 {
3468     TCGOp *op = tcg_op_alloc(opc, nargs);
3469 
3470     if (tcg_ctx->emit_before_op) {
3471         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3472     } else {
3473         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3474     }
3475     return op;
3476 }
3477 
3478 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3479                             TCGOpcode opc, TCGType type, unsigned nargs)
3480 {
3481     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3482 
3483     TCGOP_TYPE(new_op) = type;
3484     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3485     return new_op;
3486 }
3487 
3488 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3489                            TCGOpcode opc, TCGType type, unsigned nargs)
3490 {
3491     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3492 
3493     TCGOP_TYPE(new_op) = type;
3494     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3495     return new_op;
3496 }
3497 
3498 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3499 {
3500     TCGLabelUse *u;
3501 
3502     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3503         TCGOp *op = u->op;
3504         switch (op->opc) {
3505         case INDEX_op_br:
3506             op->args[0] = label_arg(to);
3507             break;
3508         case INDEX_op_brcond_i32:
3509         case INDEX_op_brcond_i64:
3510             op->args[3] = label_arg(to);
3511             break;
3512         case INDEX_op_brcond2_i32:
3513             op->args[5] = label_arg(to);
3514             break;
3515         default:
3516             g_assert_not_reached();
3517         }
3518     }
3519 
3520     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3521 }
3522 
3523 /* Reachable analysis : remove unreachable code.  */
3524 static void __attribute__((noinline))
3525 reachable_code_pass(TCGContext *s)
3526 {
3527     TCGOp *op, *op_next, *op_prev;
3528     bool dead = false;
3529 
3530     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3531         bool remove = dead;
3532         TCGLabel *label;
3533 
3534         switch (op->opc) {
3535         case INDEX_op_set_label:
3536             label = arg_label(op->args[0]);
3537 
3538             /*
3539              * Note that the first op in the TB is always a load,
3540              * so there is always something before a label.
3541              */
3542             op_prev = QTAILQ_PREV(op, link);
3543 
3544             /*
3545              * If we find two sequential labels, move all branches to
3546              * reference the second label and remove the first label.
3547              * Do this before branch to next optimization, so that the
3548              * middle label is out of the way.
3549              */
3550             if (op_prev->opc == INDEX_op_set_label) {
3551                 move_label_uses(label, arg_label(op_prev->args[0]));
3552                 tcg_op_remove(s, op_prev);
3553                 op_prev = QTAILQ_PREV(op, link);
3554             }
3555 
3556             /*
3557              * Optimization can fold conditional branches to unconditional.
3558              * If we find a label which is preceded by an unconditional
3559              * branch to next, remove the branch.  We couldn't do this when
3560              * processing the branch because any dead code between the branch
3561              * and label had not yet been removed.
3562              */
3563             if (op_prev->opc == INDEX_op_br &&
3564                 label == arg_label(op_prev->args[0])) {
3565                 tcg_op_remove(s, op_prev);
3566                 /* Fall through means insns become live again.  */
3567                 dead = false;
3568             }
3569 
3570             if (QSIMPLEQ_EMPTY(&label->branches)) {
3571                 /*
3572                  * While there is an occasional backward branch, virtually
3573                  * all branches generated by the translators are forward.
3574                  * Which means that generally we will have already removed
3575                  * all references to the label that will be, and there is
3576                  * little to be gained by iterating.
3577                  */
3578                 remove = true;
3579             } else {
3580                 /* Once we see a label, insns become live again.  */
3581                 dead = false;
3582                 remove = false;
3583             }
3584             break;
3585 
3586         case INDEX_op_br:
3587         case INDEX_op_exit_tb:
3588         case INDEX_op_goto_ptr:
3589             /* Unconditional branches; everything following is dead.  */
3590             dead = true;
3591             break;
3592 
3593         case INDEX_op_call:
3594             /* Notice noreturn helper calls, raising exceptions.  */
3595             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3596                 dead = true;
3597             }
3598             break;
3599 
3600         case INDEX_op_insn_start:
3601             /* Never remove -- we need to keep these for unwind.  */
3602             remove = false;
3603             break;
3604 
3605         default:
3606             break;
3607         }
3608 
3609         if (remove) {
3610             tcg_op_remove(s, op);
3611         }
3612     }
3613 }
3614 
3615 #define TS_DEAD  1
3616 #define TS_MEM   2
3617 
3618 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3619 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3620 
3621 /* For liveness_pass_1, the register preferences for a given temp.  */
3622 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3623 {
3624     return ts->state_ptr;
3625 }
3626 
3627 /* For liveness_pass_1, reset the preferences for a given temp to the
3628  * maximal regset for its type.
3629  */
3630 static inline void la_reset_pref(TCGTemp *ts)
3631 {
3632     *la_temp_pref(ts)
3633         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3634 }
3635 
3636 /* liveness analysis: end of function: all temps are dead, and globals
3637    should be in memory. */
3638 static void la_func_end(TCGContext *s, int ng, int nt)
3639 {
3640     int i;
3641 
3642     for (i = 0; i < ng; ++i) {
3643         s->temps[i].state = TS_DEAD | TS_MEM;
3644         la_reset_pref(&s->temps[i]);
3645     }
3646     for (i = ng; i < nt; ++i) {
3647         s->temps[i].state = TS_DEAD;
3648         la_reset_pref(&s->temps[i]);
3649     }
3650 }
3651 
3652 /* liveness analysis: end of basic block: all temps are dead, globals
3653    and local temps should be in memory. */
3654 static void la_bb_end(TCGContext *s, int ng, int nt)
3655 {
3656     int i;
3657 
3658     for (i = 0; i < nt; ++i) {
3659         TCGTemp *ts = &s->temps[i];
3660         int state;
3661 
3662         switch (ts->kind) {
3663         case TEMP_FIXED:
3664         case TEMP_GLOBAL:
3665         case TEMP_TB:
3666             state = TS_DEAD | TS_MEM;
3667             break;
3668         case TEMP_EBB:
3669         case TEMP_CONST:
3670             state = TS_DEAD;
3671             break;
3672         default:
3673             g_assert_not_reached();
3674         }
3675         ts->state = state;
3676         la_reset_pref(ts);
3677     }
3678 }
3679 
3680 /* liveness analysis: sync globals back to memory.  */
3681 static void la_global_sync(TCGContext *s, int ng)
3682 {
3683     int i;
3684 
3685     for (i = 0; i < ng; ++i) {
3686         int state = s->temps[i].state;
3687         s->temps[i].state = state | TS_MEM;
3688         if (state == TS_DEAD) {
3689             /* If the global was previously dead, reset prefs.  */
3690             la_reset_pref(&s->temps[i]);
3691         }
3692     }
3693 }
3694 
3695 /*
3696  * liveness analysis: conditional branch: all temps are dead unless
3697  * explicitly live-across-conditional-branch, globals and local temps
3698  * should be synced.
3699  */
3700 static void la_bb_sync(TCGContext *s, int ng, int nt)
3701 {
3702     la_global_sync(s, ng);
3703 
3704     for (int i = ng; i < nt; ++i) {
3705         TCGTemp *ts = &s->temps[i];
3706         int state;
3707 
3708         switch (ts->kind) {
3709         case TEMP_TB:
3710             state = ts->state;
3711             ts->state = state | TS_MEM;
3712             if (state != TS_DEAD) {
3713                 continue;
3714             }
3715             break;
3716         case TEMP_EBB:
3717         case TEMP_CONST:
3718             continue;
3719         default:
3720             g_assert_not_reached();
3721         }
3722         la_reset_pref(&s->temps[i]);
3723     }
3724 }
3725 
3726 /* liveness analysis: sync globals back to memory and kill.  */
3727 static void la_global_kill(TCGContext *s, int ng)
3728 {
3729     int i;
3730 
3731     for (i = 0; i < ng; i++) {
3732         s->temps[i].state = TS_DEAD | TS_MEM;
3733         la_reset_pref(&s->temps[i]);
3734     }
3735 }
3736 
3737 /* liveness analysis: note live globals crossing calls.  */
3738 static void la_cross_call(TCGContext *s, int nt)
3739 {
3740     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3741     int i;
3742 
3743     for (i = 0; i < nt; i++) {
3744         TCGTemp *ts = &s->temps[i];
3745         if (!(ts->state & TS_DEAD)) {
3746             TCGRegSet *pset = la_temp_pref(ts);
3747             TCGRegSet set = *pset;
3748 
3749             set &= mask;
3750             /* If the combination is not possible, restart.  */
3751             if (set == 0) {
3752                 set = tcg_target_available_regs[ts->type] & mask;
3753             }
3754             *pset = set;
3755         }
3756     }
3757 }
3758 
3759 /*
3760  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3761  * to TEMP_EBB, if possible.
3762  */
3763 static void __attribute__((noinline))
3764 liveness_pass_0(TCGContext *s)
3765 {
3766     void * const multiple_ebb = (void *)(uintptr_t)-1;
3767     int nb_temps = s->nb_temps;
3768     TCGOp *op, *ebb;
3769 
3770     for (int i = s->nb_globals; i < nb_temps; ++i) {
3771         s->temps[i].state_ptr = NULL;
3772     }
3773 
3774     /*
3775      * Represent each EBB by the op at which it begins.  In the case of
3776      * the first EBB, this is the first op, otherwise it is a label.
3777      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3778      * within a single EBB, else MULTIPLE_EBB.
3779      */
3780     ebb = QTAILQ_FIRST(&s->ops);
3781     QTAILQ_FOREACH(op, &s->ops, link) {
3782         const TCGOpDef *def;
3783         int nb_oargs, nb_iargs;
3784 
3785         switch (op->opc) {
3786         case INDEX_op_set_label:
3787             ebb = op;
3788             continue;
3789         case INDEX_op_discard:
3790             continue;
3791         case INDEX_op_call:
3792             nb_oargs = TCGOP_CALLO(op);
3793             nb_iargs = TCGOP_CALLI(op);
3794             break;
3795         default:
3796             def = &tcg_op_defs[op->opc];
3797             nb_oargs = def->nb_oargs;
3798             nb_iargs = def->nb_iargs;
3799             break;
3800         }
3801 
3802         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3803             TCGTemp *ts = arg_temp(op->args[i]);
3804 
3805             if (ts->kind != TEMP_TB) {
3806                 continue;
3807             }
3808             if (ts->state_ptr == NULL) {
3809                 ts->state_ptr = ebb;
3810             } else if (ts->state_ptr != ebb) {
3811                 ts->state_ptr = multiple_ebb;
3812             }
3813         }
3814     }
3815 
3816     /*
3817      * For TEMP_TB that turned out not to be used beyond one EBB,
3818      * reduce the liveness to TEMP_EBB.
3819      */
3820     for (int i = s->nb_globals; i < nb_temps; ++i) {
3821         TCGTemp *ts = &s->temps[i];
3822         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3823             ts->kind = TEMP_EBB;
3824         }
3825     }
3826 }
3827 
3828 /* Liveness analysis : update the opc_arg_life array to tell if a
3829    given input arguments is dead. Instructions updating dead
3830    temporaries are removed. */
3831 static void __attribute__((noinline))
3832 liveness_pass_1(TCGContext *s)
3833 {
3834     int nb_globals = s->nb_globals;
3835     int nb_temps = s->nb_temps;
3836     TCGOp *op, *op_prev;
3837     TCGRegSet *prefs;
3838     int i;
3839 
3840     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3841     for (i = 0; i < nb_temps; ++i) {
3842         s->temps[i].state_ptr = prefs + i;
3843     }
3844 
3845     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3846     la_func_end(s, nb_globals, nb_temps);
3847 
3848     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3849         int nb_iargs, nb_oargs;
3850         TCGOpcode opc_new, opc_new2;
3851         TCGLifeData arg_life = 0;
3852         TCGTemp *ts;
3853         TCGOpcode opc = op->opc;
3854         const TCGOpDef *def = &tcg_op_defs[opc];
3855         const TCGArgConstraint *args_ct;
3856 
3857         switch (opc) {
3858         case INDEX_op_call:
3859             {
3860                 const TCGHelperInfo *info = tcg_call_info(op);
3861                 int call_flags = tcg_call_flags(op);
3862 
3863                 nb_oargs = TCGOP_CALLO(op);
3864                 nb_iargs = TCGOP_CALLI(op);
3865 
3866                 /* pure functions can be removed if their result is unused */
3867                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3868                     for (i = 0; i < nb_oargs; i++) {
3869                         ts = arg_temp(op->args[i]);
3870                         if (ts->state != TS_DEAD) {
3871                             goto do_not_remove_call;
3872                         }
3873                     }
3874                     goto do_remove;
3875                 }
3876             do_not_remove_call:
3877 
3878                 /* Output args are dead.  */
3879                 for (i = 0; i < nb_oargs; i++) {
3880                     ts = arg_temp(op->args[i]);
3881                     if (ts->state & TS_DEAD) {
3882                         arg_life |= DEAD_ARG << i;
3883                     }
3884                     if (ts->state & TS_MEM) {
3885                         arg_life |= SYNC_ARG << i;
3886                     }
3887                     ts->state = TS_DEAD;
3888                     la_reset_pref(ts);
3889                 }
3890 
3891                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3892                 memset(op->output_pref, 0, sizeof(op->output_pref));
3893 
3894                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3895                                     TCG_CALL_NO_READ_GLOBALS))) {
3896                     la_global_kill(s, nb_globals);
3897                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3898                     la_global_sync(s, nb_globals);
3899                 }
3900 
3901                 /* Record arguments that die in this helper.  */
3902                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3903                     ts = arg_temp(op->args[i]);
3904                     if (ts->state & TS_DEAD) {
3905                         arg_life |= DEAD_ARG << i;
3906                     }
3907                 }
3908 
3909                 /* For all live registers, remove call-clobbered prefs.  */
3910                 la_cross_call(s, nb_temps);
3911 
3912                 /*
3913                  * Input arguments are live for preceding opcodes.
3914                  *
3915                  * For those arguments that die, and will be allocated in
3916                  * registers, clear the register set for that arg, to be
3917                  * filled in below.  For args that will be on the stack,
3918                  * reset to any available reg.  Process arguments in reverse
3919                  * order so that if a temp is used more than once, the stack
3920                  * reset to max happens before the register reset to 0.
3921                  */
3922                 for (i = nb_iargs - 1; i >= 0; i--) {
3923                     const TCGCallArgumentLoc *loc = &info->in[i];
3924                     ts = arg_temp(op->args[nb_oargs + i]);
3925 
3926                     if (ts->state & TS_DEAD) {
3927                         switch (loc->kind) {
3928                         case TCG_CALL_ARG_NORMAL:
3929                         case TCG_CALL_ARG_EXTEND_U:
3930                         case TCG_CALL_ARG_EXTEND_S:
3931                             if (arg_slot_reg_p(loc->arg_slot)) {
3932                                 *la_temp_pref(ts) = 0;
3933                                 break;
3934                             }
3935                             /* fall through */
3936                         default:
3937                             *la_temp_pref(ts) =
3938                                 tcg_target_available_regs[ts->type];
3939                             break;
3940                         }
3941                         ts->state &= ~TS_DEAD;
3942                     }
3943                 }
3944 
3945                 /*
3946                  * For each input argument, add its input register to prefs.
3947                  * If a temp is used once, this produces a single set bit;
3948                  * if a temp is used multiple times, this produces a set.
3949                  */
3950                 for (i = 0; i < nb_iargs; i++) {
3951                     const TCGCallArgumentLoc *loc = &info->in[i];
3952                     ts = arg_temp(op->args[nb_oargs + i]);
3953 
3954                     switch (loc->kind) {
3955                     case TCG_CALL_ARG_NORMAL:
3956                     case TCG_CALL_ARG_EXTEND_U:
3957                     case TCG_CALL_ARG_EXTEND_S:
3958                         if (arg_slot_reg_p(loc->arg_slot)) {
3959                             tcg_regset_set_reg(*la_temp_pref(ts),
3960                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3961                         }
3962                         break;
3963                     default:
3964                         break;
3965                     }
3966                 }
3967             }
3968             break;
3969         case INDEX_op_insn_start:
3970             break;
3971         case INDEX_op_discard:
3972             /* mark the temporary as dead */
3973             ts = arg_temp(op->args[0]);
3974             ts->state = TS_DEAD;
3975             la_reset_pref(ts);
3976             break;
3977 
3978         case INDEX_op_add2_i32:
3979         case INDEX_op_add2_i64:
3980             opc_new = INDEX_op_add;
3981             goto do_addsub2;
3982         case INDEX_op_sub2_i32:
3983         case INDEX_op_sub2_i64:
3984             opc_new = INDEX_op_sub;
3985         do_addsub2:
3986             nb_iargs = 4;
3987             nb_oargs = 2;
3988             /* Test if the high part of the operation is dead, but not
3989                the low part.  The result can be optimized to a simple
3990                add or sub.  This happens often for x86_64 guest when the
3991                cpu mode is set to 32 bit.  */
3992             if (arg_temp(op->args[1])->state == TS_DEAD) {
3993                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3994                     goto do_remove;
3995                 }
3996                 /* Replace the opcode and adjust the args in place,
3997                    leaving 3 unused args at the end.  */
3998                 op->opc = opc = opc_new;
3999                 op->args[1] = op->args[2];
4000                 op->args[2] = op->args[4];
4001                 /* Fall through and mark the single-word operation live.  */
4002                 nb_iargs = 2;
4003                 nb_oargs = 1;
4004             }
4005             goto do_not_remove;
4006 
4007         case INDEX_op_muls2_i32:
4008         case INDEX_op_muls2_i64:
4009             opc_new = INDEX_op_mul;
4010             opc_new2 = INDEX_op_mulsh;
4011             goto do_mul2;
4012         case INDEX_op_mulu2_i32:
4013         case INDEX_op_mulu2_i64:
4014             opc_new = INDEX_op_mul;
4015             opc_new2 = INDEX_op_muluh;
4016         do_mul2:
4017             nb_iargs = 2;
4018             nb_oargs = 2;
4019             if (arg_temp(op->args[1])->state == TS_DEAD) {
4020                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4021                     /* Both parts of the operation are dead.  */
4022                     goto do_remove;
4023                 }
4024                 /* The high part of the operation is dead; generate the low. */
4025                 op->opc = opc = opc_new;
4026                 op->args[1] = op->args[2];
4027                 op->args[2] = op->args[3];
4028             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4029                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4030                 /* The low part of the operation is dead; generate the high. */
4031                 op->opc = opc = opc_new2;
4032                 op->args[0] = op->args[1];
4033                 op->args[1] = op->args[2];
4034                 op->args[2] = op->args[3];
4035             } else {
4036                 goto do_not_remove;
4037             }
4038             /* Mark the single-word operation live.  */
4039             nb_oargs = 1;
4040             goto do_not_remove;
4041 
4042         default:
4043             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4044             nb_iargs = def->nb_iargs;
4045             nb_oargs = def->nb_oargs;
4046 
4047             /* Test if the operation can be removed because all
4048                its outputs are dead. We assume that nb_oargs == 0
4049                implies side effects */
4050             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4051                 for (i = 0; i < nb_oargs; i++) {
4052                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4053                         goto do_not_remove;
4054                     }
4055                 }
4056                 goto do_remove;
4057             }
4058             goto do_not_remove;
4059 
4060         do_remove:
4061             tcg_op_remove(s, op);
4062             break;
4063 
4064         do_not_remove:
4065             for (i = 0; i < nb_oargs; i++) {
4066                 ts = arg_temp(op->args[i]);
4067 
4068                 /* Remember the preference of the uses that followed.  */
4069                 if (i < ARRAY_SIZE(op->output_pref)) {
4070                     op->output_pref[i] = *la_temp_pref(ts);
4071                 }
4072 
4073                 /* Output args are dead.  */
4074                 if (ts->state & TS_DEAD) {
4075                     arg_life |= DEAD_ARG << i;
4076                 }
4077                 if (ts->state & TS_MEM) {
4078                     arg_life |= SYNC_ARG << i;
4079                 }
4080                 ts->state = TS_DEAD;
4081                 la_reset_pref(ts);
4082             }
4083 
4084             /* If end of basic block, update.  */
4085             if (def->flags & TCG_OPF_BB_EXIT) {
4086                 la_func_end(s, nb_globals, nb_temps);
4087             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4088                 la_bb_sync(s, nb_globals, nb_temps);
4089             } else if (def->flags & TCG_OPF_BB_END) {
4090                 la_bb_end(s, nb_globals, nb_temps);
4091             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4092                 la_global_sync(s, nb_globals);
4093                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4094                     la_cross_call(s, nb_temps);
4095                 }
4096             }
4097 
4098             /* Record arguments that die in this opcode.  */
4099             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4100                 ts = arg_temp(op->args[i]);
4101                 if (ts->state & TS_DEAD) {
4102                     arg_life |= DEAD_ARG << i;
4103                 }
4104             }
4105 
4106             /* Input arguments are live for preceding opcodes.  */
4107             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4108                 ts = arg_temp(op->args[i]);
4109                 if (ts->state & TS_DEAD) {
4110                     /* For operands that were dead, initially allow
4111                        all regs for the type.  */
4112                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4113                     ts->state &= ~TS_DEAD;
4114                 }
4115             }
4116 
4117             /* Incorporate constraints for this operand.  */
4118             switch (opc) {
4119             case INDEX_op_mov:
4120                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4121                    have proper constraints.  That said, special case
4122                    moves to propagate preferences backward.  */
4123                 if (IS_DEAD_ARG(1)) {
4124                     *la_temp_pref(arg_temp(op->args[0]))
4125                         = *la_temp_pref(arg_temp(op->args[1]));
4126                 }
4127                 break;
4128 
4129             default:
4130                 args_ct = opcode_args_ct(op);
4131                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4132                     const TCGArgConstraint *ct = &args_ct[i];
4133                     TCGRegSet set, *pset;
4134 
4135                     ts = arg_temp(op->args[i]);
4136                     pset = la_temp_pref(ts);
4137                     set = *pset;
4138 
4139                     set &= ct->regs;
4140                     if (ct->ialias) {
4141                         set &= output_pref(op, ct->alias_index);
4142                     }
4143                     /* If the combination is not possible, restart.  */
4144                     if (set == 0) {
4145                         set = ct->regs;
4146                     }
4147                     *pset = set;
4148                 }
4149                 break;
4150             }
4151             break;
4152         }
4153         op->life = arg_life;
4154     }
4155 }
4156 
4157 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4158 static bool __attribute__((noinline))
4159 liveness_pass_2(TCGContext *s)
4160 {
4161     int nb_globals = s->nb_globals;
4162     int nb_temps, i;
4163     bool changes = false;
4164     TCGOp *op, *op_next;
4165 
4166     /* Create a temporary for each indirect global.  */
4167     for (i = 0; i < nb_globals; ++i) {
4168         TCGTemp *its = &s->temps[i];
4169         if (its->indirect_reg) {
4170             TCGTemp *dts = tcg_temp_alloc(s);
4171             dts->type = its->type;
4172             dts->base_type = its->base_type;
4173             dts->temp_subindex = its->temp_subindex;
4174             dts->kind = TEMP_EBB;
4175             its->state_ptr = dts;
4176         } else {
4177             its->state_ptr = NULL;
4178         }
4179         /* All globals begin dead.  */
4180         its->state = TS_DEAD;
4181     }
4182     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4183         TCGTemp *its = &s->temps[i];
4184         its->state_ptr = NULL;
4185         its->state = TS_DEAD;
4186     }
4187 
4188     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4189         TCGOpcode opc = op->opc;
4190         const TCGOpDef *def = &tcg_op_defs[opc];
4191         TCGLifeData arg_life = op->life;
4192         int nb_iargs, nb_oargs, call_flags;
4193         TCGTemp *arg_ts, *dir_ts;
4194 
4195         if (opc == INDEX_op_call) {
4196             nb_oargs = TCGOP_CALLO(op);
4197             nb_iargs = TCGOP_CALLI(op);
4198             call_flags = tcg_call_flags(op);
4199         } else {
4200             nb_iargs = def->nb_iargs;
4201             nb_oargs = def->nb_oargs;
4202 
4203             /* Set flags similar to how calls require.  */
4204             if (def->flags & TCG_OPF_COND_BRANCH) {
4205                 /* Like reading globals: sync_globals */
4206                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4207             } else if (def->flags & TCG_OPF_BB_END) {
4208                 /* Like writing globals: save_globals */
4209                 call_flags = 0;
4210             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4211                 /* Like reading globals: sync_globals */
4212                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4213             } else {
4214                 /* No effect on globals.  */
4215                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4216                               TCG_CALL_NO_WRITE_GLOBALS);
4217             }
4218         }
4219 
4220         /* Make sure that input arguments are available.  */
4221         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4222             arg_ts = arg_temp(op->args[i]);
4223             dir_ts = arg_ts->state_ptr;
4224             if (dir_ts && arg_ts->state == TS_DEAD) {
4225                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4226                                   ? INDEX_op_ld_i32
4227                                   : INDEX_op_ld_i64);
4228                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4229                                                   arg_ts->type, 3);
4230 
4231                 lop->args[0] = temp_arg(dir_ts);
4232                 lop->args[1] = temp_arg(arg_ts->mem_base);
4233                 lop->args[2] = arg_ts->mem_offset;
4234 
4235                 /* Loaded, but synced with memory.  */
4236                 arg_ts->state = TS_MEM;
4237             }
4238         }
4239 
4240         /* Perform input replacement, and mark inputs that became dead.
4241            No action is required except keeping temp_state up to date
4242            so that we reload when needed.  */
4243         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4244             arg_ts = arg_temp(op->args[i]);
4245             dir_ts = arg_ts->state_ptr;
4246             if (dir_ts) {
4247                 op->args[i] = temp_arg(dir_ts);
4248                 changes = true;
4249                 if (IS_DEAD_ARG(i)) {
4250                     arg_ts->state = TS_DEAD;
4251                 }
4252             }
4253         }
4254 
4255         /* Liveness analysis should ensure that the following are
4256            all correct, for call sites and basic block end points.  */
4257         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4258             /* Nothing to do */
4259         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4260             for (i = 0; i < nb_globals; ++i) {
4261                 /* Liveness should see that globals are synced back,
4262                    that is, either TS_DEAD or TS_MEM.  */
4263                 arg_ts = &s->temps[i];
4264                 tcg_debug_assert(arg_ts->state_ptr == 0
4265                                  || arg_ts->state != 0);
4266             }
4267         } else {
4268             for (i = 0; i < nb_globals; ++i) {
4269                 /* Liveness should see that globals are saved back,
4270                    that is, TS_DEAD, waiting to be reloaded.  */
4271                 arg_ts = &s->temps[i];
4272                 tcg_debug_assert(arg_ts->state_ptr == 0
4273                                  || arg_ts->state == TS_DEAD);
4274             }
4275         }
4276 
4277         /* Outputs become available.  */
4278         if (opc == INDEX_op_mov) {
4279             arg_ts = arg_temp(op->args[0]);
4280             dir_ts = arg_ts->state_ptr;
4281             if (dir_ts) {
4282                 op->args[0] = temp_arg(dir_ts);
4283                 changes = true;
4284 
4285                 /* The output is now live and modified.  */
4286                 arg_ts->state = 0;
4287 
4288                 if (NEED_SYNC_ARG(0)) {
4289                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4290                                       ? INDEX_op_st_i32
4291                                       : INDEX_op_st_i64);
4292                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4293                                                      arg_ts->type, 3);
4294                     TCGTemp *out_ts = dir_ts;
4295 
4296                     if (IS_DEAD_ARG(0)) {
4297                         out_ts = arg_temp(op->args[1]);
4298                         arg_ts->state = TS_DEAD;
4299                         tcg_op_remove(s, op);
4300                     } else {
4301                         arg_ts->state = TS_MEM;
4302                     }
4303 
4304                     sop->args[0] = temp_arg(out_ts);
4305                     sop->args[1] = temp_arg(arg_ts->mem_base);
4306                     sop->args[2] = arg_ts->mem_offset;
4307                 } else {
4308                     tcg_debug_assert(!IS_DEAD_ARG(0));
4309                 }
4310             }
4311         } else {
4312             for (i = 0; i < nb_oargs; i++) {
4313                 arg_ts = arg_temp(op->args[i]);
4314                 dir_ts = arg_ts->state_ptr;
4315                 if (!dir_ts) {
4316                     continue;
4317                 }
4318                 op->args[i] = temp_arg(dir_ts);
4319                 changes = true;
4320 
4321                 /* The output is now live and modified.  */
4322                 arg_ts->state = 0;
4323 
4324                 /* Sync outputs upon their last write.  */
4325                 if (NEED_SYNC_ARG(i)) {
4326                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4327                                       ? INDEX_op_st_i32
4328                                       : INDEX_op_st_i64);
4329                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4330                                                      arg_ts->type, 3);
4331 
4332                     sop->args[0] = temp_arg(dir_ts);
4333                     sop->args[1] = temp_arg(arg_ts->mem_base);
4334                     sop->args[2] = arg_ts->mem_offset;
4335 
4336                     arg_ts->state = TS_MEM;
4337                 }
4338                 /* Drop outputs that are dead.  */
4339                 if (IS_DEAD_ARG(i)) {
4340                     arg_ts->state = TS_DEAD;
4341                 }
4342             }
4343         }
4344     }
4345 
4346     return changes;
4347 }
4348 
4349 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4350 {
4351     intptr_t off;
4352     int size, align;
4353 
4354     /* When allocating an object, look at the full type. */
4355     size = tcg_type_size(ts->base_type);
4356     switch (ts->base_type) {
4357     case TCG_TYPE_I32:
4358         align = 4;
4359         break;
4360     case TCG_TYPE_I64:
4361     case TCG_TYPE_V64:
4362         align = 8;
4363         break;
4364     case TCG_TYPE_I128:
4365     case TCG_TYPE_V128:
4366     case TCG_TYPE_V256:
4367         /*
4368          * Note that we do not require aligned storage for V256,
4369          * and that we provide alignment for I128 to match V128,
4370          * even if that's above what the host ABI requires.
4371          */
4372         align = 16;
4373         break;
4374     default:
4375         g_assert_not_reached();
4376     }
4377 
4378     /*
4379      * Assume the stack is sufficiently aligned.
4380      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4381      * and do not require 16 byte vector alignment.  This seems slightly
4382      * easier than fully parameterizing the above switch statement.
4383      */
4384     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4385     off = ROUND_UP(s->current_frame_offset, align);
4386 
4387     /* If we've exhausted the stack frame, restart with a smaller TB. */
4388     if (off + size > s->frame_end) {
4389         tcg_raise_tb_overflow(s);
4390     }
4391     s->current_frame_offset = off + size;
4392 #if defined(__sparc__)
4393     off += TCG_TARGET_STACK_BIAS;
4394 #endif
4395 
4396     /* If the object was subdivided, assign memory to all the parts. */
4397     if (ts->base_type != ts->type) {
4398         int part_size = tcg_type_size(ts->type);
4399         int part_count = size / part_size;
4400 
4401         /*
4402          * Each part is allocated sequentially in tcg_temp_new_internal.
4403          * Jump back to the first part by subtracting the current index.
4404          */
4405         ts -= ts->temp_subindex;
4406         for (int i = 0; i < part_count; ++i) {
4407             ts[i].mem_offset = off + i * part_size;
4408             ts[i].mem_base = s->frame_temp;
4409             ts[i].mem_allocated = 1;
4410         }
4411     } else {
4412         ts->mem_offset = off;
4413         ts->mem_base = s->frame_temp;
4414         ts->mem_allocated = 1;
4415     }
4416 }
4417 
4418 /* Assign @reg to @ts, and update reg_to_temp[]. */
4419 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4420 {
4421     if (ts->val_type == TEMP_VAL_REG) {
4422         TCGReg old = ts->reg;
4423         tcg_debug_assert(s->reg_to_temp[old] == ts);
4424         if (old == reg) {
4425             return;
4426         }
4427         s->reg_to_temp[old] = NULL;
4428     }
4429     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4430     s->reg_to_temp[reg] = ts;
4431     ts->val_type = TEMP_VAL_REG;
4432     ts->reg = reg;
4433 }
4434 
4435 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4436 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4437 {
4438     tcg_debug_assert(type != TEMP_VAL_REG);
4439     if (ts->val_type == TEMP_VAL_REG) {
4440         TCGReg reg = ts->reg;
4441         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4442         s->reg_to_temp[reg] = NULL;
4443     }
4444     ts->val_type = type;
4445 }
4446 
4447 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4448 
4449 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4450    mark it free; otherwise mark it dead.  */
4451 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4452 {
4453     TCGTempVal new_type;
4454 
4455     switch (ts->kind) {
4456     case TEMP_FIXED:
4457         return;
4458     case TEMP_GLOBAL:
4459     case TEMP_TB:
4460         new_type = TEMP_VAL_MEM;
4461         break;
4462     case TEMP_EBB:
4463         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4464         break;
4465     case TEMP_CONST:
4466         new_type = TEMP_VAL_CONST;
4467         break;
4468     default:
4469         g_assert_not_reached();
4470     }
4471     set_temp_val_nonreg(s, ts, new_type);
4472 }
4473 
4474 /* Mark a temporary as dead.  */
4475 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4476 {
4477     temp_free_or_dead(s, ts, 1);
4478 }
4479 
4480 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4481    registers needs to be allocated to store a constant.  If 'free_or_dead'
4482    is non-zero, subsequently release the temporary; if it is positive, the
4483    temp is dead; if it is negative, the temp is free.  */
4484 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4485                       TCGRegSet preferred_regs, int free_or_dead)
4486 {
4487     if (!temp_readonly(ts) && !ts->mem_coherent) {
4488         if (!ts->mem_allocated) {
4489             temp_allocate_frame(s, ts);
4490         }
4491         switch (ts->val_type) {
4492         case TEMP_VAL_CONST:
4493             /* If we're going to free the temp immediately, then we won't
4494                require it later in a register, so attempt to store the
4495                constant to memory directly.  */
4496             if (free_or_dead
4497                 && tcg_out_sti(s, ts->type, ts->val,
4498                                ts->mem_base->reg, ts->mem_offset)) {
4499                 break;
4500             }
4501             temp_load(s, ts, tcg_target_available_regs[ts->type],
4502                       allocated_regs, preferred_regs);
4503             /* fallthrough */
4504 
4505         case TEMP_VAL_REG:
4506             tcg_out_st(s, ts->type, ts->reg,
4507                        ts->mem_base->reg, ts->mem_offset);
4508             break;
4509 
4510         case TEMP_VAL_MEM:
4511             break;
4512 
4513         case TEMP_VAL_DEAD:
4514         default:
4515             g_assert_not_reached();
4516         }
4517         ts->mem_coherent = 1;
4518     }
4519     if (free_or_dead) {
4520         temp_free_or_dead(s, ts, free_or_dead);
4521     }
4522 }
4523 
4524 /* free register 'reg' by spilling the corresponding temporary if necessary */
4525 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4526 {
4527     TCGTemp *ts = s->reg_to_temp[reg];
4528     if (ts != NULL) {
4529         temp_sync(s, ts, allocated_regs, 0, -1);
4530     }
4531 }
4532 
4533 /**
4534  * tcg_reg_alloc:
4535  * @required_regs: Set of registers in which we must allocate.
4536  * @allocated_regs: Set of registers which must be avoided.
4537  * @preferred_regs: Set of registers we should prefer.
4538  * @rev: True if we search the registers in "indirect" order.
4539  *
4540  * The allocated register must be in @required_regs & ~@allocated_regs,
4541  * but if we can put it in @preferred_regs we may save a move later.
4542  */
4543 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4544                             TCGRegSet allocated_regs,
4545                             TCGRegSet preferred_regs, bool rev)
4546 {
4547     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4548     TCGRegSet reg_ct[2];
4549     const int *order;
4550 
4551     reg_ct[1] = required_regs & ~allocated_regs;
4552     tcg_debug_assert(reg_ct[1] != 0);
4553     reg_ct[0] = reg_ct[1] & preferred_regs;
4554 
4555     /* Skip the preferred_regs option if it cannot be satisfied,
4556        or if the preference made no difference.  */
4557     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4558 
4559     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4560 
4561     /* Try free registers, preferences first.  */
4562     for (j = f; j < 2; j++) {
4563         TCGRegSet set = reg_ct[j];
4564 
4565         if (tcg_regset_single(set)) {
4566             /* One register in the set.  */
4567             TCGReg reg = tcg_regset_first(set);
4568             if (s->reg_to_temp[reg] == NULL) {
4569                 return reg;
4570             }
4571         } else {
4572             for (i = 0; i < n; i++) {
4573                 TCGReg reg = order[i];
4574                 if (s->reg_to_temp[reg] == NULL &&
4575                     tcg_regset_test_reg(set, reg)) {
4576                     return reg;
4577                 }
4578             }
4579         }
4580     }
4581 
4582     /* We must spill something.  */
4583     for (j = f; j < 2; j++) {
4584         TCGRegSet set = reg_ct[j];
4585 
4586         if (tcg_regset_single(set)) {
4587             /* One register in the set.  */
4588             TCGReg reg = tcg_regset_first(set);
4589             tcg_reg_free(s, reg, allocated_regs);
4590             return reg;
4591         } else {
4592             for (i = 0; i < n; i++) {
4593                 TCGReg reg = order[i];
4594                 if (tcg_regset_test_reg(set, reg)) {
4595                     tcg_reg_free(s, reg, allocated_regs);
4596                     return reg;
4597                 }
4598             }
4599         }
4600     }
4601 
4602     g_assert_not_reached();
4603 }
4604 
4605 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4606                                  TCGRegSet allocated_regs,
4607                                  TCGRegSet preferred_regs, bool rev)
4608 {
4609     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4610     TCGRegSet reg_ct[2];
4611     const int *order;
4612 
4613     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4614     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4615     tcg_debug_assert(reg_ct[1] != 0);
4616     reg_ct[0] = reg_ct[1] & preferred_regs;
4617 
4618     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4619 
4620     /*
4621      * Skip the preferred_regs option if it cannot be satisfied,
4622      * or if the preference made no difference.
4623      */
4624     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4625 
4626     /*
4627      * Minimize the number of flushes by looking for 2 free registers first,
4628      * then a single flush, then two flushes.
4629      */
4630     for (fmin = 2; fmin >= 0; fmin--) {
4631         for (j = k; j < 2; j++) {
4632             TCGRegSet set = reg_ct[j];
4633 
4634             for (i = 0; i < n; i++) {
4635                 TCGReg reg = order[i];
4636 
4637                 if (tcg_regset_test_reg(set, reg)) {
4638                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4639                     if (f >= fmin) {
4640                         tcg_reg_free(s, reg, allocated_regs);
4641                         tcg_reg_free(s, reg + 1, allocated_regs);
4642                         return reg;
4643                     }
4644                 }
4645             }
4646         }
4647     }
4648     g_assert_not_reached();
4649 }
4650 
4651 /* Make sure the temporary is in a register.  If needed, allocate the register
4652    from DESIRED while avoiding ALLOCATED.  */
4653 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4654                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4655 {
4656     TCGReg reg;
4657 
4658     switch (ts->val_type) {
4659     case TEMP_VAL_REG:
4660         return;
4661     case TEMP_VAL_CONST:
4662         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4663                             preferred_regs, ts->indirect_base);
4664         if (ts->type <= TCG_TYPE_I64) {
4665             tcg_out_movi(s, ts->type, reg, ts->val);
4666         } else {
4667             uint64_t val = ts->val;
4668             MemOp vece = MO_64;
4669 
4670             /*
4671              * Find the minimal vector element that matches the constant.
4672              * The targets will, in general, have to do this search anyway,
4673              * do this generically.
4674              */
4675             if (val == dup_const(MO_8, val)) {
4676                 vece = MO_8;
4677             } else if (val == dup_const(MO_16, val)) {
4678                 vece = MO_16;
4679             } else if (val == dup_const(MO_32, val)) {
4680                 vece = MO_32;
4681             }
4682 
4683             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4684         }
4685         ts->mem_coherent = 0;
4686         break;
4687     case TEMP_VAL_MEM:
4688         if (!ts->mem_allocated) {
4689             temp_allocate_frame(s, ts);
4690         }
4691         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4692                             preferred_regs, ts->indirect_base);
4693         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4694         ts->mem_coherent = 1;
4695         break;
4696     case TEMP_VAL_DEAD:
4697     default:
4698         g_assert_not_reached();
4699     }
4700     set_temp_val_reg(s, ts, reg);
4701 }
4702 
4703 /* Save a temporary to memory. 'allocated_regs' is used in case a
4704    temporary registers needs to be allocated to store a constant.  */
4705 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4706 {
4707     /* The liveness analysis already ensures that globals are back
4708        in memory. Keep an tcg_debug_assert for safety. */
4709     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4710 }
4711 
4712 /* save globals to their canonical location and assume they can be
4713    modified be the following code. 'allocated_regs' is used in case a
4714    temporary registers needs to be allocated to store a constant. */
4715 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4716 {
4717     int i, n;
4718 
4719     for (i = 0, n = s->nb_globals; i < n; i++) {
4720         temp_save(s, &s->temps[i], allocated_regs);
4721     }
4722 }
4723 
4724 /* sync globals to their canonical location and assume they can be
4725    read by the following code. 'allocated_regs' is used in case a
4726    temporary registers needs to be allocated to store a constant. */
4727 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4728 {
4729     int i, n;
4730 
4731     for (i = 0, n = s->nb_globals; i < n; i++) {
4732         TCGTemp *ts = &s->temps[i];
4733         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4734                          || ts->kind == TEMP_FIXED
4735                          || ts->mem_coherent);
4736     }
4737 }
4738 
4739 /* at the end of a basic block, we assume all temporaries are dead and
4740    all globals are stored at their canonical location. */
4741 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4742 {
4743     int i;
4744 
4745     for (i = s->nb_globals; i < s->nb_temps; i++) {
4746         TCGTemp *ts = &s->temps[i];
4747 
4748         switch (ts->kind) {
4749         case TEMP_TB:
4750             temp_save(s, ts, allocated_regs);
4751             break;
4752         case TEMP_EBB:
4753             /* The liveness analysis already ensures that temps are dead.
4754                Keep an tcg_debug_assert for safety. */
4755             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4756             break;
4757         case TEMP_CONST:
4758             /* Similarly, we should have freed any allocated register. */
4759             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4760             break;
4761         default:
4762             g_assert_not_reached();
4763         }
4764     }
4765 
4766     save_globals(s, allocated_regs);
4767 }
4768 
4769 /*
4770  * At a conditional branch, we assume all temporaries are dead unless
4771  * explicitly live-across-conditional-branch; all globals and local
4772  * temps are synced to their location.
4773  */
4774 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4775 {
4776     sync_globals(s, allocated_regs);
4777 
4778     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4779         TCGTemp *ts = &s->temps[i];
4780         /*
4781          * The liveness analysis already ensures that temps are dead.
4782          * Keep tcg_debug_asserts for safety.
4783          */
4784         switch (ts->kind) {
4785         case TEMP_TB:
4786             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4787             break;
4788         case TEMP_EBB:
4789         case TEMP_CONST:
4790             break;
4791         default:
4792             g_assert_not_reached();
4793         }
4794     }
4795 }
4796 
4797 /*
4798  * Specialized code generation for INDEX_op_mov_* with a constant.
4799  */
4800 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4801                                   tcg_target_ulong val, TCGLifeData arg_life,
4802                                   TCGRegSet preferred_regs)
4803 {
4804     /* ENV should not be modified.  */
4805     tcg_debug_assert(!temp_readonly(ots));
4806 
4807     /* The movi is not explicitly generated here.  */
4808     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4809     ots->val = val;
4810     ots->mem_coherent = 0;
4811     if (NEED_SYNC_ARG(0)) {
4812         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4813     } else if (IS_DEAD_ARG(0)) {
4814         temp_dead(s, ots);
4815     }
4816 }
4817 
4818 /*
4819  * Specialized code generation for INDEX_op_mov_*.
4820  */
4821 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4822 {
4823     const TCGLifeData arg_life = op->life;
4824     TCGRegSet allocated_regs, preferred_regs;
4825     TCGTemp *ts, *ots;
4826     TCGType otype, itype;
4827     TCGReg oreg, ireg;
4828 
4829     allocated_regs = s->reserved_regs;
4830     preferred_regs = output_pref(op, 0);
4831     ots = arg_temp(op->args[0]);
4832     ts = arg_temp(op->args[1]);
4833 
4834     /* ENV should not be modified.  */
4835     tcg_debug_assert(!temp_readonly(ots));
4836 
4837     /* Note that otype != itype for no-op truncation.  */
4838     otype = ots->type;
4839     itype = ts->type;
4840 
4841     if (ts->val_type == TEMP_VAL_CONST) {
4842         /* propagate constant or generate sti */
4843         tcg_target_ulong val = ts->val;
4844         if (IS_DEAD_ARG(1)) {
4845             temp_dead(s, ts);
4846         }
4847         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4848         return;
4849     }
4850 
4851     /* If the source value is in memory we're going to be forced
4852        to have it in a register in order to perform the copy.  Copy
4853        the SOURCE value into its own register first, that way we
4854        don't have to reload SOURCE the next time it is used. */
4855     if (ts->val_type == TEMP_VAL_MEM) {
4856         temp_load(s, ts, tcg_target_available_regs[itype],
4857                   allocated_regs, preferred_regs);
4858     }
4859     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4860     ireg = ts->reg;
4861 
4862     if (IS_DEAD_ARG(0)) {
4863         /* mov to a non-saved dead register makes no sense (even with
4864            liveness analysis disabled). */
4865         tcg_debug_assert(NEED_SYNC_ARG(0));
4866         if (!ots->mem_allocated) {
4867             temp_allocate_frame(s, ots);
4868         }
4869         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4870         if (IS_DEAD_ARG(1)) {
4871             temp_dead(s, ts);
4872         }
4873         temp_dead(s, ots);
4874         return;
4875     }
4876 
4877     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4878         /*
4879          * The mov can be suppressed.  Kill input first, so that it
4880          * is unlinked from reg_to_temp, then set the output to the
4881          * reg that we saved from the input.
4882          */
4883         temp_dead(s, ts);
4884         oreg = ireg;
4885     } else {
4886         if (ots->val_type == TEMP_VAL_REG) {
4887             oreg = ots->reg;
4888         } else {
4889             /* Make sure to not spill the input register during allocation. */
4890             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4891                                  allocated_regs | ((TCGRegSet)1 << ireg),
4892                                  preferred_regs, ots->indirect_base);
4893         }
4894         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4895             /*
4896              * Cross register class move not supported.
4897              * Store the source register into the destination slot
4898              * and leave the destination temp as TEMP_VAL_MEM.
4899              */
4900             assert(!temp_readonly(ots));
4901             if (!ts->mem_allocated) {
4902                 temp_allocate_frame(s, ots);
4903             }
4904             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4905             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4906             ots->mem_coherent = 1;
4907             return;
4908         }
4909     }
4910     set_temp_val_reg(s, ots, oreg);
4911     ots->mem_coherent = 0;
4912 
4913     if (NEED_SYNC_ARG(0)) {
4914         temp_sync(s, ots, allocated_regs, 0, 0);
4915     }
4916 }
4917 
4918 /*
4919  * Specialized code generation for INDEX_op_dup_vec.
4920  */
4921 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4922 {
4923     const TCGLifeData arg_life = op->life;
4924     TCGRegSet dup_out_regs, dup_in_regs;
4925     const TCGArgConstraint *dup_args_ct;
4926     TCGTemp *its, *ots;
4927     TCGType itype, vtype;
4928     unsigned vece;
4929     int lowpart_ofs;
4930     bool ok;
4931 
4932     ots = arg_temp(op->args[0]);
4933     its = arg_temp(op->args[1]);
4934 
4935     /* ENV should not be modified.  */
4936     tcg_debug_assert(!temp_readonly(ots));
4937 
4938     itype = its->type;
4939     vece = TCGOP_VECE(op);
4940     vtype = TCGOP_TYPE(op);
4941 
4942     if (its->val_type == TEMP_VAL_CONST) {
4943         /* Propagate constant via movi -> dupi.  */
4944         tcg_target_ulong val = its->val;
4945         if (IS_DEAD_ARG(1)) {
4946             temp_dead(s, its);
4947         }
4948         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4949         return;
4950     }
4951 
4952     dup_args_ct = opcode_args_ct(op);
4953     dup_out_regs = dup_args_ct[0].regs;
4954     dup_in_regs = dup_args_ct[1].regs;
4955 
4956     /* Allocate the output register now.  */
4957     if (ots->val_type != TEMP_VAL_REG) {
4958         TCGRegSet allocated_regs = s->reserved_regs;
4959         TCGReg oreg;
4960 
4961         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4962             /* Make sure to not spill the input register. */
4963             tcg_regset_set_reg(allocated_regs, its->reg);
4964         }
4965         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4966                              output_pref(op, 0), ots->indirect_base);
4967         set_temp_val_reg(s, ots, oreg);
4968     }
4969 
4970     switch (its->val_type) {
4971     case TEMP_VAL_REG:
4972         /*
4973          * The dup constriaints must be broad, covering all possible VECE.
4974          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4975          * to fail, indicating that extra moves are required for that case.
4976          */
4977         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4978             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4979                 goto done;
4980             }
4981             /* Try again from memory or a vector input register.  */
4982         }
4983         if (!its->mem_coherent) {
4984             /*
4985              * The input register is not synced, and so an extra store
4986              * would be required to use memory.  Attempt an integer-vector
4987              * register move first.  We do not have a TCGRegSet for this.
4988              */
4989             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4990                 break;
4991             }
4992             /* Sync the temp back to its slot and load from there.  */
4993             temp_sync(s, its, s->reserved_regs, 0, 0);
4994         }
4995         /* fall through */
4996 
4997     case TEMP_VAL_MEM:
4998         lowpart_ofs = 0;
4999         if (HOST_BIG_ENDIAN) {
5000             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5001         }
5002         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5003                              its->mem_offset + lowpart_ofs)) {
5004             goto done;
5005         }
5006         /* Load the input into the destination vector register. */
5007         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5008         break;
5009 
5010     default:
5011         g_assert_not_reached();
5012     }
5013 
5014     /* We now have a vector input register, so dup must succeed. */
5015     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5016     tcg_debug_assert(ok);
5017 
5018  done:
5019     ots->mem_coherent = 0;
5020     if (IS_DEAD_ARG(1)) {
5021         temp_dead(s, its);
5022     }
5023     if (NEED_SYNC_ARG(0)) {
5024         temp_sync(s, ots, s->reserved_regs, 0, 0);
5025     }
5026     if (IS_DEAD_ARG(0)) {
5027         temp_dead(s, ots);
5028     }
5029 }
5030 
5031 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5032 {
5033     const TCGLifeData arg_life = op->life;
5034     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5035     TCGRegSet i_allocated_regs;
5036     TCGRegSet o_allocated_regs;
5037     int i, k, nb_iargs, nb_oargs;
5038     TCGReg reg;
5039     TCGArg arg;
5040     const TCGArgConstraint *args_ct;
5041     const TCGArgConstraint *arg_ct;
5042     TCGTemp *ts;
5043     TCGArg new_args[TCG_MAX_OP_ARGS];
5044     int const_args[TCG_MAX_OP_ARGS];
5045     TCGCond op_cond;
5046 
5047     nb_oargs = def->nb_oargs;
5048     nb_iargs = def->nb_iargs;
5049 
5050     /* copy constants */
5051     memcpy(new_args + nb_oargs + nb_iargs,
5052            op->args + nb_oargs + nb_iargs,
5053            sizeof(TCGArg) * def->nb_cargs);
5054 
5055     i_allocated_regs = s->reserved_regs;
5056     o_allocated_regs = s->reserved_regs;
5057 
5058     switch (op->opc) {
5059     case INDEX_op_brcond_i32:
5060     case INDEX_op_brcond_i64:
5061         op_cond = op->args[2];
5062         break;
5063     case INDEX_op_setcond_i32:
5064     case INDEX_op_setcond_i64:
5065     case INDEX_op_negsetcond_i32:
5066     case INDEX_op_negsetcond_i64:
5067     case INDEX_op_cmp_vec:
5068         op_cond = op->args[3];
5069         break;
5070     case INDEX_op_brcond2_i32:
5071         op_cond = op->args[4];
5072         break;
5073     case INDEX_op_movcond_i32:
5074     case INDEX_op_movcond_i64:
5075     case INDEX_op_setcond2_i32:
5076     case INDEX_op_cmpsel_vec:
5077         op_cond = op->args[5];
5078         break;
5079     default:
5080         /* No condition within opcode. */
5081         op_cond = TCG_COND_ALWAYS;
5082         break;
5083     }
5084 
5085     args_ct = opcode_args_ct(op);
5086 
5087     /* satisfy input constraints */
5088     for (k = 0; k < nb_iargs; k++) {
5089         TCGRegSet i_preferred_regs, i_required_regs;
5090         bool allocate_new_reg, copyto_new_reg;
5091         TCGTemp *ts2;
5092         int i1, i2;
5093 
5094         i = args_ct[nb_oargs + k].sort_index;
5095         arg = op->args[i];
5096         arg_ct = &args_ct[i];
5097         ts = arg_temp(arg);
5098 
5099         if (ts->val_type == TEMP_VAL_CONST) {
5100 #ifdef TCG_REG_ZERO
5101             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5102                 /* Hardware zero register: indicate register via non-const. */
5103                 const_args[i] = 0;
5104                 new_args[i] = TCG_REG_ZERO;
5105                 continue;
5106             }
5107 #endif
5108 
5109             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5110                                        op_cond, TCGOP_VECE(op))) {
5111                 /* constant is OK for instruction */
5112                 const_args[i] = 1;
5113                 new_args[i] = ts->val;
5114                 continue;
5115             }
5116         }
5117 
5118         reg = ts->reg;
5119         i_preferred_regs = 0;
5120         i_required_regs = arg_ct->regs;
5121         allocate_new_reg = false;
5122         copyto_new_reg = false;
5123 
5124         switch (arg_ct->pair) {
5125         case 0: /* not paired */
5126             if (arg_ct->ialias) {
5127                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5128 
5129                 /*
5130                  * If the input is readonly, then it cannot also be an
5131                  * output and aliased to itself.  If the input is not
5132                  * dead after the instruction, we must allocate a new
5133                  * register and move it.
5134                  */
5135                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5136                     || args_ct[arg_ct->alias_index].newreg) {
5137                     allocate_new_reg = true;
5138                 } else if (ts->val_type == TEMP_VAL_REG) {
5139                     /*
5140                      * Check if the current register has already been
5141                      * allocated for another input.
5142                      */
5143                     allocate_new_reg =
5144                         tcg_regset_test_reg(i_allocated_regs, reg);
5145                 }
5146             }
5147             if (!allocate_new_reg) {
5148                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5149                           i_preferred_regs);
5150                 reg = ts->reg;
5151                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5152             }
5153             if (allocate_new_reg) {
5154                 /*
5155                  * Allocate a new register matching the constraint
5156                  * and move the temporary register into it.
5157                  */
5158                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5159                           i_allocated_regs, 0);
5160                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5161                                     i_preferred_regs, ts->indirect_base);
5162                 copyto_new_reg = true;
5163             }
5164             break;
5165 
5166         case 1:
5167             /* First of an input pair; if i1 == i2, the second is an output. */
5168             i1 = i;
5169             i2 = arg_ct->pair_index;
5170             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5171 
5172             /*
5173              * It is easier to default to allocating a new pair
5174              * and to identify a few cases where it's not required.
5175              */
5176             if (arg_ct->ialias) {
5177                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5178                 if (IS_DEAD_ARG(i1) &&
5179                     IS_DEAD_ARG(i2) &&
5180                     !temp_readonly(ts) &&
5181                     ts->val_type == TEMP_VAL_REG &&
5182                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5183                     tcg_regset_test_reg(i_required_regs, reg) &&
5184                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5185                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5186                     (ts2
5187                      ? ts2->val_type == TEMP_VAL_REG &&
5188                        ts2->reg == reg + 1 &&
5189                        !temp_readonly(ts2)
5190                      : s->reg_to_temp[reg + 1] == NULL)) {
5191                     break;
5192                 }
5193             } else {
5194                 /* Without aliasing, the pair must also be an input. */
5195                 tcg_debug_assert(ts2);
5196                 if (ts->val_type == TEMP_VAL_REG &&
5197                     ts2->val_type == TEMP_VAL_REG &&
5198                     ts2->reg == reg + 1 &&
5199                     tcg_regset_test_reg(i_required_regs, reg)) {
5200                     break;
5201                 }
5202             }
5203             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5204                                      0, ts->indirect_base);
5205             goto do_pair;
5206 
5207         case 2: /* pair second */
5208             reg = new_args[arg_ct->pair_index] + 1;
5209             goto do_pair;
5210 
5211         case 3: /* ialias with second output, no first input */
5212             tcg_debug_assert(arg_ct->ialias);
5213             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5214 
5215             if (IS_DEAD_ARG(i) &&
5216                 !temp_readonly(ts) &&
5217                 ts->val_type == TEMP_VAL_REG &&
5218                 reg > 0 &&
5219                 s->reg_to_temp[reg - 1] == NULL &&
5220                 tcg_regset_test_reg(i_required_regs, reg) &&
5221                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5222                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5223                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5224                 break;
5225             }
5226             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5227                                      i_allocated_regs, 0,
5228                                      ts->indirect_base);
5229             tcg_regset_set_reg(i_allocated_regs, reg);
5230             reg += 1;
5231             goto do_pair;
5232 
5233         do_pair:
5234             /*
5235              * If an aliased input is not dead after the instruction,
5236              * we must allocate a new register and move it.
5237              */
5238             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5239                 TCGRegSet t_allocated_regs = i_allocated_regs;
5240 
5241                 /*
5242                  * Because of the alias, and the continued life, make sure
5243                  * that the temp is somewhere *other* than the reg pair,
5244                  * and we get a copy in reg.
5245                  */
5246                 tcg_regset_set_reg(t_allocated_regs, reg);
5247                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5248                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5249                     /* If ts was already in reg, copy it somewhere else. */
5250                     TCGReg nr;
5251                     bool ok;
5252 
5253                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5254                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5255                                        t_allocated_regs, 0, ts->indirect_base);
5256                     ok = tcg_out_mov(s, ts->type, nr, reg);
5257                     tcg_debug_assert(ok);
5258 
5259                     set_temp_val_reg(s, ts, nr);
5260                 } else {
5261                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5262                               t_allocated_regs, 0);
5263                     copyto_new_reg = true;
5264                 }
5265             } else {
5266                 /* Preferably allocate to reg, otherwise copy. */
5267                 i_required_regs = (TCGRegSet)1 << reg;
5268                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5269                           i_preferred_regs);
5270                 copyto_new_reg = ts->reg != reg;
5271             }
5272             break;
5273 
5274         default:
5275             g_assert_not_reached();
5276         }
5277 
5278         if (copyto_new_reg) {
5279             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5280                 /*
5281                  * Cross register class move not supported.  Sync the
5282                  * temp back to its slot and load from there.
5283                  */
5284                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5285                 tcg_out_ld(s, ts->type, reg,
5286                            ts->mem_base->reg, ts->mem_offset);
5287             }
5288         }
5289         new_args[i] = reg;
5290         const_args[i] = 0;
5291         tcg_regset_set_reg(i_allocated_regs, reg);
5292     }
5293 
5294     /* mark dead temporaries and free the associated registers */
5295     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5296         if (IS_DEAD_ARG(i)) {
5297             temp_dead(s, arg_temp(op->args[i]));
5298         }
5299     }
5300 
5301     if (def->flags & TCG_OPF_COND_BRANCH) {
5302         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5303     } else if (def->flags & TCG_OPF_BB_END) {
5304         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5305     } else {
5306         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5307             /* XXX: permit generic clobber register list ? */
5308             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5309                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5310                     tcg_reg_free(s, i, i_allocated_regs);
5311                 }
5312             }
5313         }
5314         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5315             /* sync globals if the op has side effects and might trigger
5316                an exception. */
5317             sync_globals(s, i_allocated_regs);
5318         }
5319 
5320         /* satisfy the output constraints */
5321         for (k = 0; k < nb_oargs; k++) {
5322             i = args_ct[k].sort_index;
5323             arg = op->args[i];
5324             arg_ct = &args_ct[i];
5325             ts = arg_temp(arg);
5326 
5327             /* ENV should not be modified.  */
5328             tcg_debug_assert(!temp_readonly(ts));
5329 
5330             switch (arg_ct->pair) {
5331             case 0: /* not paired */
5332                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5333                     reg = new_args[arg_ct->alias_index];
5334                 } else if (arg_ct->newreg) {
5335                     reg = tcg_reg_alloc(s, arg_ct->regs,
5336                                         i_allocated_regs | o_allocated_regs,
5337                                         output_pref(op, k), ts->indirect_base);
5338                 } else {
5339                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5340                                         output_pref(op, k), ts->indirect_base);
5341                 }
5342                 break;
5343 
5344             case 1: /* first of pair */
5345                 if (arg_ct->oalias) {
5346                     reg = new_args[arg_ct->alias_index];
5347                 } else if (arg_ct->newreg) {
5348                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5349                                              i_allocated_regs | o_allocated_regs,
5350                                              output_pref(op, k),
5351                                              ts->indirect_base);
5352                 } else {
5353                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5354                                              output_pref(op, k),
5355                                              ts->indirect_base);
5356                 }
5357                 break;
5358 
5359             case 2: /* second of pair */
5360                 if (arg_ct->oalias) {
5361                     reg = new_args[arg_ct->alias_index];
5362                 } else {
5363                     reg = new_args[arg_ct->pair_index] + 1;
5364                 }
5365                 break;
5366 
5367             case 3: /* first of pair, aliasing with a second input */
5368                 tcg_debug_assert(!arg_ct->newreg);
5369                 reg = new_args[arg_ct->pair_index] - 1;
5370                 break;
5371 
5372             default:
5373                 g_assert_not_reached();
5374             }
5375             tcg_regset_set_reg(o_allocated_regs, reg);
5376             set_temp_val_reg(s, ts, reg);
5377             ts->mem_coherent = 0;
5378             new_args[i] = reg;
5379         }
5380     }
5381 
5382     /* emit instruction */
5383     TCGType type = TCGOP_TYPE(op);
5384     switch (op->opc) {
5385     case INDEX_op_ext_i32_i64:
5386         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5387         break;
5388     case INDEX_op_extu_i32_i64:
5389         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5390         break;
5391     case INDEX_op_extrl_i64_i32:
5392         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5393         break;
5394 
5395     case INDEX_op_add:
5396     case INDEX_op_and:
5397     case INDEX_op_andc:
5398     case INDEX_op_clz:
5399     case INDEX_op_ctz:
5400     case INDEX_op_divs:
5401     case INDEX_op_divu:
5402     case INDEX_op_eqv:
5403     case INDEX_op_mul:
5404     case INDEX_op_mulsh:
5405     case INDEX_op_muluh:
5406     case INDEX_op_nand:
5407     case INDEX_op_nor:
5408     case INDEX_op_or:
5409     case INDEX_op_orc:
5410     case INDEX_op_rems:
5411     case INDEX_op_remu:
5412     case INDEX_op_rotl:
5413     case INDEX_op_rotr:
5414     case INDEX_op_sar:
5415     case INDEX_op_shl:
5416     case INDEX_op_shr:
5417     case INDEX_op_xor:
5418         {
5419             const TCGOutOpBinary *out =
5420                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5421 
5422             /* Constants should never appear in the first source operand. */
5423             tcg_debug_assert(!const_args[1]);
5424             if (const_args[2]) {
5425                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5426             } else {
5427                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5428             }
5429         }
5430         break;
5431 
5432     case INDEX_op_sub:
5433         {
5434             const TCGOutOpSubtract *out = &outop_sub;
5435 
5436             /*
5437              * Constants should never appear in the second source operand.
5438              * These are folded to add with negative constant.
5439              */
5440             tcg_debug_assert(!const_args[2]);
5441             if (const_args[1]) {
5442                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5443             } else {
5444                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5445             }
5446         }
5447         break;
5448 
5449     case INDEX_op_ctpop:
5450     case INDEX_op_neg:
5451     case INDEX_op_not:
5452         {
5453             const TCGOutOpUnary *out =
5454                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5455 
5456             /* Constants should have been folded. */
5457             tcg_debug_assert(!const_args[1]);
5458             out->out_rr(s, type, new_args[0], new_args[1]);
5459         }
5460         break;
5461 
5462     case INDEX_op_divs2:
5463     case INDEX_op_divu2:
5464         {
5465             const TCGOutOpDivRem *out =
5466                 container_of(all_outop[op->opc], TCGOutOpDivRem, base);
5467 
5468             /* Only used by x86 and s390x, which use matching constraints. */
5469             tcg_debug_assert(new_args[0] == new_args[2]);
5470             tcg_debug_assert(new_args[1] == new_args[3]);
5471             tcg_debug_assert(!const_args[4]);
5472             out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]);
5473         }
5474         break;
5475 
5476     default:
5477         if (def->flags & TCG_OPF_VECTOR) {
5478             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5479                            TCGOP_VECE(op), new_args, const_args);
5480         } else {
5481             tcg_out_op(s, op->opc, type, new_args, const_args);
5482         }
5483         break;
5484     }
5485 
5486     /* move the outputs in the correct register if needed */
5487     for(i = 0; i < nb_oargs; i++) {
5488         ts = arg_temp(op->args[i]);
5489 
5490         /* ENV should not be modified.  */
5491         tcg_debug_assert(!temp_readonly(ts));
5492 
5493         if (NEED_SYNC_ARG(i)) {
5494             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5495         } else if (IS_DEAD_ARG(i)) {
5496             temp_dead(s, ts);
5497         }
5498     }
5499 }
5500 
5501 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5502 {
5503     const TCGLifeData arg_life = op->life;
5504     TCGTemp *ots, *itsl, *itsh;
5505     TCGType vtype = TCGOP_TYPE(op);
5506 
5507     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5508     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5509     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5510 
5511     ots = arg_temp(op->args[0]);
5512     itsl = arg_temp(op->args[1]);
5513     itsh = arg_temp(op->args[2]);
5514 
5515     /* ENV should not be modified.  */
5516     tcg_debug_assert(!temp_readonly(ots));
5517 
5518     /* Allocate the output register now.  */
5519     if (ots->val_type != TEMP_VAL_REG) {
5520         TCGRegSet allocated_regs = s->reserved_regs;
5521         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5522         TCGReg oreg;
5523 
5524         /* Make sure to not spill the input registers. */
5525         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5526             tcg_regset_set_reg(allocated_regs, itsl->reg);
5527         }
5528         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5529             tcg_regset_set_reg(allocated_regs, itsh->reg);
5530         }
5531 
5532         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5533                              output_pref(op, 0), ots->indirect_base);
5534         set_temp_val_reg(s, ots, oreg);
5535     }
5536 
5537     /* Promote dup2 of immediates to dupi_vec. */
5538     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5539         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5540         MemOp vece = MO_64;
5541 
5542         if (val == dup_const(MO_8, val)) {
5543             vece = MO_8;
5544         } else if (val == dup_const(MO_16, val)) {
5545             vece = MO_16;
5546         } else if (val == dup_const(MO_32, val)) {
5547             vece = MO_32;
5548         }
5549 
5550         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5551         goto done;
5552     }
5553 
5554     /* If the two inputs form one 64-bit value, try dupm_vec. */
5555     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5556         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5557         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5558         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5559 
5560         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5561         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5562 
5563         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5564                              its->mem_base->reg, its->mem_offset)) {
5565             goto done;
5566         }
5567     }
5568 
5569     /* Fall back to generic expansion. */
5570     return false;
5571 
5572  done:
5573     ots->mem_coherent = 0;
5574     if (IS_DEAD_ARG(1)) {
5575         temp_dead(s, itsl);
5576     }
5577     if (IS_DEAD_ARG(2)) {
5578         temp_dead(s, itsh);
5579     }
5580     if (NEED_SYNC_ARG(0)) {
5581         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5582     } else if (IS_DEAD_ARG(0)) {
5583         temp_dead(s, ots);
5584     }
5585     return true;
5586 }
5587 
5588 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5589                          TCGRegSet allocated_regs)
5590 {
5591     if (ts->val_type == TEMP_VAL_REG) {
5592         if (ts->reg != reg) {
5593             tcg_reg_free(s, reg, allocated_regs);
5594             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5595                 /*
5596                  * Cross register class move not supported.  Sync the
5597                  * temp back to its slot and load from there.
5598                  */
5599                 temp_sync(s, ts, allocated_regs, 0, 0);
5600                 tcg_out_ld(s, ts->type, reg,
5601                            ts->mem_base->reg, ts->mem_offset);
5602             }
5603         }
5604     } else {
5605         TCGRegSet arg_set = 0;
5606 
5607         tcg_reg_free(s, reg, allocated_regs);
5608         tcg_regset_set_reg(arg_set, reg);
5609         temp_load(s, ts, arg_set, allocated_regs, 0);
5610     }
5611 }
5612 
5613 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5614                          TCGRegSet allocated_regs)
5615 {
5616     /*
5617      * When the destination is on the stack, load up the temp and store.
5618      * If there are many call-saved registers, the temp might live to
5619      * see another use; otherwise it'll be discarded.
5620      */
5621     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5622     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5623                arg_slot_stk_ofs(arg_slot));
5624 }
5625 
5626 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5627                             TCGTemp *ts, TCGRegSet *allocated_regs)
5628 {
5629     if (arg_slot_reg_p(l->arg_slot)) {
5630         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5631         load_arg_reg(s, reg, ts, *allocated_regs);
5632         tcg_regset_set_reg(*allocated_regs, reg);
5633     } else {
5634         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5635     }
5636 }
5637 
5638 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5639                          intptr_t ref_off, TCGRegSet *allocated_regs)
5640 {
5641     TCGReg reg;
5642 
5643     if (arg_slot_reg_p(arg_slot)) {
5644         reg = tcg_target_call_iarg_regs[arg_slot];
5645         tcg_reg_free(s, reg, *allocated_regs);
5646         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5647         tcg_regset_set_reg(*allocated_regs, reg);
5648     } else {
5649         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5650                             *allocated_regs, 0, false);
5651         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5652         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5653                    arg_slot_stk_ofs(arg_slot));
5654     }
5655 }
5656 
5657 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5658 {
5659     const int nb_oargs = TCGOP_CALLO(op);
5660     const int nb_iargs = TCGOP_CALLI(op);
5661     const TCGLifeData arg_life = op->life;
5662     const TCGHelperInfo *info = tcg_call_info(op);
5663     TCGRegSet allocated_regs = s->reserved_regs;
5664     int i;
5665 
5666     /*
5667      * Move inputs into place in reverse order,
5668      * so that we place stacked arguments first.
5669      */
5670     for (i = nb_iargs - 1; i >= 0; --i) {
5671         const TCGCallArgumentLoc *loc = &info->in[i];
5672         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5673 
5674         switch (loc->kind) {
5675         case TCG_CALL_ARG_NORMAL:
5676         case TCG_CALL_ARG_EXTEND_U:
5677         case TCG_CALL_ARG_EXTEND_S:
5678             load_arg_normal(s, loc, ts, &allocated_regs);
5679             break;
5680         case TCG_CALL_ARG_BY_REF:
5681             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5682             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5683                          arg_slot_stk_ofs(loc->ref_slot),
5684                          &allocated_regs);
5685             break;
5686         case TCG_CALL_ARG_BY_REF_N:
5687             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5688             break;
5689         default:
5690             g_assert_not_reached();
5691         }
5692     }
5693 
5694     /* Mark dead temporaries and free the associated registers.  */
5695     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5696         if (IS_DEAD_ARG(i)) {
5697             temp_dead(s, arg_temp(op->args[i]));
5698         }
5699     }
5700 
5701     /* Clobber call registers.  */
5702     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5703         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5704             tcg_reg_free(s, i, allocated_regs);
5705         }
5706     }
5707 
5708     /*
5709      * Save globals if they might be written by the helper,
5710      * sync them if they might be read.
5711      */
5712     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5713         /* Nothing to do */
5714     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5715         sync_globals(s, allocated_regs);
5716     } else {
5717         save_globals(s, allocated_regs);
5718     }
5719 
5720     /*
5721      * If the ABI passes a pointer to the returned struct as the first
5722      * argument, load that now.  Pass a pointer to the output home slot.
5723      */
5724     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5725         TCGTemp *ts = arg_temp(op->args[0]);
5726 
5727         if (!ts->mem_allocated) {
5728             temp_allocate_frame(s, ts);
5729         }
5730         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5731     }
5732 
5733     tcg_out_call(s, tcg_call_func(op), info);
5734 
5735     /* Assign output registers and emit moves if needed.  */
5736     switch (info->out_kind) {
5737     case TCG_CALL_RET_NORMAL:
5738         for (i = 0; i < nb_oargs; i++) {
5739             TCGTemp *ts = arg_temp(op->args[i]);
5740             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5741 
5742             /* ENV should not be modified.  */
5743             tcg_debug_assert(!temp_readonly(ts));
5744 
5745             set_temp_val_reg(s, ts, reg);
5746             ts->mem_coherent = 0;
5747         }
5748         break;
5749 
5750     case TCG_CALL_RET_BY_VEC:
5751         {
5752             TCGTemp *ts = arg_temp(op->args[0]);
5753 
5754             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5755             tcg_debug_assert(ts->temp_subindex == 0);
5756             if (!ts->mem_allocated) {
5757                 temp_allocate_frame(s, ts);
5758             }
5759             tcg_out_st(s, TCG_TYPE_V128,
5760                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5761                        ts->mem_base->reg, ts->mem_offset);
5762         }
5763         /* fall through to mark all parts in memory */
5764 
5765     case TCG_CALL_RET_BY_REF:
5766         /* The callee has performed a write through the reference. */
5767         for (i = 0; i < nb_oargs; i++) {
5768             TCGTemp *ts = arg_temp(op->args[i]);
5769             ts->val_type = TEMP_VAL_MEM;
5770         }
5771         break;
5772 
5773     default:
5774         g_assert_not_reached();
5775     }
5776 
5777     /* Flush or discard output registers as needed. */
5778     for (i = 0; i < nb_oargs; i++) {
5779         TCGTemp *ts = arg_temp(op->args[i]);
5780         if (NEED_SYNC_ARG(i)) {
5781             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5782         } else if (IS_DEAD_ARG(i)) {
5783             temp_dead(s, ts);
5784         }
5785     }
5786 }
5787 
5788 /**
5789  * atom_and_align_for_opc:
5790  * @s: tcg context
5791  * @opc: memory operation code
5792  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5793  * @allow_two_ops: true if we are prepared to issue two operations
5794  *
5795  * Return the alignment and atomicity to use for the inline fast path
5796  * for the given memory operation.  The alignment may be larger than
5797  * that specified in @opc, and the correct alignment will be diagnosed
5798  * by the slow path helper.
5799  *
5800  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5801  * and issue two loads or stores for subalignment.
5802  */
5803 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5804                                            MemOp host_atom, bool allow_two_ops)
5805 {
5806     MemOp align = memop_alignment_bits(opc);
5807     MemOp size = opc & MO_SIZE;
5808     MemOp half = size ? size - 1 : 0;
5809     MemOp atom = opc & MO_ATOM_MASK;
5810     MemOp atmax;
5811 
5812     switch (atom) {
5813     case MO_ATOM_NONE:
5814         /* The operation requires no specific atomicity. */
5815         atmax = MO_8;
5816         break;
5817 
5818     case MO_ATOM_IFALIGN:
5819         atmax = size;
5820         break;
5821 
5822     case MO_ATOM_IFALIGN_PAIR:
5823         atmax = half;
5824         break;
5825 
5826     case MO_ATOM_WITHIN16:
5827         atmax = size;
5828         if (size == MO_128) {
5829             /* Misalignment implies !within16, and therefore no atomicity. */
5830         } else if (host_atom != MO_ATOM_WITHIN16) {
5831             /* The host does not implement within16, so require alignment. */
5832             align = MAX(align, size);
5833         }
5834         break;
5835 
5836     case MO_ATOM_WITHIN16_PAIR:
5837         atmax = size;
5838         /*
5839          * Misalignment implies !within16, and therefore half atomicity.
5840          * Any host prepared for two operations can implement this with
5841          * half alignment.
5842          */
5843         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5844             align = MAX(align, half);
5845         }
5846         break;
5847 
5848     case MO_ATOM_SUBALIGN:
5849         atmax = size;
5850         if (host_atom != MO_ATOM_SUBALIGN) {
5851             /* If unaligned but not odd, there are subobjects up to half. */
5852             if (allow_two_ops) {
5853                 align = MAX(align, half);
5854             } else {
5855                 align = MAX(align, size);
5856             }
5857         }
5858         break;
5859 
5860     default:
5861         g_assert_not_reached();
5862     }
5863 
5864     return (TCGAtomAlign){ .atom = atmax, .align = align };
5865 }
5866 
5867 /*
5868  * Similarly for qemu_ld/st slow path helpers.
5869  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5870  * using only the provided backend tcg_out_* functions.
5871  */
5872 
5873 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5874 {
5875     int ofs = arg_slot_stk_ofs(slot);
5876 
5877     /*
5878      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5879      * require extension to uint64_t, adjust the address for uint32_t.
5880      */
5881     if (HOST_BIG_ENDIAN &&
5882         TCG_TARGET_REG_BITS == 64 &&
5883         type == TCG_TYPE_I32) {
5884         ofs += 4;
5885     }
5886     return ofs;
5887 }
5888 
5889 static void tcg_out_helper_load_slots(TCGContext *s,
5890                                       unsigned nmov, TCGMovExtend *mov,
5891                                       const TCGLdstHelperParam *parm)
5892 {
5893     unsigned i;
5894     TCGReg dst3;
5895 
5896     /*
5897      * Start from the end, storing to the stack first.
5898      * This frees those registers, so we need not consider overlap.
5899      */
5900     for (i = nmov; i-- > 0; ) {
5901         unsigned slot = mov[i].dst;
5902 
5903         if (arg_slot_reg_p(slot)) {
5904             goto found_reg;
5905         }
5906 
5907         TCGReg src = mov[i].src;
5908         TCGType dst_type = mov[i].dst_type;
5909         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5910 
5911         /* The argument is going onto the stack; extend into scratch. */
5912         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5913             tcg_debug_assert(parm->ntmp != 0);
5914             mov[i].dst = src = parm->tmp[0];
5915             tcg_out_movext1(s, &mov[i]);
5916         }
5917 
5918         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5919                    tcg_out_helper_stk_ofs(dst_type, slot));
5920     }
5921     return;
5922 
5923  found_reg:
5924     /*
5925      * The remaining arguments are in registers.
5926      * Convert slot numbers to argument registers.
5927      */
5928     nmov = i + 1;
5929     for (i = 0; i < nmov; ++i) {
5930         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5931     }
5932 
5933     switch (nmov) {
5934     case 4:
5935         /* The backend must have provided enough temps for the worst case. */
5936         tcg_debug_assert(parm->ntmp >= 2);
5937 
5938         dst3 = mov[3].dst;
5939         for (unsigned j = 0; j < 3; ++j) {
5940             if (dst3 == mov[j].src) {
5941                 /*
5942                  * Conflict. Copy the source to a temporary, perform the
5943                  * remaining moves, then the extension from our scratch
5944                  * on the way out.
5945                  */
5946                 TCGReg scratch = parm->tmp[1];
5947 
5948                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5949                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5950                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5951                 break;
5952             }
5953         }
5954 
5955         /* No conflicts: perform this move and continue. */
5956         tcg_out_movext1(s, &mov[3]);
5957         /* fall through */
5958 
5959     case 3:
5960         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5961                         parm->ntmp ? parm->tmp[0] : -1);
5962         break;
5963     case 2:
5964         tcg_out_movext2(s, mov, mov + 1,
5965                         parm->ntmp ? parm->tmp[0] : -1);
5966         break;
5967     case 1:
5968         tcg_out_movext1(s, mov);
5969         break;
5970     default:
5971         g_assert_not_reached();
5972     }
5973 }
5974 
5975 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5976                                     TCGType type, tcg_target_long imm,
5977                                     const TCGLdstHelperParam *parm)
5978 {
5979     if (arg_slot_reg_p(slot)) {
5980         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5981     } else {
5982         int ofs = tcg_out_helper_stk_ofs(type, slot);
5983         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5984             tcg_debug_assert(parm->ntmp != 0);
5985             tcg_out_movi(s, type, parm->tmp[0], imm);
5986             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5987         }
5988     }
5989 }
5990 
5991 static void tcg_out_helper_load_common_args(TCGContext *s,
5992                                             const TCGLabelQemuLdst *ldst,
5993                                             const TCGLdstHelperParam *parm,
5994                                             const TCGHelperInfo *info,
5995                                             unsigned next_arg)
5996 {
5997     TCGMovExtend ptr_mov = {
5998         .dst_type = TCG_TYPE_PTR,
5999         .src_type = TCG_TYPE_PTR,
6000         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6001     };
6002     const TCGCallArgumentLoc *loc = &info->in[0];
6003     TCGType type;
6004     unsigned slot;
6005     tcg_target_ulong imm;
6006 
6007     /*
6008      * Handle env, which is always first.
6009      */
6010     ptr_mov.dst = loc->arg_slot;
6011     ptr_mov.src = TCG_AREG0;
6012     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6013 
6014     /*
6015      * Handle oi.
6016      */
6017     imm = ldst->oi;
6018     loc = &info->in[next_arg];
6019     type = TCG_TYPE_I32;
6020     switch (loc->kind) {
6021     case TCG_CALL_ARG_NORMAL:
6022         break;
6023     case TCG_CALL_ARG_EXTEND_U:
6024     case TCG_CALL_ARG_EXTEND_S:
6025         /* No extension required for MemOpIdx. */
6026         tcg_debug_assert(imm <= INT32_MAX);
6027         type = TCG_TYPE_REG;
6028         break;
6029     default:
6030         g_assert_not_reached();
6031     }
6032     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6033     next_arg++;
6034 
6035     /*
6036      * Handle ra.
6037      */
6038     loc = &info->in[next_arg];
6039     slot = loc->arg_slot;
6040     if (parm->ra_gen) {
6041         int arg_reg = -1;
6042         TCGReg ra_reg;
6043 
6044         if (arg_slot_reg_p(slot)) {
6045             arg_reg = tcg_target_call_iarg_regs[slot];
6046         }
6047         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6048 
6049         ptr_mov.dst = slot;
6050         ptr_mov.src = ra_reg;
6051         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6052     } else {
6053         imm = (uintptr_t)ldst->raddr;
6054         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6055     }
6056 }
6057 
6058 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6059                                        const TCGCallArgumentLoc *loc,
6060                                        TCGType dst_type, TCGType src_type,
6061                                        TCGReg lo, TCGReg hi)
6062 {
6063     MemOp reg_mo;
6064 
6065     if (dst_type <= TCG_TYPE_REG) {
6066         MemOp src_ext;
6067 
6068         switch (loc->kind) {
6069         case TCG_CALL_ARG_NORMAL:
6070             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6071             break;
6072         case TCG_CALL_ARG_EXTEND_U:
6073             dst_type = TCG_TYPE_REG;
6074             src_ext = MO_UL;
6075             break;
6076         case TCG_CALL_ARG_EXTEND_S:
6077             dst_type = TCG_TYPE_REG;
6078             src_ext = MO_SL;
6079             break;
6080         default:
6081             g_assert_not_reached();
6082         }
6083 
6084         mov[0].dst = loc->arg_slot;
6085         mov[0].dst_type = dst_type;
6086         mov[0].src = lo;
6087         mov[0].src_type = src_type;
6088         mov[0].src_ext = src_ext;
6089         return 1;
6090     }
6091 
6092     if (TCG_TARGET_REG_BITS == 32) {
6093         assert(dst_type == TCG_TYPE_I64);
6094         reg_mo = MO_32;
6095     } else {
6096         assert(dst_type == TCG_TYPE_I128);
6097         reg_mo = MO_64;
6098     }
6099 
6100     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6101     mov[0].src = lo;
6102     mov[0].dst_type = TCG_TYPE_REG;
6103     mov[0].src_type = TCG_TYPE_REG;
6104     mov[0].src_ext = reg_mo;
6105 
6106     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6107     mov[1].src = hi;
6108     mov[1].dst_type = TCG_TYPE_REG;
6109     mov[1].src_type = TCG_TYPE_REG;
6110     mov[1].src_ext = reg_mo;
6111 
6112     return 2;
6113 }
6114 
6115 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6116                                    const TCGLdstHelperParam *parm)
6117 {
6118     const TCGHelperInfo *info;
6119     const TCGCallArgumentLoc *loc;
6120     TCGMovExtend mov[2];
6121     unsigned next_arg, nmov;
6122     MemOp mop = get_memop(ldst->oi);
6123 
6124     switch (mop & MO_SIZE) {
6125     case MO_8:
6126     case MO_16:
6127     case MO_32:
6128         info = &info_helper_ld32_mmu;
6129         break;
6130     case MO_64:
6131         info = &info_helper_ld64_mmu;
6132         break;
6133     case MO_128:
6134         info = &info_helper_ld128_mmu;
6135         break;
6136     default:
6137         g_assert_not_reached();
6138     }
6139 
6140     /* Defer env argument. */
6141     next_arg = 1;
6142 
6143     loc = &info->in[next_arg];
6144     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6145         /*
6146          * 32-bit host with 32-bit guest: zero-extend the guest address
6147          * to 64-bits for the helper by storing the low part, then
6148          * load a zero for the high part.
6149          */
6150         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6151                                TCG_TYPE_I32, TCG_TYPE_I32,
6152                                ldst->addr_reg, -1);
6153         tcg_out_helper_load_slots(s, 1, mov, parm);
6154 
6155         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6156                                 TCG_TYPE_I32, 0, parm);
6157         next_arg += 2;
6158     } else {
6159         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6160                                       ldst->addr_reg, -1);
6161         tcg_out_helper_load_slots(s, nmov, mov, parm);
6162         next_arg += nmov;
6163     }
6164 
6165     switch (info->out_kind) {
6166     case TCG_CALL_RET_NORMAL:
6167     case TCG_CALL_RET_BY_VEC:
6168         break;
6169     case TCG_CALL_RET_BY_REF:
6170         /*
6171          * The return reference is in the first argument slot.
6172          * We need memory in which to return: re-use the top of stack.
6173          */
6174         {
6175             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6176 
6177             if (arg_slot_reg_p(0)) {
6178                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6179                                  TCG_REG_CALL_STACK, ofs_slot0);
6180             } else {
6181                 tcg_debug_assert(parm->ntmp != 0);
6182                 tcg_out_addi_ptr(s, parm->tmp[0],
6183                                  TCG_REG_CALL_STACK, ofs_slot0);
6184                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6185                            TCG_REG_CALL_STACK, ofs_slot0);
6186             }
6187         }
6188         break;
6189     default:
6190         g_assert_not_reached();
6191     }
6192 
6193     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6194 }
6195 
6196 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6197                                   bool load_sign,
6198                                   const TCGLdstHelperParam *parm)
6199 {
6200     MemOp mop = get_memop(ldst->oi);
6201     TCGMovExtend mov[2];
6202     int ofs_slot0;
6203 
6204     switch (ldst->type) {
6205     case TCG_TYPE_I64:
6206         if (TCG_TARGET_REG_BITS == 32) {
6207             break;
6208         }
6209         /* fall through */
6210 
6211     case TCG_TYPE_I32:
6212         mov[0].dst = ldst->datalo_reg;
6213         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6214         mov[0].dst_type = ldst->type;
6215         mov[0].src_type = TCG_TYPE_REG;
6216 
6217         /*
6218          * If load_sign, then we allowed the helper to perform the
6219          * appropriate sign extension to tcg_target_ulong, and all
6220          * we need now is a plain move.
6221          *
6222          * If they do not, then we expect the relevant extension
6223          * instruction to be no more expensive than a move, and
6224          * we thus save the icache etc by only using one of two
6225          * helper functions.
6226          */
6227         if (load_sign || !(mop & MO_SIGN)) {
6228             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6229                 mov[0].src_ext = MO_32;
6230             } else {
6231                 mov[0].src_ext = MO_64;
6232             }
6233         } else {
6234             mov[0].src_ext = mop & MO_SSIZE;
6235         }
6236         tcg_out_movext1(s, mov);
6237         return;
6238 
6239     case TCG_TYPE_I128:
6240         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6241         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6242         switch (TCG_TARGET_CALL_RET_I128) {
6243         case TCG_CALL_RET_NORMAL:
6244             break;
6245         case TCG_CALL_RET_BY_VEC:
6246             tcg_out_st(s, TCG_TYPE_V128,
6247                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6248                        TCG_REG_CALL_STACK, ofs_slot0);
6249             /* fall through */
6250         case TCG_CALL_RET_BY_REF:
6251             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6252                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6253             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6254                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6255             return;
6256         default:
6257             g_assert_not_reached();
6258         }
6259         break;
6260 
6261     default:
6262         g_assert_not_reached();
6263     }
6264 
6265     mov[0].dst = ldst->datalo_reg;
6266     mov[0].src =
6267         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6268     mov[0].dst_type = TCG_TYPE_REG;
6269     mov[0].src_type = TCG_TYPE_REG;
6270     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6271 
6272     mov[1].dst = ldst->datahi_reg;
6273     mov[1].src =
6274         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6275     mov[1].dst_type = TCG_TYPE_REG;
6276     mov[1].src_type = TCG_TYPE_REG;
6277     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6278 
6279     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6280 }
6281 
6282 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6283                                    const TCGLdstHelperParam *parm)
6284 {
6285     const TCGHelperInfo *info;
6286     const TCGCallArgumentLoc *loc;
6287     TCGMovExtend mov[4];
6288     TCGType data_type;
6289     unsigned next_arg, nmov, n;
6290     MemOp mop = get_memop(ldst->oi);
6291 
6292     switch (mop & MO_SIZE) {
6293     case MO_8:
6294     case MO_16:
6295     case MO_32:
6296         info = &info_helper_st32_mmu;
6297         data_type = TCG_TYPE_I32;
6298         break;
6299     case MO_64:
6300         info = &info_helper_st64_mmu;
6301         data_type = TCG_TYPE_I64;
6302         break;
6303     case MO_128:
6304         info = &info_helper_st128_mmu;
6305         data_type = TCG_TYPE_I128;
6306         break;
6307     default:
6308         g_assert_not_reached();
6309     }
6310 
6311     /* Defer env argument. */
6312     next_arg = 1;
6313     nmov = 0;
6314 
6315     /* Handle addr argument. */
6316     loc = &info->in[next_arg];
6317     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6318     if (TCG_TARGET_REG_BITS == 32) {
6319         /*
6320          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6321          * to 64-bits for the helper by storing the low part.  Later,
6322          * after we have processed the register inputs, we will load a
6323          * zero for the high part.
6324          */
6325         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6326                                TCG_TYPE_I32, TCG_TYPE_I32,
6327                                ldst->addr_reg, -1);
6328         next_arg += 2;
6329         nmov += 1;
6330     } else {
6331         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6332                                    ldst->addr_reg, -1);
6333         next_arg += n;
6334         nmov += n;
6335     }
6336 
6337     /* Handle data argument. */
6338     loc = &info->in[next_arg];
6339     switch (loc->kind) {
6340     case TCG_CALL_ARG_NORMAL:
6341     case TCG_CALL_ARG_EXTEND_U:
6342     case TCG_CALL_ARG_EXTEND_S:
6343         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6344                                    ldst->datalo_reg, ldst->datahi_reg);
6345         next_arg += n;
6346         nmov += n;
6347         tcg_out_helper_load_slots(s, nmov, mov, parm);
6348         break;
6349 
6350     case TCG_CALL_ARG_BY_REF:
6351         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6352         tcg_debug_assert(data_type == TCG_TYPE_I128);
6353         tcg_out_st(s, TCG_TYPE_I64,
6354                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6355                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6356         tcg_out_st(s, TCG_TYPE_I64,
6357                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6358                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6359 
6360         tcg_out_helper_load_slots(s, nmov, mov, parm);
6361 
6362         if (arg_slot_reg_p(loc->arg_slot)) {
6363             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6364                              TCG_REG_CALL_STACK,
6365                              arg_slot_stk_ofs(loc->ref_slot));
6366         } else {
6367             tcg_debug_assert(parm->ntmp != 0);
6368             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6369                              arg_slot_stk_ofs(loc->ref_slot));
6370             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6371                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6372         }
6373         next_arg += 2;
6374         break;
6375 
6376     default:
6377         g_assert_not_reached();
6378     }
6379 
6380     if (TCG_TARGET_REG_BITS == 32) {
6381         /* Zero extend the address by loading a zero for the high part. */
6382         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6383         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6384     }
6385 
6386     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6387 }
6388 
6389 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6390 {
6391     int i, start_words, num_insns;
6392     TCGOp *op;
6393 
6394     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6395                  && qemu_log_in_addr_range(pc_start))) {
6396         FILE *logfile = qemu_log_trylock();
6397         if (logfile) {
6398             fprintf(logfile, "OP:\n");
6399             tcg_dump_ops(s, logfile, false);
6400             fprintf(logfile, "\n");
6401             qemu_log_unlock(logfile);
6402         }
6403     }
6404 
6405 #ifdef CONFIG_DEBUG_TCG
6406     /* Ensure all labels referenced have been emitted.  */
6407     {
6408         TCGLabel *l;
6409         bool error = false;
6410 
6411         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6412             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6413                 qemu_log_mask(CPU_LOG_TB_OP,
6414                               "$L%d referenced but not present.\n", l->id);
6415                 error = true;
6416             }
6417         }
6418         assert(!error);
6419     }
6420 #endif
6421 
6422     /* Do not reuse any EBB that may be allocated within the TB. */
6423     tcg_temp_ebb_reset_freed(s);
6424 
6425     tcg_optimize(s);
6426 
6427     reachable_code_pass(s);
6428     liveness_pass_0(s);
6429     liveness_pass_1(s);
6430 
6431     if (s->nb_indirects > 0) {
6432         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6433                      && qemu_log_in_addr_range(pc_start))) {
6434             FILE *logfile = qemu_log_trylock();
6435             if (logfile) {
6436                 fprintf(logfile, "OP before indirect lowering:\n");
6437                 tcg_dump_ops(s, logfile, false);
6438                 fprintf(logfile, "\n");
6439                 qemu_log_unlock(logfile);
6440             }
6441         }
6442 
6443         /* Replace indirect temps with direct temps.  */
6444         if (liveness_pass_2(s)) {
6445             /* If changes were made, re-run liveness.  */
6446             liveness_pass_1(s);
6447         }
6448     }
6449 
6450     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6451                  && qemu_log_in_addr_range(pc_start))) {
6452         FILE *logfile = qemu_log_trylock();
6453         if (logfile) {
6454             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6455             tcg_dump_ops(s, logfile, true);
6456             fprintf(logfile, "\n");
6457             qemu_log_unlock(logfile);
6458         }
6459     }
6460 
6461     /* Initialize goto_tb jump offsets. */
6462     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6463     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6464     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6465     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6466 
6467     tcg_reg_alloc_start(s);
6468 
6469     /*
6470      * Reset the buffer pointers when restarting after overflow.
6471      * TODO: Move this into translate-all.c with the rest of the
6472      * buffer management.  Having only this done here is confusing.
6473      */
6474     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6475     s->code_ptr = s->code_buf;
6476     s->data_gen_ptr = NULL;
6477 
6478     QSIMPLEQ_INIT(&s->ldst_labels);
6479     s->pool_labels = NULL;
6480 
6481     start_words = s->insn_start_words;
6482     s->gen_insn_data =
6483         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6484 
6485     tcg_out_tb_start(s);
6486 
6487     num_insns = -1;
6488     QTAILQ_FOREACH(op, &s->ops, link) {
6489         TCGOpcode opc = op->opc;
6490 
6491         switch (opc) {
6492         case INDEX_op_mov:
6493         case INDEX_op_mov_vec:
6494             tcg_reg_alloc_mov(s, op);
6495             break;
6496         case INDEX_op_dup_vec:
6497             tcg_reg_alloc_dup(s, op);
6498             break;
6499         case INDEX_op_insn_start:
6500             if (num_insns >= 0) {
6501                 size_t off = tcg_current_code_size(s);
6502                 s->gen_insn_end_off[num_insns] = off;
6503                 /* Assert that we do not overflow our stored offset.  */
6504                 assert(s->gen_insn_end_off[num_insns] == off);
6505             }
6506             num_insns++;
6507             for (i = 0; i < start_words; ++i) {
6508                 s->gen_insn_data[num_insns * start_words + i] =
6509                     tcg_get_insn_start_param(op, i);
6510             }
6511             break;
6512         case INDEX_op_discard:
6513             temp_dead(s, arg_temp(op->args[0]));
6514             break;
6515         case INDEX_op_set_label:
6516             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6517             tcg_out_label(s, arg_label(op->args[0]));
6518             break;
6519         case INDEX_op_call:
6520             tcg_reg_alloc_call(s, op);
6521             break;
6522         case INDEX_op_exit_tb:
6523             tcg_out_exit_tb(s, op->args[0]);
6524             break;
6525         case INDEX_op_goto_tb:
6526             tcg_out_goto_tb(s, op->args[0]);
6527             break;
6528         case INDEX_op_dup2_vec:
6529             if (tcg_reg_alloc_dup2(s, op)) {
6530                 break;
6531             }
6532             /* fall through */
6533         default:
6534             /* Sanity check that we've not introduced any unhandled opcodes. */
6535             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6536                                               TCGOP_FLAGS(op)));
6537             /* Note: in order to speed up the code, it would be much
6538                faster to have specialized register allocator functions for
6539                some common argument patterns */
6540             tcg_reg_alloc_op(s, op);
6541             break;
6542         }
6543         /* Test for (pending) buffer overflow.  The assumption is that any
6544            one operation beginning below the high water mark cannot overrun
6545            the buffer completely.  Thus we can test for overflow after
6546            generating code without having to check during generation.  */
6547         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6548             return -1;
6549         }
6550         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6551         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6552             return -2;
6553         }
6554     }
6555     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6556     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6557 
6558     /* Generate TB finalization at the end of block */
6559     i = tcg_out_ldst_finalize(s);
6560     if (i < 0) {
6561         return i;
6562     }
6563     i = tcg_out_pool_finalize(s);
6564     if (i < 0) {
6565         return i;
6566     }
6567     if (!tcg_resolve_relocs(s)) {
6568         return -2;
6569     }
6570 
6571 #ifndef CONFIG_TCG_INTERPRETER
6572     /* flush instruction cache */
6573     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6574                         (uintptr_t)s->code_buf,
6575                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6576 #endif
6577 
6578     return tcg_current_code_size(s);
6579 }
6580 
6581 #ifdef ELF_HOST_MACHINE
6582 /* In order to use this feature, the backend needs to do three things:
6583 
6584    (1) Define ELF_HOST_MACHINE to indicate both what value to
6585        put into the ELF image and to indicate support for the feature.
6586 
6587    (2) Define tcg_register_jit.  This should create a buffer containing
6588        the contents of a .debug_frame section that describes the post-
6589        prologue unwind info for the tcg machine.
6590 
6591    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6592 */
6593 
6594 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6595 typedef enum {
6596     JIT_NOACTION = 0,
6597     JIT_REGISTER_FN,
6598     JIT_UNREGISTER_FN
6599 } jit_actions_t;
6600 
6601 struct jit_code_entry {
6602     struct jit_code_entry *next_entry;
6603     struct jit_code_entry *prev_entry;
6604     const void *symfile_addr;
6605     uint64_t symfile_size;
6606 };
6607 
6608 struct jit_descriptor {
6609     uint32_t version;
6610     uint32_t action_flag;
6611     struct jit_code_entry *relevant_entry;
6612     struct jit_code_entry *first_entry;
6613 };
6614 
6615 void __jit_debug_register_code(void) __attribute__((noinline));
6616 void __jit_debug_register_code(void)
6617 {
6618     asm("");
6619 }
6620 
6621 /* Must statically initialize the version, because GDB may check
6622    the version before we can set it.  */
6623 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6624 
6625 /* End GDB interface.  */
6626 
6627 static int find_string(const char *strtab, const char *str)
6628 {
6629     const char *p = strtab + 1;
6630 
6631     while (1) {
6632         if (strcmp(p, str) == 0) {
6633             return p - strtab;
6634         }
6635         p += strlen(p) + 1;
6636     }
6637 }
6638 
6639 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6640                                  const void *debug_frame,
6641                                  size_t debug_frame_size)
6642 {
6643     struct __attribute__((packed)) DebugInfo {
6644         uint32_t  len;
6645         uint16_t  version;
6646         uint32_t  abbrev;
6647         uint8_t   ptr_size;
6648         uint8_t   cu_die;
6649         uint16_t  cu_lang;
6650         uintptr_t cu_low_pc;
6651         uintptr_t cu_high_pc;
6652         uint8_t   fn_die;
6653         char      fn_name[16];
6654         uintptr_t fn_low_pc;
6655         uintptr_t fn_high_pc;
6656         uint8_t   cu_eoc;
6657     };
6658 
6659     struct ElfImage {
6660         ElfW(Ehdr) ehdr;
6661         ElfW(Phdr) phdr;
6662         ElfW(Shdr) shdr[7];
6663         ElfW(Sym)  sym[2];
6664         struct DebugInfo di;
6665         uint8_t    da[24];
6666         char       str[80];
6667     };
6668 
6669     struct ElfImage *img;
6670 
6671     static const struct ElfImage img_template = {
6672         .ehdr = {
6673             .e_ident[EI_MAG0] = ELFMAG0,
6674             .e_ident[EI_MAG1] = ELFMAG1,
6675             .e_ident[EI_MAG2] = ELFMAG2,
6676             .e_ident[EI_MAG3] = ELFMAG3,
6677             .e_ident[EI_CLASS] = ELF_CLASS,
6678             .e_ident[EI_DATA] = ELF_DATA,
6679             .e_ident[EI_VERSION] = EV_CURRENT,
6680             .e_type = ET_EXEC,
6681             .e_machine = ELF_HOST_MACHINE,
6682             .e_version = EV_CURRENT,
6683             .e_phoff = offsetof(struct ElfImage, phdr),
6684             .e_shoff = offsetof(struct ElfImage, shdr),
6685             .e_ehsize = sizeof(ElfW(Shdr)),
6686             .e_phentsize = sizeof(ElfW(Phdr)),
6687             .e_phnum = 1,
6688             .e_shentsize = sizeof(ElfW(Shdr)),
6689             .e_shnum = ARRAY_SIZE(img->shdr),
6690             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6691 #ifdef ELF_HOST_FLAGS
6692             .e_flags = ELF_HOST_FLAGS,
6693 #endif
6694 #ifdef ELF_OSABI
6695             .e_ident[EI_OSABI] = ELF_OSABI,
6696 #endif
6697         },
6698         .phdr = {
6699             .p_type = PT_LOAD,
6700             .p_flags = PF_X,
6701         },
6702         .shdr = {
6703             [0] = { .sh_type = SHT_NULL },
6704             /* Trick: The contents of code_gen_buffer are not present in
6705                this fake ELF file; that got allocated elsewhere.  Therefore
6706                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6707                will not look for contents.  We can record any address.  */
6708             [1] = { /* .text */
6709                 .sh_type = SHT_NOBITS,
6710                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6711             },
6712             [2] = { /* .debug_info */
6713                 .sh_type = SHT_PROGBITS,
6714                 .sh_offset = offsetof(struct ElfImage, di),
6715                 .sh_size = sizeof(struct DebugInfo),
6716             },
6717             [3] = { /* .debug_abbrev */
6718                 .sh_type = SHT_PROGBITS,
6719                 .sh_offset = offsetof(struct ElfImage, da),
6720                 .sh_size = sizeof(img->da),
6721             },
6722             [4] = { /* .debug_frame */
6723                 .sh_type = SHT_PROGBITS,
6724                 .sh_offset = sizeof(struct ElfImage),
6725             },
6726             [5] = { /* .symtab */
6727                 .sh_type = SHT_SYMTAB,
6728                 .sh_offset = offsetof(struct ElfImage, sym),
6729                 .sh_size = sizeof(img->sym),
6730                 .sh_info = 1,
6731                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6732                 .sh_entsize = sizeof(ElfW(Sym)),
6733             },
6734             [6] = { /* .strtab */
6735                 .sh_type = SHT_STRTAB,
6736                 .sh_offset = offsetof(struct ElfImage, str),
6737                 .sh_size = sizeof(img->str),
6738             }
6739         },
6740         .sym = {
6741             [1] = { /* code_gen_buffer */
6742                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6743                 .st_shndx = 1,
6744             }
6745         },
6746         .di = {
6747             .len = sizeof(struct DebugInfo) - 4,
6748             .version = 2,
6749             .ptr_size = sizeof(void *),
6750             .cu_die = 1,
6751             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6752             .fn_die = 2,
6753             .fn_name = "code_gen_buffer"
6754         },
6755         .da = {
6756             1,          /* abbrev number (the cu) */
6757             0x11, 1,    /* DW_TAG_compile_unit, has children */
6758             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6759             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6760             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6761             0, 0,       /* end of abbrev */
6762             2,          /* abbrev number (the fn) */
6763             0x2e, 0,    /* DW_TAG_subprogram, no children */
6764             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6765             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6766             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6767             0, 0,       /* end of abbrev */
6768             0           /* no more abbrev */
6769         },
6770         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6771                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6772     };
6773 
6774     /* We only need a single jit entry; statically allocate it.  */
6775     static struct jit_code_entry one_entry;
6776 
6777     uintptr_t buf = (uintptr_t)buf_ptr;
6778     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6779     DebugFrameHeader *dfh;
6780 
6781     img = g_malloc(img_size);
6782     *img = img_template;
6783 
6784     img->phdr.p_vaddr = buf;
6785     img->phdr.p_paddr = buf;
6786     img->phdr.p_memsz = buf_size;
6787 
6788     img->shdr[1].sh_name = find_string(img->str, ".text");
6789     img->shdr[1].sh_addr = buf;
6790     img->shdr[1].sh_size = buf_size;
6791 
6792     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6793     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6794 
6795     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6796     img->shdr[4].sh_size = debug_frame_size;
6797 
6798     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6799     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6800 
6801     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6802     img->sym[1].st_value = buf;
6803     img->sym[1].st_size = buf_size;
6804 
6805     img->di.cu_low_pc = buf;
6806     img->di.cu_high_pc = buf + buf_size;
6807     img->di.fn_low_pc = buf;
6808     img->di.fn_high_pc = buf + buf_size;
6809 
6810     dfh = (DebugFrameHeader *)(img + 1);
6811     memcpy(dfh, debug_frame, debug_frame_size);
6812     dfh->fde.func_start = buf;
6813     dfh->fde.func_len = buf_size;
6814 
6815 #ifdef DEBUG_JIT
6816     /* Enable this block to be able to debug the ELF image file creation.
6817        One can use readelf, objdump, or other inspection utilities.  */
6818     {
6819         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6820         FILE *f = fopen(jit, "w+b");
6821         if (f) {
6822             if (fwrite(img, img_size, 1, f) != img_size) {
6823                 /* Avoid stupid unused return value warning for fwrite.  */
6824             }
6825             fclose(f);
6826         }
6827     }
6828 #endif
6829 
6830     one_entry.symfile_addr = img;
6831     one_entry.symfile_size = img_size;
6832 
6833     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6834     __jit_debug_descriptor.relevant_entry = &one_entry;
6835     __jit_debug_descriptor.first_entry = &one_entry;
6836     __jit_debug_register_code();
6837 }
6838 #else
6839 /* No support for the feature.  Provide the entry point expected by exec.c,
6840    and implement the internal function we declared earlier.  */
6841 
6842 static void tcg_register_jit_int(const void *buf, size_t size,
6843                                  const void *debug_frame,
6844                                  size_t debug_frame_size)
6845 {
6846 }
6847 
6848 void tcg_register_jit(const void *buf, size_t buf_size)
6849 {
6850 }
6851 #endif /* ELF_HOST_MACHINE */
6852 
6853 #if !TCG_TARGET_MAYBE_vec
6854 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6855 {
6856     g_assert_not_reached();
6857 }
6858 #endif
6859