xref: /openbmc/qemu/tcg/tcg.c (revision c742824dd8df3283098d5339291d49e65e515751)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpUnary {
990     TCGOutOp base;
991     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
992 } TCGOutOpUnary;
993 
994 typedef struct TCGOutOpSubtract {
995     TCGOutOp base;
996     void (*out_rrr)(TCGContext *s, TCGType type,
997                     TCGReg a0, TCGReg a1, TCGReg a2);
998     void (*out_rir)(TCGContext *s, TCGType type,
999                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1000 } TCGOutOpSubtract;
1001 
1002 #include "tcg-target.c.inc"
1003 
1004 #ifndef CONFIG_TCG_INTERPRETER
1005 /* Validate CPUTLBDescFast placement. */
1006 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1007                         sizeof(CPUNegativeOffsetState))
1008                   < MIN_TLB_MASK_TABLE_OFS);
1009 #endif
1010 
1011 /*
1012  * Register V as the TCGOutOp for O.
1013  * This verifies that V is of type T, otherwise give a nice compiler error.
1014  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1015  */
1016 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1017 
1018 /* Register allocation descriptions for every TCGOpcode. */
1019 static const TCGOutOp * const all_outop[NB_OPS] = {
1020     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1021     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1022     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1023     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1024     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1025     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1026     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1027     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1028     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1029     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1030     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1031     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1032     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1033     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1034     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1035 };
1036 
1037 #undef OUTOP
1038 
1039 /*
1040  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1041  * and registered the target's TCG globals) must register with this function
1042  * before initiating translation.
1043  *
1044  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1045  * of tcg_region_init() for the reasoning behind this.
1046  *
1047  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1048  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1049  * is not used anymore for translation once this function is called.
1050  *
1051  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1052  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1053  * modes.
1054  */
1055 #ifdef CONFIG_USER_ONLY
1056 void tcg_register_thread(void)
1057 {
1058     tcg_ctx = &tcg_init_ctx;
1059 }
1060 #else
1061 void tcg_register_thread(void)
1062 {
1063     TCGContext *s = g_malloc(sizeof(*s));
1064     unsigned int i, n;
1065 
1066     *s = tcg_init_ctx;
1067 
1068     /* Relink mem_base.  */
1069     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1070         if (tcg_init_ctx.temps[i].mem_base) {
1071             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1072             tcg_debug_assert(b >= 0 && b < n);
1073             s->temps[i].mem_base = &s->temps[b];
1074         }
1075     }
1076 
1077     /* Claim an entry in tcg_ctxs */
1078     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1079     g_assert(n < tcg_max_ctxs);
1080     qatomic_set(&tcg_ctxs[n], s);
1081 
1082     if (n > 0) {
1083         tcg_region_initial_alloc(s);
1084     }
1085 
1086     tcg_ctx = s;
1087 }
1088 #endif /* !CONFIG_USER_ONLY */
1089 
1090 /* pool based memory allocation */
1091 void *tcg_malloc_internal(TCGContext *s, int size)
1092 {
1093     TCGPool *p;
1094     int pool_size;
1095 
1096     if (size > TCG_POOL_CHUNK_SIZE) {
1097         /* big malloc: insert a new pool (XXX: could optimize) */
1098         p = g_malloc(sizeof(TCGPool) + size);
1099         p->size = size;
1100         p->next = s->pool_first_large;
1101         s->pool_first_large = p;
1102         return p->data;
1103     } else {
1104         p = s->pool_current;
1105         if (!p) {
1106             p = s->pool_first;
1107             if (!p)
1108                 goto new_pool;
1109         } else {
1110             if (!p->next) {
1111             new_pool:
1112                 pool_size = TCG_POOL_CHUNK_SIZE;
1113                 p = g_malloc(sizeof(TCGPool) + pool_size);
1114                 p->size = pool_size;
1115                 p->next = NULL;
1116                 if (s->pool_current) {
1117                     s->pool_current->next = p;
1118                 } else {
1119                     s->pool_first = p;
1120                 }
1121             } else {
1122                 p = p->next;
1123             }
1124         }
1125     }
1126     s->pool_current = p;
1127     s->pool_cur = p->data + size;
1128     s->pool_end = p->data + p->size;
1129     return p->data;
1130 }
1131 
1132 void tcg_pool_reset(TCGContext *s)
1133 {
1134     TCGPool *p, *t;
1135     for (p = s->pool_first_large; p; p = t) {
1136         t = p->next;
1137         g_free(p);
1138     }
1139     s->pool_first_large = NULL;
1140     s->pool_cur = s->pool_end = NULL;
1141     s->pool_current = NULL;
1142 }
1143 
1144 /*
1145  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1146  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1147  * We only use these for layout in tcg_out_ld_helper_ret and
1148  * tcg_out_st_helper_args, and share them between several of
1149  * the helpers, with the end result that it's easier to build manually.
1150  */
1151 
1152 #if TCG_TARGET_REG_BITS == 32
1153 # define dh_typecode_ttl  dh_typecode_i32
1154 #else
1155 # define dh_typecode_ttl  dh_typecode_i64
1156 #endif
1157 
1158 static TCGHelperInfo info_helper_ld32_mmu = {
1159     .flags = TCG_CALL_NO_WG,
1160     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1161               | dh_typemask(env, 1)
1162               | dh_typemask(i64, 2)  /* uint64_t addr */
1163               | dh_typemask(i32, 3)  /* unsigned oi */
1164               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1165 };
1166 
1167 static TCGHelperInfo info_helper_ld64_mmu = {
1168     .flags = TCG_CALL_NO_WG,
1169     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1170               | dh_typemask(env, 1)
1171               | dh_typemask(i64, 2)  /* uint64_t addr */
1172               | dh_typemask(i32, 3)  /* unsigned oi */
1173               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1174 };
1175 
1176 static TCGHelperInfo info_helper_ld128_mmu = {
1177     .flags = TCG_CALL_NO_WG,
1178     .typemask = dh_typemask(i128, 0) /* return Int128 */
1179               | dh_typemask(env, 1)
1180               | dh_typemask(i64, 2)  /* uint64_t addr */
1181               | dh_typemask(i32, 3)  /* unsigned oi */
1182               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1183 };
1184 
1185 static TCGHelperInfo info_helper_st32_mmu = {
1186     .flags = TCG_CALL_NO_WG,
1187     .typemask = dh_typemask(void, 0)
1188               | dh_typemask(env, 1)
1189               | dh_typemask(i64, 2)  /* uint64_t addr */
1190               | dh_typemask(i32, 3)  /* uint32_t data */
1191               | dh_typemask(i32, 4)  /* unsigned oi */
1192               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1193 };
1194 
1195 static TCGHelperInfo info_helper_st64_mmu = {
1196     .flags = TCG_CALL_NO_WG,
1197     .typemask = dh_typemask(void, 0)
1198               | dh_typemask(env, 1)
1199               | dh_typemask(i64, 2)  /* uint64_t addr */
1200               | dh_typemask(i64, 3)  /* uint64_t data */
1201               | dh_typemask(i32, 4)  /* unsigned oi */
1202               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1203 };
1204 
1205 static TCGHelperInfo info_helper_st128_mmu = {
1206     .flags = TCG_CALL_NO_WG,
1207     .typemask = dh_typemask(void, 0)
1208               | dh_typemask(env, 1)
1209               | dh_typemask(i64, 2)  /* uint64_t addr */
1210               | dh_typemask(i128, 3) /* Int128 data */
1211               | dh_typemask(i32, 4)  /* unsigned oi */
1212               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1213 };
1214 
1215 #ifdef CONFIG_TCG_INTERPRETER
1216 static ffi_type *typecode_to_ffi(int argmask)
1217 {
1218     /*
1219      * libffi does not support __int128_t, so we have forced Int128
1220      * to use the structure definition instead of the builtin type.
1221      */
1222     static ffi_type *ffi_type_i128_elements[3] = {
1223         &ffi_type_uint64,
1224         &ffi_type_uint64,
1225         NULL
1226     };
1227     static ffi_type ffi_type_i128 = {
1228         .size = 16,
1229         .alignment = __alignof__(Int128),
1230         .type = FFI_TYPE_STRUCT,
1231         .elements = ffi_type_i128_elements,
1232     };
1233 
1234     switch (argmask) {
1235     case dh_typecode_void:
1236         return &ffi_type_void;
1237     case dh_typecode_i32:
1238         return &ffi_type_uint32;
1239     case dh_typecode_s32:
1240         return &ffi_type_sint32;
1241     case dh_typecode_i64:
1242         return &ffi_type_uint64;
1243     case dh_typecode_s64:
1244         return &ffi_type_sint64;
1245     case dh_typecode_ptr:
1246         return &ffi_type_pointer;
1247     case dh_typecode_i128:
1248         return &ffi_type_i128;
1249     }
1250     g_assert_not_reached();
1251 }
1252 
1253 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1254 {
1255     unsigned typemask = info->typemask;
1256     struct {
1257         ffi_cif cif;
1258         ffi_type *args[];
1259     } *ca;
1260     ffi_status status;
1261     int nargs;
1262 
1263     /* Ignoring the return type, find the last non-zero field. */
1264     nargs = 32 - clz32(typemask >> 3);
1265     nargs = DIV_ROUND_UP(nargs, 3);
1266     assert(nargs <= MAX_CALL_IARGS);
1267 
1268     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1269     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1270     ca->cif.nargs = nargs;
1271 
1272     if (nargs != 0) {
1273         ca->cif.arg_types = ca->args;
1274         for (int j = 0; j < nargs; ++j) {
1275             int typecode = extract32(typemask, (j + 1) * 3, 3);
1276             ca->args[j] = typecode_to_ffi(typecode);
1277         }
1278     }
1279 
1280     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1281                           ca->cif.rtype, ca->cif.arg_types);
1282     assert(status == FFI_OK);
1283 
1284     return &ca->cif;
1285 }
1286 
1287 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1288 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1289 #else
1290 #define HELPER_INFO_INIT(I)      (&(I)->init)
1291 #define HELPER_INFO_INIT_VAL(I)  1
1292 #endif /* CONFIG_TCG_INTERPRETER */
1293 
1294 static inline bool arg_slot_reg_p(unsigned arg_slot)
1295 {
1296     /*
1297      * Split the sizeof away from the comparison to avoid Werror from
1298      * "unsigned < 0 is always false", when iarg_regs is empty.
1299      */
1300     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1301     return arg_slot < nreg;
1302 }
1303 
1304 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1305 {
1306     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1307     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1308 
1309     tcg_debug_assert(stk_slot < max);
1310     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1311 }
1312 
1313 typedef struct TCGCumulativeArgs {
1314     int arg_idx;                /* tcg_gen_callN args[] */
1315     int info_in_idx;            /* TCGHelperInfo in[] */
1316     int arg_slot;               /* regs+stack slot */
1317     int ref_slot;               /* stack slots for references */
1318 } TCGCumulativeArgs;
1319 
1320 static void layout_arg_even(TCGCumulativeArgs *cum)
1321 {
1322     cum->arg_slot += cum->arg_slot & 1;
1323 }
1324 
1325 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1326                          TCGCallArgumentKind kind)
1327 {
1328     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1329 
1330     *loc = (TCGCallArgumentLoc){
1331         .kind = kind,
1332         .arg_idx = cum->arg_idx,
1333         .arg_slot = cum->arg_slot,
1334     };
1335     cum->info_in_idx++;
1336     cum->arg_slot++;
1337 }
1338 
1339 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1340                                 TCGHelperInfo *info, int n)
1341 {
1342     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1343 
1344     for (int i = 0; i < n; ++i) {
1345         /* Layout all using the same arg_idx, adjusting the subindex. */
1346         loc[i] = (TCGCallArgumentLoc){
1347             .kind = TCG_CALL_ARG_NORMAL,
1348             .arg_idx = cum->arg_idx,
1349             .tmp_subindex = i,
1350             .arg_slot = cum->arg_slot + i,
1351         };
1352     }
1353     cum->info_in_idx += n;
1354     cum->arg_slot += n;
1355 }
1356 
1357 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1358 {
1359     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1360     int n = 128 / TCG_TARGET_REG_BITS;
1361 
1362     /* The first subindex carries the pointer. */
1363     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1364 
1365     /*
1366      * The callee is allowed to clobber memory associated with
1367      * structure pass by-reference.  Therefore we must make copies.
1368      * Allocate space from "ref_slot", which will be adjusted to
1369      * follow the parameters on the stack.
1370      */
1371     loc[0].ref_slot = cum->ref_slot;
1372 
1373     /*
1374      * Subsequent words also go into the reference slot, but
1375      * do not accumulate into the regular arguments.
1376      */
1377     for (int i = 1; i < n; ++i) {
1378         loc[i] = (TCGCallArgumentLoc){
1379             .kind = TCG_CALL_ARG_BY_REF_N,
1380             .arg_idx = cum->arg_idx,
1381             .tmp_subindex = i,
1382             .ref_slot = cum->ref_slot + i,
1383         };
1384     }
1385     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1386     cum->ref_slot += n;
1387 }
1388 
1389 static void init_call_layout(TCGHelperInfo *info)
1390 {
1391     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1392     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1393     unsigned typemask = info->typemask;
1394     unsigned typecode;
1395     TCGCumulativeArgs cum = { };
1396 
1397     /*
1398      * Parse and place any function return value.
1399      */
1400     typecode = typemask & 7;
1401     switch (typecode) {
1402     case dh_typecode_void:
1403         info->nr_out = 0;
1404         break;
1405     case dh_typecode_i32:
1406     case dh_typecode_s32:
1407     case dh_typecode_ptr:
1408         info->nr_out = 1;
1409         info->out_kind = TCG_CALL_RET_NORMAL;
1410         break;
1411     case dh_typecode_i64:
1412     case dh_typecode_s64:
1413         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1414         info->out_kind = TCG_CALL_RET_NORMAL;
1415         /* Query the last register now to trigger any assert early. */
1416         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1417         break;
1418     case dh_typecode_i128:
1419         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1420         info->out_kind = TCG_TARGET_CALL_RET_I128;
1421         switch (TCG_TARGET_CALL_RET_I128) {
1422         case TCG_CALL_RET_NORMAL:
1423             /* Query the last register now to trigger any assert early. */
1424             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1425             break;
1426         case TCG_CALL_RET_BY_VEC:
1427             /* Query the single register now to trigger any assert early. */
1428             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1429             break;
1430         case TCG_CALL_RET_BY_REF:
1431             /*
1432              * Allocate the first argument to the output.
1433              * We don't need to store this anywhere, just make it
1434              * unavailable for use in the input loop below.
1435              */
1436             cum.arg_slot = 1;
1437             break;
1438         default:
1439             qemu_build_not_reached();
1440         }
1441         break;
1442     default:
1443         g_assert_not_reached();
1444     }
1445 
1446     /*
1447      * Parse and place function arguments.
1448      */
1449     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1450         TCGCallArgumentKind kind;
1451         TCGType type;
1452 
1453         typecode = typemask & 7;
1454         switch (typecode) {
1455         case dh_typecode_i32:
1456         case dh_typecode_s32:
1457             type = TCG_TYPE_I32;
1458             break;
1459         case dh_typecode_i64:
1460         case dh_typecode_s64:
1461             type = TCG_TYPE_I64;
1462             break;
1463         case dh_typecode_ptr:
1464             type = TCG_TYPE_PTR;
1465             break;
1466         case dh_typecode_i128:
1467             type = TCG_TYPE_I128;
1468             break;
1469         default:
1470             g_assert_not_reached();
1471         }
1472 
1473         switch (type) {
1474         case TCG_TYPE_I32:
1475             switch (TCG_TARGET_CALL_ARG_I32) {
1476             case TCG_CALL_ARG_EVEN:
1477                 layout_arg_even(&cum);
1478                 /* fall through */
1479             case TCG_CALL_ARG_NORMAL:
1480                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1481                 break;
1482             case TCG_CALL_ARG_EXTEND:
1483                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1484                 layout_arg_1(&cum, info, kind);
1485                 break;
1486             default:
1487                 qemu_build_not_reached();
1488             }
1489             break;
1490 
1491         case TCG_TYPE_I64:
1492             switch (TCG_TARGET_CALL_ARG_I64) {
1493             case TCG_CALL_ARG_EVEN:
1494                 layout_arg_even(&cum);
1495                 /* fall through */
1496             case TCG_CALL_ARG_NORMAL:
1497                 if (TCG_TARGET_REG_BITS == 32) {
1498                     layout_arg_normal_n(&cum, info, 2);
1499                 } else {
1500                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1501                 }
1502                 break;
1503             default:
1504                 qemu_build_not_reached();
1505             }
1506             break;
1507 
1508         case TCG_TYPE_I128:
1509             switch (TCG_TARGET_CALL_ARG_I128) {
1510             case TCG_CALL_ARG_EVEN:
1511                 layout_arg_even(&cum);
1512                 /* fall through */
1513             case TCG_CALL_ARG_NORMAL:
1514                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1515                 break;
1516             case TCG_CALL_ARG_BY_REF:
1517                 layout_arg_by_ref(&cum, info);
1518                 break;
1519             default:
1520                 qemu_build_not_reached();
1521             }
1522             break;
1523 
1524         default:
1525             g_assert_not_reached();
1526         }
1527     }
1528     info->nr_in = cum.info_in_idx;
1529 
1530     /* Validate that we didn't overrun the input array. */
1531     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1532     /* Validate the backend has enough argument space. */
1533     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1534 
1535     /*
1536      * Relocate the "ref_slot" area to the end of the parameters.
1537      * Minimizing this stack offset helps code size for x86,
1538      * which has a signed 8-bit offset encoding.
1539      */
1540     if (cum.ref_slot != 0) {
1541         int ref_base = 0;
1542 
1543         if (cum.arg_slot > max_reg_slots) {
1544             int align = __alignof(Int128) / sizeof(tcg_target_long);
1545 
1546             ref_base = cum.arg_slot - max_reg_slots;
1547             if (align > 1) {
1548                 ref_base = ROUND_UP(ref_base, align);
1549             }
1550         }
1551         assert(ref_base + cum.ref_slot <= max_stk_slots);
1552         ref_base += max_reg_slots;
1553 
1554         if (ref_base != 0) {
1555             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1556                 TCGCallArgumentLoc *loc = &info->in[i];
1557                 switch (loc->kind) {
1558                 case TCG_CALL_ARG_BY_REF:
1559                 case TCG_CALL_ARG_BY_REF_N:
1560                     loc->ref_slot += ref_base;
1561                     break;
1562                 default:
1563                     break;
1564                 }
1565             }
1566         }
1567     }
1568 }
1569 
1570 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1571 static void process_constraint_sets(void);
1572 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1573                                             TCGReg reg, const char *name);
1574 
1575 static void tcg_context_init(unsigned max_threads)
1576 {
1577     TCGContext *s = &tcg_init_ctx;
1578     int n, i;
1579     TCGTemp *ts;
1580 
1581     memset(s, 0, sizeof(*s));
1582     s->nb_globals = 0;
1583 
1584     init_call_layout(&info_helper_ld32_mmu);
1585     init_call_layout(&info_helper_ld64_mmu);
1586     init_call_layout(&info_helper_ld128_mmu);
1587     init_call_layout(&info_helper_st32_mmu);
1588     init_call_layout(&info_helper_st64_mmu);
1589     init_call_layout(&info_helper_st128_mmu);
1590 
1591     tcg_target_init(s);
1592     process_constraint_sets();
1593 
1594     /* Reverse the order of the saved registers, assuming they're all at
1595        the start of tcg_target_reg_alloc_order.  */
1596     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1597         int r = tcg_target_reg_alloc_order[n];
1598         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1599             break;
1600         }
1601     }
1602     for (i = 0; i < n; ++i) {
1603         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1604     }
1605     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1606         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1607     }
1608 
1609     tcg_ctx = s;
1610     /*
1611      * In user-mode we simply share the init context among threads, since we
1612      * use a single region. See the documentation tcg_region_init() for the
1613      * reasoning behind this.
1614      * In system-mode we will have at most max_threads TCG threads.
1615      */
1616 #ifdef CONFIG_USER_ONLY
1617     tcg_ctxs = &tcg_ctx;
1618     tcg_cur_ctxs = 1;
1619     tcg_max_ctxs = 1;
1620 #else
1621     tcg_max_ctxs = max_threads;
1622     tcg_ctxs = g_new0(TCGContext *, max_threads);
1623 #endif
1624 
1625     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1626     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1627     tcg_env = temp_tcgv_ptr(ts);
1628 }
1629 
1630 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1631 {
1632     tcg_context_init(max_threads);
1633     tcg_region_init(tb_size, splitwx, max_threads);
1634 }
1635 
1636 /*
1637  * Allocate TBs right before their corresponding translated code, making
1638  * sure that TBs and code are on different cache lines.
1639  */
1640 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1641 {
1642     uintptr_t align = qemu_icache_linesize;
1643     TranslationBlock *tb;
1644     void *next;
1645 
1646  retry:
1647     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1648     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1649 
1650     if (unlikely(next > s->code_gen_highwater)) {
1651         if (tcg_region_alloc(s)) {
1652             return NULL;
1653         }
1654         goto retry;
1655     }
1656     qatomic_set(&s->code_gen_ptr, next);
1657     return tb;
1658 }
1659 
1660 void tcg_prologue_init(void)
1661 {
1662     TCGContext *s = tcg_ctx;
1663     size_t prologue_size;
1664 
1665     s->code_ptr = s->code_gen_ptr;
1666     s->code_buf = s->code_gen_ptr;
1667     s->data_gen_ptr = NULL;
1668 
1669 #ifndef CONFIG_TCG_INTERPRETER
1670     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1671 #endif
1672 
1673     s->pool_labels = NULL;
1674 
1675     qemu_thread_jit_write();
1676     /* Generate the prologue.  */
1677     tcg_target_qemu_prologue(s);
1678 
1679     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1680     {
1681         int result = tcg_out_pool_finalize(s);
1682         tcg_debug_assert(result == 0);
1683     }
1684 
1685     prologue_size = tcg_current_code_size(s);
1686     perf_report_prologue(s->code_gen_ptr, prologue_size);
1687 
1688 #ifndef CONFIG_TCG_INTERPRETER
1689     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1690                         (uintptr_t)s->code_buf, prologue_size);
1691 #endif
1692 
1693     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1694         FILE *logfile = qemu_log_trylock();
1695         if (logfile) {
1696             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1697             if (s->data_gen_ptr) {
1698                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1699                 size_t data_size = prologue_size - code_size;
1700                 size_t i;
1701 
1702                 disas(logfile, s->code_gen_ptr, code_size);
1703 
1704                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1705                     if (sizeof(tcg_target_ulong) == 8) {
1706                         fprintf(logfile,
1707                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1708                                 (uintptr_t)s->data_gen_ptr + i,
1709                                 *(uint64_t *)(s->data_gen_ptr + i));
1710                     } else {
1711                         fprintf(logfile,
1712                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1713                                 (uintptr_t)s->data_gen_ptr + i,
1714                                 *(uint32_t *)(s->data_gen_ptr + i));
1715                     }
1716                 }
1717             } else {
1718                 disas(logfile, s->code_gen_ptr, prologue_size);
1719             }
1720             fprintf(logfile, "\n");
1721             qemu_log_unlock(logfile);
1722         }
1723     }
1724 
1725 #ifndef CONFIG_TCG_INTERPRETER
1726     /*
1727      * Assert that goto_ptr is implemented completely, setting an epilogue.
1728      * For tci, we use NULL as the signal to return from the interpreter,
1729      * so skip this check.
1730      */
1731     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1732 #endif
1733 
1734     tcg_region_prologue_set(s);
1735 }
1736 
1737 void tcg_func_start(TCGContext *s)
1738 {
1739     tcg_pool_reset(s);
1740     s->nb_temps = s->nb_globals;
1741 
1742     /* No temps have been previously allocated for size or locality.  */
1743     tcg_temp_ebb_reset_freed(s);
1744 
1745     /* No constant temps have been previously allocated. */
1746     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1747         if (s->const_table[i]) {
1748             g_hash_table_remove_all(s->const_table[i]);
1749         }
1750     }
1751 
1752     s->nb_ops = 0;
1753     s->nb_labels = 0;
1754     s->current_frame_offset = s->frame_start;
1755 
1756 #ifdef CONFIG_DEBUG_TCG
1757     s->goto_tb_issue_mask = 0;
1758 #endif
1759 
1760     QTAILQ_INIT(&s->ops);
1761     QTAILQ_INIT(&s->free_ops);
1762     s->emit_before_op = NULL;
1763     QSIMPLEQ_INIT(&s->labels);
1764 
1765     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1766     tcg_debug_assert(s->insn_start_words > 0);
1767 }
1768 
1769 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1770 {
1771     int n = s->nb_temps++;
1772 
1773     if (n >= TCG_MAX_TEMPS) {
1774         tcg_raise_tb_overflow(s);
1775     }
1776     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1777 }
1778 
1779 static TCGTemp *tcg_global_alloc(TCGContext *s)
1780 {
1781     TCGTemp *ts;
1782 
1783     tcg_debug_assert(s->nb_globals == s->nb_temps);
1784     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1785     s->nb_globals++;
1786     ts = tcg_temp_alloc(s);
1787     ts->kind = TEMP_GLOBAL;
1788 
1789     return ts;
1790 }
1791 
1792 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1793                                             TCGReg reg, const char *name)
1794 {
1795     TCGTemp *ts;
1796 
1797     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1798 
1799     ts = tcg_global_alloc(s);
1800     ts->base_type = type;
1801     ts->type = type;
1802     ts->kind = TEMP_FIXED;
1803     ts->reg = reg;
1804     ts->name = name;
1805     tcg_regset_set_reg(s->reserved_regs, reg);
1806 
1807     return ts;
1808 }
1809 
1810 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1811 {
1812     s->frame_start = start;
1813     s->frame_end = start + size;
1814     s->frame_temp
1815         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1816 }
1817 
1818 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1819                                             const char *name, TCGType type)
1820 {
1821     TCGContext *s = tcg_ctx;
1822     TCGTemp *base_ts = tcgv_ptr_temp(base);
1823     TCGTemp *ts = tcg_global_alloc(s);
1824     int indirect_reg = 0;
1825 
1826     switch (base_ts->kind) {
1827     case TEMP_FIXED:
1828         break;
1829     case TEMP_GLOBAL:
1830         /* We do not support double-indirect registers.  */
1831         tcg_debug_assert(!base_ts->indirect_reg);
1832         base_ts->indirect_base = 1;
1833         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1834                             ? 2 : 1);
1835         indirect_reg = 1;
1836         break;
1837     default:
1838         g_assert_not_reached();
1839     }
1840 
1841     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1842         TCGTemp *ts2 = tcg_global_alloc(s);
1843         char buf[64];
1844 
1845         ts->base_type = TCG_TYPE_I64;
1846         ts->type = TCG_TYPE_I32;
1847         ts->indirect_reg = indirect_reg;
1848         ts->mem_allocated = 1;
1849         ts->mem_base = base_ts;
1850         ts->mem_offset = offset;
1851         pstrcpy(buf, sizeof(buf), name);
1852         pstrcat(buf, sizeof(buf), "_0");
1853         ts->name = strdup(buf);
1854 
1855         tcg_debug_assert(ts2 == ts + 1);
1856         ts2->base_type = TCG_TYPE_I64;
1857         ts2->type = TCG_TYPE_I32;
1858         ts2->indirect_reg = indirect_reg;
1859         ts2->mem_allocated = 1;
1860         ts2->mem_base = base_ts;
1861         ts2->mem_offset = offset + 4;
1862         ts2->temp_subindex = 1;
1863         pstrcpy(buf, sizeof(buf), name);
1864         pstrcat(buf, sizeof(buf), "_1");
1865         ts2->name = strdup(buf);
1866     } else {
1867         ts->base_type = type;
1868         ts->type = type;
1869         ts->indirect_reg = indirect_reg;
1870         ts->mem_allocated = 1;
1871         ts->mem_base = base_ts;
1872         ts->mem_offset = offset;
1873         ts->name = name;
1874     }
1875     return ts;
1876 }
1877 
1878 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1879 {
1880     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1881     return temp_tcgv_i32(ts);
1882 }
1883 
1884 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1885 {
1886     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1887     return temp_tcgv_i64(ts);
1888 }
1889 
1890 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1891 {
1892     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1893     return temp_tcgv_ptr(ts);
1894 }
1895 
1896 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1897 {
1898     TCGContext *s = tcg_ctx;
1899     TCGTemp *ts;
1900     int n;
1901 
1902     if (kind == TEMP_EBB) {
1903         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1904 
1905         if (idx < TCG_MAX_TEMPS) {
1906             /* There is already an available temp with the right type.  */
1907             clear_bit(idx, s->free_temps[type].l);
1908 
1909             ts = &s->temps[idx];
1910             ts->temp_allocated = 1;
1911             tcg_debug_assert(ts->base_type == type);
1912             tcg_debug_assert(ts->kind == kind);
1913             return ts;
1914         }
1915     } else {
1916         tcg_debug_assert(kind == TEMP_TB);
1917     }
1918 
1919     switch (type) {
1920     case TCG_TYPE_I32:
1921     case TCG_TYPE_V64:
1922     case TCG_TYPE_V128:
1923     case TCG_TYPE_V256:
1924         n = 1;
1925         break;
1926     case TCG_TYPE_I64:
1927         n = 64 / TCG_TARGET_REG_BITS;
1928         break;
1929     case TCG_TYPE_I128:
1930         n = 128 / TCG_TARGET_REG_BITS;
1931         break;
1932     default:
1933         g_assert_not_reached();
1934     }
1935 
1936     ts = tcg_temp_alloc(s);
1937     ts->base_type = type;
1938     ts->temp_allocated = 1;
1939     ts->kind = kind;
1940 
1941     if (n == 1) {
1942         ts->type = type;
1943     } else {
1944         ts->type = TCG_TYPE_REG;
1945 
1946         for (int i = 1; i < n; ++i) {
1947             TCGTemp *ts2 = tcg_temp_alloc(s);
1948 
1949             tcg_debug_assert(ts2 == ts + i);
1950             ts2->base_type = type;
1951             ts2->type = TCG_TYPE_REG;
1952             ts2->temp_allocated = 1;
1953             ts2->temp_subindex = i;
1954             ts2->kind = kind;
1955         }
1956     }
1957     return ts;
1958 }
1959 
1960 TCGv_i32 tcg_temp_new_i32(void)
1961 {
1962     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1963 }
1964 
1965 TCGv_i32 tcg_temp_ebb_new_i32(void)
1966 {
1967     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1968 }
1969 
1970 TCGv_i64 tcg_temp_new_i64(void)
1971 {
1972     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1973 }
1974 
1975 TCGv_i64 tcg_temp_ebb_new_i64(void)
1976 {
1977     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1978 }
1979 
1980 TCGv_ptr tcg_temp_new_ptr(void)
1981 {
1982     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1983 }
1984 
1985 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1986 {
1987     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1988 }
1989 
1990 TCGv_i128 tcg_temp_new_i128(void)
1991 {
1992     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1993 }
1994 
1995 TCGv_i128 tcg_temp_ebb_new_i128(void)
1996 {
1997     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1998 }
1999 
2000 TCGv_vec tcg_temp_new_vec(TCGType type)
2001 {
2002     TCGTemp *t;
2003 
2004 #ifdef CONFIG_DEBUG_TCG
2005     switch (type) {
2006     case TCG_TYPE_V64:
2007         assert(TCG_TARGET_HAS_v64);
2008         break;
2009     case TCG_TYPE_V128:
2010         assert(TCG_TARGET_HAS_v128);
2011         break;
2012     case TCG_TYPE_V256:
2013         assert(TCG_TARGET_HAS_v256);
2014         break;
2015     default:
2016         g_assert_not_reached();
2017     }
2018 #endif
2019 
2020     t = tcg_temp_new_internal(type, TEMP_EBB);
2021     return temp_tcgv_vec(t);
2022 }
2023 
2024 /* Create a new temp of the same type as an existing temp.  */
2025 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2026 {
2027     TCGTemp *t = tcgv_vec_temp(match);
2028 
2029     tcg_debug_assert(t->temp_allocated != 0);
2030 
2031     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2032     return temp_tcgv_vec(t);
2033 }
2034 
2035 void tcg_temp_free_internal(TCGTemp *ts)
2036 {
2037     TCGContext *s = tcg_ctx;
2038 
2039     switch (ts->kind) {
2040     case TEMP_CONST:
2041     case TEMP_TB:
2042         /* Silently ignore free. */
2043         break;
2044     case TEMP_EBB:
2045         tcg_debug_assert(ts->temp_allocated != 0);
2046         ts->temp_allocated = 0;
2047         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2048         break;
2049     default:
2050         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2051         g_assert_not_reached();
2052     }
2053 }
2054 
2055 void tcg_temp_free_i32(TCGv_i32 arg)
2056 {
2057     tcg_temp_free_internal(tcgv_i32_temp(arg));
2058 }
2059 
2060 void tcg_temp_free_i64(TCGv_i64 arg)
2061 {
2062     tcg_temp_free_internal(tcgv_i64_temp(arg));
2063 }
2064 
2065 void tcg_temp_free_i128(TCGv_i128 arg)
2066 {
2067     tcg_temp_free_internal(tcgv_i128_temp(arg));
2068 }
2069 
2070 void tcg_temp_free_ptr(TCGv_ptr arg)
2071 {
2072     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2073 }
2074 
2075 void tcg_temp_free_vec(TCGv_vec arg)
2076 {
2077     tcg_temp_free_internal(tcgv_vec_temp(arg));
2078 }
2079 
2080 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2081 {
2082     TCGContext *s = tcg_ctx;
2083     GHashTable *h = s->const_table[type];
2084     TCGTemp *ts;
2085 
2086     if (h == NULL) {
2087         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2088         s->const_table[type] = h;
2089     }
2090 
2091     ts = g_hash_table_lookup(h, &val);
2092     if (ts == NULL) {
2093         int64_t *val_ptr;
2094 
2095         ts = tcg_temp_alloc(s);
2096 
2097         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2098             TCGTemp *ts2 = tcg_temp_alloc(s);
2099 
2100             tcg_debug_assert(ts2 == ts + 1);
2101 
2102             ts->base_type = TCG_TYPE_I64;
2103             ts->type = TCG_TYPE_I32;
2104             ts->kind = TEMP_CONST;
2105             ts->temp_allocated = 1;
2106 
2107             ts2->base_type = TCG_TYPE_I64;
2108             ts2->type = TCG_TYPE_I32;
2109             ts2->kind = TEMP_CONST;
2110             ts2->temp_allocated = 1;
2111             ts2->temp_subindex = 1;
2112 
2113             /*
2114              * Retain the full value of the 64-bit constant in the low
2115              * part, so that the hash table works.  Actual uses will
2116              * truncate the value to the low part.
2117              */
2118             ts[HOST_BIG_ENDIAN].val = val;
2119             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2120             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2121         } else {
2122             ts->base_type = type;
2123             ts->type = type;
2124             ts->kind = TEMP_CONST;
2125             ts->temp_allocated = 1;
2126             ts->val = val;
2127             val_ptr = &ts->val;
2128         }
2129         g_hash_table_insert(h, val_ptr, ts);
2130     }
2131 
2132     return ts;
2133 }
2134 
2135 TCGv_i32 tcg_constant_i32(int32_t val)
2136 {
2137     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2138 }
2139 
2140 TCGv_i64 tcg_constant_i64(int64_t val)
2141 {
2142     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2143 }
2144 
2145 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2146 {
2147     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2148 }
2149 
2150 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2151 {
2152     val = dup_const(vece, val);
2153     return temp_tcgv_vec(tcg_constant_internal(type, val));
2154 }
2155 
2156 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2157 {
2158     TCGTemp *t = tcgv_vec_temp(match);
2159 
2160     tcg_debug_assert(t->temp_allocated != 0);
2161     return tcg_constant_vec(t->base_type, vece, val);
2162 }
2163 
2164 #ifdef CONFIG_DEBUG_TCG
2165 size_t temp_idx(TCGTemp *ts)
2166 {
2167     ptrdiff_t n = ts - tcg_ctx->temps;
2168     assert(n >= 0 && n < tcg_ctx->nb_temps);
2169     return n;
2170 }
2171 
2172 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2173 {
2174     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2175 
2176     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2177     assert(o % sizeof(TCGTemp) == 0);
2178 
2179     return (void *)tcg_ctx + (uintptr_t)v;
2180 }
2181 #endif /* CONFIG_DEBUG_TCG */
2182 
2183 /*
2184  * Return true if OP may appear in the opcode stream with TYPE.
2185  * Test the runtime variable that controls each opcode.
2186  */
2187 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2188 {
2189     bool has_type;
2190 
2191     switch (type) {
2192     case TCG_TYPE_I32:
2193         has_type = true;
2194         break;
2195     case TCG_TYPE_I64:
2196         has_type = TCG_TARGET_REG_BITS == 64;
2197         break;
2198     case TCG_TYPE_V64:
2199         has_type = TCG_TARGET_HAS_v64;
2200         break;
2201     case TCG_TYPE_V128:
2202         has_type = TCG_TARGET_HAS_v128;
2203         break;
2204     case TCG_TYPE_V256:
2205         has_type = TCG_TARGET_HAS_v256;
2206         break;
2207     default:
2208         has_type = false;
2209         break;
2210     }
2211 
2212     switch (op) {
2213     case INDEX_op_discard:
2214     case INDEX_op_set_label:
2215     case INDEX_op_call:
2216     case INDEX_op_br:
2217     case INDEX_op_mb:
2218     case INDEX_op_insn_start:
2219     case INDEX_op_exit_tb:
2220     case INDEX_op_goto_tb:
2221     case INDEX_op_goto_ptr:
2222     case INDEX_op_qemu_ld_i32:
2223     case INDEX_op_qemu_st_i32:
2224     case INDEX_op_qemu_ld_i64:
2225     case INDEX_op_qemu_st_i64:
2226         return true;
2227 
2228     case INDEX_op_qemu_st8_i32:
2229         return TCG_TARGET_HAS_qemu_st8_i32;
2230 
2231     case INDEX_op_qemu_ld_i128:
2232     case INDEX_op_qemu_st_i128:
2233         return TCG_TARGET_HAS_qemu_ldst_i128;
2234 
2235     case INDEX_op_add:
2236     case INDEX_op_and:
2237     case INDEX_op_mov:
2238     case INDEX_op_or:
2239     case INDEX_op_xor:
2240         return has_type;
2241 
2242     case INDEX_op_setcond_i32:
2243     case INDEX_op_brcond_i32:
2244     case INDEX_op_movcond_i32:
2245     case INDEX_op_ld8u_i32:
2246     case INDEX_op_ld8s_i32:
2247     case INDEX_op_ld16u_i32:
2248     case INDEX_op_ld16s_i32:
2249     case INDEX_op_ld_i32:
2250     case INDEX_op_st8_i32:
2251     case INDEX_op_st16_i32:
2252     case INDEX_op_st_i32:
2253     case INDEX_op_shl_i32:
2254     case INDEX_op_shr_i32:
2255     case INDEX_op_sar_i32:
2256     case INDEX_op_extract_i32:
2257     case INDEX_op_sextract_i32:
2258     case INDEX_op_deposit_i32:
2259         return true;
2260 
2261     case INDEX_op_negsetcond_i32:
2262         return TCG_TARGET_HAS_negsetcond_i32;
2263     case INDEX_op_div_i32:
2264     case INDEX_op_divu_i32:
2265         return TCG_TARGET_HAS_div_i32;
2266     case INDEX_op_rem_i32:
2267     case INDEX_op_remu_i32:
2268         return TCG_TARGET_HAS_rem_i32;
2269     case INDEX_op_div2_i32:
2270     case INDEX_op_divu2_i32:
2271         return TCG_TARGET_HAS_div2_i32;
2272     case INDEX_op_rotl_i32:
2273     case INDEX_op_rotr_i32:
2274         return TCG_TARGET_HAS_rot_i32;
2275     case INDEX_op_extract2_i32:
2276         return TCG_TARGET_HAS_extract2_i32;
2277     case INDEX_op_add2_i32:
2278         return TCG_TARGET_HAS_add2_i32;
2279     case INDEX_op_sub2_i32:
2280         return TCG_TARGET_HAS_sub2_i32;
2281     case INDEX_op_mulu2_i32:
2282         return TCG_TARGET_HAS_mulu2_i32;
2283     case INDEX_op_muls2_i32:
2284         return TCG_TARGET_HAS_muls2_i32;
2285     case INDEX_op_bswap16_i32:
2286         return TCG_TARGET_HAS_bswap16_i32;
2287     case INDEX_op_bswap32_i32:
2288         return TCG_TARGET_HAS_bswap32_i32;
2289     case INDEX_op_clz_i32:
2290         return TCG_TARGET_HAS_clz_i32;
2291     case INDEX_op_ctz_i32:
2292         return TCG_TARGET_HAS_ctz_i32;
2293     case INDEX_op_ctpop_i32:
2294         return TCG_TARGET_HAS_ctpop_i32;
2295 
2296     case INDEX_op_brcond2_i32:
2297     case INDEX_op_setcond2_i32:
2298         return TCG_TARGET_REG_BITS == 32;
2299 
2300     case INDEX_op_setcond_i64:
2301     case INDEX_op_brcond_i64:
2302     case INDEX_op_movcond_i64:
2303     case INDEX_op_ld8u_i64:
2304     case INDEX_op_ld8s_i64:
2305     case INDEX_op_ld16u_i64:
2306     case INDEX_op_ld16s_i64:
2307     case INDEX_op_ld32u_i64:
2308     case INDEX_op_ld32s_i64:
2309     case INDEX_op_ld_i64:
2310     case INDEX_op_st8_i64:
2311     case INDEX_op_st16_i64:
2312     case INDEX_op_st32_i64:
2313     case INDEX_op_st_i64:
2314     case INDEX_op_shl_i64:
2315     case INDEX_op_shr_i64:
2316     case INDEX_op_sar_i64:
2317     case INDEX_op_ext_i32_i64:
2318     case INDEX_op_extu_i32_i64:
2319     case INDEX_op_extract_i64:
2320     case INDEX_op_sextract_i64:
2321     case INDEX_op_deposit_i64:
2322         return TCG_TARGET_REG_BITS == 64;
2323 
2324     case INDEX_op_negsetcond_i64:
2325         return TCG_TARGET_HAS_negsetcond_i64;
2326     case INDEX_op_div_i64:
2327     case INDEX_op_divu_i64:
2328         return TCG_TARGET_HAS_div_i64;
2329     case INDEX_op_rem_i64:
2330     case INDEX_op_remu_i64:
2331         return TCG_TARGET_HAS_rem_i64;
2332     case INDEX_op_div2_i64:
2333     case INDEX_op_divu2_i64:
2334         return TCG_TARGET_HAS_div2_i64;
2335     case INDEX_op_rotl_i64:
2336     case INDEX_op_rotr_i64:
2337         return TCG_TARGET_HAS_rot_i64;
2338     case INDEX_op_extract2_i64:
2339         return TCG_TARGET_HAS_extract2_i64;
2340     case INDEX_op_extrl_i64_i32:
2341     case INDEX_op_extrh_i64_i32:
2342         return TCG_TARGET_HAS_extr_i64_i32;
2343     case INDEX_op_bswap16_i64:
2344         return TCG_TARGET_HAS_bswap16_i64;
2345     case INDEX_op_bswap32_i64:
2346         return TCG_TARGET_HAS_bswap32_i64;
2347     case INDEX_op_bswap64_i64:
2348         return TCG_TARGET_HAS_bswap64_i64;
2349     case INDEX_op_clz_i64:
2350         return TCG_TARGET_HAS_clz_i64;
2351     case INDEX_op_ctz_i64:
2352         return TCG_TARGET_HAS_ctz_i64;
2353     case INDEX_op_ctpop_i64:
2354         return TCG_TARGET_HAS_ctpop_i64;
2355     case INDEX_op_add2_i64:
2356         return TCG_TARGET_HAS_add2_i64;
2357     case INDEX_op_sub2_i64:
2358         return TCG_TARGET_HAS_sub2_i64;
2359     case INDEX_op_mulu2_i64:
2360         return TCG_TARGET_HAS_mulu2_i64;
2361     case INDEX_op_muls2_i64:
2362         return TCG_TARGET_HAS_muls2_i64;
2363 
2364     case INDEX_op_mov_vec:
2365     case INDEX_op_dup_vec:
2366     case INDEX_op_dupm_vec:
2367     case INDEX_op_ld_vec:
2368     case INDEX_op_st_vec:
2369     case INDEX_op_add_vec:
2370     case INDEX_op_sub_vec:
2371     case INDEX_op_and_vec:
2372     case INDEX_op_or_vec:
2373     case INDEX_op_xor_vec:
2374     case INDEX_op_cmp_vec:
2375         return has_type;
2376     case INDEX_op_dup2_vec:
2377         return has_type && TCG_TARGET_REG_BITS == 32;
2378     case INDEX_op_not_vec:
2379         return has_type && TCG_TARGET_HAS_not_vec;
2380     case INDEX_op_neg_vec:
2381         return has_type && TCG_TARGET_HAS_neg_vec;
2382     case INDEX_op_abs_vec:
2383         return has_type && TCG_TARGET_HAS_abs_vec;
2384     case INDEX_op_andc_vec:
2385         return has_type && TCG_TARGET_HAS_andc_vec;
2386     case INDEX_op_orc_vec:
2387         return has_type && TCG_TARGET_HAS_orc_vec;
2388     case INDEX_op_nand_vec:
2389         return has_type && TCG_TARGET_HAS_nand_vec;
2390     case INDEX_op_nor_vec:
2391         return has_type && TCG_TARGET_HAS_nor_vec;
2392     case INDEX_op_eqv_vec:
2393         return has_type && TCG_TARGET_HAS_eqv_vec;
2394     case INDEX_op_mul_vec:
2395         return has_type && TCG_TARGET_HAS_mul_vec;
2396     case INDEX_op_shli_vec:
2397     case INDEX_op_shri_vec:
2398     case INDEX_op_sari_vec:
2399         return has_type && TCG_TARGET_HAS_shi_vec;
2400     case INDEX_op_shls_vec:
2401     case INDEX_op_shrs_vec:
2402     case INDEX_op_sars_vec:
2403         return has_type && TCG_TARGET_HAS_shs_vec;
2404     case INDEX_op_shlv_vec:
2405     case INDEX_op_shrv_vec:
2406     case INDEX_op_sarv_vec:
2407         return has_type && TCG_TARGET_HAS_shv_vec;
2408     case INDEX_op_rotli_vec:
2409         return has_type && TCG_TARGET_HAS_roti_vec;
2410     case INDEX_op_rotls_vec:
2411         return has_type && TCG_TARGET_HAS_rots_vec;
2412     case INDEX_op_rotlv_vec:
2413     case INDEX_op_rotrv_vec:
2414         return has_type && TCG_TARGET_HAS_rotv_vec;
2415     case INDEX_op_ssadd_vec:
2416     case INDEX_op_usadd_vec:
2417     case INDEX_op_sssub_vec:
2418     case INDEX_op_ussub_vec:
2419         return has_type && TCG_TARGET_HAS_sat_vec;
2420     case INDEX_op_smin_vec:
2421     case INDEX_op_umin_vec:
2422     case INDEX_op_smax_vec:
2423     case INDEX_op_umax_vec:
2424         return has_type && TCG_TARGET_HAS_minmax_vec;
2425     case INDEX_op_bitsel_vec:
2426         return has_type && TCG_TARGET_HAS_bitsel_vec;
2427     case INDEX_op_cmpsel_vec:
2428         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2429 
2430     default:
2431         if (op < INDEX_op_last_generic) {
2432             const TCGOutOp *outop;
2433             TCGConstraintSetIndex con_set;
2434 
2435             if (!has_type) {
2436                 return false;
2437             }
2438 
2439             outop = all_outop[op];
2440             tcg_debug_assert(outop != NULL);
2441 
2442             con_set = outop->static_constraint;
2443             if (con_set == C_Dynamic) {
2444                 con_set = outop->dynamic_constraint(type, flags);
2445             }
2446             if (con_set >= 0) {
2447                 return true;
2448             }
2449             tcg_debug_assert(con_set == C_NotImplemented);
2450             return false;
2451         }
2452         tcg_debug_assert(op < NB_OPS);
2453         return true;
2454 
2455     case INDEX_op_last_generic:
2456         g_assert_not_reached();
2457     }
2458 }
2459 
2460 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2461 {
2462     unsigned width;
2463 
2464     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2465     width = (type == TCG_TYPE_I32 ? 32 : 64);
2466 
2467     tcg_debug_assert(ofs < width);
2468     tcg_debug_assert(len > 0);
2469     tcg_debug_assert(len <= width - ofs);
2470 
2471     return TCG_TARGET_deposit_valid(type, ofs, len);
2472 }
2473 
2474 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2475 
2476 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2477                           TCGTemp *ret, TCGTemp **args)
2478 {
2479     TCGv_i64 extend_free[MAX_CALL_IARGS];
2480     int n_extend = 0;
2481     TCGOp *op;
2482     int i, n, pi = 0, total_args;
2483 
2484     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2485         init_call_layout(info);
2486         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2487     }
2488 
2489     total_args = info->nr_out + info->nr_in + 2;
2490     op = tcg_op_alloc(INDEX_op_call, total_args);
2491 
2492 #ifdef CONFIG_PLUGIN
2493     /* Flag helpers that may affect guest state */
2494     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2495         tcg_ctx->plugin_insn->calls_helpers = true;
2496     }
2497 #endif
2498 
2499     TCGOP_CALLO(op) = n = info->nr_out;
2500     switch (n) {
2501     case 0:
2502         tcg_debug_assert(ret == NULL);
2503         break;
2504     case 1:
2505         tcg_debug_assert(ret != NULL);
2506         op->args[pi++] = temp_arg(ret);
2507         break;
2508     case 2:
2509     case 4:
2510         tcg_debug_assert(ret != NULL);
2511         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2512         tcg_debug_assert(ret->temp_subindex == 0);
2513         for (i = 0; i < n; ++i) {
2514             op->args[pi++] = temp_arg(ret + i);
2515         }
2516         break;
2517     default:
2518         g_assert_not_reached();
2519     }
2520 
2521     TCGOP_CALLI(op) = n = info->nr_in;
2522     for (i = 0; i < n; i++) {
2523         const TCGCallArgumentLoc *loc = &info->in[i];
2524         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2525 
2526         switch (loc->kind) {
2527         case TCG_CALL_ARG_NORMAL:
2528         case TCG_CALL_ARG_BY_REF:
2529         case TCG_CALL_ARG_BY_REF_N:
2530             op->args[pi++] = temp_arg(ts);
2531             break;
2532 
2533         case TCG_CALL_ARG_EXTEND_U:
2534         case TCG_CALL_ARG_EXTEND_S:
2535             {
2536                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2537                 TCGv_i32 orig = temp_tcgv_i32(ts);
2538 
2539                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2540                     tcg_gen_ext_i32_i64(temp, orig);
2541                 } else {
2542                     tcg_gen_extu_i32_i64(temp, orig);
2543                 }
2544                 op->args[pi++] = tcgv_i64_arg(temp);
2545                 extend_free[n_extend++] = temp;
2546             }
2547             break;
2548 
2549         default:
2550             g_assert_not_reached();
2551         }
2552     }
2553     op->args[pi++] = (uintptr_t)func;
2554     op->args[pi++] = (uintptr_t)info;
2555     tcg_debug_assert(pi == total_args);
2556 
2557     if (tcg_ctx->emit_before_op) {
2558         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2559     } else {
2560         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2561     }
2562 
2563     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2564     for (i = 0; i < n_extend; ++i) {
2565         tcg_temp_free_i64(extend_free[i]);
2566     }
2567 }
2568 
2569 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2570 {
2571     tcg_gen_callN(func, info, ret, NULL);
2572 }
2573 
2574 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2575 {
2576     tcg_gen_callN(func, info, ret, &t1);
2577 }
2578 
2579 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2580                    TCGTemp *t1, TCGTemp *t2)
2581 {
2582     TCGTemp *args[2] = { t1, t2 };
2583     tcg_gen_callN(func, info, ret, args);
2584 }
2585 
2586 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2587                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2588 {
2589     TCGTemp *args[3] = { t1, t2, t3 };
2590     tcg_gen_callN(func, info, ret, args);
2591 }
2592 
2593 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2594                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2595 {
2596     TCGTemp *args[4] = { t1, t2, t3, t4 };
2597     tcg_gen_callN(func, info, ret, args);
2598 }
2599 
2600 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2601                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2602 {
2603     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2604     tcg_gen_callN(func, info, ret, args);
2605 }
2606 
2607 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2608                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2609                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2610 {
2611     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2612     tcg_gen_callN(func, info, ret, args);
2613 }
2614 
2615 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2616                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2617                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2618 {
2619     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2620     tcg_gen_callN(func, info, ret, args);
2621 }
2622 
2623 static void tcg_reg_alloc_start(TCGContext *s)
2624 {
2625     int i, n;
2626 
2627     for (i = 0, n = s->nb_temps; i < n; i++) {
2628         TCGTemp *ts = &s->temps[i];
2629         TCGTempVal val = TEMP_VAL_MEM;
2630 
2631         switch (ts->kind) {
2632         case TEMP_CONST:
2633             val = TEMP_VAL_CONST;
2634             break;
2635         case TEMP_FIXED:
2636             val = TEMP_VAL_REG;
2637             break;
2638         case TEMP_GLOBAL:
2639             break;
2640         case TEMP_EBB:
2641             val = TEMP_VAL_DEAD;
2642             /* fall through */
2643         case TEMP_TB:
2644             ts->mem_allocated = 0;
2645             break;
2646         default:
2647             g_assert_not_reached();
2648         }
2649         ts->val_type = val;
2650     }
2651 
2652     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2653 }
2654 
2655 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2656                                  TCGTemp *ts)
2657 {
2658     int idx = temp_idx(ts);
2659 
2660     switch (ts->kind) {
2661     case TEMP_FIXED:
2662     case TEMP_GLOBAL:
2663         pstrcpy(buf, buf_size, ts->name);
2664         break;
2665     case TEMP_TB:
2666         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2667         break;
2668     case TEMP_EBB:
2669         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2670         break;
2671     case TEMP_CONST:
2672         switch (ts->type) {
2673         case TCG_TYPE_I32:
2674             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2675             break;
2676 #if TCG_TARGET_REG_BITS > 32
2677         case TCG_TYPE_I64:
2678             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2679             break;
2680 #endif
2681         case TCG_TYPE_V64:
2682         case TCG_TYPE_V128:
2683         case TCG_TYPE_V256:
2684             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2685                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2686             break;
2687         default:
2688             g_assert_not_reached();
2689         }
2690         break;
2691     }
2692     return buf;
2693 }
2694 
2695 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2696                              int buf_size, TCGArg arg)
2697 {
2698     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2699 }
2700 
2701 static const char * const cond_name[] =
2702 {
2703     [TCG_COND_NEVER] = "never",
2704     [TCG_COND_ALWAYS] = "always",
2705     [TCG_COND_EQ] = "eq",
2706     [TCG_COND_NE] = "ne",
2707     [TCG_COND_LT] = "lt",
2708     [TCG_COND_GE] = "ge",
2709     [TCG_COND_LE] = "le",
2710     [TCG_COND_GT] = "gt",
2711     [TCG_COND_LTU] = "ltu",
2712     [TCG_COND_GEU] = "geu",
2713     [TCG_COND_LEU] = "leu",
2714     [TCG_COND_GTU] = "gtu",
2715     [TCG_COND_TSTEQ] = "tsteq",
2716     [TCG_COND_TSTNE] = "tstne",
2717 };
2718 
2719 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2720 {
2721     [MO_UB]   = "ub",
2722     [MO_SB]   = "sb",
2723     [MO_LEUW] = "leuw",
2724     [MO_LESW] = "lesw",
2725     [MO_LEUL] = "leul",
2726     [MO_LESL] = "lesl",
2727     [MO_LEUQ] = "leq",
2728     [MO_BEUW] = "beuw",
2729     [MO_BESW] = "besw",
2730     [MO_BEUL] = "beul",
2731     [MO_BESL] = "besl",
2732     [MO_BEUQ] = "beq",
2733     [MO_128 + MO_BE] = "beo",
2734     [MO_128 + MO_LE] = "leo",
2735 };
2736 
2737 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2738     [MO_UNALN >> MO_ASHIFT]    = "un+",
2739     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2740     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2741     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2742     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2743     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2744     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2745     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2746 };
2747 
2748 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2749     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2750     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2751     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2752     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2753     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2754     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2755 };
2756 
2757 static const char bswap_flag_name[][6] = {
2758     [TCG_BSWAP_IZ] = "iz",
2759     [TCG_BSWAP_OZ] = "oz",
2760     [TCG_BSWAP_OS] = "os",
2761     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2762     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2763 };
2764 
2765 #ifdef CONFIG_PLUGIN
2766 static const char * const plugin_from_name[] = {
2767     "from-tb",
2768     "from-insn",
2769     "after-insn",
2770     "after-tb",
2771 };
2772 #endif
2773 
2774 static inline bool tcg_regset_single(TCGRegSet d)
2775 {
2776     return (d & (d - 1)) == 0;
2777 }
2778 
2779 static inline TCGReg tcg_regset_first(TCGRegSet d)
2780 {
2781     if (TCG_TARGET_NB_REGS <= 32) {
2782         return ctz32(d);
2783     } else {
2784         return ctz64(d);
2785     }
2786 }
2787 
2788 /* Return only the number of characters output -- no error return. */
2789 #define ne_fprintf(...) \
2790     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2791 
2792 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2793 {
2794     char buf[128];
2795     TCGOp *op;
2796 
2797     QTAILQ_FOREACH(op, &s->ops, link) {
2798         int i, k, nb_oargs, nb_iargs, nb_cargs;
2799         const TCGOpDef *def;
2800         TCGOpcode c;
2801         int col = 0;
2802 
2803         c = op->opc;
2804         def = &tcg_op_defs[c];
2805 
2806         if (c == INDEX_op_insn_start) {
2807             nb_oargs = 0;
2808             col += ne_fprintf(f, "\n ----");
2809 
2810             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2811                 col += ne_fprintf(f, " %016" PRIx64,
2812                                   tcg_get_insn_start_param(op, i));
2813             }
2814         } else if (c == INDEX_op_call) {
2815             const TCGHelperInfo *info = tcg_call_info(op);
2816             void *func = tcg_call_func(op);
2817 
2818             /* variable number of arguments */
2819             nb_oargs = TCGOP_CALLO(op);
2820             nb_iargs = TCGOP_CALLI(op);
2821             nb_cargs = def->nb_cargs;
2822 
2823             col += ne_fprintf(f, " %s ", def->name);
2824 
2825             /*
2826              * Print the function name from TCGHelperInfo, if available.
2827              * Note that plugins have a template function for the info,
2828              * but the actual function pointer comes from the plugin.
2829              */
2830             if (func == info->func) {
2831                 col += ne_fprintf(f, "%s", info->name);
2832             } else {
2833                 col += ne_fprintf(f, "plugin(%p)", func);
2834             }
2835 
2836             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2837             for (i = 0; i < nb_oargs; i++) {
2838                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2839                                                             op->args[i]));
2840             }
2841             for (i = 0; i < nb_iargs; i++) {
2842                 TCGArg arg = op->args[nb_oargs + i];
2843                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2844                 col += ne_fprintf(f, ",%s", t);
2845             }
2846         } else {
2847             if (def->flags & TCG_OPF_INT) {
2848                 col += ne_fprintf(f, " %s_i%d ",
2849                                   def->name,
2850                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2851             } else if (def->flags & TCG_OPF_VECTOR) {
2852                 col += ne_fprintf(f, "%s v%d,e%d,",
2853                                   def->name,
2854                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2855                                   8 << TCGOP_VECE(op));
2856             } else {
2857                 col += ne_fprintf(f, " %s ", def->name);
2858             }
2859 
2860             nb_oargs = def->nb_oargs;
2861             nb_iargs = def->nb_iargs;
2862             nb_cargs = def->nb_cargs;
2863 
2864             k = 0;
2865             for (i = 0; i < nb_oargs; i++) {
2866                 const char *sep =  k ? "," : "";
2867                 col += ne_fprintf(f, "%s%s", sep,
2868                                   tcg_get_arg_str(s, buf, sizeof(buf),
2869                                                   op->args[k++]));
2870             }
2871             for (i = 0; i < nb_iargs; i++) {
2872                 const char *sep =  k ? "," : "";
2873                 col += ne_fprintf(f, "%s%s", sep,
2874                                   tcg_get_arg_str(s, buf, sizeof(buf),
2875                                                   op->args[k++]));
2876             }
2877             switch (c) {
2878             case INDEX_op_brcond_i32:
2879             case INDEX_op_setcond_i32:
2880             case INDEX_op_negsetcond_i32:
2881             case INDEX_op_movcond_i32:
2882             case INDEX_op_brcond2_i32:
2883             case INDEX_op_setcond2_i32:
2884             case INDEX_op_brcond_i64:
2885             case INDEX_op_setcond_i64:
2886             case INDEX_op_negsetcond_i64:
2887             case INDEX_op_movcond_i64:
2888             case INDEX_op_cmp_vec:
2889             case INDEX_op_cmpsel_vec:
2890                 if (op->args[k] < ARRAY_SIZE(cond_name)
2891                     && cond_name[op->args[k]]) {
2892                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2893                 } else {
2894                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2895                 }
2896                 i = 1;
2897                 break;
2898             case INDEX_op_qemu_ld_i32:
2899             case INDEX_op_qemu_st_i32:
2900             case INDEX_op_qemu_st8_i32:
2901             case INDEX_op_qemu_ld_i64:
2902             case INDEX_op_qemu_st_i64:
2903             case INDEX_op_qemu_ld_i128:
2904             case INDEX_op_qemu_st_i128:
2905                 {
2906                     const char *s_al, *s_op, *s_at;
2907                     MemOpIdx oi = op->args[k++];
2908                     MemOp mop = get_memop(oi);
2909                     unsigned ix = get_mmuidx(oi);
2910 
2911                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2912                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2913                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2914                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2915 
2916                     /* If all fields are accounted for, print symbolically. */
2917                     if (!mop && s_al && s_op && s_at) {
2918                         col += ne_fprintf(f, ",%s%s%s,%u",
2919                                           s_at, s_al, s_op, ix);
2920                     } else {
2921                         mop = get_memop(oi);
2922                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2923                     }
2924                     i = 1;
2925                 }
2926                 break;
2927             case INDEX_op_bswap16_i32:
2928             case INDEX_op_bswap16_i64:
2929             case INDEX_op_bswap32_i32:
2930             case INDEX_op_bswap32_i64:
2931             case INDEX_op_bswap64_i64:
2932                 {
2933                     TCGArg flags = op->args[k];
2934                     const char *name = NULL;
2935 
2936                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2937                         name = bswap_flag_name[flags];
2938                     }
2939                     if (name) {
2940                         col += ne_fprintf(f, ",%s", name);
2941                     } else {
2942                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2943                     }
2944                     i = k = 1;
2945                 }
2946                 break;
2947 #ifdef CONFIG_PLUGIN
2948             case INDEX_op_plugin_cb:
2949                 {
2950                     TCGArg from = op->args[k++];
2951                     const char *name = NULL;
2952 
2953                     if (from < ARRAY_SIZE(plugin_from_name)) {
2954                         name = plugin_from_name[from];
2955                     }
2956                     if (name) {
2957                         col += ne_fprintf(f, "%s", name);
2958                     } else {
2959                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2960                     }
2961                     i = 1;
2962                 }
2963                 break;
2964 #endif
2965             default:
2966                 i = 0;
2967                 break;
2968             }
2969             switch (c) {
2970             case INDEX_op_set_label:
2971             case INDEX_op_br:
2972             case INDEX_op_brcond_i32:
2973             case INDEX_op_brcond_i64:
2974             case INDEX_op_brcond2_i32:
2975                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2976                                   arg_label(op->args[k])->id);
2977                 i++, k++;
2978                 break;
2979             case INDEX_op_mb:
2980                 {
2981                     TCGBar membar = op->args[k];
2982                     const char *b_op, *m_op;
2983 
2984                     switch (membar & TCG_BAR_SC) {
2985                     case 0:
2986                         b_op = "none";
2987                         break;
2988                     case TCG_BAR_LDAQ:
2989                         b_op = "acq";
2990                         break;
2991                     case TCG_BAR_STRL:
2992                         b_op = "rel";
2993                         break;
2994                     case TCG_BAR_SC:
2995                         b_op = "seq";
2996                         break;
2997                     default:
2998                         g_assert_not_reached();
2999                     }
3000 
3001                     switch (membar & TCG_MO_ALL) {
3002                     case 0:
3003                         m_op = "none";
3004                         break;
3005                     case TCG_MO_LD_LD:
3006                         m_op = "rr";
3007                         break;
3008                     case TCG_MO_LD_ST:
3009                         m_op = "rw";
3010                         break;
3011                     case TCG_MO_ST_LD:
3012                         m_op = "wr";
3013                         break;
3014                     case TCG_MO_ST_ST:
3015                         m_op = "ww";
3016                         break;
3017                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3018                         m_op = "rr+rw";
3019                         break;
3020                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3021                         m_op = "rr+wr";
3022                         break;
3023                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3024                         m_op = "rr+ww";
3025                         break;
3026                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3027                         m_op = "rw+wr";
3028                         break;
3029                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3030                         m_op = "rw+ww";
3031                         break;
3032                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3033                         m_op = "wr+ww";
3034                         break;
3035                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3036                         m_op = "rr+rw+wr";
3037                         break;
3038                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3039                         m_op = "rr+rw+ww";
3040                         break;
3041                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3042                         m_op = "rr+wr+ww";
3043                         break;
3044                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3045                         m_op = "rw+wr+ww";
3046                         break;
3047                     case TCG_MO_ALL:
3048                         m_op = "all";
3049                         break;
3050                     default:
3051                         g_assert_not_reached();
3052                     }
3053 
3054                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3055                     i++, k++;
3056                 }
3057                 break;
3058             default:
3059                 break;
3060             }
3061             for (; i < nb_cargs; i++, k++) {
3062                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3063                                   op->args[k]);
3064             }
3065         }
3066 
3067         if (have_prefs || op->life) {
3068             for (; col < 40; ++col) {
3069                 putc(' ', f);
3070             }
3071         }
3072 
3073         if (op->life) {
3074             unsigned life = op->life;
3075 
3076             if (life & (SYNC_ARG * 3)) {
3077                 ne_fprintf(f, "  sync:");
3078                 for (i = 0; i < 2; ++i) {
3079                     if (life & (SYNC_ARG << i)) {
3080                         ne_fprintf(f, " %d", i);
3081                     }
3082                 }
3083             }
3084             life /= DEAD_ARG;
3085             if (life) {
3086                 ne_fprintf(f, "  dead:");
3087                 for (i = 0; life; ++i, life >>= 1) {
3088                     if (life & 1) {
3089                         ne_fprintf(f, " %d", i);
3090                     }
3091                 }
3092             }
3093         }
3094 
3095         if (have_prefs) {
3096             for (i = 0; i < nb_oargs; ++i) {
3097                 TCGRegSet set = output_pref(op, i);
3098 
3099                 if (i == 0) {
3100                     ne_fprintf(f, "  pref=");
3101                 } else {
3102                     ne_fprintf(f, ",");
3103                 }
3104                 if (set == 0) {
3105                     ne_fprintf(f, "none");
3106                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3107                     ne_fprintf(f, "all");
3108 #ifdef CONFIG_DEBUG_TCG
3109                 } else if (tcg_regset_single(set)) {
3110                     TCGReg reg = tcg_regset_first(set);
3111                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3112 #endif
3113                 } else if (TCG_TARGET_NB_REGS <= 32) {
3114                     ne_fprintf(f, "0x%x", (uint32_t)set);
3115                 } else {
3116                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3117                 }
3118             }
3119         }
3120 
3121         putc('\n', f);
3122     }
3123 }
3124 
3125 /* we give more priority to constraints with less registers */
3126 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3127 {
3128     int n;
3129 
3130     arg_ct += k;
3131     n = ctpop64(arg_ct->regs);
3132 
3133     /*
3134      * Sort constraints of a single register first, which includes output
3135      * aliases (which must exactly match the input already allocated).
3136      */
3137     if (n == 1 || arg_ct->oalias) {
3138         return INT_MAX;
3139     }
3140 
3141     /*
3142      * Sort register pairs next, first then second immediately after.
3143      * Arbitrarily sort multiple pairs by the index of the first reg;
3144      * there shouldn't be many pairs.
3145      */
3146     switch (arg_ct->pair) {
3147     case 1:
3148     case 3:
3149         return (k + 1) * 2;
3150     case 2:
3151         return (arg_ct->pair_index + 1) * 2 - 1;
3152     }
3153 
3154     /* Finally, sort by decreasing register count. */
3155     assert(n > 1);
3156     return -n;
3157 }
3158 
3159 /* sort from highest priority to lowest */
3160 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3161 {
3162     int i, j;
3163 
3164     for (i = 0; i < n; i++) {
3165         a[start + i].sort_index = start + i;
3166     }
3167     if (n <= 1) {
3168         return;
3169     }
3170     for (i = 0; i < n - 1; i++) {
3171         for (j = i + 1; j < n; j++) {
3172             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3173             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3174             if (p1 < p2) {
3175                 int tmp = a[start + i].sort_index;
3176                 a[start + i].sort_index = a[start + j].sort_index;
3177                 a[start + j].sort_index = tmp;
3178             }
3179         }
3180     }
3181 }
3182 
3183 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3184 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3185 
3186 static void process_constraint_sets(void)
3187 {
3188     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3189         const TCGConstraintSet *tdefs = &constraint_sets[c];
3190         TCGArgConstraint *args_ct = all_cts[c];
3191         int nb_oargs = tdefs->nb_oargs;
3192         int nb_iargs = tdefs->nb_iargs;
3193         int nb_args = nb_oargs + nb_iargs;
3194         bool saw_alias_pair = false;
3195 
3196         for (int i = 0; i < nb_args; i++) {
3197             const char *ct_str = tdefs->args_ct_str[i];
3198             bool input_p = i >= nb_oargs;
3199             int o;
3200 
3201             switch (*ct_str) {
3202             case '0' ... '9':
3203                 o = *ct_str - '0';
3204                 tcg_debug_assert(input_p);
3205                 tcg_debug_assert(o < nb_oargs);
3206                 tcg_debug_assert(args_ct[o].regs != 0);
3207                 tcg_debug_assert(!args_ct[o].oalias);
3208                 args_ct[i] = args_ct[o];
3209                 /* The output sets oalias.  */
3210                 args_ct[o].oalias = 1;
3211                 args_ct[o].alias_index = i;
3212                 /* The input sets ialias. */
3213                 args_ct[i].ialias = 1;
3214                 args_ct[i].alias_index = o;
3215                 if (args_ct[i].pair) {
3216                     saw_alias_pair = true;
3217                 }
3218                 tcg_debug_assert(ct_str[1] == '\0');
3219                 continue;
3220 
3221             case '&':
3222                 tcg_debug_assert(!input_p);
3223                 args_ct[i].newreg = true;
3224                 ct_str++;
3225                 break;
3226 
3227             case 'p': /* plus */
3228                 /* Allocate to the register after the previous. */
3229                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3230                 o = i - 1;
3231                 tcg_debug_assert(!args_ct[o].pair);
3232                 tcg_debug_assert(!args_ct[o].ct);
3233                 args_ct[i] = (TCGArgConstraint){
3234                     .pair = 2,
3235                     .pair_index = o,
3236                     .regs = args_ct[o].regs << 1,
3237                     .newreg = args_ct[o].newreg,
3238                 };
3239                 args_ct[o].pair = 1;
3240                 args_ct[o].pair_index = i;
3241                 tcg_debug_assert(ct_str[1] == '\0');
3242                 continue;
3243 
3244             case 'm': /* minus */
3245                 /* Allocate to the register before the previous. */
3246                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3247                 o = i - 1;
3248                 tcg_debug_assert(!args_ct[o].pair);
3249                 tcg_debug_assert(!args_ct[o].ct);
3250                 args_ct[i] = (TCGArgConstraint){
3251                     .pair = 1,
3252                     .pair_index = o,
3253                     .regs = args_ct[o].regs >> 1,
3254                     .newreg = args_ct[o].newreg,
3255                 };
3256                 args_ct[o].pair = 2;
3257                 args_ct[o].pair_index = i;
3258                 tcg_debug_assert(ct_str[1] == '\0');
3259                 continue;
3260             }
3261 
3262             do {
3263                 switch (*ct_str) {
3264                 case 'i':
3265                     args_ct[i].ct |= TCG_CT_CONST;
3266                     break;
3267 #ifdef TCG_REG_ZERO
3268                 case 'z':
3269                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3270                     break;
3271 #endif
3272 
3273                 /* Include all of the target-specific constraints. */
3274 
3275 #undef CONST
3276 #define CONST(CASE, MASK) \
3277     case CASE: args_ct[i].ct |= MASK; break;
3278 #define REGS(CASE, MASK) \
3279     case CASE: args_ct[i].regs |= MASK; break;
3280 
3281 #include "tcg-target-con-str.h"
3282 
3283 #undef REGS
3284 #undef CONST
3285                 default:
3286                 case '0' ... '9':
3287                 case '&':
3288                 case 'p':
3289                 case 'm':
3290                     /* Typo in TCGConstraintSet constraint. */
3291                     g_assert_not_reached();
3292                 }
3293             } while (*++ct_str != '\0');
3294         }
3295 
3296         /*
3297          * Fix up output pairs that are aliased with inputs.
3298          * When we created the alias, we copied pair from the output.
3299          * There are three cases:
3300          *    (1a) Pairs of inputs alias pairs of outputs.
3301          *    (1b) One input aliases the first of a pair of outputs.
3302          *    (2)  One input aliases the second of a pair of outputs.
3303          *
3304          * Case 1a is handled by making sure that the pair_index'es are
3305          * properly updated so that they appear the same as a pair of inputs.
3306          *
3307          * Case 1b is handled by setting the pair_index of the input to
3308          * itself, simply so it doesn't point to an unrelated argument.
3309          * Since we don't encounter the "second" during the input allocation
3310          * phase, nothing happens with the second half of the input pair.
3311          *
3312          * Case 2 is handled by setting the second input to pair=3, the
3313          * first output to pair=3, and the pair_index'es to match.
3314          */
3315         if (saw_alias_pair) {
3316             for (int i = nb_oargs; i < nb_args; i++) {
3317                 int o, o2, i2;
3318 
3319                 /*
3320                  * Since [0-9pm] must be alone in the constraint string,
3321                  * the only way they can both be set is if the pair comes
3322                  * from the output alias.
3323                  */
3324                 if (!args_ct[i].ialias) {
3325                     continue;
3326                 }
3327                 switch (args_ct[i].pair) {
3328                 case 0:
3329                     break;
3330                 case 1:
3331                     o = args_ct[i].alias_index;
3332                     o2 = args_ct[o].pair_index;
3333                     tcg_debug_assert(args_ct[o].pair == 1);
3334                     tcg_debug_assert(args_ct[o2].pair == 2);
3335                     if (args_ct[o2].oalias) {
3336                         /* Case 1a */
3337                         i2 = args_ct[o2].alias_index;
3338                         tcg_debug_assert(args_ct[i2].pair == 2);
3339                         args_ct[i2].pair_index = i;
3340                         args_ct[i].pair_index = i2;
3341                     } else {
3342                         /* Case 1b */
3343                         args_ct[i].pair_index = i;
3344                     }
3345                     break;
3346                 case 2:
3347                     o = args_ct[i].alias_index;
3348                     o2 = args_ct[o].pair_index;
3349                     tcg_debug_assert(args_ct[o].pair == 2);
3350                     tcg_debug_assert(args_ct[o2].pair == 1);
3351                     if (args_ct[o2].oalias) {
3352                         /* Case 1a */
3353                         i2 = args_ct[o2].alias_index;
3354                         tcg_debug_assert(args_ct[i2].pair == 1);
3355                         args_ct[i2].pair_index = i;
3356                         args_ct[i].pair_index = i2;
3357                     } else {
3358                         /* Case 2 */
3359                         args_ct[i].pair = 3;
3360                         args_ct[o2].pair = 3;
3361                         args_ct[i].pair_index = o2;
3362                         args_ct[o2].pair_index = i;
3363                     }
3364                     break;
3365                 default:
3366                     g_assert_not_reached();
3367                 }
3368             }
3369         }
3370 
3371         /* sort the constraints (XXX: this is just an heuristic) */
3372         sort_constraints(args_ct, 0, nb_oargs);
3373         sort_constraints(args_ct, nb_oargs, nb_iargs);
3374     }
3375 }
3376 
3377 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3378 {
3379     TCGOpcode opc = op->opc;
3380     TCGType type = TCGOP_TYPE(op);
3381     unsigned flags = TCGOP_FLAGS(op);
3382     const TCGOpDef *def = &tcg_op_defs[opc];
3383     const TCGOutOp *outop = all_outop[opc];
3384     TCGConstraintSetIndex con_set;
3385 
3386     if (def->flags & TCG_OPF_NOT_PRESENT) {
3387         return empty_cts;
3388     }
3389 
3390     if (outop) {
3391         con_set = outop->static_constraint;
3392         if (con_set == C_Dynamic) {
3393             con_set = outop->dynamic_constraint(type, flags);
3394         }
3395     } else {
3396         con_set = tcg_target_op_def(opc, type, flags);
3397     }
3398     tcg_debug_assert(con_set >= 0);
3399     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3400 
3401     /* The constraint arguments must match TCGOpcode arguments. */
3402     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3403     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3404 
3405     return all_cts[con_set];
3406 }
3407 
3408 static void remove_label_use(TCGOp *op, int idx)
3409 {
3410     TCGLabel *label = arg_label(op->args[idx]);
3411     TCGLabelUse *use;
3412 
3413     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3414         if (use->op == op) {
3415             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3416             return;
3417         }
3418     }
3419     g_assert_not_reached();
3420 }
3421 
3422 void tcg_op_remove(TCGContext *s, TCGOp *op)
3423 {
3424     switch (op->opc) {
3425     case INDEX_op_br:
3426         remove_label_use(op, 0);
3427         break;
3428     case INDEX_op_brcond_i32:
3429     case INDEX_op_brcond_i64:
3430         remove_label_use(op, 3);
3431         break;
3432     case INDEX_op_brcond2_i32:
3433         remove_label_use(op, 5);
3434         break;
3435     default:
3436         break;
3437     }
3438 
3439     QTAILQ_REMOVE(&s->ops, op, link);
3440     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3441     s->nb_ops--;
3442 }
3443 
3444 void tcg_remove_ops_after(TCGOp *op)
3445 {
3446     TCGContext *s = tcg_ctx;
3447 
3448     while (true) {
3449         TCGOp *last = tcg_last_op();
3450         if (last == op) {
3451             return;
3452         }
3453         tcg_op_remove(s, last);
3454     }
3455 }
3456 
3457 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3458 {
3459     TCGContext *s = tcg_ctx;
3460     TCGOp *op = NULL;
3461 
3462     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3463         QTAILQ_FOREACH(op, &s->free_ops, link) {
3464             if (nargs <= op->nargs) {
3465                 QTAILQ_REMOVE(&s->free_ops, op, link);
3466                 nargs = op->nargs;
3467                 goto found;
3468             }
3469         }
3470     }
3471 
3472     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3473     nargs = MAX(4, nargs);
3474     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3475 
3476  found:
3477     memset(op, 0, offsetof(TCGOp, link));
3478     op->opc = opc;
3479     op->nargs = nargs;
3480 
3481     /* Check for bitfield overflow. */
3482     tcg_debug_assert(op->nargs == nargs);
3483 
3484     s->nb_ops++;
3485     return op;
3486 }
3487 
3488 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3489 {
3490     TCGOp *op = tcg_op_alloc(opc, nargs);
3491 
3492     if (tcg_ctx->emit_before_op) {
3493         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3494     } else {
3495         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3496     }
3497     return op;
3498 }
3499 
3500 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3501                             TCGOpcode opc, TCGType type, unsigned nargs)
3502 {
3503     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3504 
3505     TCGOP_TYPE(new_op) = type;
3506     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3507     return new_op;
3508 }
3509 
3510 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3511                            TCGOpcode opc, TCGType type, unsigned nargs)
3512 {
3513     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3514 
3515     TCGOP_TYPE(new_op) = type;
3516     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3517     return new_op;
3518 }
3519 
3520 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3521 {
3522     TCGLabelUse *u;
3523 
3524     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3525         TCGOp *op = u->op;
3526         switch (op->opc) {
3527         case INDEX_op_br:
3528             op->args[0] = label_arg(to);
3529             break;
3530         case INDEX_op_brcond_i32:
3531         case INDEX_op_brcond_i64:
3532             op->args[3] = label_arg(to);
3533             break;
3534         case INDEX_op_brcond2_i32:
3535             op->args[5] = label_arg(to);
3536             break;
3537         default:
3538             g_assert_not_reached();
3539         }
3540     }
3541 
3542     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3543 }
3544 
3545 /* Reachable analysis : remove unreachable code.  */
3546 static void __attribute__((noinline))
3547 reachable_code_pass(TCGContext *s)
3548 {
3549     TCGOp *op, *op_next, *op_prev;
3550     bool dead = false;
3551 
3552     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3553         bool remove = dead;
3554         TCGLabel *label;
3555 
3556         switch (op->opc) {
3557         case INDEX_op_set_label:
3558             label = arg_label(op->args[0]);
3559 
3560             /*
3561              * Note that the first op in the TB is always a load,
3562              * so there is always something before a label.
3563              */
3564             op_prev = QTAILQ_PREV(op, link);
3565 
3566             /*
3567              * If we find two sequential labels, move all branches to
3568              * reference the second label and remove the first label.
3569              * Do this before branch to next optimization, so that the
3570              * middle label is out of the way.
3571              */
3572             if (op_prev->opc == INDEX_op_set_label) {
3573                 move_label_uses(label, arg_label(op_prev->args[0]));
3574                 tcg_op_remove(s, op_prev);
3575                 op_prev = QTAILQ_PREV(op, link);
3576             }
3577 
3578             /*
3579              * Optimization can fold conditional branches to unconditional.
3580              * If we find a label which is preceded by an unconditional
3581              * branch to next, remove the branch.  We couldn't do this when
3582              * processing the branch because any dead code between the branch
3583              * and label had not yet been removed.
3584              */
3585             if (op_prev->opc == INDEX_op_br &&
3586                 label == arg_label(op_prev->args[0])) {
3587                 tcg_op_remove(s, op_prev);
3588                 /* Fall through means insns become live again.  */
3589                 dead = false;
3590             }
3591 
3592             if (QSIMPLEQ_EMPTY(&label->branches)) {
3593                 /*
3594                  * While there is an occasional backward branch, virtually
3595                  * all branches generated by the translators are forward.
3596                  * Which means that generally we will have already removed
3597                  * all references to the label that will be, and there is
3598                  * little to be gained by iterating.
3599                  */
3600                 remove = true;
3601             } else {
3602                 /* Once we see a label, insns become live again.  */
3603                 dead = false;
3604                 remove = false;
3605             }
3606             break;
3607 
3608         case INDEX_op_br:
3609         case INDEX_op_exit_tb:
3610         case INDEX_op_goto_ptr:
3611             /* Unconditional branches; everything following is dead.  */
3612             dead = true;
3613             break;
3614 
3615         case INDEX_op_call:
3616             /* Notice noreturn helper calls, raising exceptions.  */
3617             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3618                 dead = true;
3619             }
3620             break;
3621 
3622         case INDEX_op_insn_start:
3623             /* Never remove -- we need to keep these for unwind.  */
3624             remove = false;
3625             break;
3626 
3627         default:
3628             break;
3629         }
3630 
3631         if (remove) {
3632             tcg_op_remove(s, op);
3633         }
3634     }
3635 }
3636 
3637 #define TS_DEAD  1
3638 #define TS_MEM   2
3639 
3640 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3641 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3642 
3643 /* For liveness_pass_1, the register preferences for a given temp.  */
3644 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3645 {
3646     return ts->state_ptr;
3647 }
3648 
3649 /* For liveness_pass_1, reset the preferences for a given temp to the
3650  * maximal regset for its type.
3651  */
3652 static inline void la_reset_pref(TCGTemp *ts)
3653 {
3654     *la_temp_pref(ts)
3655         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3656 }
3657 
3658 /* liveness analysis: end of function: all temps are dead, and globals
3659    should be in memory. */
3660 static void la_func_end(TCGContext *s, int ng, int nt)
3661 {
3662     int i;
3663 
3664     for (i = 0; i < ng; ++i) {
3665         s->temps[i].state = TS_DEAD | TS_MEM;
3666         la_reset_pref(&s->temps[i]);
3667     }
3668     for (i = ng; i < nt; ++i) {
3669         s->temps[i].state = TS_DEAD;
3670         la_reset_pref(&s->temps[i]);
3671     }
3672 }
3673 
3674 /* liveness analysis: end of basic block: all temps are dead, globals
3675    and local temps should be in memory. */
3676 static void la_bb_end(TCGContext *s, int ng, int nt)
3677 {
3678     int i;
3679 
3680     for (i = 0; i < nt; ++i) {
3681         TCGTemp *ts = &s->temps[i];
3682         int state;
3683 
3684         switch (ts->kind) {
3685         case TEMP_FIXED:
3686         case TEMP_GLOBAL:
3687         case TEMP_TB:
3688             state = TS_DEAD | TS_MEM;
3689             break;
3690         case TEMP_EBB:
3691         case TEMP_CONST:
3692             state = TS_DEAD;
3693             break;
3694         default:
3695             g_assert_not_reached();
3696         }
3697         ts->state = state;
3698         la_reset_pref(ts);
3699     }
3700 }
3701 
3702 /* liveness analysis: sync globals back to memory.  */
3703 static void la_global_sync(TCGContext *s, int ng)
3704 {
3705     int i;
3706 
3707     for (i = 0; i < ng; ++i) {
3708         int state = s->temps[i].state;
3709         s->temps[i].state = state | TS_MEM;
3710         if (state == TS_DEAD) {
3711             /* If the global was previously dead, reset prefs.  */
3712             la_reset_pref(&s->temps[i]);
3713         }
3714     }
3715 }
3716 
3717 /*
3718  * liveness analysis: conditional branch: all temps are dead unless
3719  * explicitly live-across-conditional-branch, globals and local temps
3720  * should be synced.
3721  */
3722 static void la_bb_sync(TCGContext *s, int ng, int nt)
3723 {
3724     la_global_sync(s, ng);
3725 
3726     for (int i = ng; i < nt; ++i) {
3727         TCGTemp *ts = &s->temps[i];
3728         int state;
3729 
3730         switch (ts->kind) {
3731         case TEMP_TB:
3732             state = ts->state;
3733             ts->state = state | TS_MEM;
3734             if (state != TS_DEAD) {
3735                 continue;
3736             }
3737             break;
3738         case TEMP_EBB:
3739         case TEMP_CONST:
3740             continue;
3741         default:
3742             g_assert_not_reached();
3743         }
3744         la_reset_pref(&s->temps[i]);
3745     }
3746 }
3747 
3748 /* liveness analysis: sync globals back to memory and kill.  */
3749 static void la_global_kill(TCGContext *s, int ng)
3750 {
3751     int i;
3752 
3753     for (i = 0; i < ng; i++) {
3754         s->temps[i].state = TS_DEAD | TS_MEM;
3755         la_reset_pref(&s->temps[i]);
3756     }
3757 }
3758 
3759 /* liveness analysis: note live globals crossing calls.  */
3760 static void la_cross_call(TCGContext *s, int nt)
3761 {
3762     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3763     int i;
3764 
3765     for (i = 0; i < nt; i++) {
3766         TCGTemp *ts = &s->temps[i];
3767         if (!(ts->state & TS_DEAD)) {
3768             TCGRegSet *pset = la_temp_pref(ts);
3769             TCGRegSet set = *pset;
3770 
3771             set &= mask;
3772             /* If the combination is not possible, restart.  */
3773             if (set == 0) {
3774                 set = tcg_target_available_regs[ts->type] & mask;
3775             }
3776             *pset = set;
3777         }
3778     }
3779 }
3780 
3781 /*
3782  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3783  * to TEMP_EBB, if possible.
3784  */
3785 static void __attribute__((noinline))
3786 liveness_pass_0(TCGContext *s)
3787 {
3788     void * const multiple_ebb = (void *)(uintptr_t)-1;
3789     int nb_temps = s->nb_temps;
3790     TCGOp *op, *ebb;
3791 
3792     for (int i = s->nb_globals; i < nb_temps; ++i) {
3793         s->temps[i].state_ptr = NULL;
3794     }
3795 
3796     /*
3797      * Represent each EBB by the op at which it begins.  In the case of
3798      * the first EBB, this is the first op, otherwise it is a label.
3799      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3800      * within a single EBB, else MULTIPLE_EBB.
3801      */
3802     ebb = QTAILQ_FIRST(&s->ops);
3803     QTAILQ_FOREACH(op, &s->ops, link) {
3804         const TCGOpDef *def;
3805         int nb_oargs, nb_iargs;
3806 
3807         switch (op->opc) {
3808         case INDEX_op_set_label:
3809             ebb = op;
3810             continue;
3811         case INDEX_op_discard:
3812             continue;
3813         case INDEX_op_call:
3814             nb_oargs = TCGOP_CALLO(op);
3815             nb_iargs = TCGOP_CALLI(op);
3816             break;
3817         default:
3818             def = &tcg_op_defs[op->opc];
3819             nb_oargs = def->nb_oargs;
3820             nb_iargs = def->nb_iargs;
3821             break;
3822         }
3823 
3824         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3825             TCGTemp *ts = arg_temp(op->args[i]);
3826 
3827             if (ts->kind != TEMP_TB) {
3828                 continue;
3829             }
3830             if (ts->state_ptr == NULL) {
3831                 ts->state_ptr = ebb;
3832             } else if (ts->state_ptr != ebb) {
3833                 ts->state_ptr = multiple_ebb;
3834             }
3835         }
3836     }
3837 
3838     /*
3839      * For TEMP_TB that turned out not to be used beyond one EBB,
3840      * reduce the liveness to TEMP_EBB.
3841      */
3842     for (int i = s->nb_globals; i < nb_temps; ++i) {
3843         TCGTemp *ts = &s->temps[i];
3844         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3845             ts->kind = TEMP_EBB;
3846         }
3847     }
3848 }
3849 
3850 /* Liveness analysis : update the opc_arg_life array to tell if a
3851    given input arguments is dead. Instructions updating dead
3852    temporaries are removed. */
3853 static void __attribute__((noinline))
3854 liveness_pass_1(TCGContext *s)
3855 {
3856     int nb_globals = s->nb_globals;
3857     int nb_temps = s->nb_temps;
3858     TCGOp *op, *op_prev;
3859     TCGRegSet *prefs;
3860     int i;
3861 
3862     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3863     for (i = 0; i < nb_temps; ++i) {
3864         s->temps[i].state_ptr = prefs + i;
3865     }
3866 
3867     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3868     la_func_end(s, nb_globals, nb_temps);
3869 
3870     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3871         int nb_iargs, nb_oargs;
3872         TCGOpcode opc_new, opc_new2;
3873         TCGLifeData arg_life = 0;
3874         TCGTemp *ts;
3875         TCGOpcode opc = op->opc;
3876         const TCGOpDef *def = &tcg_op_defs[opc];
3877         const TCGArgConstraint *args_ct;
3878 
3879         switch (opc) {
3880         case INDEX_op_call:
3881             {
3882                 const TCGHelperInfo *info = tcg_call_info(op);
3883                 int call_flags = tcg_call_flags(op);
3884 
3885                 nb_oargs = TCGOP_CALLO(op);
3886                 nb_iargs = TCGOP_CALLI(op);
3887 
3888                 /* pure functions can be removed if their result is unused */
3889                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3890                     for (i = 0; i < nb_oargs; i++) {
3891                         ts = arg_temp(op->args[i]);
3892                         if (ts->state != TS_DEAD) {
3893                             goto do_not_remove_call;
3894                         }
3895                     }
3896                     goto do_remove;
3897                 }
3898             do_not_remove_call:
3899 
3900                 /* Output args are dead.  */
3901                 for (i = 0; i < nb_oargs; i++) {
3902                     ts = arg_temp(op->args[i]);
3903                     if (ts->state & TS_DEAD) {
3904                         arg_life |= DEAD_ARG << i;
3905                     }
3906                     if (ts->state & TS_MEM) {
3907                         arg_life |= SYNC_ARG << i;
3908                     }
3909                     ts->state = TS_DEAD;
3910                     la_reset_pref(ts);
3911                 }
3912 
3913                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3914                 memset(op->output_pref, 0, sizeof(op->output_pref));
3915 
3916                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3917                                     TCG_CALL_NO_READ_GLOBALS))) {
3918                     la_global_kill(s, nb_globals);
3919                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3920                     la_global_sync(s, nb_globals);
3921                 }
3922 
3923                 /* Record arguments that die in this helper.  */
3924                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3925                     ts = arg_temp(op->args[i]);
3926                     if (ts->state & TS_DEAD) {
3927                         arg_life |= DEAD_ARG << i;
3928                     }
3929                 }
3930 
3931                 /* For all live registers, remove call-clobbered prefs.  */
3932                 la_cross_call(s, nb_temps);
3933 
3934                 /*
3935                  * Input arguments are live for preceding opcodes.
3936                  *
3937                  * For those arguments that die, and will be allocated in
3938                  * registers, clear the register set for that arg, to be
3939                  * filled in below.  For args that will be on the stack,
3940                  * reset to any available reg.  Process arguments in reverse
3941                  * order so that if a temp is used more than once, the stack
3942                  * reset to max happens before the register reset to 0.
3943                  */
3944                 for (i = nb_iargs - 1; i >= 0; i--) {
3945                     const TCGCallArgumentLoc *loc = &info->in[i];
3946                     ts = arg_temp(op->args[nb_oargs + i]);
3947 
3948                     if (ts->state & TS_DEAD) {
3949                         switch (loc->kind) {
3950                         case TCG_CALL_ARG_NORMAL:
3951                         case TCG_CALL_ARG_EXTEND_U:
3952                         case TCG_CALL_ARG_EXTEND_S:
3953                             if (arg_slot_reg_p(loc->arg_slot)) {
3954                                 *la_temp_pref(ts) = 0;
3955                                 break;
3956                             }
3957                             /* fall through */
3958                         default:
3959                             *la_temp_pref(ts) =
3960                                 tcg_target_available_regs[ts->type];
3961                             break;
3962                         }
3963                         ts->state &= ~TS_DEAD;
3964                     }
3965                 }
3966 
3967                 /*
3968                  * For each input argument, add its input register to prefs.
3969                  * If a temp is used once, this produces a single set bit;
3970                  * if a temp is used multiple times, this produces a set.
3971                  */
3972                 for (i = 0; i < nb_iargs; i++) {
3973                     const TCGCallArgumentLoc *loc = &info->in[i];
3974                     ts = arg_temp(op->args[nb_oargs + i]);
3975 
3976                     switch (loc->kind) {
3977                     case TCG_CALL_ARG_NORMAL:
3978                     case TCG_CALL_ARG_EXTEND_U:
3979                     case TCG_CALL_ARG_EXTEND_S:
3980                         if (arg_slot_reg_p(loc->arg_slot)) {
3981                             tcg_regset_set_reg(*la_temp_pref(ts),
3982                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3983                         }
3984                         break;
3985                     default:
3986                         break;
3987                     }
3988                 }
3989             }
3990             break;
3991         case INDEX_op_insn_start:
3992             break;
3993         case INDEX_op_discard:
3994             /* mark the temporary as dead */
3995             ts = arg_temp(op->args[0]);
3996             ts->state = TS_DEAD;
3997             la_reset_pref(ts);
3998             break;
3999 
4000         case INDEX_op_add2_i32:
4001         case INDEX_op_add2_i64:
4002             opc_new = INDEX_op_add;
4003             goto do_addsub2;
4004         case INDEX_op_sub2_i32:
4005         case INDEX_op_sub2_i64:
4006             opc_new = INDEX_op_sub;
4007         do_addsub2:
4008             nb_iargs = 4;
4009             nb_oargs = 2;
4010             /* Test if the high part of the operation is dead, but not
4011                the low part.  The result can be optimized to a simple
4012                add or sub.  This happens often for x86_64 guest when the
4013                cpu mode is set to 32 bit.  */
4014             if (arg_temp(op->args[1])->state == TS_DEAD) {
4015                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4016                     goto do_remove;
4017                 }
4018                 /* Replace the opcode and adjust the args in place,
4019                    leaving 3 unused args at the end.  */
4020                 op->opc = opc = opc_new;
4021                 op->args[1] = op->args[2];
4022                 op->args[2] = op->args[4];
4023                 /* Fall through and mark the single-word operation live.  */
4024                 nb_iargs = 2;
4025                 nb_oargs = 1;
4026             }
4027             goto do_not_remove;
4028 
4029         case INDEX_op_muls2_i32:
4030         case INDEX_op_muls2_i64:
4031             opc_new = INDEX_op_mul;
4032             opc_new2 = INDEX_op_mulsh;
4033             goto do_mul2;
4034         case INDEX_op_mulu2_i32:
4035         case INDEX_op_mulu2_i64:
4036             opc_new = INDEX_op_mul;
4037             opc_new2 = INDEX_op_muluh;
4038         do_mul2:
4039             nb_iargs = 2;
4040             nb_oargs = 2;
4041             if (arg_temp(op->args[1])->state == TS_DEAD) {
4042                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4043                     /* Both parts of the operation are dead.  */
4044                     goto do_remove;
4045                 }
4046                 /* The high part of the operation is dead; generate the low. */
4047                 op->opc = opc = opc_new;
4048                 op->args[1] = op->args[2];
4049                 op->args[2] = op->args[3];
4050             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4051                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4052                 /* The low part of the operation is dead; generate the high. */
4053                 op->opc = opc = opc_new2;
4054                 op->args[0] = op->args[1];
4055                 op->args[1] = op->args[2];
4056                 op->args[2] = op->args[3];
4057             } else {
4058                 goto do_not_remove;
4059             }
4060             /* Mark the single-word operation live.  */
4061             nb_oargs = 1;
4062             goto do_not_remove;
4063 
4064         default:
4065             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4066             nb_iargs = def->nb_iargs;
4067             nb_oargs = def->nb_oargs;
4068 
4069             /* Test if the operation can be removed because all
4070                its outputs are dead. We assume that nb_oargs == 0
4071                implies side effects */
4072             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4073                 for (i = 0; i < nb_oargs; i++) {
4074                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4075                         goto do_not_remove;
4076                     }
4077                 }
4078                 goto do_remove;
4079             }
4080             goto do_not_remove;
4081 
4082         do_remove:
4083             tcg_op_remove(s, op);
4084             break;
4085 
4086         do_not_remove:
4087             for (i = 0; i < nb_oargs; i++) {
4088                 ts = arg_temp(op->args[i]);
4089 
4090                 /* Remember the preference of the uses that followed.  */
4091                 if (i < ARRAY_SIZE(op->output_pref)) {
4092                     op->output_pref[i] = *la_temp_pref(ts);
4093                 }
4094 
4095                 /* Output args are dead.  */
4096                 if (ts->state & TS_DEAD) {
4097                     arg_life |= DEAD_ARG << i;
4098                 }
4099                 if (ts->state & TS_MEM) {
4100                     arg_life |= SYNC_ARG << i;
4101                 }
4102                 ts->state = TS_DEAD;
4103                 la_reset_pref(ts);
4104             }
4105 
4106             /* If end of basic block, update.  */
4107             if (def->flags & TCG_OPF_BB_EXIT) {
4108                 la_func_end(s, nb_globals, nb_temps);
4109             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4110                 la_bb_sync(s, nb_globals, nb_temps);
4111             } else if (def->flags & TCG_OPF_BB_END) {
4112                 la_bb_end(s, nb_globals, nb_temps);
4113             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4114                 la_global_sync(s, nb_globals);
4115                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4116                     la_cross_call(s, nb_temps);
4117                 }
4118             }
4119 
4120             /* Record arguments that die in this opcode.  */
4121             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4122                 ts = arg_temp(op->args[i]);
4123                 if (ts->state & TS_DEAD) {
4124                     arg_life |= DEAD_ARG << i;
4125                 }
4126             }
4127 
4128             /* Input arguments are live for preceding opcodes.  */
4129             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4130                 ts = arg_temp(op->args[i]);
4131                 if (ts->state & TS_DEAD) {
4132                     /* For operands that were dead, initially allow
4133                        all regs for the type.  */
4134                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4135                     ts->state &= ~TS_DEAD;
4136                 }
4137             }
4138 
4139             /* Incorporate constraints for this operand.  */
4140             switch (opc) {
4141             case INDEX_op_mov:
4142                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4143                    have proper constraints.  That said, special case
4144                    moves to propagate preferences backward.  */
4145                 if (IS_DEAD_ARG(1)) {
4146                     *la_temp_pref(arg_temp(op->args[0]))
4147                         = *la_temp_pref(arg_temp(op->args[1]));
4148                 }
4149                 break;
4150 
4151             default:
4152                 args_ct = opcode_args_ct(op);
4153                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4154                     const TCGArgConstraint *ct = &args_ct[i];
4155                     TCGRegSet set, *pset;
4156 
4157                     ts = arg_temp(op->args[i]);
4158                     pset = la_temp_pref(ts);
4159                     set = *pset;
4160 
4161                     set &= ct->regs;
4162                     if (ct->ialias) {
4163                         set &= output_pref(op, ct->alias_index);
4164                     }
4165                     /* If the combination is not possible, restart.  */
4166                     if (set == 0) {
4167                         set = ct->regs;
4168                     }
4169                     *pset = set;
4170                 }
4171                 break;
4172             }
4173             break;
4174         }
4175         op->life = arg_life;
4176     }
4177 }
4178 
4179 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4180 static bool __attribute__((noinline))
4181 liveness_pass_2(TCGContext *s)
4182 {
4183     int nb_globals = s->nb_globals;
4184     int nb_temps, i;
4185     bool changes = false;
4186     TCGOp *op, *op_next;
4187 
4188     /* Create a temporary for each indirect global.  */
4189     for (i = 0; i < nb_globals; ++i) {
4190         TCGTemp *its = &s->temps[i];
4191         if (its->indirect_reg) {
4192             TCGTemp *dts = tcg_temp_alloc(s);
4193             dts->type = its->type;
4194             dts->base_type = its->base_type;
4195             dts->temp_subindex = its->temp_subindex;
4196             dts->kind = TEMP_EBB;
4197             its->state_ptr = dts;
4198         } else {
4199             its->state_ptr = NULL;
4200         }
4201         /* All globals begin dead.  */
4202         its->state = TS_DEAD;
4203     }
4204     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4205         TCGTemp *its = &s->temps[i];
4206         its->state_ptr = NULL;
4207         its->state = TS_DEAD;
4208     }
4209 
4210     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4211         TCGOpcode opc = op->opc;
4212         const TCGOpDef *def = &tcg_op_defs[opc];
4213         TCGLifeData arg_life = op->life;
4214         int nb_iargs, nb_oargs, call_flags;
4215         TCGTemp *arg_ts, *dir_ts;
4216 
4217         if (opc == INDEX_op_call) {
4218             nb_oargs = TCGOP_CALLO(op);
4219             nb_iargs = TCGOP_CALLI(op);
4220             call_flags = tcg_call_flags(op);
4221         } else {
4222             nb_iargs = def->nb_iargs;
4223             nb_oargs = def->nb_oargs;
4224 
4225             /* Set flags similar to how calls require.  */
4226             if (def->flags & TCG_OPF_COND_BRANCH) {
4227                 /* Like reading globals: sync_globals */
4228                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4229             } else if (def->flags & TCG_OPF_BB_END) {
4230                 /* Like writing globals: save_globals */
4231                 call_flags = 0;
4232             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4233                 /* Like reading globals: sync_globals */
4234                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4235             } else {
4236                 /* No effect on globals.  */
4237                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4238                               TCG_CALL_NO_WRITE_GLOBALS);
4239             }
4240         }
4241 
4242         /* Make sure that input arguments are available.  */
4243         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4244             arg_ts = arg_temp(op->args[i]);
4245             dir_ts = arg_ts->state_ptr;
4246             if (dir_ts && arg_ts->state == TS_DEAD) {
4247                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4248                                   ? INDEX_op_ld_i32
4249                                   : INDEX_op_ld_i64);
4250                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4251                                                   arg_ts->type, 3);
4252 
4253                 lop->args[0] = temp_arg(dir_ts);
4254                 lop->args[1] = temp_arg(arg_ts->mem_base);
4255                 lop->args[2] = arg_ts->mem_offset;
4256 
4257                 /* Loaded, but synced with memory.  */
4258                 arg_ts->state = TS_MEM;
4259             }
4260         }
4261 
4262         /* Perform input replacement, and mark inputs that became dead.
4263            No action is required except keeping temp_state up to date
4264            so that we reload when needed.  */
4265         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4266             arg_ts = arg_temp(op->args[i]);
4267             dir_ts = arg_ts->state_ptr;
4268             if (dir_ts) {
4269                 op->args[i] = temp_arg(dir_ts);
4270                 changes = true;
4271                 if (IS_DEAD_ARG(i)) {
4272                     arg_ts->state = TS_DEAD;
4273                 }
4274             }
4275         }
4276 
4277         /* Liveness analysis should ensure that the following are
4278            all correct, for call sites and basic block end points.  */
4279         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4280             /* Nothing to do */
4281         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4282             for (i = 0; i < nb_globals; ++i) {
4283                 /* Liveness should see that globals are synced back,
4284                    that is, either TS_DEAD or TS_MEM.  */
4285                 arg_ts = &s->temps[i];
4286                 tcg_debug_assert(arg_ts->state_ptr == 0
4287                                  || arg_ts->state != 0);
4288             }
4289         } else {
4290             for (i = 0; i < nb_globals; ++i) {
4291                 /* Liveness should see that globals are saved back,
4292                    that is, TS_DEAD, waiting to be reloaded.  */
4293                 arg_ts = &s->temps[i];
4294                 tcg_debug_assert(arg_ts->state_ptr == 0
4295                                  || arg_ts->state == TS_DEAD);
4296             }
4297         }
4298 
4299         /* Outputs become available.  */
4300         if (opc == INDEX_op_mov) {
4301             arg_ts = arg_temp(op->args[0]);
4302             dir_ts = arg_ts->state_ptr;
4303             if (dir_ts) {
4304                 op->args[0] = temp_arg(dir_ts);
4305                 changes = true;
4306 
4307                 /* The output is now live and modified.  */
4308                 arg_ts->state = 0;
4309 
4310                 if (NEED_SYNC_ARG(0)) {
4311                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4312                                       ? INDEX_op_st_i32
4313                                       : INDEX_op_st_i64);
4314                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4315                                                      arg_ts->type, 3);
4316                     TCGTemp *out_ts = dir_ts;
4317 
4318                     if (IS_DEAD_ARG(0)) {
4319                         out_ts = arg_temp(op->args[1]);
4320                         arg_ts->state = TS_DEAD;
4321                         tcg_op_remove(s, op);
4322                     } else {
4323                         arg_ts->state = TS_MEM;
4324                     }
4325 
4326                     sop->args[0] = temp_arg(out_ts);
4327                     sop->args[1] = temp_arg(arg_ts->mem_base);
4328                     sop->args[2] = arg_ts->mem_offset;
4329                 } else {
4330                     tcg_debug_assert(!IS_DEAD_ARG(0));
4331                 }
4332             }
4333         } else {
4334             for (i = 0; i < nb_oargs; i++) {
4335                 arg_ts = arg_temp(op->args[i]);
4336                 dir_ts = arg_ts->state_ptr;
4337                 if (!dir_ts) {
4338                     continue;
4339                 }
4340                 op->args[i] = temp_arg(dir_ts);
4341                 changes = true;
4342 
4343                 /* The output is now live and modified.  */
4344                 arg_ts->state = 0;
4345 
4346                 /* Sync outputs upon their last write.  */
4347                 if (NEED_SYNC_ARG(i)) {
4348                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4349                                       ? INDEX_op_st_i32
4350                                       : INDEX_op_st_i64);
4351                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4352                                                      arg_ts->type, 3);
4353 
4354                     sop->args[0] = temp_arg(dir_ts);
4355                     sop->args[1] = temp_arg(arg_ts->mem_base);
4356                     sop->args[2] = arg_ts->mem_offset;
4357 
4358                     arg_ts->state = TS_MEM;
4359                 }
4360                 /* Drop outputs that are dead.  */
4361                 if (IS_DEAD_ARG(i)) {
4362                     arg_ts->state = TS_DEAD;
4363                 }
4364             }
4365         }
4366     }
4367 
4368     return changes;
4369 }
4370 
4371 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4372 {
4373     intptr_t off;
4374     int size, align;
4375 
4376     /* When allocating an object, look at the full type. */
4377     size = tcg_type_size(ts->base_type);
4378     switch (ts->base_type) {
4379     case TCG_TYPE_I32:
4380         align = 4;
4381         break;
4382     case TCG_TYPE_I64:
4383     case TCG_TYPE_V64:
4384         align = 8;
4385         break;
4386     case TCG_TYPE_I128:
4387     case TCG_TYPE_V128:
4388     case TCG_TYPE_V256:
4389         /*
4390          * Note that we do not require aligned storage for V256,
4391          * and that we provide alignment for I128 to match V128,
4392          * even if that's above what the host ABI requires.
4393          */
4394         align = 16;
4395         break;
4396     default:
4397         g_assert_not_reached();
4398     }
4399 
4400     /*
4401      * Assume the stack is sufficiently aligned.
4402      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4403      * and do not require 16 byte vector alignment.  This seems slightly
4404      * easier than fully parameterizing the above switch statement.
4405      */
4406     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4407     off = ROUND_UP(s->current_frame_offset, align);
4408 
4409     /* If we've exhausted the stack frame, restart with a smaller TB. */
4410     if (off + size > s->frame_end) {
4411         tcg_raise_tb_overflow(s);
4412     }
4413     s->current_frame_offset = off + size;
4414 #if defined(__sparc__)
4415     off += TCG_TARGET_STACK_BIAS;
4416 #endif
4417 
4418     /* If the object was subdivided, assign memory to all the parts. */
4419     if (ts->base_type != ts->type) {
4420         int part_size = tcg_type_size(ts->type);
4421         int part_count = size / part_size;
4422 
4423         /*
4424          * Each part is allocated sequentially in tcg_temp_new_internal.
4425          * Jump back to the first part by subtracting the current index.
4426          */
4427         ts -= ts->temp_subindex;
4428         for (int i = 0; i < part_count; ++i) {
4429             ts[i].mem_offset = off + i * part_size;
4430             ts[i].mem_base = s->frame_temp;
4431             ts[i].mem_allocated = 1;
4432         }
4433     } else {
4434         ts->mem_offset = off;
4435         ts->mem_base = s->frame_temp;
4436         ts->mem_allocated = 1;
4437     }
4438 }
4439 
4440 /* Assign @reg to @ts, and update reg_to_temp[]. */
4441 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4442 {
4443     if (ts->val_type == TEMP_VAL_REG) {
4444         TCGReg old = ts->reg;
4445         tcg_debug_assert(s->reg_to_temp[old] == ts);
4446         if (old == reg) {
4447             return;
4448         }
4449         s->reg_to_temp[old] = NULL;
4450     }
4451     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4452     s->reg_to_temp[reg] = ts;
4453     ts->val_type = TEMP_VAL_REG;
4454     ts->reg = reg;
4455 }
4456 
4457 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4458 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4459 {
4460     tcg_debug_assert(type != TEMP_VAL_REG);
4461     if (ts->val_type == TEMP_VAL_REG) {
4462         TCGReg reg = ts->reg;
4463         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4464         s->reg_to_temp[reg] = NULL;
4465     }
4466     ts->val_type = type;
4467 }
4468 
4469 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4470 
4471 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4472    mark it free; otherwise mark it dead.  */
4473 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4474 {
4475     TCGTempVal new_type;
4476 
4477     switch (ts->kind) {
4478     case TEMP_FIXED:
4479         return;
4480     case TEMP_GLOBAL:
4481     case TEMP_TB:
4482         new_type = TEMP_VAL_MEM;
4483         break;
4484     case TEMP_EBB:
4485         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4486         break;
4487     case TEMP_CONST:
4488         new_type = TEMP_VAL_CONST;
4489         break;
4490     default:
4491         g_assert_not_reached();
4492     }
4493     set_temp_val_nonreg(s, ts, new_type);
4494 }
4495 
4496 /* Mark a temporary as dead.  */
4497 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4498 {
4499     temp_free_or_dead(s, ts, 1);
4500 }
4501 
4502 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4503    registers needs to be allocated to store a constant.  If 'free_or_dead'
4504    is non-zero, subsequently release the temporary; if it is positive, the
4505    temp is dead; if it is negative, the temp is free.  */
4506 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4507                       TCGRegSet preferred_regs, int free_or_dead)
4508 {
4509     if (!temp_readonly(ts) && !ts->mem_coherent) {
4510         if (!ts->mem_allocated) {
4511             temp_allocate_frame(s, ts);
4512         }
4513         switch (ts->val_type) {
4514         case TEMP_VAL_CONST:
4515             /* If we're going to free the temp immediately, then we won't
4516                require it later in a register, so attempt to store the
4517                constant to memory directly.  */
4518             if (free_or_dead
4519                 && tcg_out_sti(s, ts->type, ts->val,
4520                                ts->mem_base->reg, ts->mem_offset)) {
4521                 break;
4522             }
4523             temp_load(s, ts, tcg_target_available_regs[ts->type],
4524                       allocated_regs, preferred_regs);
4525             /* fallthrough */
4526 
4527         case TEMP_VAL_REG:
4528             tcg_out_st(s, ts->type, ts->reg,
4529                        ts->mem_base->reg, ts->mem_offset);
4530             break;
4531 
4532         case TEMP_VAL_MEM:
4533             break;
4534 
4535         case TEMP_VAL_DEAD:
4536         default:
4537             g_assert_not_reached();
4538         }
4539         ts->mem_coherent = 1;
4540     }
4541     if (free_or_dead) {
4542         temp_free_or_dead(s, ts, free_or_dead);
4543     }
4544 }
4545 
4546 /* free register 'reg' by spilling the corresponding temporary if necessary */
4547 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4548 {
4549     TCGTemp *ts = s->reg_to_temp[reg];
4550     if (ts != NULL) {
4551         temp_sync(s, ts, allocated_regs, 0, -1);
4552     }
4553 }
4554 
4555 /**
4556  * tcg_reg_alloc:
4557  * @required_regs: Set of registers in which we must allocate.
4558  * @allocated_regs: Set of registers which must be avoided.
4559  * @preferred_regs: Set of registers we should prefer.
4560  * @rev: True if we search the registers in "indirect" order.
4561  *
4562  * The allocated register must be in @required_regs & ~@allocated_regs,
4563  * but if we can put it in @preferred_regs we may save a move later.
4564  */
4565 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4566                             TCGRegSet allocated_regs,
4567                             TCGRegSet preferred_regs, bool rev)
4568 {
4569     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4570     TCGRegSet reg_ct[2];
4571     const int *order;
4572 
4573     reg_ct[1] = required_regs & ~allocated_regs;
4574     tcg_debug_assert(reg_ct[1] != 0);
4575     reg_ct[0] = reg_ct[1] & preferred_regs;
4576 
4577     /* Skip the preferred_regs option if it cannot be satisfied,
4578        or if the preference made no difference.  */
4579     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4580 
4581     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4582 
4583     /* Try free registers, preferences first.  */
4584     for (j = f; j < 2; j++) {
4585         TCGRegSet set = reg_ct[j];
4586 
4587         if (tcg_regset_single(set)) {
4588             /* One register in the set.  */
4589             TCGReg reg = tcg_regset_first(set);
4590             if (s->reg_to_temp[reg] == NULL) {
4591                 return reg;
4592             }
4593         } else {
4594             for (i = 0; i < n; i++) {
4595                 TCGReg reg = order[i];
4596                 if (s->reg_to_temp[reg] == NULL &&
4597                     tcg_regset_test_reg(set, reg)) {
4598                     return reg;
4599                 }
4600             }
4601         }
4602     }
4603 
4604     /* We must spill something.  */
4605     for (j = f; j < 2; j++) {
4606         TCGRegSet set = reg_ct[j];
4607 
4608         if (tcg_regset_single(set)) {
4609             /* One register in the set.  */
4610             TCGReg reg = tcg_regset_first(set);
4611             tcg_reg_free(s, reg, allocated_regs);
4612             return reg;
4613         } else {
4614             for (i = 0; i < n; i++) {
4615                 TCGReg reg = order[i];
4616                 if (tcg_regset_test_reg(set, reg)) {
4617                     tcg_reg_free(s, reg, allocated_regs);
4618                     return reg;
4619                 }
4620             }
4621         }
4622     }
4623 
4624     g_assert_not_reached();
4625 }
4626 
4627 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4628                                  TCGRegSet allocated_regs,
4629                                  TCGRegSet preferred_regs, bool rev)
4630 {
4631     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4632     TCGRegSet reg_ct[2];
4633     const int *order;
4634 
4635     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4636     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4637     tcg_debug_assert(reg_ct[1] != 0);
4638     reg_ct[0] = reg_ct[1] & preferred_regs;
4639 
4640     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4641 
4642     /*
4643      * Skip the preferred_regs option if it cannot be satisfied,
4644      * or if the preference made no difference.
4645      */
4646     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4647 
4648     /*
4649      * Minimize the number of flushes by looking for 2 free registers first,
4650      * then a single flush, then two flushes.
4651      */
4652     for (fmin = 2; fmin >= 0; fmin--) {
4653         for (j = k; j < 2; j++) {
4654             TCGRegSet set = reg_ct[j];
4655 
4656             for (i = 0; i < n; i++) {
4657                 TCGReg reg = order[i];
4658 
4659                 if (tcg_regset_test_reg(set, reg)) {
4660                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4661                     if (f >= fmin) {
4662                         tcg_reg_free(s, reg, allocated_regs);
4663                         tcg_reg_free(s, reg + 1, allocated_regs);
4664                         return reg;
4665                     }
4666                 }
4667             }
4668         }
4669     }
4670     g_assert_not_reached();
4671 }
4672 
4673 /* Make sure the temporary is in a register.  If needed, allocate the register
4674    from DESIRED while avoiding ALLOCATED.  */
4675 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4676                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4677 {
4678     TCGReg reg;
4679 
4680     switch (ts->val_type) {
4681     case TEMP_VAL_REG:
4682         return;
4683     case TEMP_VAL_CONST:
4684         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4685                             preferred_regs, ts->indirect_base);
4686         if (ts->type <= TCG_TYPE_I64) {
4687             tcg_out_movi(s, ts->type, reg, ts->val);
4688         } else {
4689             uint64_t val = ts->val;
4690             MemOp vece = MO_64;
4691 
4692             /*
4693              * Find the minimal vector element that matches the constant.
4694              * The targets will, in general, have to do this search anyway,
4695              * do this generically.
4696              */
4697             if (val == dup_const(MO_8, val)) {
4698                 vece = MO_8;
4699             } else if (val == dup_const(MO_16, val)) {
4700                 vece = MO_16;
4701             } else if (val == dup_const(MO_32, val)) {
4702                 vece = MO_32;
4703             }
4704 
4705             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4706         }
4707         ts->mem_coherent = 0;
4708         break;
4709     case TEMP_VAL_MEM:
4710         if (!ts->mem_allocated) {
4711             temp_allocate_frame(s, ts);
4712         }
4713         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4714                             preferred_regs, ts->indirect_base);
4715         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4716         ts->mem_coherent = 1;
4717         break;
4718     case TEMP_VAL_DEAD:
4719     default:
4720         g_assert_not_reached();
4721     }
4722     set_temp_val_reg(s, ts, reg);
4723 }
4724 
4725 /* Save a temporary to memory. 'allocated_regs' is used in case a
4726    temporary registers needs to be allocated to store a constant.  */
4727 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4728 {
4729     /* The liveness analysis already ensures that globals are back
4730        in memory. Keep an tcg_debug_assert for safety. */
4731     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4732 }
4733 
4734 /* save globals to their canonical location and assume they can be
4735    modified be the following code. 'allocated_regs' is used in case a
4736    temporary registers needs to be allocated to store a constant. */
4737 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4738 {
4739     int i, n;
4740 
4741     for (i = 0, n = s->nb_globals; i < n; i++) {
4742         temp_save(s, &s->temps[i], allocated_regs);
4743     }
4744 }
4745 
4746 /* sync globals to their canonical location and assume they can be
4747    read by the following code. 'allocated_regs' is used in case a
4748    temporary registers needs to be allocated to store a constant. */
4749 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4750 {
4751     int i, n;
4752 
4753     for (i = 0, n = s->nb_globals; i < n; i++) {
4754         TCGTemp *ts = &s->temps[i];
4755         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4756                          || ts->kind == TEMP_FIXED
4757                          || ts->mem_coherent);
4758     }
4759 }
4760 
4761 /* at the end of a basic block, we assume all temporaries are dead and
4762    all globals are stored at their canonical location. */
4763 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4764 {
4765     int i;
4766 
4767     for (i = s->nb_globals; i < s->nb_temps; i++) {
4768         TCGTemp *ts = &s->temps[i];
4769 
4770         switch (ts->kind) {
4771         case TEMP_TB:
4772             temp_save(s, ts, allocated_regs);
4773             break;
4774         case TEMP_EBB:
4775             /* The liveness analysis already ensures that temps are dead.
4776                Keep an tcg_debug_assert for safety. */
4777             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4778             break;
4779         case TEMP_CONST:
4780             /* Similarly, we should have freed any allocated register. */
4781             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4782             break;
4783         default:
4784             g_assert_not_reached();
4785         }
4786     }
4787 
4788     save_globals(s, allocated_regs);
4789 }
4790 
4791 /*
4792  * At a conditional branch, we assume all temporaries are dead unless
4793  * explicitly live-across-conditional-branch; all globals and local
4794  * temps are synced to their location.
4795  */
4796 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4797 {
4798     sync_globals(s, allocated_regs);
4799 
4800     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4801         TCGTemp *ts = &s->temps[i];
4802         /*
4803          * The liveness analysis already ensures that temps are dead.
4804          * Keep tcg_debug_asserts for safety.
4805          */
4806         switch (ts->kind) {
4807         case TEMP_TB:
4808             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4809             break;
4810         case TEMP_EBB:
4811         case TEMP_CONST:
4812             break;
4813         default:
4814             g_assert_not_reached();
4815         }
4816     }
4817 }
4818 
4819 /*
4820  * Specialized code generation for INDEX_op_mov_* with a constant.
4821  */
4822 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4823                                   tcg_target_ulong val, TCGLifeData arg_life,
4824                                   TCGRegSet preferred_regs)
4825 {
4826     /* ENV should not be modified.  */
4827     tcg_debug_assert(!temp_readonly(ots));
4828 
4829     /* The movi is not explicitly generated here.  */
4830     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4831     ots->val = val;
4832     ots->mem_coherent = 0;
4833     if (NEED_SYNC_ARG(0)) {
4834         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4835     } else if (IS_DEAD_ARG(0)) {
4836         temp_dead(s, ots);
4837     }
4838 }
4839 
4840 /*
4841  * Specialized code generation for INDEX_op_mov_*.
4842  */
4843 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4844 {
4845     const TCGLifeData arg_life = op->life;
4846     TCGRegSet allocated_regs, preferred_regs;
4847     TCGTemp *ts, *ots;
4848     TCGType otype, itype;
4849     TCGReg oreg, ireg;
4850 
4851     allocated_regs = s->reserved_regs;
4852     preferred_regs = output_pref(op, 0);
4853     ots = arg_temp(op->args[0]);
4854     ts = arg_temp(op->args[1]);
4855 
4856     /* ENV should not be modified.  */
4857     tcg_debug_assert(!temp_readonly(ots));
4858 
4859     /* Note that otype != itype for no-op truncation.  */
4860     otype = ots->type;
4861     itype = ts->type;
4862 
4863     if (ts->val_type == TEMP_VAL_CONST) {
4864         /* propagate constant or generate sti */
4865         tcg_target_ulong val = ts->val;
4866         if (IS_DEAD_ARG(1)) {
4867             temp_dead(s, ts);
4868         }
4869         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4870         return;
4871     }
4872 
4873     /* If the source value is in memory we're going to be forced
4874        to have it in a register in order to perform the copy.  Copy
4875        the SOURCE value into its own register first, that way we
4876        don't have to reload SOURCE the next time it is used. */
4877     if (ts->val_type == TEMP_VAL_MEM) {
4878         temp_load(s, ts, tcg_target_available_regs[itype],
4879                   allocated_regs, preferred_regs);
4880     }
4881     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4882     ireg = ts->reg;
4883 
4884     if (IS_DEAD_ARG(0)) {
4885         /* mov to a non-saved dead register makes no sense (even with
4886            liveness analysis disabled). */
4887         tcg_debug_assert(NEED_SYNC_ARG(0));
4888         if (!ots->mem_allocated) {
4889             temp_allocate_frame(s, ots);
4890         }
4891         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4892         if (IS_DEAD_ARG(1)) {
4893             temp_dead(s, ts);
4894         }
4895         temp_dead(s, ots);
4896         return;
4897     }
4898 
4899     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4900         /*
4901          * The mov can be suppressed.  Kill input first, so that it
4902          * is unlinked from reg_to_temp, then set the output to the
4903          * reg that we saved from the input.
4904          */
4905         temp_dead(s, ts);
4906         oreg = ireg;
4907     } else {
4908         if (ots->val_type == TEMP_VAL_REG) {
4909             oreg = ots->reg;
4910         } else {
4911             /* Make sure to not spill the input register during allocation. */
4912             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4913                                  allocated_regs | ((TCGRegSet)1 << ireg),
4914                                  preferred_regs, ots->indirect_base);
4915         }
4916         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4917             /*
4918              * Cross register class move not supported.
4919              * Store the source register into the destination slot
4920              * and leave the destination temp as TEMP_VAL_MEM.
4921              */
4922             assert(!temp_readonly(ots));
4923             if (!ts->mem_allocated) {
4924                 temp_allocate_frame(s, ots);
4925             }
4926             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4927             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4928             ots->mem_coherent = 1;
4929             return;
4930         }
4931     }
4932     set_temp_val_reg(s, ots, oreg);
4933     ots->mem_coherent = 0;
4934 
4935     if (NEED_SYNC_ARG(0)) {
4936         temp_sync(s, ots, allocated_regs, 0, 0);
4937     }
4938 }
4939 
4940 /*
4941  * Specialized code generation for INDEX_op_dup_vec.
4942  */
4943 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4944 {
4945     const TCGLifeData arg_life = op->life;
4946     TCGRegSet dup_out_regs, dup_in_regs;
4947     const TCGArgConstraint *dup_args_ct;
4948     TCGTemp *its, *ots;
4949     TCGType itype, vtype;
4950     unsigned vece;
4951     int lowpart_ofs;
4952     bool ok;
4953 
4954     ots = arg_temp(op->args[0]);
4955     its = arg_temp(op->args[1]);
4956 
4957     /* ENV should not be modified.  */
4958     tcg_debug_assert(!temp_readonly(ots));
4959 
4960     itype = its->type;
4961     vece = TCGOP_VECE(op);
4962     vtype = TCGOP_TYPE(op);
4963 
4964     if (its->val_type == TEMP_VAL_CONST) {
4965         /* Propagate constant via movi -> dupi.  */
4966         tcg_target_ulong val = its->val;
4967         if (IS_DEAD_ARG(1)) {
4968             temp_dead(s, its);
4969         }
4970         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4971         return;
4972     }
4973 
4974     dup_args_ct = opcode_args_ct(op);
4975     dup_out_regs = dup_args_ct[0].regs;
4976     dup_in_regs = dup_args_ct[1].regs;
4977 
4978     /* Allocate the output register now.  */
4979     if (ots->val_type != TEMP_VAL_REG) {
4980         TCGRegSet allocated_regs = s->reserved_regs;
4981         TCGReg oreg;
4982 
4983         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4984             /* Make sure to not spill the input register. */
4985             tcg_regset_set_reg(allocated_regs, its->reg);
4986         }
4987         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4988                              output_pref(op, 0), ots->indirect_base);
4989         set_temp_val_reg(s, ots, oreg);
4990     }
4991 
4992     switch (its->val_type) {
4993     case TEMP_VAL_REG:
4994         /*
4995          * The dup constriaints must be broad, covering all possible VECE.
4996          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4997          * to fail, indicating that extra moves are required for that case.
4998          */
4999         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
5000             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
5001                 goto done;
5002             }
5003             /* Try again from memory or a vector input register.  */
5004         }
5005         if (!its->mem_coherent) {
5006             /*
5007              * The input register is not synced, and so an extra store
5008              * would be required to use memory.  Attempt an integer-vector
5009              * register move first.  We do not have a TCGRegSet for this.
5010              */
5011             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5012                 break;
5013             }
5014             /* Sync the temp back to its slot and load from there.  */
5015             temp_sync(s, its, s->reserved_regs, 0, 0);
5016         }
5017         /* fall through */
5018 
5019     case TEMP_VAL_MEM:
5020         lowpart_ofs = 0;
5021         if (HOST_BIG_ENDIAN) {
5022             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5023         }
5024         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5025                              its->mem_offset + lowpart_ofs)) {
5026             goto done;
5027         }
5028         /* Load the input into the destination vector register. */
5029         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5030         break;
5031 
5032     default:
5033         g_assert_not_reached();
5034     }
5035 
5036     /* We now have a vector input register, so dup must succeed. */
5037     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5038     tcg_debug_assert(ok);
5039 
5040  done:
5041     ots->mem_coherent = 0;
5042     if (IS_DEAD_ARG(1)) {
5043         temp_dead(s, its);
5044     }
5045     if (NEED_SYNC_ARG(0)) {
5046         temp_sync(s, ots, s->reserved_regs, 0, 0);
5047     }
5048     if (IS_DEAD_ARG(0)) {
5049         temp_dead(s, ots);
5050     }
5051 }
5052 
5053 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5054 {
5055     const TCGLifeData arg_life = op->life;
5056     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5057     TCGRegSet i_allocated_regs;
5058     TCGRegSet o_allocated_regs;
5059     int i, k, nb_iargs, nb_oargs;
5060     TCGReg reg;
5061     TCGArg arg;
5062     const TCGArgConstraint *args_ct;
5063     const TCGArgConstraint *arg_ct;
5064     TCGTemp *ts;
5065     TCGArg new_args[TCG_MAX_OP_ARGS];
5066     int const_args[TCG_MAX_OP_ARGS];
5067     TCGCond op_cond;
5068 
5069     nb_oargs = def->nb_oargs;
5070     nb_iargs = def->nb_iargs;
5071 
5072     /* copy constants */
5073     memcpy(new_args + nb_oargs + nb_iargs,
5074            op->args + nb_oargs + nb_iargs,
5075            sizeof(TCGArg) * def->nb_cargs);
5076 
5077     i_allocated_regs = s->reserved_regs;
5078     o_allocated_regs = s->reserved_regs;
5079 
5080     switch (op->opc) {
5081     case INDEX_op_brcond_i32:
5082     case INDEX_op_brcond_i64:
5083         op_cond = op->args[2];
5084         break;
5085     case INDEX_op_setcond_i32:
5086     case INDEX_op_setcond_i64:
5087     case INDEX_op_negsetcond_i32:
5088     case INDEX_op_negsetcond_i64:
5089     case INDEX_op_cmp_vec:
5090         op_cond = op->args[3];
5091         break;
5092     case INDEX_op_brcond2_i32:
5093         op_cond = op->args[4];
5094         break;
5095     case INDEX_op_movcond_i32:
5096     case INDEX_op_movcond_i64:
5097     case INDEX_op_setcond2_i32:
5098     case INDEX_op_cmpsel_vec:
5099         op_cond = op->args[5];
5100         break;
5101     default:
5102         /* No condition within opcode. */
5103         op_cond = TCG_COND_ALWAYS;
5104         break;
5105     }
5106 
5107     args_ct = opcode_args_ct(op);
5108 
5109     /* satisfy input constraints */
5110     for (k = 0; k < nb_iargs; k++) {
5111         TCGRegSet i_preferred_regs, i_required_regs;
5112         bool allocate_new_reg, copyto_new_reg;
5113         TCGTemp *ts2;
5114         int i1, i2;
5115 
5116         i = args_ct[nb_oargs + k].sort_index;
5117         arg = op->args[i];
5118         arg_ct = &args_ct[i];
5119         ts = arg_temp(arg);
5120 
5121         if (ts->val_type == TEMP_VAL_CONST) {
5122 #ifdef TCG_REG_ZERO
5123             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5124                 /* Hardware zero register: indicate register via non-const. */
5125                 const_args[i] = 0;
5126                 new_args[i] = TCG_REG_ZERO;
5127                 continue;
5128             }
5129 #endif
5130 
5131             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5132                                        op_cond, TCGOP_VECE(op))) {
5133                 /* constant is OK for instruction */
5134                 const_args[i] = 1;
5135                 new_args[i] = ts->val;
5136                 continue;
5137             }
5138         }
5139 
5140         reg = ts->reg;
5141         i_preferred_regs = 0;
5142         i_required_regs = arg_ct->regs;
5143         allocate_new_reg = false;
5144         copyto_new_reg = false;
5145 
5146         switch (arg_ct->pair) {
5147         case 0: /* not paired */
5148             if (arg_ct->ialias) {
5149                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5150 
5151                 /*
5152                  * If the input is readonly, then it cannot also be an
5153                  * output and aliased to itself.  If the input is not
5154                  * dead after the instruction, we must allocate a new
5155                  * register and move it.
5156                  */
5157                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5158                     || args_ct[arg_ct->alias_index].newreg) {
5159                     allocate_new_reg = true;
5160                 } else if (ts->val_type == TEMP_VAL_REG) {
5161                     /*
5162                      * Check if the current register has already been
5163                      * allocated for another input.
5164                      */
5165                     allocate_new_reg =
5166                         tcg_regset_test_reg(i_allocated_regs, reg);
5167                 }
5168             }
5169             if (!allocate_new_reg) {
5170                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5171                           i_preferred_regs);
5172                 reg = ts->reg;
5173                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5174             }
5175             if (allocate_new_reg) {
5176                 /*
5177                  * Allocate a new register matching the constraint
5178                  * and move the temporary register into it.
5179                  */
5180                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5181                           i_allocated_regs, 0);
5182                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5183                                     i_preferred_regs, ts->indirect_base);
5184                 copyto_new_reg = true;
5185             }
5186             break;
5187 
5188         case 1:
5189             /* First of an input pair; if i1 == i2, the second is an output. */
5190             i1 = i;
5191             i2 = arg_ct->pair_index;
5192             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5193 
5194             /*
5195              * It is easier to default to allocating a new pair
5196              * and to identify a few cases where it's not required.
5197              */
5198             if (arg_ct->ialias) {
5199                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5200                 if (IS_DEAD_ARG(i1) &&
5201                     IS_DEAD_ARG(i2) &&
5202                     !temp_readonly(ts) &&
5203                     ts->val_type == TEMP_VAL_REG &&
5204                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5205                     tcg_regset_test_reg(i_required_regs, reg) &&
5206                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5207                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5208                     (ts2
5209                      ? ts2->val_type == TEMP_VAL_REG &&
5210                        ts2->reg == reg + 1 &&
5211                        !temp_readonly(ts2)
5212                      : s->reg_to_temp[reg + 1] == NULL)) {
5213                     break;
5214                 }
5215             } else {
5216                 /* Without aliasing, the pair must also be an input. */
5217                 tcg_debug_assert(ts2);
5218                 if (ts->val_type == TEMP_VAL_REG &&
5219                     ts2->val_type == TEMP_VAL_REG &&
5220                     ts2->reg == reg + 1 &&
5221                     tcg_regset_test_reg(i_required_regs, reg)) {
5222                     break;
5223                 }
5224             }
5225             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5226                                      0, ts->indirect_base);
5227             goto do_pair;
5228 
5229         case 2: /* pair second */
5230             reg = new_args[arg_ct->pair_index] + 1;
5231             goto do_pair;
5232 
5233         case 3: /* ialias with second output, no first input */
5234             tcg_debug_assert(arg_ct->ialias);
5235             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5236 
5237             if (IS_DEAD_ARG(i) &&
5238                 !temp_readonly(ts) &&
5239                 ts->val_type == TEMP_VAL_REG &&
5240                 reg > 0 &&
5241                 s->reg_to_temp[reg - 1] == NULL &&
5242                 tcg_regset_test_reg(i_required_regs, reg) &&
5243                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5244                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5245                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5246                 break;
5247             }
5248             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5249                                      i_allocated_regs, 0,
5250                                      ts->indirect_base);
5251             tcg_regset_set_reg(i_allocated_regs, reg);
5252             reg += 1;
5253             goto do_pair;
5254 
5255         do_pair:
5256             /*
5257              * If an aliased input is not dead after the instruction,
5258              * we must allocate a new register and move it.
5259              */
5260             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5261                 TCGRegSet t_allocated_regs = i_allocated_regs;
5262 
5263                 /*
5264                  * Because of the alias, and the continued life, make sure
5265                  * that the temp is somewhere *other* than the reg pair,
5266                  * and we get a copy in reg.
5267                  */
5268                 tcg_regset_set_reg(t_allocated_regs, reg);
5269                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5270                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5271                     /* If ts was already in reg, copy it somewhere else. */
5272                     TCGReg nr;
5273                     bool ok;
5274 
5275                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5276                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5277                                        t_allocated_regs, 0, ts->indirect_base);
5278                     ok = tcg_out_mov(s, ts->type, nr, reg);
5279                     tcg_debug_assert(ok);
5280 
5281                     set_temp_val_reg(s, ts, nr);
5282                 } else {
5283                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5284                               t_allocated_regs, 0);
5285                     copyto_new_reg = true;
5286                 }
5287             } else {
5288                 /* Preferably allocate to reg, otherwise copy. */
5289                 i_required_regs = (TCGRegSet)1 << reg;
5290                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5291                           i_preferred_regs);
5292                 copyto_new_reg = ts->reg != reg;
5293             }
5294             break;
5295 
5296         default:
5297             g_assert_not_reached();
5298         }
5299 
5300         if (copyto_new_reg) {
5301             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5302                 /*
5303                  * Cross register class move not supported.  Sync the
5304                  * temp back to its slot and load from there.
5305                  */
5306                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5307                 tcg_out_ld(s, ts->type, reg,
5308                            ts->mem_base->reg, ts->mem_offset);
5309             }
5310         }
5311         new_args[i] = reg;
5312         const_args[i] = 0;
5313         tcg_regset_set_reg(i_allocated_regs, reg);
5314     }
5315 
5316     /* mark dead temporaries and free the associated registers */
5317     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5318         if (IS_DEAD_ARG(i)) {
5319             temp_dead(s, arg_temp(op->args[i]));
5320         }
5321     }
5322 
5323     if (def->flags & TCG_OPF_COND_BRANCH) {
5324         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5325     } else if (def->flags & TCG_OPF_BB_END) {
5326         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5327     } else {
5328         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5329             /* XXX: permit generic clobber register list ? */
5330             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5331                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5332                     tcg_reg_free(s, i, i_allocated_regs);
5333                 }
5334             }
5335         }
5336         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5337             /* sync globals if the op has side effects and might trigger
5338                an exception. */
5339             sync_globals(s, i_allocated_regs);
5340         }
5341 
5342         /* satisfy the output constraints */
5343         for (k = 0; k < nb_oargs; k++) {
5344             i = args_ct[k].sort_index;
5345             arg = op->args[i];
5346             arg_ct = &args_ct[i];
5347             ts = arg_temp(arg);
5348 
5349             /* ENV should not be modified.  */
5350             tcg_debug_assert(!temp_readonly(ts));
5351 
5352             switch (arg_ct->pair) {
5353             case 0: /* not paired */
5354                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5355                     reg = new_args[arg_ct->alias_index];
5356                 } else if (arg_ct->newreg) {
5357                     reg = tcg_reg_alloc(s, arg_ct->regs,
5358                                         i_allocated_regs | o_allocated_regs,
5359                                         output_pref(op, k), ts->indirect_base);
5360                 } else {
5361                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5362                                         output_pref(op, k), ts->indirect_base);
5363                 }
5364                 break;
5365 
5366             case 1: /* first of pair */
5367                 if (arg_ct->oalias) {
5368                     reg = new_args[arg_ct->alias_index];
5369                 } else if (arg_ct->newreg) {
5370                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5371                                              i_allocated_regs | o_allocated_regs,
5372                                              output_pref(op, k),
5373                                              ts->indirect_base);
5374                 } else {
5375                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5376                                              output_pref(op, k),
5377                                              ts->indirect_base);
5378                 }
5379                 break;
5380 
5381             case 2: /* second of pair */
5382                 if (arg_ct->oalias) {
5383                     reg = new_args[arg_ct->alias_index];
5384                 } else {
5385                     reg = new_args[arg_ct->pair_index] + 1;
5386                 }
5387                 break;
5388 
5389             case 3: /* first of pair, aliasing with a second input */
5390                 tcg_debug_assert(!arg_ct->newreg);
5391                 reg = new_args[arg_ct->pair_index] - 1;
5392                 break;
5393 
5394             default:
5395                 g_assert_not_reached();
5396             }
5397             tcg_regset_set_reg(o_allocated_regs, reg);
5398             set_temp_val_reg(s, ts, reg);
5399             ts->mem_coherent = 0;
5400             new_args[i] = reg;
5401         }
5402     }
5403 
5404     /* emit instruction */
5405     TCGType type = TCGOP_TYPE(op);
5406     switch (op->opc) {
5407     case INDEX_op_ext_i32_i64:
5408         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5409         break;
5410     case INDEX_op_extu_i32_i64:
5411         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5412         break;
5413     case INDEX_op_extrl_i64_i32:
5414         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5415         break;
5416 
5417     case INDEX_op_add:
5418     case INDEX_op_and:
5419     case INDEX_op_andc:
5420     case INDEX_op_eqv:
5421     case INDEX_op_mul:
5422     case INDEX_op_mulsh:
5423     case INDEX_op_muluh:
5424     case INDEX_op_nand:
5425     case INDEX_op_nor:
5426     case INDEX_op_or:
5427     case INDEX_op_orc:
5428     case INDEX_op_xor:
5429         {
5430             const TCGOutOpBinary *out =
5431                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5432 
5433             /* Constants should never appear in the first source operand. */
5434             tcg_debug_assert(!const_args[1]);
5435             if (const_args[2]) {
5436                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5437             } else {
5438                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5439             }
5440         }
5441         break;
5442 
5443     case INDEX_op_sub:
5444         {
5445             const TCGOutOpSubtract *out = &outop_sub;
5446 
5447             /*
5448              * Constants should never appear in the second source operand.
5449              * These are folded to add with negative constant.
5450              */
5451             tcg_debug_assert(!const_args[2]);
5452             if (const_args[1]) {
5453                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5454             } else {
5455                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5456             }
5457         }
5458         break;
5459 
5460     case INDEX_op_neg:
5461     case INDEX_op_not:
5462         {
5463             const TCGOutOpUnary *out =
5464                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5465 
5466             /* Constants should have been folded. */
5467             tcg_debug_assert(!const_args[1]);
5468             out->out_rr(s, type, new_args[0], new_args[1]);
5469         }
5470         break;
5471 
5472     default:
5473         if (def->flags & TCG_OPF_VECTOR) {
5474             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5475                            TCGOP_VECE(op), new_args, const_args);
5476         } else {
5477             tcg_out_op(s, op->opc, type, new_args, const_args);
5478         }
5479         break;
5480     }
5481 
5482     /* move the outputs in the correct register if needed */
5483     for(i = 0; i < nb_oargs; i++) {
5484         ts = arg_temp(op->args[i]);
5485 
5486         /* ENV should not be modified.  */
5487         tcg_debug_assert(!temp_readonly(ts));
5488 
5489         if (NEED_SYNC_ARG(i)) {
5490             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5491         } else if (IS_DEAD_ARG(i)) {
5492             temp_dead(s, ts);
5493         }
5494     }
5495 }
5496 
5497 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5498 {
5499     const TCGLifeData arg_life = op->life;
5500     TCGTemp *ots, *itsl, *itsh;
5501     TCGType vtype = TCGOP_TYPE(op);
5502 
5503     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5504     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5505     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5506 
5507     ots = arg_temp(op->args[0]);
5508     itsl = arg_temp(op->args[1]);
5509     itsh = arg_temp(op->args[2]);
5510 
5511     /* ENV should not be modified.  */
5512     tcg_debug_assert(!temp_readonly(ots));
5513 
5514     /* Allocate the output register now.  */
5515     if (ots->val_type != TEMP_VAL_REG) {
5516         TCGRegSet allocated_regs = s->reserved_regs;
5517         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5518         TCGReg oreg;
5519 
5520         /* Make sure to not spill the input registers. */
5521         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5522             tcg_regset_set_reg(allocated_regs, itsl->reg);
5523         }
5524         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5525             tcg_regset_set_reg(allocated_regs, itsh->reg);
5526         }
5527 
5528         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5529                              output_pref(op, 0), ots->indirect_base);
5530         set_temp_val_reg(s, ots, oreg);
5531     }
5532 
5533     /* Promote dup2 of immediates to dupi_vec. */
5534     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5535         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5536         MemOp vece = MO_64;
5537 
5538         if (val == dup_const(MO_8, val)) {
5539             vece = MO_8;
5540         } else if (val == dup_const(MO_16, val)) {
5541             vece = MO_16;
5542         } else if (val == dup_const(MO_32, val)) {
5543             vece = MO_32;
5544         }
5545 
5546         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5547         goto done;
5548     }
5549 
5550     /* If the two inputs form one 64-bit value, try dupm_vec. */
5551     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5552         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5553         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5554         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5555 
5556         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5557         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5558 
5559         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5560                              its->mem_base->reg, its->mem_offset)) {
5561             goto done;
5562         }
5563     }
5564 
5565     /* Fall back to generic expansion. */
5566     return false;
5567 
5568  done:
5569     ots->mem_coherent = 0;
5570     if (IS_DEAD_ARG(1)) {
5571         temp_dead(s, itsl);
5572     }
5573     if (IS_DEAD_ARG(2)) {
5574         temp_dead(s, itsh);
5575     }
5576     if (NEED_SYNC_ARG(0)) {
5577         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5578     } else if (IS_DEAD_ARG(0)) {
5579         temp_dead(s, ots);
5580     }
5581     return true;
5582 }
5583 
5584 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5585                          TCGRegSet allocated_regs)
5586 {
5587     if (ts->val_type == TEMP_VAL_REG) {
5588         if (ts->reg != reg) {
5589             tcg_reg_free(s, reg, allocated_regs);
5590             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5591                 /*
5592                  * Cross register class move not supported.  Sync the
5593                  * temp back to its slot and load from there.
5594                  */
5595                 temp_sync(s, ts, allocated_regs, 0, 0);
5596                 tcg_out_ld(s, ts->type, reg,
5597                            ts->mem_base->reg, ts->mem_offset);
5598             }
5599         }
5600     } else {
5601         TCGRegSet arg_set = 0;
5602 
5603         tcg_reg_free(s, reg, allocated_regs);
5604         tcg_regset_set_reg(arg_set, reg);
5605         temp_load(s, ts, arg_set, allocated_regs, 0);
5606     }
5607 }
5608 
5609 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5610                          TCGRegSet allocated_regs)
5611 {
5612     /*
5613      * When the destination is on the stack, load up the temp and store.
5614      * If there are many call-saved registers, the temp might live to
5615      * see another use; otherwise it'll be discarded.
5616      */
5617     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5618     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5619                arg_slot_stk_ofs(arg_slot));
5620 }
5621 
5622 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5623                             TCGTemp *ts, TCGRegSet *allocated_regs)
5624 {
5625     if (arg_slot_reg_p(l->arg_slot)) {
5626         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5627         load_arg_reg(s, reg, ts, *allocated_regs);
5628         tcg_regset_set_reg(*allocated_regs, reg);
5629     } else {
5630         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5631     }
5632 }
5633 
5634 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5635                          intptr_t ref_off, TCGRegSet *allocated_regs)
5636 {
5637     TCGReg reg;
5638 
5639     if (arg_slot_reg_p(arg_slot)) {
5640         reg = tcg_target_call_iarg_regs[arg_slot];
5641         tcg_reg_free(s, reg, *allocated_regs);
5642         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5643         tcg_regset_set_reg(*allocated_regs, reg);
5644     } else {
5645         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5646                             *allocated_regs, 0, false);
5647         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5648         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5649                    arg_slot_stk_ofs(arg_slot));
5650     }
5651 }
5652 
5653 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5654 {
5655     const int nb_oargs = TCGOP_CALLO(op);
5656     const int nb_iargs = TCGOP_CALLI(op);
5657     const TCGLifeData arg_life = op->life;
5658     const TCGHelperInfo *info = tcg_call_info(op);
5659     TCGRegSet allocated_regs = s->reserved_regs;
5660     int i;
5661 
5662     /*
5663      * Move inputs into place in reverse order,
5664      * so that we place stacked arguments first.
5665      */
5666     for (i = nb_iargs - 1; i >= 0; --i) {
5667         const TCGCallArgumentLoc *loc = &info->in[i];
5668         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5669 
5670         switch (loc->kind) {
5671         case TCG_CALL_ARG_NORMAL:
5672         case TCG_CALL_ARG_EXTEND_U:
5673         case TCG_CALL_ARG_EXTEND_S:
5674             load_arg_normal(s, loc, ts, &allocated_regs);
5675             break;
5676         case TCG_CALL_ARG_BY_REF:
5677             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5678             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5679                          arg_slot_stk_ofs(loc->ref_slot),
5680                          &allocated_regs);
5681             break;
5682         case TCG_CALL_ARG_BY_REF_N:
5683             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5684             break;
5685         default:
5686             g_assert_not_reached();
5687         }
5688     }
5689 
5690     /* Mark dead temporaries and free the associated registers.  */
5691     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5692         if (IS_DEAD_ARG(i)) {
5693             temp_dead(s, arg_temp(op->args[i]));
5694         }
5695     }
5696 
5697     /* Clobber call registers.  */
5698     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5699         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5700             tcg_reg_free(s, i, allocated_regs);
5701         }
5702     }
5703 
5704     /*
5705      * Save globals if they might be written by the helper,
5706      * sync them if they might be read.
5707      */
5708     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5709         /* Nothing to do */
5710     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5711         sync_globals(s, allocated_regs);
5712     } else {
5713         save_globals(s, allocated_regs);
5714     }
5715 
5716     /*
5717      * If the ABI passes a pointer to the returned struct as the first
5718      * argument, load that now.  Pass a pointer to the output home slot.
5719      */
5720     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5721         TCGTemp *ts = arg_temp(op->args[0]);
5722 
5723         if (!ts->mem_allocated) {
5724             temp_allocate_frame(s, ts);
5725         }
5726         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5727     }
5728 
5729     tcg_out_call(s, tcg_call_func(op), info);
5730 
5731     /* Assign output registers and emit moves if needed.  */
5732     switch (info->out_kind) {
5733     case TCG_CALL_RET_NORMAL:
5734         for (i = 0; i < nb_oargs; i++) {
5735             TCGTemp *ts = arg_temp(op->args[i]);
5736             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5737 
5738             /* ENV should not be modified.  */
5739             tcg_debug_assert(!temp_readonly(ts));
5740 
5741             set_temp_val_reg(s, ts, reg);
5742             ts->mem_coherent = 0;
5743         }
5744         break;
5745 
5746     case TCG_CALL_RET_BY_VEC:
5747         {
5748             TCGTemp *ts = arg_temp(op->args[0]);
5749 
5750             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5751             tcg_debug_assert(ts->temp_subindex == 0);
5752             if (!ts->mem_allocated) {
5753                 temp_allocate_frame(s, ts);
5754             }
5755             tcg_out_st(s, TCG_TYPE_V128,
5756                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5757                        ts->mem_base->reg, ts->mem_offset);
5758         }
5759         /* fall through to mark all parts in memory */
5760 
5761     case TCG_CALL_RET_BY_REF:
5762         /* The callee has performed a write through the reference. */
5763         for (i = 0; i < nb_oargs; i++) {
5764             TCGTemp *ts = arg_temp(op->args[i]);
5765             ts->val_type = TEMP_VAL_MEM;
5766         }
5767         break;
5768 
5769     default:
5770         g_assert_not_reached();
5771     }
5772 
5773     /* Flush or discard output registers as needed. */
5774     for (i = 0; i < nb_oargs; i++) {
5775         TCGTemp *ts = arg_temp(op->args[i]);
5776         if (NEED_SYNC_ARG(i)) {
5777             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5778         } else if (IS_DEAD_ARG(i)) {
5779             temp_dead(s, ts);
5780         }
5781     }
5782 }
5783 
5784 /**
5785  * atom_and_align_for_opc:
5786  * @s: tcg context
5787  * @opc: memory operation code
5788  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5789  * @allow_two_ops: true if we are prepared to issue two operations
5790  *
5791  * Return the alignment and atomicity to use for the inline fast path
5792  * for the given memory operation.  The alignment may be larger than
5793  * that specified in @opc, and the correct alignment will be diagnosed
5794  * by the slow path helper.
5795  *
5796  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5797  * and issue two loads or stores for subalignment.
5798  */
5799 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5800                                            MemOp host_atom, bool allow_two_ops)
5801 {
5802     MemOp align = memop_alignment_bits(opc);
5803     MemOp size = opc & MO_SIZE;
5804     MemOp half = size ? size - 1 : 0;
5805     MemOp atom = opc & MO_ATOM_MASK;
5806     MemOp atmax;
5807 
5808     switch (atom) {
5809     case MO_ATOM_NONE:
5810         /* The operation requires no specific atomicity. */
5811         atmax = MO_8;
5812         break;
5813 
5814     case MO_ATOM_IFALIGN:
5815         atmax = size;
5816         break;
5817 
5818     case MO_ATOM_IFALIGN_PAIR:
5819         atmax = half;
5820         break;
5821 
5822     case MO_ATOM_WITHIN16:
5823         atmax = size;
5824         if (size == MO_128) {
5825             /* Misalignment implies !within16, and therefore no atomicity. */
5826         } else if (host_atom != MO_ATOM_WITHIN16) {
5827             /* The host does not implement within16, so require alignment. */
5828             align = MAX(align, size);
5829         }
5830         break;
5831 
5832     case MO_ATOM_WITHIN16_PAIR:
5833         atmax = size;
5834         /*
5835          * Misalignment implies !within16, and therefore half atomicity.
5836          * Any host prepared for two operations can implement this with
5837          * half alignment.
5838          */
5839         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5840             align = MAX(align, half);
5841         }
5842         break;
5843 
5844     case MO_ATOM_SUBALIGN:
5845         atmax = size;
5846         if (host_atom != MO_ATOM_SUBALIGN) {
5847             /* If unaligned but not odd, there are subobjects up to half. */
5848             if (allow_two_ops) {
5849                 align = MAX(align, half);
5850             } else {
5851                 align = MAX(align, size);
5852             }
5853         }
5854         break;
5855 
5856     default:
5857         g_assert_not_reached();
5858     }
5859 
5860     return (TCGAtomAlign){ .atom = atmax, .align = align };
5861 }
5862 
5863 /*
5864  * Similarly for qemu_ld/st slow path helpers.
5865  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5866  * using only the provided backend tcg_out_* functions.
5867  */
5868 
5869 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5870 {
5871     int ofs = arg_slot_stk_ofs(slot);
5872 
5873     /*
5874      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5875      * require extension to uint64_t, adjust the address for uint32_t.
5876      */
5877     if (HOST_BIG_ENDIAN &&
5878         TCG_TARGET_REG_BITS == 64 &&
5879         type == TCG_TYPE_I32) {
5880         ofs += 4;
5881     }
5882     return ofs;
5883 }
5884 
5885 static void tcg_out_helper_load_slots(TCGContext *s,
5886                                       unsigned nmov, TCGMovExtend *mov,
5887                                       const TCGLdstHelperParam *parm)
5888 {
5889     unsigned i;
5890     TCGReg dst3;
5891 
5892     /*
5893      * Start from the end, storing to the stack first.
5894      * This frees those registers, so we need not consider overlap.
5895      */
5896     for (i = nmov; i-- > 0; ) {
5897         unsigned slot = mov[i].dst;
5898 
5899         if (arg_slot_reg_p(slot)) {
5900             goto found_reg;
5901         }
5902 
5903         TCGReg src = mov[i].src;
5904         TCGType dst_type = mov[i].dst_type;
5905         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5906 
5907         /* The argument is going onto the stack; extend into scratch. */
5908         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5909             tcg_debug_assert(parm->ntmp != 0);
5910             mov[i].dst = src = parm->tmp[0];
5911             tcg_out_movext1(s, &mov[i]);
5912         }
5913 
5914         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5915                    tcg_out_helper_stk_ofs(dst_type, slot));
5916     }
5917     return;
5918 
5919  found_reg:
5920     /*
5921      * The remaining arguments are in registers.
5922      * Convert slot numbers to argument registers.
5923      */
5924     nmov = i + 1;
5925     for (i = 0; i < nmov; ++i) {
5926         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5927     }
5928 
5929     switch (nmov) {
5930     case 4:
5931         /* The backend must have provided enough temps for the worst case. */
5932         tcg_debug_assert(parm->ntmp >= 2);
5933 
5934         dst3 = mov[3].dst;
5935         for (unsigned j = 0; j < 3; ++j) {
5936             if (dst3 == mov[j].src) {
5937                 /*
5938                  * Conflict. Copy the source to a temporary, perform the
5939                  * remaining moves, then the extension from our scratch
5940                  * on the way out.
5941                  */
5942                 TCGReg scratch = parm->tmp[1];
5943 
5944                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5945                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5946                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5947                 break;
5948             }
5949         }
5950 
5951         /* No conflicts: perform this move and continue. */
5952         tcg_out_movext1(s, &mov[3]);
5953         /* fall through */
5954 
5955     case 3:
5956         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5957                         parm->ntmp ? parm->tmp[0] : -1);
5958         break;
5959     case 2:
5960         tcg_out_movext2(s, mov, mov + 1,
5961                         parm->ntmp ? parm->tmp[0] : -1);
5962         break;
5963     case 1:
5964         tcg_out_movext1(s, mov);
5965         break;
5966     default:
5967         g_assert_not_reached();
5968     }
5969 }
5970 
5971 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5972                                     TCGType type, tcg_target_long imm,
5973                                     const TCGLdstHelperParam *parm)
5974 {
5975     if (arg_slot_reg_p(slot)) {
5976         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5977     } else {
5978         int ofs = tcg_out_helper_stk_ofs(type, slot);
5979         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5980             tcg_debug_assert(parm->ntmp != 0);
5981             tcg_out_movi(s, type, parm->tmp[0], imm);
5982             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5983         }
5984     }
5985 }
5986 
5987 static void tcg_out_helper_load_common_args(TCGContext *s,
5988                                             const TCGLabelQemuLdst *ldst,
5989                                             const TCGLdstHelperParam *parm,
5990                                             const TCGHelperInfo *info,
5991                                             unsigned next_arg)
5992 {
5993     TCGMovExtend ptr_mov = {
5994         .dst_type = TCG_TYPE_PTR,
5995         .src_type = TCG_TYPE_PTR,
5996         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5997     };
5998     const TCGCallArgumentLoc *loc = &info->in[0];
5999     TCGType type;
6000     unsigned slot;
6001     tcg_target_ulong imm;
6002 
6003     /*
6004      * Handle env, which is always first.
6005      */
6006     ptr_mov.dst = loc->arg_slot;
6007     ptr_mov.src = TCG_AREG0;
6008     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6009 
6010     /*
6011      * Handle oi.
6012      */
6013     imm = ldst->oi;
6014     loc = &info->in[next_arg];
6015     type = TCG_TYPE_I32;
6016     switch (loc->kind) {
6017     case TCG_CALL_ARG_NORMAL:
6018         break;
6019     case TCG_CALL_ARG_EXTEND_U:
6020     case TCG_CALL_ARG_EXTEND_S:
6021         /* No extension required for MemOpIdx. */
6022         tcg_debug_assert(imm <= INT32_MAX);
6023         type = TCG_TYPE_REG;
6024         break;
6025     default:
6026         g_assert_not_reached();
6027     }
6028     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6029     next_arg++;
6030 
6031     /*
6032      * Handle ra.
6033      */
6034     loc = &info->in[next_arg];
6035     slot = loc->arg_slot;
6036     if (parm->ra_gen) {
6037         int arg_reg = -1;
6038         TCGReg ra_reg;
6039 
6040         if (arg_slot_reg_p(slot)) {
6041             arg_reg = tcg_target_call_iarg_regs[slot];
6042         }
6043         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6044 
6045         ptr_mov.dst = slot;
6046         ptr_mov.src = ra_reg;
6047         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6048     } else {
6049         imm = (uintptr_t)ldst->raddr;
6050         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6051     }
6052 }
6053 
6054 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6055                                        const TCGCallArgumentLoc *loc,
6056                                        TCGType dst_type, TCGType src_type,
6057                                        TCGReg lo, TCGReg hi)
6058 {
6059     MemOp reg_mo;
6060 
6061     if (dst_type <= TCG_TYPE_REG) {
6062         MemOp src_ext;
6063 
6064         switch (loc->kind) {
6065         case TCG_CALL_ARG_NORMAL:
6066             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6067             break;
6068         case TCG_CALL_ARG_EXTEND_U:
6069             dst_type = TCG_TYPE_REG;
6070             src_ext = MO_UL;
6071             break;
6072         case TCG_CALL_ARG_EXTEND_S:
6073             dst_type = TCG_TYPE_REG;
6074             src_ext = MO_SL;
6075             break;
6076         default:
6077             g_assert_not_reached();
6078         }
6079 
6080         mov[0].dst = loc->arg_slot;
6081         mov[0].dst_type = dst_type;
6082         mov[0].src = lo;
6083         mov[0].src_type = src_type;
6084         mov[0].src_ext = src_ext;
6085         return 1;
6086     }
6087 
6088     if (TCG_TARGET_REG_BITS == 32) {
6089         assert(dst_type == TCG_TYPE_I64);
6090         reg_mo = MO_32;
6091     } else {
6092         assert(dst_type == TCG_TYPE_I128);
6093         reg_mo = MO_64;
6094     }
6095 
6096     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6097     mov[0].src = lo;
6098     mov[0].dst_type = TCG_TYPE_REG;
6099     mov[0].src_type = TCG_TYPE_REG;
6100     mov[0].src_ext = reg_mo;
6101 
6102     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6103     mov[1].src = hi;
6104     mov[1].dst_type = TCG_TYPE_REG;
6105     mov[1].src_type = TCG_TYPE_REG;
6106     mov[1].src_ext = reg_mo;
6107 
6108     return 2;
6109 }
6110 
6111 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6112                                    const TCGLdstHelperParam *parm)
6113 {
6114     const TCGHelperInfo *info;
6115     const TCGCallArgumentLoc *loc;
6116     TCGMovExtend mov[2];
6117     unsigned next_arg, nmov;
6118     MemOp mop = get_memop(ldst->oi);
6119 
6120     switch (mop & MO_SIZE) {
6121     case MO_8:
6122     case MO_16:
6123     case MO_32:
6124         info = &info_helper_ld32_mmu;
6125         break;
6126     case MO_64:
6127         info = &info_helper_ld64_mmu;
6128         break;
6129     case MO_128:
6130         info = &info_helper_ld128_mmu;
6131         break;
6132     default:
6133         g_assert_not_reached();
6134     }
6135 
6136     /* Defer env argument. */
6137     next_arg = 1;
6138 
6139     loc = &info->in[next_arg];
6140     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6141         /*
6142          * 32-bit host with 32-bit guest: zero-extend the guest address
6143          * to 64-bits for the helper by storing the low part, then
6144          * load a zero for the high part.
6145          */
6146         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6147                                TCG_TYPE_I32, TCG_TYPE_I32,
6148                                ldst->addr_reg, -1);
6149         tcg_out_helper_load_slots(s, 1, mov, parm);
6150 
6151         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6152                                 TCG_TYPE_I32, 0, parm);
6153         next_arg += 2;
6154     } else {
6155         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6156                                       ldst->addr_reg, -1);
6157         tcg_out_helper_load_slots(s, nmov, mov, parm);
6158         next_arg += nmov;
6159     }
6160 
6161     switch (info->out_kind) {
6162     case TCG_CALL_RET_NORMAL:
6163     case TCG_CALL_RET_BY_VEC:
6164         break;
6165     case TCG_CALL_RET_BY_REF:
6166         /*
6167          * The return reference is in the first argument slot.
6168          * We need memory in which to return: re-use the top of stack.
6169          */
6170         {
6171             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6172 
6173             if (arg_slot_reg_p(0)) {
6174                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6175                                  TCG_REG_CALL_STACK, ofs_slot0);
6176             } else {
6177                 tcg_debug_assert(parm->ntmp != 0);
6178                 tcg_out_addi_ptr(s, parm->tmp[0],
6179                                  TCG_REG_CALL_STACK, ofs_slot0);
6180                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6181                            TCG_REG_CALL_STACK, ofs_slot0);
6182             }
6183         }
6184         break;
6185     default:
6186         g_assert_not_reached();
6187     }
6188 
6189     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6190 }
6191 
6192 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6193                                   bool load_sign,
6194                                   const TCGLdstHelperParam *parm)
6195 {
6196     MemOp mop = get_memop(ldst->oi);
6197     TCGMovExtend mov[2];
6198     int ofs_slot0;
6199 
6200     switch (ldst->type) {
6201     case TCG_TYPE_I64:
6202         if (TCG_TARGET_REG_BITS == 32) {
6203             break;
6204         }
6205         /* fall through */
6206 
6207     case TCG_TYPE_I32:
6208         mov[0].dst = ldst->datalo_reg;
6209         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6210         mov[0].dst_type = ldst->type;
6211         mov[0].src_type = TCG_TYPE_REG;
6212 
6213         /*
6214          * If load_sign, then we allowed the helper to perform the
6215          * appropriate sign extension to tcg_target_ulong, and all
6216          * we need now is a plain move.
6217          *
6218          * If they do not, then we expect the relevant extension
6219          * instruction to be no more expensive than a move, and
6220          * we thus save the icache etc by only using one of two
6221          * helper functions.
6222          */
6223         if (load_sign || !(mop & MO_SIGN)) {
6224             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6225                 mov[0].src_ext = MO_32;
6226             } else {
6227                 mov[0].src_ext = MO_64;
6228             }
6229         } else {
6230             mov[0].src_ext = mop & MO_SSIZE;
6231         }
6232         tcg_out_movext1(s, mov);
6233         return;
6234 
6235     case TCG_TYPE_I128:
6236         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6237         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6238         switch (TCG_TARGET_CALL_RET_I128) {
6239         case TCG_CALL_RET_NORMAL:
6240             break;
6241         case TCG_CALL_RET_BY_VEC:
6242             tcg_out_st(s, TCG_TYPE_V128,
6243                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6244                        TCG_REG_CALL_STACK, ofs_slot0);
6245             /* fall through */
6246         case TCG_CALL_RET_BY_REF:
6247             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6248                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6249             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6250                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6251             return;
6252         default:
6253             g_assert_not_reached();
6254         }
6255         break;
6256 
6257     default:
6258         g_assert_not_reached();
6259     }
6260 
6261     mov[0].dst = ldst->datalo_reg;
6262     mov[0].src =
6263         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6264     mov[0].dst_type = TCG_TYPE_REG;
6265     mov[0].src_type = TCG_TYPE_REG;
6266     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6267 
6268     mov[1].dst = ldst->datahi_reg;
6269     mov[1].src =
6270         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6271     mov[1].dst_type = TCG_TYPE_REG;
6272     mov[1].src_type = TCG_TYPE_REG;
6273     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6274 
6275     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6276 }
6277 
6278 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6279                                    const TCGLdstHelperParam *parm)
6280 {
6281     const TCGHelperInfo *info;
6282     const TCGCallArgumentLoc *loc;
6283     TCGMovExtend mov[4];
6284     TCGType data_type;
6285     unsigned next_arg, nmov, n;
6286     MemOp mop = get_memop(ldst->oi);
6287 
6288     switch (mop & MO_SIZE) {
6289     case MO_8:
6290     case MO_16:
6291     case MO_32:
6292         info = &info_helper_st32_mmu;
6293         data_type = TCG_TYPE_I32;
6294         break;
6295     case MO_64:
6296         info = &info_helper_st64_mmu;
6297         data_type = TCG_TYPE_I64;
6298         break;
6299     case MO_128:
6300         info = &info_helper_st128_mmu;
6301         data_type = TCG_TYPE_I128;
6302         break;
6303     default:
6304         g_assert_not_reached();
6305     }
6306 
6307     /* Defer env argument. */
6308     next_arg = 1;
6309     nmov = 0;
6310 
6311     /* Handle addr argument. */
6312     loc = &info->in[next_arg];
6313     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6314     if (TCG_TARGET_REG_BITS == 32) {
6315         /*
6316          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6317          * to 64-bits for the helper by storing the low part.  Later,
6318          * after we have processed the register inputs, we will load a
6319          * zero for the high part.
6320          */
6321         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6322                                TCG_TYPE_I32, TCG_TYPE_I32,
6323                                ldst->addr_reg, -1);
6324         next_arg += 2;
6325         nmov += 1;
6326     } else {
6327         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6328                                    ldst->addr_reg, -1);
6329         next_arg += n;
6330         nmov += n;
6331     }
6332 
6333     /* Handle data argument. */
6334     loc = &info->in[next_arg];
6335     switch (loc->kind) {
6336     case TCG_CALL_ARG_NORMAL:
6337     case TCG_CALL_ARG_EXTEND_U:
6338     case TCG_CALL_ARG_EXTEND_S:
6339         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6340                                    ldst->datalo_reg, ldst->datahi_reg);
6341         next_arg += n;
6342         nmov += n;
6343         tcg_out_helper_load_slots(s, nmov, mov, parm);
6344         break;
6345 
6346     case TCG_CALL_ARG_BY_REF:
6347         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6348         tcg_debug_assert(data_type == TCG_TYPE_I128);
6349         tcg_out_st(s, TCG_TYPE_I64,
6350                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6351                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6352         tcg_out_st(s, TCG_TYPE_I64,
6353                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6354                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6355 
6356         tcg_out_helper_load_slots(s, nmov, mov, parm);
6357 
6358         if (arg_slot_reg_p(loc->arg_slot)) {
6359             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6360                              TCG_REG_CALL_STACK,
6361                              arg_slot_stk_ofs(loc->ref_slot));
6362         } else {
6363             tcg_debug_assert(parm->ntmp != 0);
6364             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6365                              arg_slot_stk_ofs(loc->ref_slot));
6366             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6367                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6368         }
6369         next_arg += 2;
6370         break;
6371 
6372     default:
6373         g_assert_not_reached();
6374     }
6375 
6376     if (TCG_TARGET_REG_BITS == 32) {
6377         /* Zero extend the address by loading a zero for the high part. */
6378         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6379         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6380     }
6381 
6382     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6383 }
6384 
6385 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6386 {
6387     int i, start_words, num_insns;
6388     TCGOp *op;
6389 
6390     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6391                  && qemu_log_in_addr_range(pc_start))) {
6392         FILE *logfile = qemu_log_trylock();
6393         if (logfile) {
6394             fprintf(logfile, "OP:\n");
6395             tcg_dump_ops(s, logfile, false);
6396             fprintf(logfile, "\n");
6397             qemu_log_unlock(logfile);
6398         }
6399     }
6400 
6401 #ifdef CONFIG_DEBUG_TCG
6402     /* Ensure all labels referenced have been emitted.  */
6403     {
6404         TCGLabel *l;
6405         bool error = false;
6406 
6407         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6408             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6409                 qemu_log_mask(CPU_LOG_TB_OP,
6410                               "$L%d referenced but not present.\n", l->id);
6411                 error = true;
6412             }
6413         }
6414         assert(!error);
6415     }
6416 #endif
6417 
6418     /* Do not reuse any EBB that may be allocated within the TB. */
6419     tcg_temp_ebb_reset_freed(s);
6420 
6421     tcg_optimize(s);
6422 
6423     reachable_code_pass(s);
6424     liveness_pass_0(s);
6425     liveness_pass_1(s);
6426 
6427     if (s->nb_indirects > 0) {
6428         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6429                      && qemu_log_in_addr_range(pc_start))) {
6430             FILE *logfile = qemu_log_trylock();
6431             if (logfile) {
6432                 fprintf(logfile, "OP before indirect lowering:\n");
6433                 tcg_dump_ops(s, logfile, false);
6434                 fprintf(logfile, "\n");
6435                 qemu_log_unlock(logfile);
6436             }
6437         }
6438 
6439         /* Replace indirect temps with direct temps.  */
6440         if (liveness_pass_2(s)) {
6441             /* If changes were made, re-run liveness.  */
6442             liveness_pass_1(s);
6443         }
6444     }
6445 
6446     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6447                  && qemu_log_in_addr_range(pc_start))) {
6448         FILE *logfile = qemu_log_trylock();
6449         if (logfile) {
6450             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6451             tcg_dump_ops(s, logfile, true);
6452             fprintf(logfile, "\n");
6453             qemu_log_unlock(logfile);
6454         }
6455     }
6456 
6457     /* Initialize goto_tb jump offsets. */
6458     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6459     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6460     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6461     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6462 
6463     tcg_reg_alloc_start(s);
6464 
6465     /*
6466      * Reset the buffer pointers when restarting after overflow.
6467      * TODO: Move this into translate-all.c with the rest of the
6468      * buffer management.  Having only this done here is confusing.
6469      */
6470     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6471     s->code_ptr = s->code_buf;
6472     s->data_gen_ptr = NULL;
6473 
6474     QSIMPLEQ_INIT(&s->ldst_labels);
6475     s->pool_labels = NULL;
6476 
6477     start_words = s->insn_start_words;
6478     s->gen_insn_data =
6479         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6480 
6481     tcg_out_tb_start(s);
6482 
6483     num_insns = -1;
6484     QTAILQ_FOREACH(op, &s->ops, link) {
6485         TCGOpcode opc = op->opc;
6486 
6487         switch (opc) {
6488         case INDEX_op_mov:
6489         case INDEX_op_mov_vec:
6490             tcg_reg_alloc_mov(s, op);
6491             break;
6492         case INDEX_op_dup_vec:
6493             tcg_reg_alloc_dup(s, op);
6494             break;
6495         case INDEX_op_insn_start:
6496             if (num_insns >= 0) {
6497                 size_t off = tcg_current_code_size(s);
6498                 s->gen_insn_end_off[num_insns] = off;
6499                 /* Assert that we do not overflow our stored offset.  */
6500                 assert(s->gen_insn_end_off[num_insns] == off);
6501             }
6502             num_insns++;
6503             for (i = 0; i < start_words; ++i) {
6504                 s->gen_insn_data[num_insns * start_words + i] =
6505                     tcg_get_insn_start_param(op, i);
6506             }
6507             break;
6508         case INDEX_op_discard:
6509             temp_dead(s, arg_temp(op->args[0]));
6510             break;
6511         case INDEX_op_set_label:
6512             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6513             tcg_out_label(s, arg_label(op->args[0]));
6514             break;
6515         case INDEX_op_call:
6516             tcg_reg_alloc_call(s, op);
6517             break;
6518         case INDEX_op_exit_tb:
6519             tcg_out_exit_tb(s, op->args[0]);
6520             break;
6521         case INDEX_op_goto_tb:
6522             tcg_out_goto_tb(s, op->args[0]);
6523             break;
6524         case INDEX_op_dup2_vec:
6525             if (tcg_reg_alloc_dup2(s, op)) {
6526                 break;
6527             }
6528             /* fall through */
6529         default:
6530             /* Sanity check that we've not introduced any unhandled opcodes. */
6531             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6532                                               TCGOP_FLAGS(op)));
6533             /* Note: in order to speed up the code, it would be much
6534                faster to have specialized register allocator functions for
6535                some common argument patterns */
6536             tcg_reg_alloc_op(s, op);
6537             break;
6538         }
6539         /* Test for (pending) buffer overflow.  The assumption is that any
6540            one operation beginning below the high water mark cannot overrun
6541            the buffer completely.  Thus we can test for overflow after
6542            generating code without having to check during generation.  */
6543         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6544             return -1;
6545         }
6546         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6547         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6548             return -2;
6549         }
6550     }
6551     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6552     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6553 
6554     /* Generate TB finalization at the end of block */
6555     i = tcg_out_ldst_finalize(s);
6556     if (i < 0) {
6557         return i;
6558     }
6559     i = tcg_out_pool_finalize(s);
6560     if (i < 0) {
6561         return i;
6562     }
6563     if (!tcg_resolve_relocs(s)) {
6564         return -2;
6565     }
6566 
6567 #ifndef CONFIG_TCG_INTERPRETER
6568     /* flush instruction cache */
6569     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6570                         (uintptr_t)s->code_buf,
6571                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6572 #endif
6573 
6574     return tcg_current_code_size(s);
6575 }
6576 
6577 #ifdef ELF_HOST_MACHINE
6578 /* In order to use this feature, the backend needs to do three things:
6579 
6580    (1) Define ELF_HOST_MACHINE to indicate both what value to
6581        put into the ELF image and to indicate support for the feature.
6582 
6583    (2) Define tcg_register_jit.  This should create a buffer containing
6584        the contents of a .debug_frame section that describes the post-
6585        prologue unwind info for the tcg machine.
6586 
6587    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6588 */
6589 
6590 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6591 typedef enum {
6592     JIT_NOACTION = 0,
6593     JIT_REGISTER_FN,
6594     JIT_UNREGISTER_FN
6595 } jit_actions_t;
6596 
6597 struct jit_code_entry {
6598     struct jit_code_entry *next_entry;
6599     struct jit_code_entry *prev_entry;
6600     const void *symfile_addr;
6601     uint64_t symfile_size;
6602 };
6603 
6604 struct jit_descriptor {
6605     uint32_t version;
6606     uint32_t action_flag;
6607     struct jit_code_entry *relevant_entry;
6608     struct jit_code_entry *first_entry;
6609 };
6610 
6611 void __jit_debug_register_code(void) __attribute__((noinline));
6612 void __jit_debug_register_code(void)
6613 {
6614     asm("");
6615 }
6616 
6617 /* Must statically initialize the version, because GDB may check
6618    the version before we can set it.  */
6619 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6620 
6621 /* End GDB interface.  */
6622 
6623 static int find_string(const char *strtab, const char *str)
6624 {
6625     const char *p = strtab + 1;
6626 
6627     while (1) {
6628         if (strcmp(p, str) == 0) {
6629             return p - strtab;
6630         }
6631         p += strlen(p) + 1;
6632     }
6633 }
6634 
6635 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6636                                  const void *debug_frame,
6637                                  size_t debug_frame_size)
6638 {
6639     struct __attribute__((packed)) DebugInfo {
6640         uint32_t  len;
6641         uint16_t  version;
6642         uint32_t  abbrev;
6643         uint8_t   ptr_size;
6644         uint8_t   cu_die;
6645         uint16_t  cu_lang;
6646         uintptr_t cu_low_pc;
6647         uintptr_t cu_high_pc;
6648         uint8_t   fn_die;
6649         char      fn_name[16];
6650         uintptr_t fn_low_pc;
6651         uintptr_t fn_high_pc;
6652         uint8_t   cu_eoc;
6653     };
6654 
6655     struct ElfImage {
6656         ElfW(Ehdr) ehdr;
6657         ElfW(Phdr) phdr;
6658         ElfW(Shdr) shdr[7];
6659         ElfW(Sym)  sym[2];
6660         struct DebugInfo di;
6661         uint8_t    da[24];
6662         char       str[80];
6663     };
6664 
6665     struct ElfImage *img;
6666 
6667     static const struct ElfImage img_template = {
6668         .ehdr = {
6669             .e_ident[EI_MAG0] = ELFMAG0,
6670             .e_ident[EI_MAG1] = ELFMAG1,
6671             .e_ident[EI_MAG2] = ELFMAG2,
6672             .e_ident[EI_MAG3] = ELFMAG3,
6673             .e_ident[EI_CLASS] = ELF_CLASS,
6674             .e_ident[EI_DATA] = ELF_DATA,
6675             .e_ident[EI_VERSION] = EV_CURRENT,
6676             .e_type = ET_EXEC,
6677             .e_machine = ELF_HOST_MACHINE,
6678             .e_version = EV_CURRENT,
6679             .e_phoff = offsetof(struct ElfImage, phdr),
6680             .e_shoff = offsetof(struct ElfImage, shdr),
6681             .e_ehsize = sizeof(ElfW(Shdr)),
6682             .e_phentsize = sizeof(ElfW(Phdr)),
6683             .e_phnum = 1,
6684             .e_shentsize = sizeof(ElfW(Shdr)),
6685             .e_shnum = ARRAY_SIZE(img->shdr),
6686             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6687 #ifdef ELF_HOST_FLAGS
6688             .e_flags = ELF_HOST_FLAGS,
6689 #endif
6690 #ifdef ELF_OSABI
6691             .e_ident[EI_OSABI] = ELF_OSABI,
6692 #endif
6693         },
6694         .phdr = {
6695             .p_type = PT_LOAD,
6696             .p_flags = PF_X,
6697         },
6698         .shdr = {
6699             [0] = { .sh_type = SHT_NULL },
6700             /* Trick: The contents of code_gen_buffer are not present in
6701                this fake ELF file; that got allocated elsewhere.  Therefore
6702                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6703                will not look for contents.  We can record any address.  */
6704             [1] = { /* .text */
6705                 .sh_type = SHT_NOBITS,
6706                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6707             },
6708             [2] = { /* .debug_info */
6709                 .sh_type = SHT_PROGBITS,
6710                 .sh_offset = offsetof(struct ElfImage, di),
6711                 .sh_size = sizeof(struct DebugInfo),
6712             },
6713             [3] = { /* .debug_abbrev */
6714                 .sh_type = SHT_PROGBITS,
6715                 .sh_offset = offsetof(struct ElfImage, da),
6716                 .sh_size = sizeof(img->da),
6717             },
6718             [4] = { /* .debug_frame */
6719                 .sh_type = SHT_PROGBITS,
6720                 .sh_offset = sizeof(struct ElfImage),
6721             },
6722             [5] = { /* .symtab */
6723                 .sh_type = SHT_SYMTAB,
6724                 .sh_offset = offsetof(struct ElfImage, sym),
6725                 .sh_size = sizeof(img->sym),
6726                 .sh_info = 1,
6727                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6728                 .sh_entsize = sizeof(ElfW(Sym)),
6729             },
6730             [6] = { /* .strtab */
6731                 .sh_type = SHT_STRTAB,
6732                 .sh_offset = offsetof(struct ElfImage, str),
6733                 .sh_size = sizeof(img->str),
6734             }
6735         },
6736         .sym = {
6737             [1] = { /* code_gen_buffer */
6738                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6739                 .st_shndx = 1,
6740             }
6741         },
6742         .di = {
6743             .len = sizeof(struct DebugInfo) - 4,
6744             .version = 2,
6745             .ptr_size = sizeof(void *),
6746             .cu_die = 1,
6747             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6748             .fn_die = 2,
6749             .fn_name = "code_gen_buffer"
6750         },
6751         .da = {
6752             1,          /* abbrev number (the cu) */
6753             0x11, 1,    /* DW_TAG_compile_unit, has children */
6754             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6755             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6756             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6757             0, 0,       /* end of abbrev */
6758             2,          /* abbrev number (the fn) */
6759             0x2e, 0,    /* DW_TAG_subprogram, no children */
6760             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6761             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6762             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6763             0, 0,       /* end of abbrev */
6764             0           /* no more abbrev */
6765         },
6766         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6767                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6768     };
6769 
6770     /* We only need a single jit entry; statically allocate it.  */
6771     static struct jit_code_entry one_entry;
6772 
6773     uintptr_t buf = (uintptr_t)buf_ptr;
6774     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6775     DebugFrameHeader *dfh;
6776 
6777     img = g_malloc(img_size);
6778     *img = img_template;
6779 
6780     img->phdr.p_vaddr = buf;
6781     img->phdr.p_paddr = buf;
6782     img->phdr.p_memsz = buf_size;
6783 
6784     img->shdr[1].sh_name = find_string(img->str, ".text");
6785     img->shdr[1].sh_addr = buf;
6786     img->shdr[1].sh_size = buf_size;
6787 
6788     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6789     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6790 
6791     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6792     img->shdr[4].sh_size = debug_frame_size;
6793 
6794     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6795     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6796 
6797     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6798     img->sym[1].st_value = buf;
6799     img->sym[1].st_size = buf_size;
6800 
6801     img->di.cu_low_pc = buf;
6802     img->di.cu_high_pc = buf + buf_size;
6803     img->di.fn_low_pc = buf;
6804     img->di.fn_high_pc = buf + buf_size;
6805 
6806     dfh = (DebugFrameHeader *)(img + 1);
6807     memcpy(dfh, debug_frame, debug_frame_size);
6808     dfh->fde.func_start = buf;
6809     dfh->fde.func_len = buf_size;
6810 
6811 #ifdef DEBUG_JIT
6812     /* Enable this block to be able to debug the ELF image file creation.
6813        One can use readelf, objdump, or other inspection utilities.  */
6814     {
6815         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6816         FILE *f = fopen(jit, "w+b");
6817         if (f) {
6818             if (fwrite(img, img_size, 1, f) != img_size) {
6819                 /* Avoid stupid unused return value warning for fwrite.  */
6820             }
6821             fclose(f);
6822         }
6823     }
6824 #endif
6825 
6826     one_entry.symfile_addr = img;
6827     one_entry.symfile_size = img_size;
6828 
6829     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6830     __jit_debug_descriptor.relevant_entry = &one_entry;
6831     __jit_debug_descriptor.first_entry = &one_entry;
6832     __jit_debug_register_code();
6833 }
6834 #else
6835 /* No support for the feature.  Provide the entry point expected by exec.c,
6836    and implement the internal function we declared earlier.  */
6837 
6838 static void tcg_register_jit_int(const void *buf, size_t size,
6839                                  const void *debug_frame,
6840                                  size_t debug_frame_size)
6841 {
6842 }
6843 
6844 void tcg_register_jit(const void *buf, size_t buf_size)
6845 {
6846 }
6847 #endif /* ELF_HOST_MACHINE */
6848 
6849 #if !TCG_TARGET_MAYBE_vec
6850 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6851 {
6852     g_assert_not_reached();
6853 }
6854 #endif
6855