xref: /openbmc/qemu/tcg/tcg.c (revision 5c62d3779b8b1075782672751165c0e4f716762f)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpUnary {
990     TCGOutOp base;
991     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
992 } TCGOutOpUnary;
993 
994 typedef struct TCGOutOpSubtract {
995     TCGOutOp base;
996     void (*out_rrr)(TCGContext *s, TCGType type,
997                     TCGReg a0, TCGReg a1, TCGReg a2);
998     void (*out_rir)(TCGContext *s, TCGType type,
999                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1000 } TCGOutOpSubtract;
1001 
1002 #include "tcg-target.c.inc"
1003 
1004 #ifndef CONFIG_TCG_INTERPRETER
1005 /* Validate CPUTLBDescFast placement. */
1006 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1007                         sizeof(CPUNegativeOffsetState))
1008                   < MIN_TLB_MASK_TABLE_OFS);
1009 #endif
1010 
1011 /*
1012  * Register V as the TCGOutOp for O.
1013  * This verifies that V is of type T, otherwise give a nice compiler error.
1014  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1015  */
1016 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1017 
1018 /* Register allocation descriptions for every TCGOpcode. */
1019 static const TCGOutOp * const all_outop[NB_OPS] = {
1020     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1021     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1022     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1023     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1024     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1025     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1026     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1027     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1028     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1029     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1030     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1031     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1032 };
1033 
1034 #undef OUTOP
1035 
1036 /*
1037  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1038  * and registered the target's TCG globals) must register with this function
1039  * before initiating translation.
1040  *
1041  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1042  * of tcg_region_init() for the reasoning behind this.
1043  *
1044  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1045  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1046  * is not used anymore for translation once this function is called.
1047  *
1048  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1049  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1050  * modes.
1051  */
1052 #ifdef CONFIG_USER_ONLY
1053 void tcg_register_thread(void)
1054 {
1055     tcg_ctx = &tcg_init_ctx;
1056 }
1057 #else
1058 void tcg_register_thread(void)
1059 {
1060     TCGContext *s = g_malloc(sizeof(*s));
1061     unsigned int i, n;
1062 
1063     *s = tcg_init_ctx;
1064 
1065     /* Relink mem_base.  */
1066     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1067         if (tcg_init_ctx.temps[i].mem_base) {
1068             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1069             tcg_debug_assert(b >= 0 && b < n);
1070             s->temps[i].mem_base = &s->temps[b];
1071         }
1072     }
1073 
1074     /* Claim an entry in tcg_ctxs */
1075     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1076     g_assert(n < tcg_max_ctxs);
1077     qatomic_set(&tcg_ctxs[n], s);
1078 
1079     if (n > 0) {
1080         tcg_region_initial_alloc(s);
1081     }
1082 
1083     tcg_ctx = s;
1084 }
1085 #endif /* !CONFIG_USER_ONLY */
1086 
1087 /* pool based memory allocation */
1088 void *tcg_malloc_internal(TCGContext *s, int size)
1089 {
1090     TCGPool *p;
1091     int pool_size;
1092 
1093     if (size > TCG_POOL_CHUNK_SIZE) {
1094         /* big malloc: insert a new pool (XXX: could optimize) */
1095         p = g_malloc(sizeof(TCGPool) + size);
1096         p->size = size;
1097         p->next = s->pool_first_large;
1098         s->pool_first_large = p;
1099         return p->data;
1100     } else {
1101         p = s->pool_current;
1102         if (!p) {
1103             p = s->pool_first;
1104             if (!p)
1105                 goto new_pool;
1106         } else {
1107             if (!p->next) {
1108             new_pool:
1109                 pool_size = TCG_POOL_CHUNK_SIZE;
1110                 p = g_malloc(sizeof(TCGPool) + pool_size);
1111                 p->size = pool_size;
1112                 p->next = NULL;
1113                 if (s->pool_current) {
1114                     s->pool_current->next = p;
1115                 } else {
1116                     s->pool_first = p;
1117                 }
1118             } else {
1119                 p = p->next;
1120             }
1121         }
1122     }
1123     s->pool_current = p;
1124     s->pool_cur = p->data + size;
1125     s->pool_end = p->data + p->size;
1126     return p->data;
1127 }
1128 
1129 void tcg_pool_reset(TCGContext *s)
1130 {
1131     TCGPool *p, *t;
1132     for (p = s->pool_first_large; p; p = t) {
1133         t = p->next;
1134         g_free(p);
1135     }
1136     s->pool_first_large = NULL;
1137     s->pool_cur = s->pool_end = NULL;
1138     s->pool_current = NULL;
1139 }
1140 
1141 /*
1142  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1143  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1144  * We only use these for layout in tcg_out_ld_helper_ret and
1145  * tcg_out_st_helper_args, and share them between several of
1146  * the helpers, with the end result that it's easier to build manually.
1147  */
1148 
1149 #if TCG_TARGET_REG_BITS == 32
1150 # define dh_typecode_ttl  dh_typecode_i32
1151 #else
1152 # define dh_typecode_ttl  dh_typecode_i64
1153 #endif
1154 
1155 static TCGHelperInfo info_helper_ld32_mmu = {
1156     .flags = TCG_CALL_NO_WG,
1157     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1158               | dh_typemask(env, 1)
1159               | dh_typemask(i64, 2)  /* uint64_t addr */
1160               | dh_typemask(i32, 3)  /* unsigned oi */
1161               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1162 };
1163 
1164 static TCGHelperInfo info_helper_ld64_mmu = {
1165     .flags = TCG_CALL_NO_WG,
1166     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1167               | dh_typemask(env, 1)
1168               | dh_typemask(i64, 2)  /* uint64_t addr */
1169               | dh_typemask(i32, 3)  /* unsigned oi */
1170               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1171 };
1172 
1173 static TCGHelperInfo info_helper_ld128_mmu = {
1174     .flags = TCG_CALL_NO_WG,
1175     .typemask = dh_typemask(i128, 0) /* return Int128 */
1176               | dh_typemask(env, 1)
1177               | dh_typemask(i64, 2)  /* uint64_t addr */
1178               | dh_typemask(i32, 3)  /* unsigned oi */
1179               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1180 };
1181 
1182 static TCGHelperInfo info_helper_st32_mmu = {
1183     .flags = TCG_CALL_NO_WG,
1184     .typemask = dh_typemask(void, 0)
1185               | dh_typemask(env, 1)
1186               | dh_typemask(i64, 2)  /* uint64_t addr */
1187               | dh_typemask(i32, 3)  /* uint32_t data */
1188               | dh_typemask(i32, 4)  /* unsigned oi */
1189               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1190 };
1191 
1192 static TCGHelperInfo info_helper_st64_mmu = {
1193     .flags = TCG_CALL_NO_WG,
1194     .typemask = dh_typemask(void, 0)
1195               | dh_typemask(env, 1)
1196               | dh_typemask(i64, 2)  /* uint64_t addr */
1197               | dh_typemask(i64, 3)  /* uint64_t data */
1198               | dh_typemask(i32, 4)  /* unsigned oi */
1199               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1200 };
1201 
1202 static TCGHelperInfo info_helper_st128_mmu = {
1203     .flags = TCG_CALL_NO_WG,
1204     .typemask = dh_typemask(void, 0)
1205               | dh_typemask(env, 1)
1206               | dh_typemask(i64, 2)  /* uint64_t addr */
1207               | dh_typemask(i128, 3) /* Int128 data */
1208               | dh_typemask(i32, 4)  /* unsigned oi */
1209               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1210 };
1211 
1212 #ifdef CONFIG_TCG_INTERPRETER
1213 static ffi_type *typecode_to_ffi(int argmask)
1214 {
1215     /*
1216      * libffi does not support __int128_t, so we have forced Int128
1217      * to use the structure definition instead of the builtin type.
1218      */
1219     static ffi_type *ffi_type_i128_elements[3] = {
1220         &ffi_type_uint64,
1221         &ffi_type_uint64,
1222         NULL
1223     };
1224     static ffi_type ffi_type_i128 = {
1225         .size = 16,
1226         .alignment = __alignof__(Int128),
1227         .type = FFI_TYPE_STRUCT,
1228         .elements = ffi_type_i128_elements,
1229     };
1230 
1231     switch (argmask) {
1232     case dh_typecode_void:
1233         return &ffi_type_void;
1234     case dh_typecode_i32:
1235         return &ffi_type_uint32;
1236     case dh_typecode_s32:
1237         return &ffi_type_sint32;
1238     case dh_typecode_i64:
1239         return &ffi_type_uint64;
1240     case dh_typecode_s64:
1241         return &ffi_type_sint64;
1242     case dh_typecode_ptr:
1243         return &ffi_type_pointer;
1244     case dh_typecode_i128:
1245         return &ffi_type_i128;
1246     }
1247     g_assert_not_reached();
1248 }
1249 
1250 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1251 {
1252     unsigned typemask = info->typemask;
1253     struct {
1254         ffi_cif cif;
1255         ffi_type *args[];
1256     } *ca;
1257     ffi_status status;
1258     int nargs;
1259 
1260     /* Ignoring the return type, find the last non-zero field. */
1261     nargs = 32 - clz32(typemask >> 3);
1262     nargs = DIV_ROUND_UP(nargs, 3);
1263     assert(nargs <= MAX_CALL_IARGS);
1264 
1265     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1266     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1267     ca->cif.nargs = nargs;
1268 
1269     if (nargs != 0) {
1270         ca->cif.arg_types = ca->args;
1271         for (int j = 0; j < nargs; ++j) {
1272             int typecode = extract32(typemask, (j + 1) * 3, 3);
1273             ca->args[j] = typecode_to_ffi(typecode);
1274         }
1275     }
1276 
1277     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1278                           ca->cif.rtype, ca->cif.arg_types);
1279     assert(status == FFI_OK);
1280 
1281     return &ca->cif;
1282 }
1283 
1284 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1285 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1286 #else
1287 #define HELPER_INFO_INIT(I)      (&(I)->init)
1288 #define HELPER_INFO_INIT_VAL(I)  1
1289 #endif /* CONFIG_TCG_INTERPRETER */
1290 
1291 static inline bool arg_slot_reg_p(unsigned arg_slot)
1292 {
1293     /*
1294      * Split the sizeof away from the comparison to avoid Werror from
1295      * "unsigned < 0 is always false", when iarg_regs is empty.
1296      */
1297     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1298     return arg_slot < nreg;
1299 }
1300 
1301 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1302 {
1303     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1304     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1305 
1306     tcg_debug_assert(stk_slot < max);
1307     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1308 }
1309 
1310 typedef struct TCGCumulativeArgs {
1311     int arg_idx;                /* tcg_gen_callN args[] */
1312     int info_in_idx;            /* TCGHelperInfo in[] */
1313     int arg_slot;               /* regs+stack slot */
1314     int ref_slot;               /* stack slots for references */
1315 } TCGCumulativeArgs;
1316 
1317 static void layout_arg_even(TCGCumulativeArgs *cum)
1318 {
1319     cum->arg_slot += cum->arg_slot & 1;
1320 }
1321 
1322 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1323                          TCGCallArgumentKind kind)
1324 {
1325     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1326 
1327     *loc = (TCGCallArgumentLoc){
1328         .kind = kind,
1329         .arg_idx = cum->arg_idx,
1330         .arg_slot = cum->arg_slot,
1331     };
1332     cum->info_in_idx++;
1333     cum->arg_slot++;
1334 }
1335 
1336 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1337                                 TCGHelperInfo *info, int n)
1338 {
1339     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1340 
1341     for (int i = 0; i < n; ++i) {
1342         /* Layout all using the same arg_idx, adjusting the subindex. */
1343         loc[i] = (TCGCallArgumentLoc){
1344             .kind = TCG_CALL_ARG_NORMAL,
1345             .arg_idx = cum->arg_idx,
1346             .tmp_subindex = i,
1347             .arg_slot = cum->arg_slot + i,
1348         };
1349     }
1350     cum->info_in_idx += n;
1351     cum->arg_slot += n;
1352 }
1353 
1354 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1355 {
1356     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1357     int n = 128 / TCG_TARGET_REG_BITS;
1358 
1359     /* The first subindex carries the pointer. */
1360     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1361 
1362     /*
1363      * The callee is allowed to clobber memory associated with
1364      * structure pass by-reference.  Therefore we must make copies.
1365      * Allocate space from "ref_slot", which will be adjusted to
1366      * follow the parameters on the stack.
1367      */
1368     loc[0].ref_slot = cum->ref_slot;
1369 
1370     /*
1371      * Subsequent words also go into the reference slot, but
1372      * do not accumulate into the regular arguments.
1373      */
1374     for (int i = 1; i < n; ++i) {
1375         loc[i] = (TCGCallArgumentLoc){
1376             .kind = TCG_CALL_ARG_BY_REF_N,
1377             .arg_idx = cum->arg_idx,
1378             .tmp_subindex = i,
1379             .ref_slot = cum->ref_slot + i,
1380         };
1381     }
1382     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1383     cum->ref_slot += n;
1384 }
1385 
1386 static void init_call_layout(TCGHelperInfo *info)
1387 {
1388     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1389     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1390     unsigned typemask = info->typemask;
1391     unsigned typecode;
1392     TCGCumulativeArgs cum = { };
1393 
1394     /*
1395      * Parse and place any function return value.
1396      */
1397     typecode = typemask & 7;
1398     switch (typecode) {
1399     case dh_typecode_void:
1400         info->nr_out = 0;
1401         break;
1402     case dh_typecode_i32:
1403     case dh_typecode_s32:
1404     case dh_typecode_ptr:
1405         info->nr_out = 1;
1406         info->out_kind = TCG_CALL_RET_NORMAL;
1407         break;
1408     case dh_typecode_i64:
1409     case dh_typecode_s64:
1410         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1411         info->out_kind = TCG_CALL_RET_NORMAL;
1412         /* Query the last register now to trigger any assert early. */
1413         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1414         break;
1415     case dh_typecode_i128:
1416         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1417         info->out_kind = TCG_TARGET_CALL_RET_I128;
1418         switch (TCG_TARGET_CALL_RET_I128) {
1419         case TCG_CALL_RET_NORMAL:
1420             /* Query the last register now to trigger any assert early. */
1421             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1422             break;
1423         case TCG_CALL_RET_BY_VEC:
1424             /* Query the single register now to trigger any assert early. */
1425             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1426             break;
1427         case TCG_CALL_RET_BY_REF:
1428             /*
1429              * Allocate the first argument to the output.
1430              * We don't need to store this anywhere, just make it
1431              * unavailable for use in the input loop below.
1432              */
1433             cum.arg_slot = 1;
1434             break;
1435         default:
1436             qemu_build_not_reached();
1437         }
1438         break;
1439     default:
1440         g_assert_not_reached();
1441     }
1442 
1443     /*
1444      * Parse and place function arguments.
1445      */
1446     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1447         TCGCallArgumentKind kind;
1448         TCGType type;
1449 
1450         typecode = typemask & 7;
1451         switch (typecode) {
1452         case dh_typecode_i32:
1453         case dh_typecode_s32:
1454             type = TCG_TYPE_I32;
1455             break;
1456         case dh_typecode_i64:
1457         case dh_typecode_s64:
1458             type = TCG_TYPE_I64;
1459             break;
1460         case dh_typecode_ptr:
1461             type = TCG_TYPE_PTR;
1462             break;
1463         case dh_typecode_i128:
1464             type = TCG_TYPE_I128;
1465             break;
1466         default:
1467             g_assert_not_reached();
1468         }
1469 
1470         switch (type) {
1471         case TCG_TYPE_I32:
1472             switch (TCG_TARGET_CALL_ARG_I32) {
1473             case TCG_CALL_ARG_EVEN:
1474                 layout_arg_even(&cum);
1475                 /* fall through */
1476             case TCG_CALL_ARG_NORMAL:
1477                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1478                 break;
1479             case TCG_CALL_ARG_EXTEND:
1480                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1481                 layout_arg_1(&cum, info, kind);
1482                 break;
1483             default:
1484                 qemu_build_not_reached();
1485             }
1486             break;
1487 
1488         case TCG_TYPE_I64:
1489             switch (TCG_TARGET_CALL_ARG_I64) {
1490             case TCG_CALL_ARG_EVEN:
1491                 layout_arg_even(&cum);
1492                 /* fall through */
1493             case TCG_CALL_ARG_NORMAL:
1494                 if (TCG_TARGET_REG_BITS == 32) {
1495                     layout_arg_normal_n(&cum, info, 2);
1496                 } else {
1497                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1498                 }
1499                 break;
1500             default:
1501                 qemu_build_not_reached();
1502             }
1503             break;
1504 
1505         case TCG_TYPE_I128:
1506             switch (TCG_TARGET_CALL_ARG_I128) {
1507             case TCG_CALL_ARG_EVEN:
1508                 layout_arg_even(&cum);
1509                 /* fall through */
1510             case TCG_CALL_ARG_NORMAL:
1511                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1512                 break;
1513             case TCG_CALL_ARG_BY_REF:
1514                 layout_arg_by_ref(&cum, info);
1515                 break;
1516             default:
1517                 qemu_build_not_reached();
1518             }
1519             break;
1520 
1521         default:
1522             g_assert_not_reached();
1523         }
1524     }
1525     info->nr_in = cum.info_in_idx;
1526 
1527     /* Validate that we didn't overrun the input array. */
1528     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1529     /* Validate the backend has enough argument space. */
1530     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1531 
1532     /*
1533      * Relocate the "ref_slot" area to the end of the parameters.
1534      * Minimizing this stack offset helps code size for x86,
1535      * which has a signed 8-bit offset encoding.
1536      */
1537     if (cum.ref_slot != 0) {
1538         int ref_base = 0;
1539 
1540         if (cum.arg_slot > max_reg_slots) {
1541             int align = __alignof(Int128) / sizeof(tcg_target_long);
1542 
1543             ref_base = cum.arg_slot - max_reg_slots;
1544             if (align > 1) {
1545                 ref_base = ROUND_UP(ref_base, align);
1546             }
1547         }
1548         assert(ref_base + cum.ref_slot <= max_stk_slots);
1549         ref_base += max_reg_slots;
1550 
1551         if (ref_base != 0) {
1552             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1553                 TCGCallArgumentLoc *loc = &info->in[i];
1554                 switch (loc->kind) {
1555                 case TCG_CALL_ARG_BY_REF:
1556                 case TCG_CALL_ARG_BY_REF_N:
1557                     loc->ref_slot += ref_base;
1558                     break;
1559                 default:
1560                     break;
1561                 }
1562             }
1563         }
1564     }
1565 }
1566 
1567 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1568 static void process_constraint_sets(void);
1569 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1570                                             TCGReg reg, const char *name);
1571 
1572 static void tcg_context_init(unsigned max_threads)
1573 {
1574     TCGContext *s = &tcg_init_ctx;
1575     int n, i;
1576     TCGTemp *ts;
1577 
1578     memset(s, 0, sizeof(*s));
1579     s->nb_globals = 0;
1580 
1581     init_call_layout(&info_helper_ld32_mmu);
1582     init_call_layout(&info_helper_ld64_mmu);
1583     init_call_layout(&info_helper_ld128_mmu);
1584     init_call_layout(&info_helper_st32_mmu);
1585     init_call_layout(&info_helper_st64_mmu);
1586     init_call_layout(&info_helper_st128_mmu);
1587 
1588     tcg_target_init(s);
1589     process_constraint_sets();
1590 
1591     /* Reverse the order of the saved registers, assuming they're all at
1592        the start of tcg_target_reg_alloc_order.  */
1593     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1594         int r = tcg_target_reg_alloc_order[n];
1595         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1596             break;
1597         }
1598     }
1599     for (i = 0; i < n; ++i) {
1600         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1601     }
1602     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1603         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1604     }
1605 
1606     tcg_ctx = s;
1607     /*
1608      * In user-mode we simply share the init context among threads, since we
1609      * use a single region. See the documentation tcg_region_init() for the
1610      * reasoning behind this.
1611      * In system-mode we will have at most max_threads TCG threads.
1612      */
1613 #ifdef CONFIG_USER_ONLY
1614     tcg_ctxs = &tcg_ctx;
1615     tcg_cur_ctxs = 1;
1616     tcg_max_ctxs = 1;
1617 #else
1618     tcg_max_ctxs = max_threads;
1619     tcg_ctxs = g_new0(TCGContext *, max_threads);
1620 #endif
1621 
1622     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1623     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1624     tcg_env = temp_tcgv_ptr(ts);
1625 }
1626 
1627 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1628 {
1629     tcg_context_init(max_threads);
1630     tcg_region_init(tb_size, splitwx, max_threads);
1631 }
1632 
1633 /*
1634  * Allocate TBs right before their corresponding translated code, making
1635  * sure that TBs and code are on different cache lines.
1636  */
1637 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1638 {
1639     uintptr_t align = qemu_icache_linesize;
1640     TranslationBlock *tb;
1641     void *next;
1642 
1643  retry:
1644     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1645     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1646 
1647     if (unlikely(next > s->code_gen_highwater)) {
1648         if (tcg_region_alloc(s)) {
1649             return NULL;
1650         }
1651         goto retry;
1652     }
1653     qatomic_set(&s->code_gen_ptr, next);
1654     return tb;
1655 }
1656 
1657 void tcg_prologue_init(void)
1658 {
1659     TCGContext *s = tcg_ctx;
1660     size_t prologue_size;
1661 
1662     s->code_ptr = s->code_gen_ptr;
1663     s->code_buf = s->code_gen_ptr;
1664     s->data_gen_ptr = NULL;
1665 
1666 #ifndef CONFIG_TCG_INTERPRETER
1667     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1668 #endif
1669 
1670     s->pool_labels = NULL;
1671 
1672     qemu_thread_jit_write();
1673     /* Generate the prologue.  */
1674     tcg_target_qemu_prologue(s);
1675 
1676     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1677     {
1678         int result = tcg_out_pool_finalize(s);
1679         tcg_debug_assert(result == 0);
1680     }
1681 
1682     prologue_size = tcg_current_code_size(s);
1683     perf_report_prologue(s->code_gen_ptr, prologue_size);
1684 
1685 #ifndef CONFIG_TCG_INTERPRETER
1686     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1687                         (uintptr_t)s->code_buf, prologue_size);
1688 #endif
1689 
1690     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1691         FILE *logfile = qemu_log_trylock();
1692         if (logfile) {
1693             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1694             if (s->data_gen_ptr) {
1695                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1696                 size_t data_size = prologue_size - code_size;
1697                 size_t i;
1698 
1699                 disas(logfile, s->code_gen_ptr, code_size);
1700 
1701                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1702                     if (sizeof(tcg_target_ulong) == 8) {
1703                         fprintf(logfile,
1704                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1705                                 (uintptr_t)s->data_gen_ptr + i,
1706                                 *(uint64_t *)(s->data_gen_ptr + i));
1707                     } else {
1708                         fprintf(logfile,
1709                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1710                                 (uintptr_t)s->data_gen_ptr + i,
1711                                 *(uint32_t *)(s->data_gen_ptr + i));
1712                     }
1713                 }
1714             } else {
1715                 disas(logfile, s->code_gen_ptr, prologue_size);
1716             }
1717             fprintf(logfile, "\n");
1718             qemu_log_unlock(logfile);
1719         }
1720     }
1721 
1722 #ifndef CONFIG_TCG_INTERPRETER
1723     /*
1724      * Assert that goto_ptr is implemented completely, setting an epilogue.
1725      * For tci, we use NULL as the signal to return from the interpreter,
1726      * so skip this check.
1727      */
1728     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1729 #endif
1730 
1731     tcg_region_prologue_set(s);
1732 }
1733 
1734 void tcg_func_start(TCGContext *s)
1735 {
1736     tcg_pool_reset(s);
1737     s->nb_temps = s->nb_globals;
1738 
1739     /* No temps have been previously allocated for size or locality.  */
1740     tcg_temp_ebb_reset_freed(s);
1741 
1742     /* No constant temps have been previously allocated. */
1743     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1744         if (s->const_table[i]) {
1745             g_hash_table_remove_all(s->const_table[i]);
1746         }
1747     }
1748 
1749     s->nb_ops = 0;
1750     s->nb_labels = 0;
1751     s->current_frame_offset = s->frame_start;
1752 
1753 #ifdef CONFIG_DEBUG_TCG
1754     s->goto_tb_issue_mask = 0;
1755 #endif
1756 
1757     QTAILQ_INIT(&s->ops);
1758     QTAILQ_INIT(&s->free_ops);
1759     s->emit_before_op = NULL;
1760     QSIMPLEQ_INIT(&s->labels);
1761 
1762     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1763     tcg_debug_assert(s->insn_start_words > 0);
1764 }
1765 
1766 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1767 {
1768     int n = s->nb_temps++;
1769 
1770     if (n >= TCG_MAX_TEMPS) {
1771         tcg_raise_tb_overflow(s);
1772     }
1773     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1774 }
1775 
1776 static TCGTemp *tcg_global_alloc(TCGContext *s)
1777 {
1778     TCGTemp *ts;
1779 
1780     tcg_debug_assert(s->nb_globals == s->nb_temps);
1781     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1782     s->nb_globals++;
1783     ts = tcg_temp_alloc(s);
1784     ts->kind = TEMP_GLOBAL;
1785 
1786     return ts;
1787 }
1788 
1789 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1790                                             TCGReg reg, const char *name)
1791 {
1792     TCGTemp *ts;
1793 
1794     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1795 
1796     ts = tcg_global_alloc(s);
1797     ts->base_type = type;
1798     ts->type = type;
1799     ts->kind = TEMP_FIXED;
1800     ts->reg = reg;
1801     ts->name = name;
1802     tcg_regset_set_reg(s->reserved_regs, reg);
1803 
1804     return ts;
1805 }
1806 
1807 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1808 {
1809     s->frame_start = start;
1810     s->frame_end = start + size;
1811     s->frame_temp
1812         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1813 }
1814 
1815 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1816                                             const char *name, TCGType type)
1817 {
1818     TCGContext *s = tcg_ctx;
1819     TCGTemp *base_ts = tcgv_ptr_temp(base);
1820     TCGTemp *ts = tcg_global_alloc(s);
1821     int indirect_reg = 0;
1822 
1823     switch (base_ts->kind) {
1824     case TEMP_FIXED:
1825         break;
1826     case TEMP_GLOBAL:
1827         /* We do not support double-indirect registers.  */
1828         tcg_debug_assert(!base_ts->indirect_reg);
1829         base_ts->indirect_base = 1;
1830         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1831                             ? 2 : 1);
1832         indirect_reg = 1;
1833         break;
1834     default:
1835         g_assert_not_reached();
1836     }
1837 
1838     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1839         TCGTemp *ts2 = tcg_global_alloc(s);
1840         char buf[64];
1841 
1842         ts->base_type = TCG_TYPE_I64;
1843         ts->type = TCG_TYPE_I32;
1844         ts->indirect_reg = indirect_reg;
1845         ts->mem_allocated = 1;
1846         ts->mem_base = base_ts;
1847         ts->mem_offset = offset;
1848         pstrcpy(buf, sizeof(buf), name);
1849         pstrcat(buf, sizeof(buf), "_0");
1850         ts->name = strdup(buf);
1851 
1852         tcg_debug_assert(ts2 == ts + 1);
1853         ts2->base_type = TCG_TYPE_I64;
1854         ts2->type = TCG_TYPE_I32;
1855         ts2->indirect_reg = indirect_reg;
1856         ts2->mem_allocated = 1;
1857         ts2->mem_base = base_ts;
1858         ts2->mem_offset = offset + 4;
1859         ts2->temp_subindex = 1;
1860         pstrcpy(buf, sizeof(buf), name);
1861         pstrcat(buf, sizeof(buf), "_1");
1862         ts2->name = strdup(buf);
1863     } else {
1864         ts->base_type = type;
1865         ts->type = type;
1866         ts->indirect_reg = indirect_reg;
1867         ts->mem_allocated = 1;
1868         ts->mem_base = base_ts;
1869         ts->mem_offset = offset;
1870         ts->name = name;
1871     }
1872     return ts;
1873 }
1874 
1875 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1876 {
1877     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1878     return temp_tcgv_i32(ts);
1879 }
1880 
1881 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1882 {
1883     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1884     return temp_tcgv_i64(ts);
1885 }
1886 
1887 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1888 {
1889     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1890     return temp_tcgv_ptr(ts);
1891 }
1892 
1893 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1894 {
1895     TCGContext *s = tcg_ctx;
1896     TCGTemp *ts;
1897     int n;
1898 
1899     if (kind == TEMP_EBB) {
1900         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1901 
1902         if (idx < TCG_MAX_TEMPS) {
1903             /* There is already an available temp with the right type.  */
1904             clear_bit(idx, s->free_temps[type].l);
1905 
1906             ts = &s->temps[idx];
1907             ts->temp_allocated = 1;
1908             tcg_debug_assert(ts->base_type == type);
1909             tcg_debug_assert(ts->kind == kind);
1910             return ts;
1911         }
1912     } else {
1913         tcg_debug_assert(kind == TEMP_TB);
1914     }
1915 
1916     switch (type) {
1917     case TCG_TYPE_I32:
1918     case TCG_TYPE_V64:
1919     case TCG_TYPE_V128:
1920     case TCG_TYPE_V256:
1921         n = 1;
1922         break;
1923     case TCG_TYPE_I64:
1924         n = 64 / TCG_TARGET_REG_BITS;
1925         break;
1926     case TCG_TYPE_I128:
1927         n = 128 / TCG_TARGET_REG_BITS;
1928         break;
1929     default:
1930         g_assert_not_reached();
1931     }
1932 
1933     ts = tcg_temp_alloc(s);
1934     ts->base_type = type;
1935     ts->temp_allocated = 1;
1936     ts->kind = kind;
1937 
1938     if (n == 1) {
1939         ts->type = type;
1940     } else {
1941         ts->type = TCG_TYPE_REG;
1942 
1943         for (int i = 1; i < n; ++i) {
1944             TCGTemp *ts2 = tcg_temp_alloc(s);
1945 
1946             tcg_debug_assert(ts2 == ts + i);
1947             ts2->base_type = type;
1948             ts2->type = TCG_TYPE_REG;
1949             ts2->temp_allocated = 1;
1950             ts2->temp_subindex = i;
1951             ts2->kind = kind;
1952         }
1953     }
1954     return ts;
1955 }
1956 
1957 TCGv_i32 tcg_temp_new_i32(void)
1958 {
1959     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1960 }
1961 
1962 TCGv_i32 tcg_temp_ebb_new_i32(void)
1963 {
1964     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1965 }
1966 
1967 TCGv_i64 tcg_temp_new_i64(void)
1968 {
1969     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1970 }
1971 
1972 TCGv_i64 tcg_temp_ebb_new_i64(void)
1973 {
1974     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1975 }
1976 
1977 TCGv_ptr tcg_temp_new_ptr(void)
1978 {
1979     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1980 }
1981 
1982 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1983 {
1984     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1985 }
1986 
1987 TCGv_i128 tcg_temp_new_i128(void)
1988 {
1989     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1990 }
1991 
1992 TCGv_i128 tcg_temp_ebb_new_i128(void)
1993 {
1994     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1995 }
1996 
1997 TCGv_vec tcg_temp_new_vec(TCGType type)
1998 {
1999     TCGTemp *t;
2000 
2001 #ifdef CONFIG_DEBUG_TCG
2002     switch (type) {
2003     case TCG_TYPE_V64:
2004         assert(TCG_TARGET_HAS_v64);
2005         break;
2006     case TCG_TYPE_V128:
2007         assert(TCG_TARGET_HAS_v128);
2008         break;
2009     case TCG_TYPE_V256:
2010         assert(TCG_TARGET_HAS_v256);
2011         break;
2012     default:
2013         g_assert_not_reached();
2014     }
2015 #endif
2016 
2017     t = tcg_temp_new_internal(type, TEMP_EBB);
2018     return temp_tcgv_vec(t);
2019 }
2020 
2021 /* Create a new temp of the same type as an existing temp.  */
2022 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2023 {
2024     TCGTemp *t = tcgv_vec_temp(match);
2025 
2026     tcg_debug_assert(t->temp_allocated != 0);
2027 
2028     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2029     return temp_tcgv_vec(t);
2030 }
2031 
2032 void tcg_temp_free_internal(TCGTemp *ts)
2033 {
2034     TCGContext *s = tcg_ctx;
2035 
2036     switch (ts->kind) {
2037     case TEMP_CONST:
2038     case TEMP_TB:
2039         /* Silently ignore free. */
2040         break;
2041     case TEMP_EBB:
2042         tcg_debug_assert(ts->temp_allocated != 0);
2043         ts->temp_allocated = 0;
2044         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2045         break;
2046     default:
2047         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2048         g_assert_not_reached();
2049     }
2050 }
2051 
2052 void tcg_temp_free_i32(TCGv_i32 arg)
2053 {
2054     tcg_temp_free_internal(tcgv_i32_temp(arg));
2055 }
2056 
2057 void tcg_temp_free_i64(TCGv_i64 arg)
2058 {
2059     tcg_temp_free_internal(tcgv_i64_temp(arg));
2060 }
2061 
2062 void tcg_temp_free_i128(TCGv_i128 arg)
2063 {
2064     tcg_temp_free_internal(tcgv_i128_temp(arg));
2065 }
2066 
2067 void tcg_temp_free_ptr(TCGv_ptr arg)
2068 {
2069     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2070 }
2071 
2072 void tcg_temp_free_vec(TCGv_vec arg)
2073 {
2074     tcg_temp_free_internal(tcgv_vec_temp(arg));
2075 }
2076 
2077 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2078 {
2079     TCGContext *s = tcg_ctx;
2080     GHashTable *h = s->const_table[type];
2081     TCGTemp *ts;
2082 
2083     if (h == NULL) {
2084         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2085         s->const_table[type] = h;
2086     }
2087 
2088     ts = g_hash_table_lookup(h, &val);
2089     if (ts == NULL) {
2090         int64_t *val_ptr;
2091 
2092         ts = tcg_temp_alloc(s);
2093 
2094         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2095             TCGTemp *ts2 = tcg_temp_alloc(s);
2096 
2097             tcg_debug_assert(ts2 == ts + 1);
2098 
2099             ts->base_type = TCG_TYPE_I64;
2100             ts->type = TCG_TYPE_I32;
2101             ts->kind = TEMP_CONST;
2102             ts->temp_allocated = 1;
2103 
2104             ts2->base_type = TCG_TYPE_I64;
2105             ts2->type = TCG_TYPE_I32;
2106             ts2->kind = TEMP_CONST;
2107             ts2->temp_allocated = 1;
2108             ts2->temp_subindex = 1;
2109 
2110             /*
2111              * Retain the full value of the 64-bit constant in the low
2112              * part, so that the hash table works.  Actual uses will
2113              * truncate the value to the low part.
2114              */
2115             ts[HOST_BIG_ENDIAN].val = val;
2116             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2117             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2118         } else {
2119             ts->base_type = type;
2120             ts->type = type;
2121             ts->kind = TEMP_CONST;
2122             ts->temp_allocated = 1;
2123             ts->val = val;
2124             val_ptr = &ts->val;
2125         }
2126         g_hash_table_insert(h, val_ptr, ts);
2127     }
2128 
2129     return ts;
2130 }
2131 
2132 TCGv_i32 tcg_constant_i32(int32_t val)
2133 {
2134     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2135 }
2136 
2137 TCGv_i64 tcg_constant_i64(int64_t val)
2138 {
2139     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2140 }
2141 
2142 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2143 {
2144     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2145 }
2146 
2147 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2148 {
2149     val = dup_const(vece, val);
2150     return temp_tcgv_vec(tcg_constant_internal(type, val));
2151 }
2152 
2153 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2154 {
2155     TCGTemp *t = tcgv_vec_temp(match);
2156 
2157     tcg_debug_assert(t->temp_allocated != 0);
2158     return tcg_constant_vec(t->base_type, vece, val);
2159 }
2160 
2161 #ifdef CONFIG_DEBUG_TCG
2162 size_t temp_idx(TCGTemp *ts)
2163 {
2164     ptrdiff_t n = ts - tcg_ctx->temps;
2165     assert(n >= 0 && n < tcg_ctx->nb_temps);
2166     return n;
2167 }
2168 
2169 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2170 {
2171     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2172 
2173     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2174     assert(o % sizeof(TCGTemp) == 0);
2175 
2176     return (void *)tcg_ctx + (uintptr_t)v;
2177 }
2178 #endif /* CONFIG_DEBUG_TCG */
2179 
2180 /*
2181  * Return true if OP may appear in the opcode stream with TYPE.
2182  * Test the runtime variable that controls each opcode.
2183  */
2184 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2185 {
2186     bool has_type;
2187 
2188     switch (type) {
2189     case TCG_TYPE_I32:
2190         has_type = true;
2191         break;
2192     case TCG_TYPE_I64:
2193         has_type = TCG_TARGET_REG_BITS == 64;
2194         break;
2195     case TCG_TYPE_V64:
2196         has_type = TCG_TARGET_HAS_v64;
2197         break;
2198     case TCG_TYPE_V128:
2199         has_type = TCG_TARGET_HAS_v128;
2200         break;
2201     case TCG_TYPE_V256:
2202         has_type = TCG_TARGET_HAS_v256;
2203         break;
2204     default:
2205         has_type = false;
2206         break;
2207     }
2208 
2209     switch (op) {
2210     case INDEX_op_discard:
2211     case INDEX_op_set_label:
2212     case INDEX_op_call:
2213     case INDEX_op_br:
2214     case INDEX_op_mb:
2215     case INDEX_op_insn_start:
2216     case INDEX_op_exit_tb:
2217     case INDEX_op_goto_tb:
2218     case INDEX_op_goto_ptr:
2219     case INDEX_op_qemu_ld_i32:
2220     case INDEX_op_qemu_st_i32:
2221     case INDEX_op_qemu_ld_i64:
2222     case INDEX_op_qemu_st_i64:
2223         return true;
2224 
2225     case INDEX_op_qemu_st8_i32:
2226         return TCG_TARGET_HAS_qemu_st8_i32;
2227 
2228     case INDEX_op_qemu_ld_i128:
2229     case INDEX_op_qemu_st_i128:
2230         return TCG_TARGET_HAS_qemu_ldst_i128;
2231 
2232     case INDEX_op_add:
2233     case INDEX_op_and:
2234     case INDEX_op_mov:
2235     case INDEX_op_or:
2236     case INDEX_op_xor:
2237         return has_type;
2238 
2239     case INDEX_op_setcond_i32:
2240     case INDEX_op_brcond_i32:
2241     case INDEX_op_movcond_i32:
2242     case INDEX_op_ld8u_i32:
2243     case INDEX_op_ld8s_i32:
2244     case INDEX_op_ld16u_i32:
2245     case INDEX_op_ld16s_i32:
2246     case INDEX_op_ld_i32:
2247     case INDEX_op_st8_i32:
2248     case INDEX_op_st16_i32:
2249     case INDEX_op_st_i32:
2250     case INDEX_op_mul_i32:
2251     case INDEX_op_shl_i32:
2252     case INDEX_op_shr_i32:
2253     case INDEX_op_sar_i32:
2254     case INDEX_op_extract_i32:
2255     case INDEX_op_sextract_i32:
2256     case INDEX_op_deposit_i32:
2257         return true;
2258 
2259     case INDEX_op_negsetcond_i32:
2260         return TCG_TARGET_HAS_negsetcond_i32;
2261     case INDEX_op_div_i32:
2262     case INDEX_op_divu_i32:
2263         return TCG_TARGET_HAS_div_i32;
2264     case INDEX_op_rem_i32:
2265     case INDEX_op_remu_i32:
2266         return TCG_TARGET_HAS_rem_i32;
2267     case INDEX_op_div2_i32:
2268     case INDEX_op_divu2_i32:
2269         return TCG_TARGET_HAS_div2_i32;
2270     case INDEX_op_rotl_i32:
2271     case INDEX_op_rotr_i32:
2272         return TCG_TARGET_HAS_rot_i32;
2273     case INDEX_op_extract2_i32:
2274         return TCG_TARGET_HAS_extract2_i32;
2275     case INDEX_op_add2_i32:
2276         return TCG_TARGET_HAS_add2_i32;
2277     case INDEX_op_sub2_i32:
2278         return TCG_TARGET_HAS_sub2_i32;
2279     case INDEX_op_mulu2_i32:
2280         return TCG_TARGET_HAS_mulu2_i32;
2281     case INDEX_op_muls2_i32:
2282         return TCG_TARGET_HAS_muls2_i32;
2283     case INDEX_op_muluh_i32:
2284         return TCG_TARGET_HAS_muluh_i32;
2285     case INDEX_op_mulsh_i32:
2286         return TCG_TARGET_HAS_mulsh_i32;
2287     case INDEX_op_bswap16_i32:
2288         return TCG_TARGET_HAS_bswap16_i32;
2289     case INDEX_op_bswap32_i32:
2290         return TCG_TARGET_HAS_bswap32_i32;
2291     case INDEX_op_clz_i32:
2292         return TCG_TARGET_HAS_clz_i32;
2293     case INDEX_op_ctz_i32:
2294         return TCG_TARGET_HAS_ctz_i32;
2295     case INDEX_op_ctpop_i32:
2296         return TCG_TARGET_HAS_ctpop_i32;
2297 
2298     case INDEX_op_brcond2_i32:
2299     case INDEX_op_setcond2_i32:
2300         return TCG_TARGET_REG_BITS == 32;
2301 
2302     case INDEX_op_setcond_i64:
2303     case INDEX_op_brcond_i64:
2304     case INDEX_op_movcond_i64:
2305     case INDEX_op_ld8u_i64:
2306     case INDEX_op_ld8s_i64:
2307     case INDEX_op_ld16u_i64:
2308     case INDEX_op_ld16s_i64:
2309     case INDEX_op_ld32u_i64:
2310     case INDEX_op_ld32s_i64:
2311     case INDEX_op_ld_i64:
2312     case INDEX_op_st8_i64:
2313     case INDEX_op_st16_i64:
2314     case INDEX_op_st32_i64:
2315     case INDEX_op_st_i64:
2316     case INDEX_op_mul_i64:
2317     case INDEX_op_shl_i64:
2318     case INDEX_op_shr_i64:
2319     case INDEX_op_sar_i64:
2320     case INDEX_op_ext_i32_i64:
2321     case INDEX_op_extu_i32_i64:
2322     case INDEX_op_extract_i64:
2323     case INDEX_op_sextract_i64:
2324     case INDEX_op_deposit_i64:
2325         return TCG_TARGET_REG_BITS == 64;
2326 
2327     case INDEX_op_negsetcond_i64:
2328         return TCG_TARGET_HAS_negsetcond_i64;
2329     case INDEX_op_div_i64:
2330     case INDEX_op_divu_i64:
2331         return TCG_TARGET_HAS_div_i64;
2332     case INDEX_op_rem_i64:
2333     case INDEX_op_remu_i64:
2334         return TCG_TARGET_HAS_rem_i64;
2335     case INDEX_op_div2_i64:
2336     case INDEX_op_divu2_i64:
2337         return TCG_TARGET_HAS_div2_i64;
2338     case INDEX_op_rotl_i64:
2339     case INDEX_op_rotr_i64:
2340         return TCG_TARGET_HAS_rot_i64;
2341     case INDEX_op_extract2_i64:
2342         return TCG_TARGET_HAS_extract2_i64;
2343     case INDEX_op_extrl_i64_i32:
2344     case INDEX_op_extrh_i64_i32:
2345         return TCG_TARGET_HAS_extr_i64_i32;
2346     case INDEX_op_bswap16_i64:
2347         return TCG_TARGET_HAS_bswap16_i64;
2348     case INDEX_op_bswap32_i64:
2349         return TCG_TARGET_HAS_bswap32_i64;
2350     case INDEX_op_bswap64_i64:
2351         return TCG_TARGET_HAS_bswap64_i64;
2352     case INDEX_op_clz_i64:
2353         return TCG_TARGET_HAS_clz_i64;
2354     case INDEX_op_ctz_i64:
2355         return TCG_TARGET_HAS_ctz_i64;
2356     case INDEX_op_ctpop_i64:
2357         return TCG_TARGET_HAS_ctpop_i64;
2358     case INDEX_op_add2_i64:
2359         return TCG_TARGET_HAS_add2_i64;
2360     case INDEX_op_sub2_i64:
2361         return TCG_TARGET_HAS_sub2_i64;
2362     case INDEX_op_mulu2_i64:
2363         return TCG_TARGET_HAS_mulu2_i64;
2364     case INDEX_op_muls2_i64:
2365         return TCG_TARGET_HAS_muls2_i64;
2366     case INDEX_op_muluh_i64:
2367         return TCG_TARGET_HAS_muluh_i64;
2368     case INDEX_op_mulsh_i64:
2369         return TCG_TARGET_HAS_mulsh_i64;
2370 
2371     case INDEX_op_mov_vec:
2372     case INDEX_op_dup_vec:
2373     case INDEX_op_dupm_vec:
2374     case INDEX_op_ld_vec:
2375     case INDEX_op_st_vec:
2376     case INDEX_op_add_vec:
2377     case INDEX_op_sub_vec:
2378     case INDEX_op_and_vec:
2379     case INDEX_op_or_vec:
2380     case INDEX_op_xor_vec:
2381     case INDEX_op_cmp_vec:
2382         return has_type;
2383     case INDEX_op_dup2_vec:
2384         return has_type && TCG_TARGET_REG_BITS == 32;
2385     case INDEX_op_not_vec:
2386         return has_type && TCG_TARGET_HAS_not_vec;
2387     case INDEX_op_neg_vec:
2388         return has_type && TCG_TARGET_HAS_neg_vec;
2389     case INDEX_op_abs_vec:
2390         return has_type && TCG_TARGET_HAS_abs_vec;
2391     case INDEX_op_andc_vec:
2392         return has_type && TCG_TARGET_HAS_andc_vec;
2393     case INDEX_op_orc_vec:
2394         return has_type && TCG_TARGET_HAS_orc_vec;
2395     case INDEX_op_nand_vec:
2396         return has_type && TCG_TARGET_HAS_nand_vec;
2397     case INDEX_op_nor_vec:
2398         return has_type && TCG_TARGET_HAS_nor_vec;
2399     case INDEX_op_eqv_vec:
2400         return has_type && TCG_TARGET_HAS_eqv_vec;
2401     case INDEX_op_mul_vec:
2402         return has_type && TCG_TARGET_HAS_mul_vec;
2403     case INDEX_op_shli_vec:
2404     case INDEX_op_shri_vec:
2405     case INDEX_op_sari_vec:
2406         return has_type && TCG_TARGET_HAS_shi_vec;
2407     case INDEX_op_shls_vec:
2408     case INDEX_op_shrs_vec:
2409     case INDEX_op_sars_vec:
2410         return has_type && TCG_TARGET_HAS_shs_vec;
2411     case INDEX_op_shlv_vec:
2412     case INDEX_op_shrv_vec:
2413     case INDEX_op_sarv_vec:
2414         return has_type && TCG_TARGET_HAS_shv_vec;
2415     case INDEX_op_rotli_vec:
2416         return has_type && TCG_TARGET_HAS_roti_vec;
2417     case INDEX_op_rotls_vec:
2418         return has_type && TCG_TARGET_HAS_rots_vec;
2419     case INDEX_op_rotlv_vec:
2420     case INDEX_op_rotrv_vec:
2421         return has_type && TCG_TARGET_HAS_rotv_vec;
2422     case INDEX_op_ssadd_vec:
2423     case INDEX_op_usadd_vec:
2424     case INDEX_op_sssub_vec:
2425     case INDEX_op_ussub_vec:
2426         return has_type && TCG_TARGET_HAS_sat_vec;
2427     case INDEX_op_smin_vec:
2428     case INDEX_op_umin_vec:
2429     case INDEX_op_smax_vec:
2430     case INDEX_op_umax_vec:
2431         return has_type && TCG_TARGET_HAS_minmax_vec;
2432     case INDEX_op_bitsel_vec:
2433         return has_type && TCG_TARGET_HAS_bitsel_vec;
2434     case INDEX_op_cmpsel_vec:
2435         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2436 
2437     default:
2438         if (op < INDEX_op_last_generic) {
2439             const TCGOutOp *outop;
2440             TCGConstraintSetIndex con_set;
2441 
2442             if (!has_type) {
2443                 return false;
2444             }
2445 
2446             outop = all_outop[op];
2447             tcg_debug_assert(outop != NULL);
2448 
2449             con_set = outop->static_constraint;
2450             if (con_set == C_Dynamic) {
2451                 con_set = outop->dynamic_constraint(type, flags);
2452             }
2453             if (con_set >= 0) {
2454                 return true;
2455             }
2456             tcg_debug_assert(con_set == C_NotImplemented);
2457             return false;
2458         }
2459         tcg_debug_assert(op < NB_OPS);
2460         return true;
2461 
2462     case INDEX_op_last_generic:
2463         g_assert_not_reached();
2464     }
2465 }
2466 
2467 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2468 {
2469     unsigned width;
2470 
2471     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2472     width = (type == TCG_TYPE_I32 ? 32 : 64);
2473 
2474     tcg_debug_assert(ofs < width);
2475     tcg_debug_assert(len > 0);
2476     tcg_debug_assert(len <= width - ofs);
2477 
2478     return TCG_TARGET_deposit_valid(type, ofs, len);
2479 }
2480 
2481 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2482 
2483 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2484                           TCGTemp *ret, TCGTemp **args)
2485 {
2486     TCGv_i64 extend_free[MAX_CALL_IARGS];
2487     int n_extend = 0;
2488     TCGOp *op;
2489     int i, n, pi = 0, total_args;
2490 
2491     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2492         init_call_layout(info);
2493         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2494     }
2495 
2496     total_args = info->nr_out + info->nr_in + 2;
2497     op = tcg_op_alloc(INDEX_op_call, total_args);
2498 
2499 #ifdef CONFIG_PLUGIN
2500     /* Flag helpers that may affect guest state */
2501     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2502         tcg_ctx->plugin_insn->calls_helpers = true;
2503     }
2504 #endif
2505 
2506     TCGOP_CALLO(op) = n = info->nr_out;
2507     switch (n) {
2508     case 0:
2509         tcg_debug_assert(ret == NULL);
2510         break;
2511     case 1:
2512         tcg_debug_assert(ret != NULL);
2513         op->args[pi++] = temp_arg(ret);
2514         break;
2515     case 2:
2516     case 4:
2517         tcg_debug_assert(ret != NULL);
2518         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2519         tcg_debug_assert(ret->temp_subindex == 0);
2520         for (i = 0; i < n; ++i) {
2521             op->args[pi++] = temp_arg(ret + i);
2522         }
2523         break;
2524     default:
2525         g_assert_not_reached();
2526     }
2527 
2528     TCGOP_CALLI(op) = n = info->nr_in;
2529     for (i = 0; i < n; i++) {
2530         const TCGCallArgumentLoc *loc = &info->in[i];
2531         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2532 
2533         switch (loc->kind) {
2534         case TCG_CALL_ARG_NORMAL:
2535         case TCG_CALL_ARG_BY_REF:
2536         case TCG_CALL_ARG_BY_REF_N:
2537             op->args[pi++] = temp_arg(ts);
2538             break;
2539 
2540         case TCG_CALL_ARG_EXTEND_U:
2541         case TCG_CALL_ARG_EXTEND_S:
2542             {
2543                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2544                 TCGv_i32 orig = temp_tcgv_i32(ts);
2545 
2546                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2547                     tcg_gen_ext_i32_i64(temp, orig);
2548                 } else {
2549                     tcg_gen_extu_i32_i64(temp, orig);
2550                 }
2551                 op->args[pi++] = tcgv_i64_arg(temp);
2552                 extend_free[n_extend++] = temp;
2553             }
2554             break;
2555 
2556         default:
2557             g_assert_not_reached();
2558         }
2559     }
2560     op->args[pi++] = (uintptr_t)func;
2561     op->args[pi++] = (uintptr_t)info;
2562     tcg_debug_assert(pi == total_args);
2563 
2564     if (tcg_ctx->emit_before_op) {
2565         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2566     } else {
2567         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2568     }
2569 
2570     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2571     for (i = 0; i < n_extend; ++i) {
2572         tcg_temp_free_i64(extend_free[i]);
2573     }
2574 }
2575 
2576 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2577 {
2578     tcg_gen_callN(func, info, ret, NULL);
2579 }
2580 
2581 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2582 {
2583     tcg_gen_callN(func, info, ret, &t1);
2584 }
2585 
2586 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2587                    TCGTemp *t1, TCGTemp *t2)
2588 {
2589     TCGTemp *args[2] = { t1, t2 };
2590     tcg_gen_callN(func, info, ret, args);
2591 }
2592 
2593 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2594                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2595 {
2596     TCGTemp *args[3] = { t1, t2, t3 };
2597     tcg_gen_callN(func, info, ret, args);
2598 }
2599 
2600 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2601                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2602 {
2603     TCGTemp *args[4] = { t1, t2, t3, t4 };
2604     tcg_gen_callN(func, info, ret, args);
2605 }
2606 
2607 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2608                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2609 {
2610     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2611     tcg_gen_callN(func, info, ret, args);
2612 }
2613 
2614 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2615                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2616                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2617 {
2618     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2619     tcg_gen_callN(func, info, ret, args);
2620 }
2621 
2622 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2623                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2624                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2625 {
2626     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2627     tcg_gen_callN(func, info, ret, args);
2628 }
2629 
2630 static void tcg_reg_alloc_start(TCGContext *s)
2631 {
2632     int i, n;
2633 
2634     for (i = 0, n = s->nb_temps; i < n; i++) {
2635         TCGTemp *ts = &s->temps[i];
2636         TCGTempVal val = TEMP_VAL_MEM;
2637 
2638         switch (ts->kind) {
2639         case TEMP_CONST:
2640             val = TEMP_VAL_CONST;
2641             break;
2642         case TEMP_FIXED:
2643             val = TEMP_VAL_REG;
2644             break;
2645         case TEMP_GLOBAL:
2646             break;
2647         case TEMP_EBB:
2648             val = TEMP_VAL_DEAD;
2649             /* fall through */
2650         case TEMP_TB:
2651             ts->mem_allocated = 0;
2652             break;
2653         default:
2654             g_assert_not_reached();
2655         }
2656         ts->val_type = val;
2657     }
2658 
2659     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2660 }
2661 
2662 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2663                                  TCGTemp *ts)
2664 {
2665     int idx = temp_idx(ts);
2666 
2667     switch (ts->kind) {
2668     case TEMP_FIXED:
2669     case TEMP_GLOBAL:
2670         pstrcpy(buf, buf_size, ts->name);
2671         break;
2672     case TEMP_TB:
2673         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2674         break;
2675     case TEMP_EBB:
2676         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2677         break;
2678     case TEMP_CONST:
2679         switch (ts->type) {
2680         case TCG_TYPE_I32:
2681             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2682             break;
2683 #if TCG_TARGET_REG_BITS > 32
2684         case TCG_TYPE_I64:
2685             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2686             break;
2687 #endif
2688         case TCG_TYPE_V64:
2689         case TCG_TYPE_V128:
2690         case TCG_TYPE_V256:
2691             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2692                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2693             break;
2694         default:
2695             g_assert_not_reached();
2696         }
2697         break;
2698     }
2699     return buf;
2700 }
2701 
2702 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2703                              int buf_size, TCGArg arg)
2704 {
2705     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2706 }
2707 
2708 static const char * const cond_name[] =
2709 {
2710     [TCG_COND_NEVER] = "never",
2711     [TCG_COND_ALWAYS] = "always",
2712     [TCG_COND_EQ] = "eq",
2713     [TCG_COND_NE] = "ne",
2714     [TCG_COND_LT] = "lt",
2715     [TCG_COND_GE] = "ge",
2716     [TCG_COND_LE] = "le",
2717     [TCG_COND_GT] = "gt",
2718     [TCG_COND_LTU] = "ltu",
2719     [TCG_COND_GEU] = "geu",
2720     [TCG_COND_LEU] = "leu",
2721     [TCG_COND_GTU] = "gtu",
2722     [TCG_COND_TSTEQ] = "tsteq",
2723     [TCG_COND_TSTNE] = "tstne",
2724 };
2725 
2726 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2727 {
2728     [MO_UB]   = "ub",
2729     [MO_SB]   = "sb",
2730     [MO_LEUW] = "leuw",
2731     [MO_LESW] = "lesw",
2732     [MO_LEUL] = "leul",
2733     [MO_LESL] = "lesl",
2734     [MO_LEUQ] = "leq",
2735     [MO_BEUW] = "beuw",
2736     [MO_BESW] = "besw",
2737     [MO_BEUL] = "beul",
2738     [MO_BESL] = "besl",
2739     [MO_BEUQ] = "beq",
2740     [MO_128 + MO_BE] = "beo",
2741     [MO_128 + MO_LE] = "leo",
2742 };
2743 
2744 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2745     [MO_UNALN >> MO_ASHIFT]    = "un+",
2746     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2747     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2748     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2749     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2750     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2751     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2752     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2753 };
2754 
2755 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2756     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2757     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2758     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2759     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2760     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2761     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2762 };
2763 
2764 static const char bswap_flag_name[][6] = {
2765     [TCG_BSWAP_IZ] = "iz",
2766     [TCG_BSWAP_OZ] = "oz",
2767     [TCG_BSWAP_OS] = "os",
2768     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2769     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2770 };
2771 
2772 #ifdef CONFIG_PLUGIN
2773 static const char * const plugin_from_name[] = {
2774     "from-tb",
2775     "from-insn",
2776     "after-insn",
2777     "after-tb",
2778 };
2779 #endif
2780 
2781 static inline bool tcg_regset_single(TCGRegSet d)
2782 {
2783     return (d & (d - 1)) == 0;
2784 }
2785 
2786 static inline TCGReg tcg_regset_first(TCGRegSet d)
2787 {
2788     if (TCG_TARGET_NB_REGS <= 32) {
2789         return ctz32(d);
2790     } else {
2791         return ctz64(d);
2792     }
2793 }
2794 
2795 /* Return only the number of characters output -- no error return. */
2796 #define ne_fprintf(...) \
2797     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2798 
2799 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2800 {
2801     char buf[128];
2802     TCGOp *op;
2803 
2804     QTAILQ_FOREACH(op, &s->ops, link) {
2805         int i, k, nb_oargs, nb_iargs, nb_cargs;
2806         const TCGOpDef *def;
2807         TCGOpcode c;
2808         int col = 0;
2809 
2810         c = op->opc;
2811         def = &tcg_op_defs[c];
2812 
2813         if (c == INDEX_op_insn_start) {
2814             nb_oargs = 0;
2815             col += ne_fprintf(f, "\n ----");
2816 
2817             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2818                 col += ne_fprintf(f, " %016" PRIx64,
2819                                   tcg_get_insn_start_param(op, i));
2820             }
2821         } else if (c == INDEX_op_call) {
2822             const TCGHelperInfo *info = tcg_call_info(op);
2823             void *func = tcg_call_func(op);
2824 
2825             /* variable number of arguments */
2826             nb_oargs = TCGOP_CALLO(op);
2827             nb_iargs = TCGOP_CALLI(op);
2828             nb_cargs = def->nb_cargs;
2829 
2830             col += ne_fprintf(f, " %s ", def->name);
2831 
2832             /*
2833              * Print the function name from TCGHelperInfo, if available.
2834              * Note that plugins have a template function for the info,
2835              * but the actual function pointer comes from the plugin.
2836              */
2837             if (func == info->func) {
2838                 col += ne_fprintf(f, "%s", info->name);
2839             } else {
2840                 col += ne_fprintf(f, "plugin(%p)", func);
2841             }
2842 
2843             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2844             for (i = 0; i < nb_oargs; i++) {
2845                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2846                                                             op->args[i]));
2847             }
2848             for (i = 0; i < nb_iargs; i++) {
2849                 TCGArg arg = op->args[nb_oargs + i];
2850                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2851                 col += ne_fprintf(f, ",%s", t);
2852             }
2853         } else {
2854             if (def->flags & TCG_OPF_INT) {
2855                 col += ne_fprintf(f, " %s_i%d ",
2856                                   def->name,
2857                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2858             } else if (def->flags & TCG_OPF_VECTOR) {
2859                 col += ne_fprintf(f, "%s v%d,e%d,",
2860                                   def->name,
2861                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2862                                   8 << TCGOP_VECE(op));
2863             } else {
2864                 col += ne_fprintf(f, " %s ", def->name);
2865             }
2866 
2867             nb_oargs = def->nb_oargs;
2868             nb_iargs = def->nb_iargs;
2869             nb_cargs = def->nb_cargs;
2870 
2871             k = 0;
2872             for (i = 0; i < nb_oargs; i++) {
2873                 const char *sep =  k ? "," : "";
2874                 col += ne_fprintf(f, "%s%s", sep,
2875                                   tcg_get_arg_str(s, buf, sizeof(buf),
2876                                                   op->args[k++]));
2877             }
2878             for (i = 0; i < nb_iargs; i++) {
2879                 const char *sep =  k ? "," : "";
2880                 col += ne_fprintf(f, "%s%s", sep,
2881                                   tcg_get_arg_str(s, buf, sizeof(buf),
2882                                                   op->args[k++]));
2883             }
2884             switch (c) {
2885             case INDEX_op_brcond_i32:
2886             case INDEX_op_setcond_i32:
2887             case INDEX_op_negsetcond_i32:
2888             case INDEX_op_movcond_i32:
2889             case INDEX_op_brcond2_i32:
2890             case INDEX_op_setcond2_i32:
2891             case INDEX_op_brcond_i64:
2892             case INDEX_op_setcond_i64:
2893             case INDEX_op_negsetcond_i64:
2894             case INDEX_op_movcond_i64:
2895             case INDEX_op_cmp_vec:
2896             case INDEX_op_cmpsel_vec:
2897                 if (op->args[k] < ARRAY_SIZE(cond_name)
2898                     && cond_name[op->args[k]]) {
2899                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2900                 } else {
2901                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2902                 }
2903                 i = 1;
2904                 break;
2905             case INDEX_op_qemu_ld_i32:
2906             case INDEX_op_qemu_st_i32:
2907             case INDEX_op_qemu_st8_i32:
2908             case INDEX_op_qemu_ld_i64:
2909             case INDEX_op_qemu_st_i64:
2910             case INDEX_op_qemu_ld_i128:
2911             case INDEX_op_qemu_st_i128:
2912                 {
2913                     const char *s_al, *s_op, *s_at;
2914                     MemOpIdx oi = op->args[k++];
2915                     MemOp mop = get_memop(oi);
2916                     unsigned ix = get_mmuidx(oi);
2917 
2918                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2919                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2920                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2921                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2922 
2923                     /* If all fields are accounted for, print symbolically. */
2924                     if (!mop && s_al && s_op && s_at) {
2925                         col += ne_fprintf(f, ",%s%s%s,%u",
2926                                           s_at, s_al, s_op, ix);
2927                     } else {
2928                         mop = get_memop(oi);
2929                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2930                     }
2931                     i = 1;
2932                 }
2933                 break;
2934             case INDEX_op_bswap16_i32:
2935             case INDEX_op_bswap16_i64:
2936             case INDEX_op_bswap32_i32:
2937             case INDEX_op_bswap32_i64:
2938             case INDEX_op_bswap64_i64:
2939                 {
2940                     TCGArg flags = op->args[k];
2941                     const char *name = NULL;
2942 
2943                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2944                         name = bswap_flag_name[flags];
2945                     }
2946                     if (name) {
2947                         col += ne_fprintf(f, ",%s", name);
2948                     } else {
2949                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2950                     }
2951                     i = k = 1;
2952                 }
2953                 break;
2954 #ifdef CONFIG_PLUGIN
2955             case INDEX_op_plugin_cb:
2956                 {
2957                     TCGArg from = op->args[k++];
2958                     const char *name = NULL;
2959 
2960                     if (from < ARRAY_SIZE(plugin_from_name)) {
2961                         name = plugin_from_name[from];
2962                     }
2963                     if (name) {
2964                         col += ne_fprintf(f, "%s", name);
2965                     } else {
2966                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2967                     }
2968                     i = 1;
2969                 }
2970                 break;
2971 #endif
2972             default:
2973                 i = 0;
2974                 break;
2975             }
2976             switch (c) {
2977             case INDEX_op_set_label:
2978             case INDEX_op_br:
2979             case INDEX_op_brcond_i32:
2980             case INDEX_op_brcond_i64:
2981             case INDEX_op_brcond2_i32:
2982                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2983                                   arg_label(op->args[k])->id);
2984                 i++, k++;
2985                 break;
2986             case INDEX_op_mb:
2987                 {
2988                     TCGBar membar = op->args[k];
2989                     const char *b_op, *m_op;
2990 
2991                     switch (membar & TCG_BAR_SC) {
2992                     case 0:
2993                         b_op = "none";
2994                         break;
2995                     case TCG_BAR_LDAQ:
2996                         b_op = "acq";
2997                         break;
2998                     case TCG_BAR_STRL:
2999                         b_op = "rel";
3000                         break;
3001                     case TCG_BAR_SC:
3002                         b_op = "seq";
3003                         break;
3004                     default:
3005                         g_assert_not_reached();
3006                     }
3007 
3008                     switch (membar & TCG_MO_ALL) {
3009                     case 0:
3010                         m_op = "none";
3011                         break;
3012                     case TCG_MO_LD_LD:
3013                         m_op = "rr";
3014                         break;
3015                     case TCG_MO_LD_ST:
3016                         m_op = "rw";
3017                         break;
3018                     case TCG_MO_ST_LD:
3019                         m_op = "wr";
3020                         break;
3021                     case TCG_MO_ST_ST:
3022                         m_op = "ww";
3023                         break;
3024                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3025                         m_op = "rr+rw";
3026                         break;
3027                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3028                         m_op = "rr+wr";
3029                         break;
3030                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3031                         m_op = "rr+ww";
3032                         break;
3033                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3034                         m_op = "rw+wr";
3035                         break;
3036                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3037                         m_op = "rw+ww";
3038                         break;
3039                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3040                         m_op = "wr+ww";
3041                         break;
3042                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3043                         m_op = "rr+rw+wr";
3044                         break;
3045                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3046                         m_op = "rr+rw+ww";
3047                         break;
3048                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3049                         m_op = "rr+wr+ww";
3050                         break;
3051                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3052                         m_op = "rw+wr+ww";
3053                         break;
3054                     case TCG_MO_ALL:
3055                         m_op = "all";
3056                         break;
3057                     default:
3058                         g_assert_not_reached();
3059                     }
3060 
3061                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3062                     i++, k++;
3063                 }
3064                 break;
3065             default:
3066                 break;
3067             }
3068             for (; i < nb_cargs; i++, k++) {
3069                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3070                                   op->args[k]);
3071             }
3072         }
3073 
3074         if (have_prefs || op->life) {
3075             for (; col < 40; ++col) {
3076                 putc(' ', f);
3077             }
3078         }
3079 
3080         if (op->life) {
3081             unsigned life = op->life;
3082 
3083             if (life & (SYNC_ARG * 3)) {
3084                 ne_fprintf(f, "  sync:");
3085                 for (i = 0; i < 2; ++i) {
3086                     if (life & (SYNC_ARG << i)) {
3087                         ne_fprintf(f, " %d", i);
3088                     }
3089                 }
3090             }
3091             life /= DEAD_ARG;
3092             if (life) {
3093                 ne_fprintf(f, "  dead:");
3094                 for (i = 0; life; ++i, life >>= 1) {
3095                     if (life & 1) {
3096                         ne_fprintf(f, " %d", i);
3097                     }
3098                 }
3099             }
3100         }
3101 
3102         if (have_prefs) {
3103             for (i = 0; i < nb_oargs; ++i) {
3104                 TCGRegSet set = output_pref(op, i);
3105 
3106                 if (i == 0) {
3107                     ne_fprintf(f, "  pref=");
3108                 } else {
3109                     ne_fprintf(f, ",");
3110                 }
3111                 if (set == 0) {
3112                     ne_fprintf(f, "none");
3113                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3114                     ne_fprintf(f, "all");
3115 #ifdef CONFIG_DEBUG_TCG
3116                 } else if (tcg_regset_single(set)) {
3117                     TCGReg reg = tcg_regset_first(set);
3118                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3119 #endif
3120                 } else if (TCG_TARGET_NB_REGS <= 32) {
3121                     ne_fprintf(f, "0x%x", (uint32_t)set);
3122                 } else {
3123                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3124                 }
3125             }
3126         }
3127 
3128         putc('\n', f);
3129     }
3130 }
3131 
3132 /* we give more priority to constraints with less registers */
3133 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3134 {
3135     int n;
3136 
3137     arg_ct += k;
3138     n = ctpop64(arg_ct->regs);
3139 
3140     /*
3141      * Sort constraints of a single register first, which includes output
3142      * aliases (which must exactly match the input already allocated).
3143      */
3144     if (n == 1 || arg_ct->oalias) {
3145         return INT_MAX;
3146     }
3147 
3148     /*
3149      * Sort register pairs next, first then second immediately after.
3150      * Arbitrarily sort multiple pairs by the index of the first reg;
3151      * there shouldn't be many pairs.
3152      */
3153     switch (arg_ct->pair) {
3154     case 1:
3155     case 3:
3156         return (k + 1) * 2;
3157     case 2:
3158         return (arg_ct->pair_index + 1) * 2 - 1;
3159     }
3160 
3161     /* Finally, sort by decreasing register count. */
3162     assert(n > 1);
3163     return -n;
3164 }
3165 
3166 /* sort from highest priority to lowest */
3167 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3168 {
3169     int i, j;
3170 
3171     for (i = 0; i < n; i++) {
3172         a[start + i].sort_index = start + i;
3173     }
3174     if (n <= 1) {
3175         return;
3176     }
3177     for (i = 0; i < n - 1; i++) {
3178         for (j = i + 1; j < n; j++) {
3179             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3180             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3181             if (p1 < p2) {
3182                 int tmp = a[start + i].sort_index;
3183                 a[start + i].sort_index = a[start + j].sort_index;
3184                 a[start + j].sort_index = tmp;
3185             }
3186         }
3187     }
3188 }
3189 
3190 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3191 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3192 
3193 static void process_constraint_sets(void)
3194 {
3195     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3196         const TCGConstraintSet *tdefs = &constraint_sets[c];
3197         TCGArgConstraint *args_ct = all_cts[c];
3198         int nb_oargs = tdefs->nb_oargs;
3199         int nb_iargs = tdefs->nb_iargs;
3200         int nb_args = nb_oargs + nb_iargs;
3201         bool saw_alias_pair = false;
3202 
3203         for (int i = 0; i < nb_args; i++) {
3204             const char *ct_str = tdefs->args_ct_str[i];
3205             bool input_p = i >= nb_oargs;
3206             int o;
3207 
3208             switch (*ct_str) {
3209             case '0' ... '9':
3210                 o = *ct_str - '0';
3211                 tcg_debug_assert(input_p);
3212                 tcg_debug_assert(o < nb_oargs);
3213                 tcg_debug_assert(args_ct[o].regs != 0);
3214                 tcg_debug_assert(!args_ct[o].oalias);
3215                 args_ct[i] = args_ct[o];
3216                 /* The output sets oalias.  */
3217                 args_ct[o].oalias = 1;
3218                 args_ct[o].alias_index = i;
3219                 /* The input sets ialias. */
3220                 args_ct[i].ialias = 1;
3221                 args_ct[i].alias_index = o;
3222                 if (args_ct[i].pair) {
3223                     saw_alias_pair = true;
3224                 }
3225                 tcg_debug_assert(ct_str[1] == '\0');
3226                 continue;
3227 
3228             case '&':
3229                 tcg_debug_assert(!input_p);
3230                 args_ct[i].newreg = true;
3231                 ct_str++;
3232                 break;
3233 
3234             case 'p': /* plus */
3235                 /* Allocate to the register after the previous. */
3236                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3237                 o = i - 1;
3238                 tcg_debug_assert(!args_ct[o].pair);
3239                 tcg_debug_assert(!args_ct[o].ct);
3240                 args_ct[i] = (TCGArgConstraint){
3241                     .pair = 2,
3242                     .pair_index = o,
3243                     .regs = args_ct[o].regs << 1,
3244                     .newreg = args_ct[o].newreg,
3245                 };
3246                 args_ct[o].pair = 1;
3247                 args_ct[o].pair_index = i;
3248                 tcg_debug_assert(ct_str[1] == '\0');
3249                 continue;
3250 
3251             case 'm': /* minus */
3252                 /* Allocate to the register before the previous. */
3253                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3254                 o = i - 1;
3255                 tcg_debug_assert(!args_ct[o].pair);
3256                 tcg_debug_assert(!args_ct[o].ct);
3257                 args_ct[i] = (TCGArgConstraint){
3258                     .pair = 1,
3259                     .pair_index = o,
3260                     .regs = args_ct[o].regs >> 1,
3261                     .newreg = args_ct[o].newreg,
3262                 };
3263                 args_ct[o].pair = 2;
3264                 args_ct[o].pair_index = i;
3265                 tcg_debug_assert(ct_str[1] == '\0');
3266                 continue;
3267             }
3268 
3269             do {
3270                 switch (*ct_str) {
3271                 case 'i':
3272                     args_ct[i].ct |= TCG_CT_CONST;
3273                     break;
3274 #ifdef TCG_REG_ZERO
3275                 case 'z':
3276                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3277                     break;
3278 #endif
3279 
3280                 /* Include all of the target-specific constraints. */
3281 
3282 #undef CONST
3283 #define CONST(CASE, MASK) \
3284     case CASE: args_ct[i].ct |= MASK; break;
3285 #define REGS(CASE, MASK) \
3286     case CASE: args_ct[i].regs |= MASK; break;
3287 
3288 #include "tcg-target-con-str.h"
3289 
3290 #undef REGS
3291 #undef CONST
3292                 default:
3293                 case '0' ... '9':
3294                 case '&':
3295                 case 'p':
3296                 case 'm':
3297                     /* Typo in TCGConstraintSet constraint. */
3298                     g_assert_not_reached();
3299                 }
3300             } while (*++ct_str != '\0');
3301         }
3302 
3303         /*
3304          * Fix up output pairs that are aliased with inputs.
3305          * When we created the alias, we copied pair from the output.
3306          * There are three cases:
3307          *    (1a) Pairs of inputs alias pairs of outputs.
3308          *    (1b) One input aliases the first of a pair of outputs.
3309          *    (2)  One input aliases the second of a pair of outputs.
3310          *
3311          * Case 1a is handled by making sure that the pair_index'es are
3312          * properly updated so that they appear the same as a pair of inputs.
3313          *
3314          * Case 1b is handled by setting the pair_index of the input to
3315          * itself, simply so it doesn't point to an unrelated argument.
3316          * Since we don't encounter the "second" during the input allocation
3317          * phase, nothing happens with the second half of the input pair.
3318          *
3319          * Case 2 is handled by setting the second input to pair=3, the
3320          * first output to pair=3, and the pair_index'es to match.
3321          */
3322         if (saw_alias_pair) {
3323             for (int i = nb_oargs; i < nb_args; i++) {
3324                 int o, o2, i2;
3325 
3326                 /*
3327                  * Since [0-9pm] must be alone in the constraint string,
3328                  * the only way they can both be set is if the pair comes
3329                  * from the output alias.
3330                  */
3331                 if (!args_ct[i].ialias) {
3332                     continue;
3333                 }
3334                 switch (args_ct[i].pair) {
3335                 case 0:
3336                     break;
3337                 case 1:
3338                     o = args_ct[i].alias_index;
3339                     o2 = args_ct[o].pair_index;
3340                     tcg_debug_assert(args_ct[o].pair == 1);
3341                     tcg_debug_assert(args_ct[o2].pair == 2);
3342                     if (args_ct[o2].oalias) {
3343                         /* Case 1a */
3344                         i2 = args_ct[o2].alias_index;
3345                         tcg_debug_assert(args_ct[i2].pair == 2);
3346                         args_ct[i2].pair_index = i;
3347                         args_ct[i].pair_index = i2;
3348                     } else {
3349                         /* Case 1b */
3350                         args_ct[i].pair_index = i;
3351                     }
3352                     break;
3353                 case 2:
3354                     o = args_ct[i].alias_index;
3355                     o2 = args_ct[o].pair_index;
3356                     tcg_debug_assert(args_ct[o].pair == 2);
3357                     tcg_debug_assert(args_ct[o2].pair == 1);
3358                     if (args_ct[o2].oalias) {
3359                         /* Case 1a */
3360                         i2 = args_ct[o2].alias_index;
3361                         tcg_debug_assert(args_ct[i2].pair == 1);
3362                         args_ct[i2].pair_index = i;
3363                         args_ct[i].pair_index = i2;
3364                     } else {
3365                         /* Case 2 */
3366                         args_ct[i].pair = 3;
3367                         args_ct[o2].pair = 3;
3368                         args_ct[i].pair_index = o2;
3369                         args_ct[o2].pair_index = i;
3370                     }
3371                     break;
3372                 default:
3373                     g_assert_not_reached();
3374                 }
3375             }
3376         }
3377 
3378         /* sort the constraints (XXX: this is just an heuristic) */
3379         sort_constraints(args_ct, 0, nb_oargs);
3380         sort_constraints(args_ct, nb_oargs, nb_iargs);
3381     }
3382 }
3383 
3384 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3385 {
3386     TCGOpcode opc = op->opc;
3387     TCGType type = TCGOP_TYPE(op);
3388     unsigned flags = TCGOP_FLAGS(op);
3389     const TCGOpDef *def = &tcg_op_defs[opc];
3390     const TCGOutOp *outop = all_outop[opc];
3391     TCGConstraintSetIndex con_set;
3392 
3393     if (def->flags & TCG_OPF_NOT_PRESENT) {
3394         return empty_cts;
3395     }
3396 
3397     if (outop) {
3398         con_set = outop->static_constraint;
3399         if (con_set == C_Dynamic) {
3400             con_set = outop->dynamic_constraint(type, flags);
3401         }
3402     } else {
3403         con_set = tcg_target_op_def(opc, type, flags);
3404     }
3405     tcg_debug_assert(con_set >= 0);
3406     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3407 
3408     /* The constraint arguments must match TCGOpcode arguments. */
3409     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3410     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3411 
3412     return all_cts[con_set];
3413 }
3414 
3415 static void remove_label_use(TCGOp *op, int idx)
3416 {
3417     TCGLabel *label = arg_label(op->args[idx]);
3418     TCGLabelUse *use;
3419 
3420     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3421         if (use->op == op) {
3422             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3423             return;
3424         }
3425     }
3426     g_assert_not_reached();
3427 }
3428 
3429 void tcg_op_remove(TCGContext *s, TCGOp *op)
3430 {
3431     switch (op->opc) {
3432     case INDEX_op_br:
3433         remove_label_use(op, 0);
3434         break;
3435     case INDEX_op_brcond_i32:
3436     case INDEX_op_brcond_i64:
3437         remove_label_use(op, 3);
3438         break;
3439     case INDEX_op_brcond2_i32:
3440         remove_label_use(op, 5);
3441         break;
3442     default:
3443         break;
3444     }
3445 
3446     QTAILQ_REMOVE(&s->ops, op, link);
3447     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3448     s->nb_ops--;
3449 }
3450 
3451 void tcg_remove_ops_after(TCGOp *op)
3452 {
3453     TCGContext *s = tcg_ctx;
3454 
3455     while (true) {
3456         TCGOp *last = tcg_last_op();
3457         if (last == op) {
3458             return;
3459         }
3460         tcg_op_remove(s, last);
3461     }
3462 }
3463 
3464 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3465 {
3466     TCGContext *s = tcg_ctx;
3467     TCGOp *op = NULL;
3468 
3469     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3470         QTAILQ_FOREACH(op, &s->free_ops, link) {
3471             if (nargs <= op->nargs) {
3472                 QTAILQ_REMOVE(&s->free_ops, op, link);
3473                 nargs = op->nargs;
3474                 goto found;
3475             }
3476         }
3477     }
3478 
3479     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3480     nargs = MAX(4, nargs);
3481     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3482 
3483  found:
3484     memset(op, 0, offsetof(TCGOp, link));
3485     op->opc = opc;
3486     op->nargs = nargs;
3487 
3488     /* Check for bitfield overflow. */
3489     tcg_debug_assert(op->nargs == nargs);
3490 
3491     s->nb_ops++;
3492     return op;
3493 }
3494 
3495 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3496 {
3497     TCGOp *op = tcg_op_alloc(opc, nargs);
3498 
3499     if (tcg_ctx->emit_before_op) {
3500         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3501     } else {
3502         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3503     }
3504     return op;
3505 }
3506 
3507 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3508                             TCGOpcode opc, TCGType type, unsigned nargs)
3509 {
3510     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3511 
3512     TCGOP_TYPE(new_op) = type;
3513     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3514     return new_op;
3515 }
3516 
3517 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3518                            TCGOpcode opc, TCGType type, unsigned nargs)
3519 {
3520     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3521 
3522     TCGOP_TYPE(new_op) = type;
3523     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3524     return new_op;
3525 }
3526 
3527 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3528 {
3529     TCGLabelUse *u;
3530 
3531     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3532         TCGOp *op = u->op;
3533         switch (op->opc) {
3534         case INDEX_op_br:
3535             op->args[0] = label_arg(to);
3536             break;
3537         case INDEX_op_brcond_i32:
3538         case INDEX_op_brcond_i64:
3539             op->args[3] = label_arg(to);
3540             break;
3541         case INDEX_op_brcond2_i32:
3542             op->args[5] = label_arg(to);
3543             break;
3544         default:
3545             g_assert_not_reached();
3546         }
3547     }
3548 
3549     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3550 }
3551 
3552 /* Reachable analysis : remove unreachable code.  */
3553 static void __attribute__((noinline))
3554 reachable_code_pass(TCGContext *s)
3555 {
3556     TCGOp *op, *op_next, *op_prev;
3557     bool dead = false;
3558 
3559     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3560         bool remove = dead;
3561         TCGLabel *label;
3562 
3563         switch (op->opc) {
3564         case INDEX_op_set_label:
3565             label = arg_label(op->args[0]);
3566 
3567             /*
3568              * Note that the first op in the TB is always a load,
3569              * so there is always something before a label.
3570              */
3571             op_prev = QTAILQ_PREV(op, link);
3572 
3573             /*
3574              * If we find two sequential labels, move all branches to
3575              * reference the second label and remove the first label.
3576              * Do this before branch to next optimization, so that the
3577              * middle label is out of the way.
3578              */
3579             if (op_prev->opc == INDEX_op_set_label) {
3580                 move_label_uses(label, arg_label(op_prev->args[0]));
3581                 tcg_op_remove(s, op_prev);
3582                 op_prev = QTAILQ_PREV(op, link);
3583             }
3584 
3585             /*
3586              * Optimization can fold conditional branches to unconditional.
3587              * If we find a label which is preceded by an unconditional
3588              * branch to next, remove the branch.  We couldn't do this when
3589              * processing the branch because any dead code between the branch
3590              * and label had not yet been removed.
3591              */
3592             if (op_prev->opc == INDEX_op_br &&
3593                 label == arg_label(op_prev->args[0])) {
3594                 tcg_op_remove(s, op_prev);
3595                 /* Fall through means insns become live again.  */
3596                 dead = false;
3597             }
3598 
3599             if (QSIMPLEQ_EMPTY(&label->branches)) {
3600                 /*
3601                  * While there is an occasional backward branch, virtually
3602                  * all branches generated by the translators are forward.
3603                  * Which means that generally we will have already removed
3604                  * all references to the label that will be, and there is
3605                  * little to be gained by iterating.
3606                  */
3607                 remove = true;
3608             } else {
3609                 /* Once we see a label, insns become live again.  */
3610                 dead = false;
3611                 remove = false;
3612             }
3613             break;
3614 
3615         case INDEX_op_br:
3616         case INDEX_op_exit_tb:
3617         case INDEX_op_goto_ptr:
3618             /* Unconditional branches; everything following is dead.  */
3619             dead = true;
3620             break;
3621 
3622         case INDEX_op_call:
3623             /* Notice noreturn helper calls, raising exceptions.  */
3624             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3625                 dead = true;
3626             }
3627             break;
3628 
3629         case INDEX_op_insn_start:
3630             /* Never remove -- we need to keep these for unwind.  */
3631             remove = false;
3632             break;
3633 
3634         default:
3635             break;
3636         }
3637 
3638         if (remove) {
3639             tcg_op_remove(s, op);
3640         }
3641     }
3642 }
3643 
3644 #define TS_DEAD  1
3645 #define TS_MEM   2
3646 
3647 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3648 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3649 
3650 /* For liveness_pass_1, the register preferences for a given temp.  */
3651 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3652 {
3653     return ts->state_ptr;
3654 }
3655 
3656 /* For liveness_pass_1, reset the preferences for a given temp to the
3657  * maximal regset for its type.
3658  */
3659 static inline void la_reset_pref(TCGTemp *ts)
3660 {
3661     *la_temp_pref(ts)
3662         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3663 }
3664 
3665 /* liveness analysis: end of function: all temps are dead, and globals
3666    should be in memory. */
3667 static void la_func_end(TCGContext *s, int ng, int nt)
3668 {
3669     int i;
3670 
3671     for (i = 0; i < ng; ++i) {
3672         s->temps[i].state = TS_DEAD | TS_MEM;
3673         la_reset_pref(&s->temps[i]);
3674     }
3675     for (i = ng; i < nt; ++i) {
3676         s->temps[i].state = TS_DEAD;
3677         la_reset_pref(&s->temps[i]);
3678     }
3679 }
3680 
3681 /* liveness analysis: end of basic block: all temps are dead, globals
3682    and local temps should be in memory. */
3683 static void la_bb_end(TCGContext *s, int ng, int nt)
3684 {
3685     int i;
3686 
3687     for (i = 0; i < nt; ++i) {
3688         TCGTemp *ts = &s->temps[i];
3689         int state;
3690 
3691         switch (ts->kind) {
3692         case TEMP_FIXED:
3693         case TEMP_GLOBAL:
3694         case TEMP_TB:
3695             state = TS_DEAD | TS_MEM;
3696             break;
3697         case TEMP_EBB:
3698         case TEMP_CONST:
3699             state = TS_DEAD;
3700             break;
3701         default:
3702             g_assert_not_reached();
3703         }
3704         ts->state = state;
3705         la_reset_pref(ts);
3706     }
3707 }
3708 
3709 /* liveness analysis: sync globals back to memory.  */
3710 static void la_global_sync(TCGContext *s, int ng)
3711 {
3712     int i;
3713 
3714     for (i = 0; i < ng; ++i) {
3715         int state = s->temps[i].state;
3716         s->temps[i].state = state | TS_MEM;
3717         if (state == TS_DEAD) {
3718             /* If the global was previously dead, reset prefs.  */
3719             la_reset_pref(&s->temps[i]);
3720         }
3721     }
3722 }
3723 
3724 /*
3725  * liveness analysis: conditional branch: all temps are dead unless
3726  * explicitly live-across-conditional-branch, globals and local temps
3727  * should be synced.
3728  */
3729 static void la_bb_sync(TCGContext *s, int ng, int nt)
3730 {
3731     la_global_sync(s, ng);
3732 
3733     for (int i = ng; i < nt; ++i) {
3734         TCGTemp *ts = &s->temps[i];
3735         int state;
3736 
3737         switch (ts->kind) {
3738         case TEMP_TB:
3739             state = ts->state;
3740             ts->state = state | TS_MEM;
3741             if (state != TS_DEAD) {
3742                 continue;
3743             }
3744             break;
3745         case TEMP_EBB:
3746         case TEMP_CONST:
3747             continue;
3748         default:
3749             g_assert_not_reached();
3750         }
3751         la_reset_pref(&s->temps[i]);
3752     }
3753 }
3754 
3755 /* liveness analysis: sync globals back to memory and kill.  */
3756 static void la_global_kill(TCGContext *s, int ng)
3757 {
3758     int i;
3759 
3760     for (i = 0; i < ng; i++) {
3761         s->temps[i].state = TS_DEAD | TS_MEM;
3762         la_reset_pref(&s->temps[i]);
3763     }
3764 }
3765 
3766 /* liveness analysis: note live globals crossing calls.  */
3767 static void la_cross_call(TCGContext *s, int nt)
3768 {
3769     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3770     int i;
3771 
3772     for (i = 0; i < nt; i++) {
3773         TCGTemp *ts = &s->temps[i];
3774         if (!(ts->state & TS_DEAD)) {
3775             TCGRegSet *pset = la_temp_pref(ts);
3776             TCGRegSet set = *pset;
3777 
3778             set &= mask;
3779             /* If the combination is not possible, restart.  */
3780             if (set == 0) {
3781                 set = tcg_target_available_regs[ts->type] & mask;
3782             }
3783             *pset = set;
3784         }
3785     }
3786 }
3787 
3788 /*
3789  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3790  * to TEMP_EBB, if possible.
3791  */
3792 static void __attribute__((noinline))
3793 liveness_pass_0(TCGContext *s)
3794 {
3795     void * const multiple_ebb = (void *)(uintptr_t)-1;
3796     int nb_temps = s->nb_temps;
3797     TCGOp *op, *ebb;
3798 
3799     for (int i = s->nb_globals; i < nb_temps; ++i) {
3800         s->temps[i].state_ptr = NULL;
3801     }
3802 
3803     /*
3804      * Represent each EBB by the op at which it begins.  In the case of
3805      * the first EBB, this is the first op, otherwise it is a label.
3806      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3807      * within a single EBB, else MULTIPLE_EBB.
3808      */
3809     ebb = QTAILQ_FIRST(&s->ops);
3810     QTAILQ_FOREACH(op, &s->ops, link) {
3811         const TCGOpDef *def;
3812         int nb_oargs, nb_iargs;
3813 
3814         switch (op->opc) {
3815         case INDEX_op_set_label:
3816             ebb = op;
3817             continue;
3818         case INDEX_op_discard:
3819             continue;
3820         case INDEX_op_call:
3821             nb_oargs = TCGOP_CALLO(op);
3822             nb_iargs = TCGOP_CALLI(op);
3823             break;
3824         default:
3825             def = &tcg_op_defs[op->opc];
3826             nb_oargs = def->nb_oargs;
3827             nb_iargs = def->nb_iargs;
3828             break;
3829         }
3830 
3831         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3832             TCGTemp *ts = arg_temp(op->args[i]);
3833 
3834             if (ts->kind != TEMP_TB) {
3835                 continue;
3836             }
3837             if (ts->state_ptr == NULL) {
3838                 ts->state_ptr = ebb;
3839             } else if (ts->state_ptr != ebb) {
3840                 ts->state_ptr = multiple_ebb;
3841             }
3842         }
3843     }
3844 
3845     /*
3846      * For TEMP_TB that turned out not to be used beyond one EBB,
3847      * reduce the liveness to TEMP_EBB.
3848      */
3849     for (int i = s->nb_globals; i < nb_temps; ++i) {
3850         TCGTemp *ts = &s->temps[i];
3851         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3852             ts->kind = TEMP_EBB;
3853         }
3854     }
3855 }
3856 
3857 /* Liveness analysis : update the opc_arg_life array to tell if a
3858    given input arguments is dead. Instructions updating dead
3859    temporaries are removed. */
3860 static void __attribute__((noinline))
3861 liveness_pass_1(TCGContext *s)
3862 {
3863     int nb_globals = s->nb_globals;
3864     int nb_temps = s->nb_temps;
3865     TCGOp *op, *op_prev;
3866     TCGRegSet *prefs;
3867     int i;
3868 
3869     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3870     for (i = 0; i < nb_temps; ++i) {
3871         s->temps[i].state_ptr = prefs + i;
3872     }
3873 
3874     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3875     la_func_end(s, nb_globals, nb_temps);
3876 
3877     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3878         int nb_iargs, nb_oargs;
3879         TCGOpcode opc_new, opc_new2;
3880         bool have_opc_new2;
3881         TCGLifeData arg_life = 0;
3882         TCGTemp *ts;
3883         TCGOpcode opc = op->opc;
3884         const TCGOpDef *def = &tcg_op_defs[opc];
3885         const TCGArgConstraint *args_ct;
3886 
3887         switch (opc) {
3888         case INDEX_op_call:
3889             {
3890                 const TCGHelperInfo *info = tcg_call_info(op);
3891                 int call_flags = tcg_call_flags(op);
3892 
3893                 nb_oargs = TCGOP_CALLO(op);
3894                 nb_iargs = TCGOP_CALLI(op);
3895 
3896                 /* pure functions can be removed if their result is unused */
3897                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3898                     for (i = 0; i < nb_oargs; i++) {
3899                         ts = arg_temp(op->args[i]);
3900                         if (ts->state != TS_DEAD) {
3901                             goto do_not_remove_call;
3902                         }
3903                     }
3904                     goto do_remove;
3905                 }
3906             do_not_remove_call:
3907 
3908                 /* Output args are dead.  */
3909                 for (i = 0; i < nb_oargs; i++) {
3910                     ts = arg_temp(op->args[i]);
3911                     if (ts->state & TS_DEAD) {
3912                         arg_life |= DEAD_ARG << i;
3913                     }
3914                     if (ts->state & TS_MEM) {
3915                         arg_life |= SYNC_ARG << i;
3916                     }
3917                     ts->state = TS_DEAD;
3918                     la_reset_pref(ts);
3919                 }
3920 
3921                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3922                 memset(op->output_pref, 0, sizeof(op->output_pref));
3923 
3924                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3925                                     TCG_CALL_NO_READ_GLOBALS))) {
3926                     la_global_kill(s, nb_globals);
3927                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3928                     la_global_sync(s, nb_globals);
3929                 }
3930 
3931                 /* Record arguments that die in this helper.  */
3932                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3933                     ts = arg_temp(op->args[i]);
3934                     if (ts->state & TS_DEAD) {
3935                         arg_life |= DEAD_ARG << i;
3936                     }
3937                 }
3938 
3939                 /* For all live registers, remove call-clobbered prefs.  */
3940                 la_cross_call(s, nb_temps);
3941 
3942                 /*
3943                  * Input arguments are live for preceding opcodes.
3944                  *
3945                  * For those arguments that die, and will be allocated in
3946                  * registers, clear the register set for that arg, to be
3947                  * filled in below.  For args that will be on the stack,
3948                  * reset to any available reg.  Process arguments in reverse
3949                  * order so that if a temp is used more than once, the stack
3950                  * reset to max happens before the register reset to 0.
3951                  */
3952                 for (i = nb_iargs - 1; i >= 0; i--) {
3953                     const TCGCallArgumentLoc *loc = &info->in[i];
3954                     ts = arg_temp(op->args[nb_oargs + i]);
3955 
3956                     if (ts->state & TS_DEAD) {
3957                         switch (loc->kind) {
3958                         case TCG_CALL_ARG_NORMAL:
3959                         case TCG_CALL_ARG_EXTEND_U:
3960                         case TCG_CALL_ARG_EXTEND_S:
3961                             if (arg_slot_reg_p(loc->arg_slot)) {
3962                                 *la_temp_pref(ts) = 0;
3963                                 break;
3964                             }
3965                             /* fall through */
3966                         default:
3967                             *la_temp_pref(ts) =
3968                                 tcg_target_available_regs[ts->type];
3969                             break;
3970                         }
3971                         ts->state &= ~TS_DEAD;
3972                     }
3973                 }
3974 
3975                 /*
3976                  * For each input argument, add its input register to prefs.
3977                  * If a temp is used once, this produces a single set bit;
3978                  * if a temp is used multiple times, this produces a set.
3979                  */
3980                 for (i = 0; i < nb_iargs; i++) {
3981                     const TCGCallArgumentLoc *loc = &info->in[i];
3982                     ts = arg_temp(op->args[nb_oargs + i]);
3983 
3984                     switch (loc->kind) {
3985                     case TCG_CALL_ARG_NORMAL:
3986                     case TCG_CALL_ARG_EXTEND_U:
3987                     case TCG_CALL_ARG_EXTEND_S:
3988                         if (arg_slot_reg_p(loc->arg_slot)) {
3989                             tcg_regset_set_reg(*la_temp_pref(ts),
3990                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3991                         }
3992                         break;
3993                     default:
3994                         break;
3995                     }
3996                 }
3997             }
3998             break;
3999         case INDEX_op_insn_start:
4000             break;
4001         case INDEX_op_discard:
4002             /* mark the temporary as dead */
4003             ts = arg_temp(op->args[0]);
4004             ts->state = TS_DEAD;
4005             la_reset_pref(ts);
4006             break;
4007 
4008         case INDEX_op_add2_i32:
4009         case INDEX_op_add2_i64:
4010             opc_new = INDEX_op_add;
4011             goto do_addsub2;
4012         case INDEX_op_sub2_i32:
4013         case INDEX_op_sub2_i64:
4014             opc_new = INDEX_op_sub;
4015         do_addsub2:
4016             nb_iargs = 4;
4017             nb_oargs = 2;
4018             /* Test if the high part of the operation is dead, but not
4019                the low part.  The result can be optimized to a simple
4020                add or sub.  This happens often for x86_64 guest when the
4021                cpu mode is set to 32 bit.  */
4022             if (arg_temp(op->args[1])->state == TS_DEAD) {
4023                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4024                     goto do_remove;
4025                 }
4026                 /* Replace the opcode and adjust the args in place,
4027                    leaving 3 unused args at the end.  */
4028                 op->opc = opc = opc_new;
4029                 op->args[1] = op->args[2];
4030                 op->args[2] = op->args[4];
4031                 /* Fall through and mark the single-word operation live.  */
4032                 nb_iargs = 2;
4033                 nb_oargs = 1;
4034             }
4035             goto do_not_remove;
4036 
4037         case INDEX_op_mulu2_i32:
4038             opc_new = INDEX_op_mul_i32;
4039             opc_new2 = INDEX_op_muluh_i32;
4040             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
4041             goto do_mul2;
4042         case INDEX_op_muls2_i32:
4043             opc_new = INDEX_op_mul_i32;
4044             opc_new2 = INDEX_op_mulsh_i32;
4045             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
4046             goto do_mul2;
4047         case INDEX_op_mulu2_i64:
4048             opc_new = INDEX_op_mul_i64;
4049             opc_new2 = INDEX_op_muluh_i64;
4050             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
4051             goto do_mul2;
4052         case INDEX_op_muls2_i64:
4053             opc_new = INDEX_op_mul_i64;
4054             opc_new2 = INDEX_op_mulsh_i64;
4055             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
4056             goto do_mul2;
4057         do_mul2:
4058             nb_iargs = 2;
4059             nb_oargs = 2;
4060             if (arg_temp(op->args[1])->state == TS_DEAD) {
4061                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4062                     /* Both parts of the operation are dead.  */
4063                     goto do_remove;
4064                 }
4065                 /* The high part of the operation is dead; generate the low. */
4066                 op->opc = opc = opc_new;
4067                 op->args[1] = op->args[2];
4068                 op->args[2] = op->args[3];
4069             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
4070                 /* The low part of the operation is dead; generate the high. */
4071                 op->opc = opc = opc_new2;
4072                 op->args[0] = op->args[1];
4073                 op->args[1] = op->args[2];
4074                 op->args[2] = op->args[3];
4075             } else {
4076                 goto do_not_remove;
4077             }
4078             /* Mark the single-word operation live.  */
4079             nb_oargs = 1;
4080             goto do_not_remove;
4081 
4082         default:
4083             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4084             nb_iargs = def->nb_iargs;
4085             nb_oargs = def->nb_oargs;
4086 
4087             /* Test if the operation can be removed because all
4088                its outputs are dead. We assume that nb_oargs == 0
4089                implies side effects */
4090             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4091                 for (i = 0; i < nb_oargs; i++) {
4092                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4093                         goto do_not_remove;
4094                     }
4095                 }
4096                 goto do_remove;
4097             }
4098             goto do_not_remove;
4099 
4100         do_remove:
4101             tcg_op_remove(s, op);
4102             break;
4103 
4104         do_not_remove:
4105             for (i = 0; i < nb_oargs; i++) {
4106                 ts = arg_temp(op->args[i]);
4107 
4108                 /* Remember the preference of the uses that followed.  */
4109                 if (i < ARRAY_SIZE(op->output_pref)) {
4110                     op->output_pref[i] = *la_temp_pref(ts);
4111                 }
4112 
4113                 /* Output args are dead.  */
4114                 if (ts->state & TS_DEAD) {
4115                     arg_life |= DEAD_ARG << i;
4116                 }
4117                 if (ts->state & TS_MEM) {
4118                     arg_life |= SYNC_ARG << i;
4119                 }
4120                 ts->state = TS_DEAD;
4121                 la_reset_pref(ts);
4122             }
4123 
4124             /* If end of basic block, update.  */
4125             if (def->flags & TCG_OPF_BB_EXIT) {
4126                 la_func_end(s, nb_globals, nb_temps);
4127             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4128                 la_bb_sync(s, nb_globals, nb_temps);
4129             } else if (def->flags & TCG_OPF_BB_END) {
4130                 la_bb_end(s, nb_globals, nb_temps);
4131             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4132                 la_global_sync(s, nb_globals);
4133                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4134                     la_cross_call(s, nb_temps);
4135                 }
4136             }
4137 
4138             /* Record arguments that die in this opcode.  */
4139             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4140                 ts = arg_temp(op->args[i]);
4141                 if (ts->state & TS_DEAD) {
4142                     arg_life |= DEAD_ARG << i;
4143                 }
4144             }
4145 
4146             /* Input arguments are live for preceding opcodes.  */
4147             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4148                 ts = arg_temp(op->args[i]);
4149                 if (ts->state & TS_DEAD) {
4150                     /* For operands that were dead, initially allow
4151                        all regs for the type.  */
4152                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4153                     ts->state &= ~TS_DEAD;
4154                 }
4155             }
4156 
4157             /* Incorporate constraints for this operand.  */
4158             switch (opc) {
4159             case INDEX_op_mov:
4160                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4161                    have proper constraints.  That said, special case
4162                    moves to propagate preferences backward.  */
4163                 if (IS_DEAD_ARG(1)) {
4164                     *la_temp_pref(arg_temp(op->args[0]))
4165                         = *la_temp_pref(arg_temp(op->args[1]));
4166                 }
4167                 break;
4168 
4169             default:
4170                 args_ct = opcode_args_ct(op);
4171                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4172                     const TCGArgConstraint *ct = &args_ct[i];
4173                     TCGRegSet set, *pset;
4174 
4175                     ts = arg_temp(op->args[i]);
4176                     pset = la_temp_pref(ts);
4177                     set = *pset;
4178 
4179                     set &= ct->regs;
4180                     if (ct->ialias) {
4181                         set &= output_pref(op, ct->alias_index);
4182                     }
4183                     /* If the combination is not possible, restart.  */
4184                     if (set == 0) {
4185                         set = ct->regs;
4186                     }
4187                     *pset = set;
4188                 }
4189                 break;
4190             }
4191             break;
4192         }
4193         op->life = arg_life;
4194     }
4195 }
4196 
4197 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4198 static bool __attribute__((noinline))
4199 liveness_pass_2(TCGContext *s)
4200 {
4201     int nb_globals = s->nb_globals;
4202     int nb_temps, i;
4203     bool changes = false;
4204     TCGOp *op, *op_next;
4205 
4206     /* Create a temporary for each indirect global.  */
4207     for (i = 0; i < nb_globals; ++i) {
4208         TCGTemp *its = &s->temps[i];
4209         if (its->indirect_reg) {
4210             TCGTemp *dts = tcg_temp_alloc(s);
4211             dts->type = its->type;
4212             dts->base_type = its->base_type;
4213             dts->temp_subindex = its->temp_subindex;
4214             dts->kind = TEMP_EBB;
4215             its->state_ptr = dts;
4216         } else {
4217             its->state_ptr = NULL;
4218         }
4219         /* All globals begin dead.  */
4220         its->state = TS_DEAD;
4221     }
4222     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4223         TCGTemp *its = &s->temps[i];
4224         its->state_ptr = NULL;
4225         its->state = TS_DEAD;
4226     }
4227 
4228     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4229         TCGOpcode opc = op->opc;
4230         const TCGOpDef *def = &tcg_op_defs[opc];
4231         TCGLifeData arg_life = op->life;
4232         int nb_iargs, nb_oargs, call_flags;
4233         TCGTemp *arg_ts, *dir_ts;
4234 
4235         if (opc == INDEX_op_call) {
4236             nb_oargs = TCGOP_CALLO(op);
4237             nb_iargs = TCGOP_CALLI(op);
4238             call_flags = tcg_call_flags(op);
4239         } else {
4240             nb_iargs = def->nb_iargs;
4241             nb_oargs = def->nb_oargs;
4242 
4243             /* Set flags similar to how calls require.  */
4244             if (def->flags & TCG_OPF_COND_BRANCH) {
4245                 /* Like reading globals: sync_globals */
4246                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4247             } else if (def->flags & TCG_OPF_BB_END) {
4248                 /* Like writing globals: save_globals */
4249                 call_flags = 0;
4250             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4251                 /* Like reading globals: sync_globals */
4252                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4253             } else {
4254                 /* No effect on globals.  */
4255                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4256                               TCG_CALL_NO_WRITE_GLOBALS);
4257             }
4258         }
4259 
4260         /* Make sure that input arguments are available.  */
4261         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4262             arg_ts = arg_temp(op->args[i]);
4263             dir_ts = arg_ts->state_ptr;
4264             if (dir_ts && arg_ts->state == TS_DEAD) {
4265                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4266                                   ? INDEX_op_ld_i32
4267                                   : INDEX_op_ld_i64);
4268                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4269                                                   arg_ts->type, 3);
4270 
4271                 lop->args[0] = temp_arg(dir_ts);
4272                 lop->args[1] = temp_arg(arg_ts->mem_base);
4273                 lop->args[2] = arg_ts->mem_offset;
4274 
4275                 /* Loaded, but synced with memory.  */
4276                 arg_ts->state = TS_MEM;
4277             }
4278         }
4279 
4280         /* Perform input replacement, and mark inputs that became dead.
4281            No action is required except keeping temp_state up to date
4282            so that we reload when needed.  */
4283         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4284             arg_ts = arg_temp(op->args[i]);
4285             dir_ts = arg_ts->state_ptr;
4286             if (dir_ts) {
4287                 op->args[i] = temp_arg(dir_ts);
4288                 changes = true;
4289                 if (IS_DEAD_ARG(i)) {
4290                     arg_ts->state = TS_DEAD;
4291                 }
4292             }
4293         }
4294 
4295         /* Liveness analysis should ensure that the following are
4296            all correct, for call sites and basic block end points.  */
4297         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4298             /* Nothing to do */
4299         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4300             for (i = 0; i < nb_globals; ++i) {
4301                 /* Liveness should see that globals are synced back,
4302                    that is, either TS_DEAD or TS_MEM.  */
4303                 arg_ts = &s->temps[i];
4304                 tcg_debug_assert(arg_ts->state_ptr == 0
4305                                  || arg_ts->state != 0);
4306             }
4307         } else {
4308             for (i = 0; i < nb_globals; ++i) {
4309                 /* Liveness should see that globals are saved back,
4310                    that is, TS_DEAD, waiting to be reloaded.  */
4311                 arg_ts = &s->temps[i];
4312                 tcg_debug_assert(arg_ts->state_ptr == 0
4313                                  || arg_ts->state == TS_DEAD);
4314             }
4315         }
4316 
4317         /* Outputs become available.  */
4318         if (opc == INDEX_op_mov) {
4319             arg_ts = arg_temp(op->args[0]);
4320             dir_ts = arg_ts->state_ptr;
4321             if (dir_ts) {
4322                 op->args[0] = temp_arg(dir_ts);
4323                 changes = true;
4324 
4325                 /* The output is now live and modified.  */
4326                 arg_ts->state = 0;
4327 
4328                 if (NEED_SYNC_ARG(0)) {
4329                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4330                                       ? INDEX_op_st_i32
4331                                       : INDEX_op_st_i64);
4332                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4333                                                      arg_ts->type, 3);
4334                     TCGTemp *out_ts = dir_ts;
4335 
4336                     if (IS_DEAD_ARG(0)) {
4337                         out_ts = arg_temp(op->args[1]);
4338                         arg_ts->state = TS_DEAD;
4339                         tcg_op_remove(s, op);
4340                     } else {
4341                         arg_ts->state = TS_MEM;
4342                     }
4343 
4344                     sop->args[0] = temp_arg(out_ts);
4345                     sop->args[1] = temp_arg(arg_ts->mem_base);
4346                     sop->args[2] = arg_ts->mem_offset;
4347                 } else {
4348                     tcg_debug_assert(!IS_DEAD_ARG(0));
4349                 }
4350             }
4351         } else {
4352             for (i = 0; i < nb_oargs; i++) {
4353                 arg_ts = arg_temp(op->args[i]);
4354                 dir_ts = arg_ts->state_ptr;
4355                 if (!dir_ts) {
4356                     continue;
4357                 }
4358                 op->args[i] = temp_arg(dir_ts);
4359                 changes = true;
4360 
4361                 /* The output is now live and modified.  */
4362                 arg_ts->state = 0;
4363 
4364                 /* Sync outputs upon their last write.  */
4365                 if (NEED_SYNC_ARG(i)) {
4366                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4367                                       ? INDEX_op_st_i32
4368                                       : INDEX_op_st_i64);
4369                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4370                                                      arg_ts->type, 3);
4371 
4372                     sop->args[0] = temp_arg(dir_ts);
4373                     sop->args[1] = temp_arg(arg_ts->mem_base);
4374                     sop->args[2] = arg_ts->mem_offset;
4375 
4376                     arg_ts->state = TS_MEM;
4377                 }
4378                 /* Drop outputs that are dead.  */
4379                 if (IS_DEAD_ARG(i)) {
4380                     arg_ts->state = TS_DEAD;
4381                 }
4382             }
4383         }
4384     }
4385 
4386     return changes;
4387 }
4388 
4389 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4390 {
4391     intptr_t off;
4392     int size, align;
4393 
4394     /* When allocating an object, look at the full type. */
4395     size = tcg_type_size(ts->base_type);
4396     switch (ts->base_type) {
4397     case TCG_TYPE_I32:
4398         align = 4;
4399         break;
4400     case TCG_TYPE_I64:
4401     case TCG_TYPE_V64:
4402         align = 8;
4403         break;
4404     case TCG_TYPE_I128:
4405     case TCG_TYPE_V128:
4406     case TCG_TYPE_V256:
4407         /*
4408          * Note that we do not require aligned storage for V256,
4409          * and that we provide alignment for I128 to match V128,
4410          * even if that's above what the host ABI requires.
4411          */
4412         align = 16;
4413         break;
4414     default:
4415         g_assert_not_reached();
4416     }
4417 
4418     /*
4419      * Assume the stack is sufficiently aligned.
4420      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4421      * and do not require 16 byte vector alignment.  This seems slightly
4422      * easier than fully parameterizing the above switch statement.
4423      */
4424     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4425     off = ROUND_UP(s->current_frame_offset, align);
4426 
4427     /* If we've exhausted the stack frame, restart with a smaller TB. */
4428     if (off + size > s->frame_end) {
4429         tcg_raise_tb_overflow(s);
4430     }
4431     s->current_frame_offset = off + size;
4432 #if defined(__sparc__)
4433     off += TCG_TARGET_STACK_BIAS;
4434 #endif
4435 
4436     /* If the object was subdivided, assign memory to all the parts. */
4437     if (ts->base_type != ts->type) {
4438         int part_size = tcg_type_size(ts->type);
4439         int part_count = size / part_size;
4440 
4441         /*
4442          * Each part is allocated sequentially in tcg_temp_new_internal.
4443          * Jump back to the first part by subtracting the current index.
4444          */
4445         ts -= ts->temp_subindex;
4446         for (int i = 0; i < part_count; ++i) {
4447             ts[i].mem_offset = off + i * part_size;
4448             ts[i].mem_base = s->frame_temp;
4449             ts[i].mem_allocated = 1;
4450         }
4451     } else {
4452         ts->mem_offset = off;
4453         ts->mem_base = s->frame_temp;
4454         ts->mem_allocated = 1;
4455     }
4456 }
4457 
4458 /* Assign @reg to @ts, and update reg_to_temp[]. */
4459 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4460 {
4461     if (ts->val_type == TEMP_VAL_REG) {
4462         TCGReg old = ts->reg;
4463         tcg_debug_assert(s->reg_to_temp[old] == ts);
4464         if (old == reg) {
4465             return;
4466         }
4467         s->reg_to_temp[old] = NULL;
4468     }
4469     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4470     s->reg_to_temp[reg] = ts;
4471     ts->val_type = TEMP_VAL_REG;
4472     ts->reg = reg;
4473 }
4474 
4475 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4476 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4477 {
4478     tcg_debug_assert(type != TEMP_VAL_REG);
4479     if (ts->val_type == TEMP_VAL_REG) {
4480         TCGReg reg = ts->reg;
4481         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4482         s->reg_to_temp[reg] = NULL;
4483     }
4484     ts->val_type = type;
4485 }
4486 
4487 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4488 
4489 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4490    mark it free; otherwise mark it dead.  */
4491 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4492 {
4493     TCGTempVal new_type;
4494 
4495     switch (ts->kind) {
4496     case TEMP_FIXED:
4497         return;
4498     case TEMP_GLOBAL:
4499     case TEMP_TB:
4500         new_type = TEMP_VAL_MEM;
4501         break;
4502     case TEMP_EBB:
4503         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4504         break;
4505     case TEMP_CONST:
4506         new_type = TEMP_VAL_CONST;
4507         break;
4508     default:
4509         g_assert_not_reached();
4510     }
4511     set_temp_val_nonreg(s, ts, new_type);
4512 }
4513 
4514 /* Mark a temporary as dead.  */
4515 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4516 {
4517     temp_free_or_dead(s, ts, 1);
4518 }
4519 
4520 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4521    registers needs to be allocated to store a constant.  If 'free_or_dead'
4522    is non-zero, subsequently release the temporary; if it is positive, the
4523    temp is dead; if it is negative, the temp is free.  */
4524 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4525                       TCGRegSet preferred_regs, int free_or_dead)
4526 {
4527     if (!temp_readonly(ts) && !ts->mem_coherent) {
4528         if (!ts->mem_allocated) {
4529             temp_allocate_frame(s, ts);
4530         }
4531         switch (ts->val_type) {
4532         case TEMP_VAL_CONST:
4533             /* If we're going to free the temp immediately, then we won't
4534                require it later in a register, so attempt to store the
4535                constant to memory directly.  */
4536             if (free_or_dead
4537                 && tcg_out_sti(s, ts->type, ts->val,
4538                                ts->mem_base->reg, ts->mem_offset)) {
4539                 break;
4540             }
4541             temp_load(s, ts, tcg_target_available_regs[ts->type],
4542                       allocated_regs, preferred_regs);
4543             /* fallthrough */
4544 
4545         case TEMP_VAL_REG:
4546             tcg_out_st(s, ts->type, ts->reg,
4547                        ts->mem_base->reg, ts->mem_offset);
4548             break;
4549 
4550         case TEMP_VAL_MEM:
4551             break;
4552 
4553         case TEMP_VAL_DEAD:
4554         default:
4555             g_assert_not_reached();
4556         }
4557         ts->mem_coherent = 1;
4558     }
4559     if (free_or_dead) {
4560         temp_free_or_dead(s, ts, free_or_dead);
4561     }
4562 }
4563 
4564 /* free register 'reg' by spilling the corresponding temporary if necessary */
4565 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4566 {
4567     TCGTemp *ts = s->reg_to_temp[reg];
4568     if (ts != NULL) {
4569         temp_sync(s, ts, allocated_regs, 0, -1);
4570     }
4571 }
4572 
4573 /**
4574  * tcg_reg_alloc:
4575  * @required_regs: Set of registers in which we must allocate.
4576  * @allocated_regs: Set of registers which must be avoided.
4577  * @preferred_regs: Set of registers we should prefer.
4578  * @rev: True if we search the registers in "indirect" order.
4579  *
4580  * The allocated register must be in @required_regs & ~@allocated_regs,
4581  * but if we can put it in @preferred_regs we may save a move later.
4582  */
4583 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4584                             TCGRegSet allocated_regs,
4585                             TCGRegSet preferred_regs, bool rev)
4586 {
4587     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4588     TCGRegSet reg_ct[2];
4589     const int *order;
4590 
4591     reg_ct[1] = required_regs & ~allocated_regs;
4592     tcg_debug_assert(reg_ct[1] != 0);
4593     reg_ct[0] = reg_ct[1] & preferred_regs;
4594 
4595     /* Skip the preferred_regs option if it cannot be satisfied,
4596        or if the preference made no difference.  */
4597     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4598 
4599     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4600 
4601     /* Try free registers, preferences first.  */
4602     for (j = f; j < 2; j++) {
4603         TCGRegSet set = reg_ct[j];
4604 
4605         if (tcg_regset_single(set)) {
4606             /* One register in the set.  */
4607             TCGReg reg = tcg_regset_first(set);
4608             if (s->reg_to_temp[reg] == NULL) {
4609                 return reg;
4610             }
4611         } else {
4612             for (i = 0; i < n; i++) {
4613                 TCGReg reg = order[i];
4614                 if (s->reg_to_temp[reg] == NULL &&
4615                     tcg_regset_test_reg(set, reg)) {
4616                     return reg;
4617                 }
4618             }
4619         }
4620     }
4621 
4622     /* We must spill something.  */
4623     for (j = f; j < 2; j++) {
4624         TCGRegSet set = reg_ct[j];
4625 
4626         if (tcg_regset_single(set)) {
4627             /* One register in the set.  */
4628             TCGReg reg = tcg_regset_first(set);
4629             tcg_reg_free(s, reg, allocated_regs);
4630             return reg;
4631         } else {
4632             for (i = 0; i < n; i++) {
4633                 TCGReg reg = order[i];
4634                 if (tcg_regset_test_reg(set, reg)) {
4635                     tcg_reg_free(s, reg, allocated_regs);
4636                     return reg;
4637                 }
4638             }
4639         }
4640     }
4641 
4642     g_assert_not_reached();
4643 }
4644 
4645 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4646                                  TCGRegSet allocated_regs,
4647                                  TCGRegSet preferred_regs, bool rev)
4648 {
4649     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4650     TCGRegSet reg_ct[2];
4651     const int *order;
4652 
4653     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4654     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4655     tcg_debug_assert(reg_ct[1] != 0);
4656     reg_ct[0] = reg_ct[1] & preferred_regs;
4657 
4658     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4659 
4660     /*
4661      * Skip the preferred_regs option if it cannot be satisfied,
4662      * or if the preference made no difference.
4663      */
4664     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4665 
4666     /*
4667      * Minimize the number of flushes by looking for 2 free registers first,
4668      * then a single flush, then two flushes.
4669      */
4670     for (fmin = 2; fmin >= 0; fmin--) {
4671         for (j = k; j < 2; j++) {
4672             TCGRegSet set = reg_ct[j];
4673 
4674             for (i = 0; i < n; i++) {
4675                 TCGReg reg = order[i];
4676 
4677                 if (tcg_regset_test_reg(set, reg)) {
4678                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4679                     if (f >= fmin) {
4680                         tcg_reg_free(s, reg, allocated_regs);
4681                         tcg_reg_free(s, reg + 1, allocated_regs);
4682                         return reg;
4683                     }
4684                 }
4685             }
4686         }
4687     }
4688     g_assert_not_reached();
4689 }
4690 
4691 /* Make sure the temporary is in a register.  If needed, allocate the register
4692    from DESIRED while avoiding ALLOCATED.  */
4693 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4694                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4695 {
4696     TCGReg reg;
4697 
4698     switch (ts->val_type) {
4699     case TEMP_VAL_REG:
4700         return;
4701     case TEMP_VAL_CONST:
4702         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4703                             preferred_regs, ts->indirect_base);
4704         if (ts->type <= TCG_TYPE_I64) {
4705             tcg_out_movi(s, ts->type, reg, ts->val);
4706         } else {
4707             uint64_t val = ts->val;
4708             MemOp vece = MO_64;
4709 
4710             /*
4711              * Find the minimal vector element that matches the constant.
4712              * The targets will, in general, have to do this search anyway,
4713              * do this generically.
4714              */
4715             if (val == dup_const(MO_8, val)) {
4716                 vece = MO_8;
4717             } else if (val == dup_const(MO_16, val)) {
4718                 vece = MO_16;
4719             } else if (val == dup_const(MO_32, val)) {
4720                 vece = MO_32;
4721             }
4722 
4723             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4724         }
4725         ts->mem_coherent = 0;
4726         break;
4727     case TEMP_VAL_MEM:
4728         if (!ts->mem_allocated) {
4729             temp_allocate_frame(s, ts);
4730         }
4731         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4732                             preferred_regs, ts->indirect_base);
4733         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4734         ts->mem_coherent = 1;
4735         break;
4736     case TEMP_VAL_DEAD:
4737     default:
4738         g_assert_not_reached();
4739     }
4740     set_temp_val_reg(s, ts, reg);
4741 }
4742 
4743 /* Save a temporary to memory. 'allocated_regs' is used in case a
4744    temporary registers needs to be allocated to store a constant.  */
4745 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4746 {
4747     /* The liveness analysis already ensures that globals are back
4748        in memory. Keep an tcg_debug_assert for safety. */
4749     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4750 }
4751 
4752 /* save globals to their canonical location and assume they can be
4753    modified be the following code. 'allocated_regs' is used in case a
4754    temporary registers needs to be allocated to store a constant. */
4755 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4756 {
4757     int i, n;
4758 
4759     for (i = 0, n = s->nb_globals; i < n; i++) {
4760         temp_save(s, &s->temps[i], allocated_regs);
4761     }
4762 }
4763 
4764 /* sync globals to their canonical location and assume they can be
4765    read by the following code. 'allocated_regs' is used in case a
4766    temporary registers needs to be allocated to store a constant. */
4767 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4768 {
4769     int i, n;
4770 
4771     for (i = 0, n = s->nb_globals; i < n; i++) {
4772         TCGTemp *ts = &s->temps[i];
4773         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4774                          || ts->kind == TEMP_FIXED
4775                          || ts->mem_coherent);
4776     }
4777 }
4778 
4779 /* at the end of a basic block, we assume all temporaries are dead and
4780    all globals are stored at their canonical location. */
4781 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4782 {
4783     int i;
4784 
4785     for (i = s->nb_globals; i < s->nb_temps; i++) {
4786         TCGTemp *ts = &s->temps[i];
4787 
4788         switch (ts->kind) {
4789         case TEMP_TB:
4790             temp_save(s, ts, allocated_regs);
4791             break;
4792         case TEMP_EBB:
4793             /* The liveness analysis already ensures that temps are dead.
4794                Keep an tcg_debug_assert for safety. */
4795             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4796             break;
4797         case TEMP_CONST:
4798             /* Similarly, we should have freed any allocated register. */
4799             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4800             break;
4801         default:
4802             g_assert_not_reached();
4803         }
4804     }
4805 
4806     save_globals(s, allocated_regs);
4807 }
4808 
4809 /*
4810  * At a conditional branch, we assume all temporaries are dead unless
4811  * explicitly live-across-conditional-branch; all globals and local
4812  * temps are synced to their location.
4813  */
4814 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4815 {
4816     sync_globals(s, allocated_regs);
4817 
4818     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4819         TCGTemp *ts = &s->temps[i];
4820         /*
4821          * The liveness analysis already ensures that temps are dead.
4822          * Keep tcg_debug_asserts for safety.
4823          */
4824         switch (ts->kind) {
4825         case TEMP_TB:
4826             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4827             break;
4828         case TEMP_EBB:
4829         case TEMP_CONST:
4830             break;
4831         default:
4832             g_assert_not_reached();
4833         }
4834     }
4835 }
4836 
4837 /*
4838  * Specialized code generation for INDEX_op_mov_* with a constant.
4839  */
4840 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4841                                   tcg_target_ulong val, TCGLifeData arg_life,
4842                                   TCGRegSet preferred_regs)
4843 {
4844     /* ENV should not be modified.  */
4845     tcg_debug_assert(!temp_readonly(ots));
4846 
4847     /* The movi is not explicitly generated here.  */
4848     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4849     ots->val = val;
4850     ots->mem_coherent = 0;
4851     if (NEED_SYNC_ARG(0)) {
4852         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4853     } else if (IS_DEAD_ARG(0)) {
4854         temp_dead(s, ots);
4855     }
4856 }
4857 
4858 /*
4859  * Specialized code generation for INDEX_op_mov_*.
4860  */
4861 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4862 {
4863     const TCGLifeData arg_life = op->life;
4864     TCGRegSet allocated_regs, preferred_regs;
4865     TCGTemp *ts, *ots;
4866     TCGType otype, itype;
4867     TCGReg oreg, ireg;
4868 
4869     allocated_regs = s->reserved_regs;
4870     preferred_regs = output_pref(op, 0);
4871     ots = arg_temp(op->args[0]);
4872     ts = arg_temp(op->args[1]);
4873 
4874     /* ENV should not be modified.  */
4875     tcg_debug_assert(!temp_readonly(ots));
4876 
4877     /* Note that otype != itype for no-op truncation.  */
4878     otype = ots->type;
4879     itype = ts->type;
4880 
4881     if (ts->val_type == TEMP_VAL_CONST) {
4882         /* propagate constant or generate sti */
4883         tcg_target_ulong val = ts->val;
4884         if (IS_DEAD_ARG(1)) {
4885             temp_dead(s, ts);
4886         }
4887         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4888         return;
4889     }
4890 
4891     /* If the source value is in memory we're going to be forced
4892        to have it in a register in order to perform the copy.  Copy
4893        the SOURCE value into its own register first, that way we
4894        don't have to reload SOURCE the next time it is used. */
4895     if (ts->val_type == TEMP_VAL_MEM) {
4896         temp_load(s, ts, tcg_target_available_regs[itype],
4897                   allocated_regs, preferred_regs);
4898     }
4899     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4900     ireg = ts->reg;
4901 
4902     if (IS_DEAD_ARG(0)) {
4903         /* mov to a non-saved dead register makes no sense (even with
4904            liveness analysis disabled). */
4905         tcg_debug_assert(NEED_SYNC_ARG(0));
4906         if (!ots->mem_allocated) {
4907             temp_allocate_frame(s, ots);
4908         }
4909         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4910         if (IS_DEAD_ARG(1)) {
4911             temp_dead(s, ts);
4912         }
4913         temp_dead(s, ots);
4914         return;
4915     }
4916 
4917     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4918         /*
4919          * The mov can be suppressed.  Kill input first, so that it
4920          * is unlinked from reg_to_temp, then set the output to the
4921          * reg that we saved from the input.
4922          */
4923         temp_dead(s, ts);
4924         oreg = ireg;
4925     } else {
4926         if (ots->val_type == TEMP_VAL_REG) {
4927             oreg = ots->reg;
4928         } else {
4929             /* Make sure to not spill the input register during allocation. */
4930             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4931                                  allocated_regs | ((TCGRegSet)1 << ireg),
4932                                  preferred_regs, ots->indirect_base);
4933         }
4934         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4935             /*
4936              * Cross register class move not supported.
4937              * Store the source register into the destination slot
4938              * and leave the destination temp as TEMP_VAL_MEM.
4939              */
4940             assert(!temp_readonly(ots));
4941             if (!ts->mem_allocated) {
4942                 temp_allocate_frame(s, ots);
4943             }
4944             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4945             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4946             ots->mem_coherent = 1;
4947             return;
4948         }
4949     }
4950     set_temp_val_reg(s, ots, oreg);
4951     ots->mem_coherent = 0;
4952 
4953     if (NEED_SYNC_ARG(0)) {
4954         temp_sync(s, ots, allocated_regs, 0, 0);
4955     }
4956 }
4957 
4958 /*
4959  * Specialized code generation for INDEX_op_dup_vec.
4960  */
4961 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4962 {
4963     const TCGLifeData arg_life = op->life;
4964     TCGRegSet dup_out_regs, dup_in_regs;
4965     const TCGArgConstraint *dup_args_ct;
4966     TCGTemp *its, *ots;
4967     TCGType itype, vtype;
4968     unsigned vece;
4969     int lowpart_ofs;
4970     bool ok;
4971 
4972     ots = arg_temp(op->args[0]);
4973     its = arg_temp(op->args[1]);
4974 
4975     /* ENV should not be modified.  */
4976     tcg_debug_assert(!temp_readonly(ots));
4977 
4978     itype = its->type;
4979     vece = TCGOP_VECE(op);
4980     vtype = TCGOP_TYPE(op);
4981 
4982     if (its->val_type == TEMP_VAL_CONST) {
4983         /* Propagate constant via movi -> dupi.  */
4984         tcg_target_ulong val = its->val;
4985         if (IS_DEAD_ARG(1)) {
4986             temp_dead(s, its);
4987         }
4988         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4989         return;
4990     }
4991 
4992     dup_args_ct = opcode_args_ct(op);
4993     dup_out_regs = dup_args_ct[0].regs;
4994     dup_in_regs = dup_args_ct[1].regs;
4995 
4996     /* Allocate the output register now.  */
4997     if (ots->val_type != TEMP_VAL_REG) {
4998         TCGRegSet allocated_regs = s->reserved_regs;
4999         TCGReg oreg;
5000 
5001         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
5002             /* Make sure to not spill the input register. */
5003             tcg_regset_set_reg(allocated_regs, its->reg);
5004         }
5005         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5006                              output_pref(op, 0), ots->indirect_base);
5007         set_temp_val_reg(s, ots, oreg);
5008     }
5009 
5010     switch (its->val_type) {
5011     case TEMP_VAL_REG:
5012         /*
5013          * The dup constriaints must be broad, covering all possible VECE.
5014          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
5015          * to fail, indicating that extra moves are required for that case.
5016          */
5017         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
5018             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
5019                 goto done;
5020             }
5021             /* Try again from memory or a vector input register.  */
5022         }
5023         if (!its->mem_coherent) {
5024             /*
5025              * The input register is not synced, and so an extra store
5026              * would be required to use memory.  Attempt an integer-vector
5027              * register move first.  We do not have a TCGRegSet for this.
5028              */
5029             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5030                 break;
5031             }
5032             /* Sync the temp back to its slot and load from there.  */
5033             temp_sync(s, its, s->reserved_regs, 0, 0);
5034         }
5035         /* fall through */
5036 
5037     case TEMP_VAL_MEM:
5038         lowpart_ofs = 0;
5039         if (HOST_BIG_ENDIAN) {
5040             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5041         }
5042         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5043                              its->mem_offset + lowpart_ofs)) {
5044             goto done;
5045         }
5046         /* Load the input into the destination vector register. */
5047         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5048         break;
5049 
5050     default:
5051         g_assert_not_reached();
5052     }
5053 
5054     /* We now have a vector input register, so dup must succeed. */
5055     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5056     tcg_debug_assert(ok);
5057 
5058  done:
5059     ots->mem_coherent = 0;
5060     if (IS_DEAD_ARG(1)) {
5061         temp_dead(s, its);
5062     }
5063     if (NEED_SYNC_ARG(0)) {
5064         temp_sync(s, ots, s->reserved_regs, 0, 0);
5065     }
5066     if (IS_DEAD_ARG(0)) {
5067         temp_dead(s, ots);
5068     }
5069 }
5070 
5071 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5072 {
5073     const TCGLifeData arg_life = op->life;
5074     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5075     TCGRegSet i_allocated_regs;
5076     TCGRegSet o_allocated_regs;
5077     int i, k, nb_iargs, nb_oargs;
5078     TCGReg reg;
5079     TCGArg arg;
5080     const TCGArgConstraint *args_ct;
5081     const TCGArgConstraint *arg_ct;
5082     TCGTemp *ts;
5083     TCGArg new_args[TCG_MAX_OP_ARGS];
5084     int const_args[TCG_MAX_OP_ARGS];
5085     TCGCond op_cond;
5086 
5087     nb_oargs = def->nb_oargs;
5088     nb_iargs = def->nb_iargs;
5089 
5090     /* copy constants */
5091     memcpy(new_args + nb_oargs + nb_iargs,
5092            op->args + nb_oargs + nb_iargs,
5093            sizeof(TCGArg) * def->nb_cargs);
5094 
5095     i_allocated_regs = s->reserved_regs;
5096     o_allocated_regs = s->reserved_regs;
5097 
5098     switch (op->opc) {
5099     case INDEX_op_brcond_i32:
5100     case INDEX_op_brcond_i64:
5101         op_cond = op->args[2];
5102         break;
5103     case INDEX_op_setcond_i32:
5104     case INDEX_op_setcond_i64:
5105     case INDEX_op_negsetcond_i32:
5106     case INDEX_op_negsetcond_i64:
5107     case INDEX_op_cmp_vec:
5108         op_cond = op->args[3];
5109         break;
5110     case INDEX_op_brcond2_i32:
5111         op_cond = op->args[4];
5112         break;
5113     case INDEX_op_movcond_i32:
5114     case INDEX_op_movcond_i64:
5115     case INDEX_op_setcond2_i32:
5116     case INDEX_op_cmpsel_vec:
5117         op_cond = op->args[5];
5118         break;
5119     default:
5120         /* No condition within opcode. */
5121         op_cond = TCG_COND_ALWAYS;
5122         break;
5123     }
5124 
5125     args_ct = opcode_args_ct(op);
5126 
5127     /* satisfy input constraints */
5128     for (k = 0; k < nb_iargs; k++) {
5129         TCGRegSet i_preferred_regs, i_required_regs;
5130         bool allocate_new_reg, copyto_new_reg;
5131         TCGTemp *ts2;
5132         int i1, i2;
5133 
5134         i = args_ct[nb_oargs + k].sort_index;
5135         arg = op->args[i];
5136         arg_ct = &args_ct[i];
5137         ts = arg_temp(arg);
5138 
5139         if (ts->val_type == TEMP_VAL_CONST) {
5140 #ifdef TCG_REG_ZERO
5141             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5142                 /* Hardware zero register: indicate register via non-const. */
5143                 const_args[i] = 0;
5144                 new_args[i] = TCG_REG_ZERO;
5145                 continue;
5146             }
5147 #endif
5148 
5149             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5150                                        op_cond, TCGOP_VECE(op))) {
5151                 /* constant is OK for instruction */
5152                 const_args[i] = 1;
5153                 new_args[i] = ts->val;
5154                 continue;
5155             }
5156         }
5157 
5158         reg = ts->reg;
5159         i_preferred_regs = 0;
5160         i_required_regs = arg_ct->regs;
5161         allocate_new_reg = false;
5162         copyto_new_reg = false;
5163 
5164         switch (arg_ct->pair) {
5165         case 0: /* not paired */
5166             if (arg_ct->ialias) {
5167                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5168 
5169                 /*
5170                  * If the input is readonly, then it cannot also be an
5171                  * output and aliased to itself.  If the input is not
5172                  * dead after the instruction, we must allocate a new
5173                  * register and move it.
5174                  */
5175                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5176                     || args_ct[arg_ct->alias_index].newreg) {
5177                     allocate_new_reg = true;
5178                 } else if (ts->val_type == TEMP_VAL_REG) {
5179                     /*
5180                      * Check if the current register has already been
5181                      * allocated for another input.
5182                      */
5183                     allocate_new_reg =
5184                         tcg_regset_test_reg(i_allocated_regs, reg);
5185                 }
5186             }
5187             if (!allocate_new_reg) {
5188                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5189                           i_preferred_regs);
5190                 reg = ts->reg;
5191                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5192             }
5193             if (allocate_new_reg) {
5194                 /*
5195                  * Allocate a new register matching the constraint
5196                  * and move the temporary register into it.
5197                  */
5198                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5199                           i_allocated_regs, 0);
5200                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5201                                     i_preferred_regs, ts->indirect_base);
5202                 copyto_new_reg = true;
5203             }
5204             break;
5205 
5206         case 1:
5207             /* First of an input pair; if i1 == i2, the second is an output. */
5208             i1 = i;
5209             i2 = arg_ct->pair_index;
5210             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5211 
5212             /*
5213              * It is easier to default to allocating a new pair
5214              * and to identify a few cases where it's not required.
5215              */
5216             if (arg_ct->ialias) {
5217                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5218                 if (IS_DEAD_ARG(i1) &&
5219                     IS_DEAD_ARG(i2) &&
5220                     !temp_readonly(ts) &&
5221                     ts->val_type == TEMP_VAL_REG &&
5222                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5223                     tcg_regset_test_reg(i_required_regs, reg) &&
5224                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5225                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5226                     (ts2
5227                      ? ts2->val_type == TEMP_VAL_REG &&
5228                        ts2->reg == reg + 1 &&
5229                        !temp_readonly(ts2)
5230                      : s->reg_to_temp[reg + 1] == NULL)) {
5231                     break;
5232                 }
5233             } else {
5234                 /* Without aliasing, the pair must also be an input. */
5235                 tcg_debug_assert(ts2);
5236                 if (ts->val_type == TEMP_VAL_REG &&
5237                     ts2->val_type == TEMP_VAL_REG &&
5238                     ts2->reg == reg + 1 &&
5239                     tcg_regset_test_reg(i_required_regs, reg)) {
5240                     break;
5241                 }
5242             }
5243             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5244                                      0, ts->indirect_base);
5245             goto do_pair;
5246 
5247         case 2: /* pair second */
5248             reg = new_args[arg_ct->pair_index] + 1;
5249             goto do_pair;
5250 
5251         case 3: /* ialias with second output, no first input */
5252             tcg_debug_assert(arg_ct->ialias);
5253             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5254 
5255             if (IS_DEAD_ARG(i) &&
5256                 !temp_readonly(ts) &&
5257                 ts->val_type == TEMP_VAL_REG &&
5258                 reg > 0 &&
5259                 s->reg_to_temp[reg - 1] == NULL &&
5260                 tcg_regset_test_reg(i_required_regs, reg) &&
5261                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5262                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5263                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5264                 break;
5265             }
5266             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5267                                      i_allocated_regs, 0,
5268                                      ts->indirect_base);
5269             tcg_regset_set_reg(i_allocated_regs, reg);
5270             reg += 1;
5271             goto do_pair;
5272 
5273         do_pair:
5274             /*
5275              * If an aliased input is not dead after the instruction,
5276              * we must allocate a new register and move it.
5277              */
5278             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5279                 TCGRegSet t_allocated_regs = i_allocated_regs;
5280 
5281                 /*
5282                  * Because of the alias, and the continued life, make sure
5283                  * that the temp is somewhere *other* than the reg pair,
5284                  * and we get a copy in reg.
5285                  */
5286                 tcg_regset_set_reg(t_allocated_regs, reg);
5287                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5288                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5289                     /* If ts was already in reg, copy it somewhere else. */
5290                     TCGReg nr;
5291                     bool ok;
5292 
5293                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5294                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5295                                        t_allocated_regs, 0, ts->indirect_base);
5296                     ok = tcg_out_mov(s, ts->type, nr, reg);
5297                     tcg_debug_assert(ok);
5298 
5299                     set_temp_val_reg(s, ts, nr);
5300                 } else {
5301                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5302                               t_allocated_regs, 0);
5303                     copyto_new_reg = true;
5304                 }
5305             } else {
5306                 /* Preferably allocate to reg, otherwise copy. */
5307                 i_required_regs = (TCGRegSet)1 << reg;
5308                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5309                           i_preferred_regs);
5310                 copyto_new_reg = ts->reg != reg;
5311             }
5312             break;
5313 
5314         default:
5315             g_assert_not_reached();
5316         }
5317 
5318         if (copyto_new_reg) {
5319             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5320                 /*
5321                  * Cross register class move not supported.  Sync the
5322                  * temp back to its slot and load from there.
5323                  */
5324                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5325                 tcg_out_ld(s, ts->type, reg,
5326                            ts->mem_base->reg, ts->mem_offset);
5327             }
5328         }
5329         new_args[i] = reg;
5330         const_args[i] = 0;
5331         tcg_regset_set_reg(i_allocated_regs, reg);
5332     }
5333 
5334     /* mark dead temporaries and free the associated registers */
5335     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5336         if (IS_DEAD_ARG(i)) {
5337             temp_dead(s, arg_temp(op->args[i]));
5338         }
5339     }
5340 
5341     if (def->flags & TCG_OPF_COND_BRANCH) {
5342         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5343     } else if (def->flags & TCG_OPF_BB_END) {
5344         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5345     } else {
5346         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5347             /* XXX: permit generic clobber register list ? */
5348             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5349                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5350                     tcg_reg_free(s, i, i_allocated_regs);
5351                 }
5352             }
5353         }
5354         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5355             /* sync globals if the op has side effects and might trigger
5356                an exception. */
5357             sync_globals(s, i_allocated_regs);
5358         }
5359 
5360         /* satisfy the output constraints */
5361         for (k = 0; k < nb_oargs; k++) {
5362             i = args_ct[k].sort_index;
5363             arg = op->args[i];
5364             arg_ct = &args_ct[i];
5365             ts = arg_temp(arg);
5366 
5367             /* ENV should not be modified.  */
5368             tcg_debug_assert(!temp_readonly(ts));
5369 
5370             switch (arg_ct->pair) {
5371             case 0: /* not paired */
5372                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5373                     reg = new_args[arg_ct->alias_index];
5374                 } else if (arg_ct->newreg) {
5375                     reg = tcg_reg_alloc(s, arg_ct->regs,
5376                                         i_allocated_regs | o_allocated_regs,
5377                                         output_pref(op, k), ts->indirect_base);
5378                 } else {
5379                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5380                                         output_pref(op, k), ts->indirect_base);
5381                 }
5382                 break;
5383 
5384             case 1: /* first of pair */
5385                 if (arg_ct->oalias) {
5386                     reg = new_args[arg_ct->alias_index];
5387                 } else if (arg_ct->newreg) {
5388                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5389                                              i_allocated_regs | o_allocated_regs,
5390                                              output_pref(op, k),
5391                                              ts->indirect_base);
5392                 } else {
5393                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5394                                              output_pref(op, k),
5395                                              ts->indirect_base);
5396                 }
5397                 break;
5398 
5399             case 2: /* second of pair */
5400                 if (arg_ct->oalias) {
5401                     reg = new_args[arg_ct->alias_index];
5402                 } else {
5403                     reg = new_args[arg_ct->pair_index] + 1;
5404                 }
5405                 break;
5406 
5407             case 3: /* first of pair, aliasing with a second input */
5408                 tcg_debug_assert(!arg_ct->newreg);
5409                 reg = new_args[arg_ct->pair_index] - 1;
5410                 break;
5411 
5412             default:
5413                 g_assert_not_reached();
5414             }
5415             tcg_regset_set_reg(o_allocated_regs, reg);
5416             set_temp_val_reg(s, ts, reg);
5417             ts->mem_coherent = 0;
5418             new_args[i] = reg;
5419         }
5420     }
5421 
5422     /* emit instruction */
5423     TCGType type = TCGOP_TYPE(op);
5424     switch (op->opc) {
5425     case INDEX_op_ext_i32_i64:
5426         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5427         break;
5428     case INDEX_op_extu_i32_i64:
5429         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5430         break;
5431     case INDEX_op_extrl_i64_i32:
5432         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5433         break;
5434 
5435     case INDEX_op_add:
5436     case INDEX_op_and:
5437     case INDEX_op_andc:
5438     case INDEX_op_eqv:
5439     case INDEX_op_nand:
5440     case INDEX_op_nor:
5441     case INDEX_op_or:
5442     case INDEX_op_orc:
5443     case INDEX_op_xor:
5444         {
5445             const TCGOutOpBinary *out =
5446                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5447 
5448             /* Constants should never appear in the first source operand. */
5449             tcg_debug_assert(!const_args[1]);
5450             if (const_args[2]) {
5451                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5452             } else {
5453                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5454             }
5455         }
5456         break;
5457 
5458     case INDEX_op_sub:
5459         {
5460             const TCGOutOpSubtract *out = &outop_sub;
5461 
5462             /*
5463              * Constants should never appear in the second source operand.
5464              * These are folded to add with negative constant.
5465              */
5466             tcg_debug_assert(!const_args[2]);
5467             if (const_args[1]) {
5468                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5469             } else {
5470                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5471             }
5472         }
5473         break;
5474 
5475     case INDEX_op_neg:
5476     case INDEX_op_not:
5477         {
5478             const TCGOutOpUnary *out =
5479                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5480 
5481             /* Constants should have been folded. */
5482             tcg_debug_assert(!const_args[1]);
5483             out->out_rr(s, type, new_args[0], new_args[1]);
5484         }
5485         break;
5486 
5487     default:
5488         if (def->flags & TCG_OPF_VECTOR) {
5489             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5490                            TCGOP_VECE(op), new_args, const_args);
5491         } else {
5492             tcg_out_op(s, op->opc, type, new_args, const_args);
5493         }
5494         break;
5495     }
5496 
5497     /* move the outputs in the correct register if needed */
5498     for(i = 0; i < nb_oargs; i++) {
5499         ts = arg_temp(op->args[i]);
5500 
5501         /* ENV should not be modified.  */
5502         tcg_debug_assert(!temp_readonly(ts));
5503 
5504         if (NEED_SYNC_ARG(i)) {
5505             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5506         } else if (IS_DEAD_ARG(i)) {
5507             temp_dead(s, ts);
5508         }
5509     }
5510 }
5511 
5512 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5513 {
5514     const TCGLifeData arg_life = op->life;
5515     TCGTemp *ots, *itsl, *itsh;
5516     TCGType vtype = TCGOP_TYPE(op);
5517 
5518     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5519     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5520     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5521 
5522     ots = arg_temp(op->args[0]);
5523     itsl = arg_temp(op->args[1]);
5524     itsh = arg_temp(op->args[2]);
5525 
5526     /* ENV should not be modified.  */
5527     tcg_debug_assert(!temp_readonly(ots));
5528 
5529     /* Allocate the output register now.  */
5530     if (ots->val_type != TEMP_VAL_REG) {
5531         TCGRegSet allocated_regs = s->reserved_regs;
5532         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5533         TCGReg oreg;
5534 
5535         /* Make sure to not spill the input registers. */
5536         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5537             tcg_regset_set_reg(allocated_regs, itsl->reg);
5538         }
5539         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5540             tcg_regset_set_reg(allocated_regs, itsh->reg);
5541         }
5542 
5543         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5544                              output_pref(op, 0), ots->indirect_base);
5545         set_temp_val_reg(s, ots, oreg);
5546     }
5547 
5548     /* Promote dup2 of immediates to dupi_vec. */
5549     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5550         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5551         MemOp vece = MO_64;
5552 
5553         if (val == dup_const(MO_8, val)) {
5554             vece = MO_8;
5555         } else if (val == dup_const(MO_16, val)) {
5556             vece = MO_16;
5557         } else if (val == dup_const(MO_32, val)) {
5558             vece = MO_32;
5559         }
5560 
5561         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5562         goto done;
5563     }
5564 
5565     /* If the two inputs form one 64-bit value, try dupm_vec. */
5566     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5567         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5568         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5569         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5570 
5571         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5572         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5573 
5574         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5575                              its->mem_base->reg, its->mem_offset)) {
5576             goto done;
5577         }
5578     }
5579 
5580     /* Fall back to generic expansion. */
5581     return false;
5582 
5583  done:
5584     ots->mem_coherent = 0;
5585     if (IS_DEAD_ARG(1)) {
5586         temp_dead(s, itsl);
5587     }
5588     if (IS_DEAD_ARG(2)) {
5589         temp_dead(s, itsh);
5590     }
5591     if (NEED_SYNC_ARG(0)) {
5592         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5593     } else if (IS_DEAD_ARG(0)) {
5594         temp_dead(s, ots);
5595     }
5596     return true;
5597 }
5598 
5599 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5600                          TCGRegSet allocated_regs)
5601 {
5602     if (ts->val_type == TEMP_VAL_REG) {
5603         if (ts->reg != reg) {
5604             tcg_reg_free(s, reg, allocated_regs);
5605             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5606                 /*
5607                  * Cross register class move not supported.  Sync the
5608                  * temp back to its slot and load from there.
5609                  */
5610                 temp_sync(s, ts, allocated_regs, 0, 0);
5611                 tcg_out_ld(s, ts->type, reg,
5612                            ts->mem_base->reg, ts->mem_offset);
5613             }
5614         }
5615     } else {
5616         TCGRegSet arg_set = 0;
5617 
5618         tcg_reg_free(s, reg, allocated_regs);
5619         tcg_regset_set_reg(arg_set, reg);
5620         temp_load(s, ts, arg_set, allocated_regs, 0);
5621     }
5622 }
5623 
5624 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5625                          TCGRegSet allocated_regs)
5626 {
5627     /*
5628      * When the destination is on the stack, load up the temp and store.
5629      * If there are many call-saved registers, the temp might live to
5630      * see another use; otherwise it'll be discarded.
5631      */
5632     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5633     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5634                arg_slot_stk_ofs(arg_slot));
5635 }
5636 
5637 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5638                             TCGTemp *ts, TCGRegSet *allocated_regs)
5639 {
5640     if (arg_slot_reg_p(l->arg_slot)) {
5641         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5642         load_arg_reg(s, reg, ts, *allocated_regs);
5643         tcg_regset_set_reg(*allocated_regs, reg);
5644     } else {
5645         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5646     }
5647 }
5648 
5649 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5650                          intptr_t ref_off, TCGRegSet *allocated_regs)
5651 {
5652     TCGReg reg;
5653 
5654     if (arg_slot_reg_p(arg_slot)) {
5655         reg = tcg_target_call_iarg_regs[arg_slot];
5656         tcg_reg_free(s, reg, *allocated_regs);
5657         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5658         tcg_regset_set_reg(*allocated_regs, reg);
5659     } else {
5660         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5661                             *allocated_regs, 0, false);
5662         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5663         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5664                    arg_slot_stk_ofs(arg_slot));
5665     }
5666 }
5667 
5668 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5669 {
5670     const int nb_oargs = TCGOP_CALLO(op);
5671     const int nb_iargs = TCGOP_CALLI(op);
5672     const TCGLifeData arg_life = op->life;
5673     const TCGHelperInfo *info = tcg_call_info(op);
5674     TCGRegSet allocated_regs = s->reserved_regs;
5675     int i;
5676 
5677     /*
5678      * Move inputs into place in reverse order,
5679      * so that we place stacked arguments first.
5680      */
5681     for (i = nb_iargs - 1; i >= 0; --i) {
5682         const TCGCallArgumentLoc *loc = &info->in[i];
5683         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5684 
5685         switch (loc->kind) {
5686         case TCG_CALL_ARG_NORMAL:
5687         case TCG_CALL_ARG_EXTEND_U:
5688         case TCG_CALL_ARG_EXTEND_S:
5689             load_arg_normal(s, loc, ts, &allocated_regs);
5690             break;
5691         case TCG_CALL_ARG_BY_REF:
5692             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5693             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5694                          arg_slot_stk_ofs(loc->ref_slot),
5695                          &allocated_regs);
5696             break;
5697         case TCG_CALL_ARG_BY_REF_N:
5698             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5699             break;
5700         default:
5701             g_assert_not_reached();
5702         }
5703     }
5704 
5705     /* Mark dead temporaries and free the associated registers.  */
5706     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5707         if (IS_DEAD_ARG(i)) {
5708             temp_dead(s, arg_temp(op->args[i]));
5709         }
5710     }
5711 
5712     /* Clobber call registers.  */
5713     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5714         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5715             tcg_reg_free(s, i, allocated_regs);
5716         }
5717     }
5718 
5719     /*
5720      * Save globals if they might be written by the helper,
5721      * sync them if they might be read.
5722      */
5723     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5724         /* Nothing to do */
5725     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5726         sync_globals(s, allocated_regs);
5727     } else {
5728         save_globals(s, allocated_regs);
5729     }
5730 
5731     /*
5732      * If the ABI passes a pointer to the returned struct as the first
5733      * argument, load that now.  Pass a pointer to the output home slot.
5734      */
5735     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5736         TCGTemp *ts = arg_temp(op->args[0]);
5737 
5738         if (!ts->mem_allocated) {
5739             temp_allocate_frame(s, ts);
5740         }
5741         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5742     }
5743 
5744     tcg_out_call(s, tcg_call_func(op), info);
5745 
5746     /* Assign output registers and emit moves if needed.  */
5747     switch (info->out_kind) {
5748     case TCG_CALL_RET_NORMAL:
5749         for (i = 0; i < nb_oargs; i++) {
5750             TCGTemp *ts = arg_temp(op->args[i]);
5751             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5752 
5753             /* ENV should not be modified.  */
5754             tcg_debug_assert(!temp_readonly(ts));
5755 
5756             set_temp_val_reg(s, ts, reg);
5757             ts->mem_coherent = 0;
5758         }
5759         break;
5760 
5761     case TCG_CALL_RET_BY_VEC:
5762         {
5763             TCGTemp *ts = arg_temp(op->args[0]);
5764 
5765             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5766             tcg_debug_assert(ts->temp_subindex == 0);
5767             if (!ts->mem_allocated) {
5768                 temp_allocate_frame(s, ts);
5769             }
5770             tcg_out_st(s, TCG_TYPE_V128,
5771                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5772                        ts->mem_base->reg, ts->mem_offset);
5773         }
5774         /* fall through to mark all parts in memory */
5775 
5776     case TCG_CALL_RET_BY_REF:
5777         /* The callee has performed a write through the reference. */
5778         for (i = 0; i < nb_oargs; i++) {
5779             TCGTemp *ts = arg_temp(op->args[i]);
5780             ts->val_type = TEMP_VAL_MEM;
5781         }
5782         break;
5783 
5784     default:
5785         g_assert_not_reached();
5786     }
5787 
5788     /* Flush or discard output registers as needed. */
5789     for (i = 0; i < nb_oargs; i++) {
5790         TCGTemp *ts = arg_temp(op->args[i]);
5791         if (NEED_SYNC_ARG(i)) {
5792             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5793         } else if (IS_DEAD_ARG(i)) {
5794             temp_dead(s, ts);
5795         }
5796     }
5797 }
5798 
5799 /**
5800  * atom_and_align_for_opc:
5801  * @s: tcg context
5802  * @opc: memory operation code
5803  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5804  * @allow_two_ops: true if we are prepared to issue two operations
5805  *
5806  * Return the alignment and atomicity to use for the inline fast path
5807  * for the given memory operation.  The alignment may be larger than
5808  * that specified in @opc, and the correct alignment will be diagnosed
5809  * by the slow path helper.
5810  *
5811  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5812  * and issue two loads or stores for subalignment.
5813  */
5814 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5815                                            MemOp host_atom, bool allow_two_ops)
5816 {
5817     MemOp align = memop_alignment_bits(opc);
5818     MemOp size = opc & MO_SIZE;
5819     MemOp half = size ? size - 1 : 0;
5820     MemOp atom = opc & MO_ATOM_MASK;
5821     MemOp atmax;
5822 
5823     switch (atom) {
5824     case MO_ATOM_NONE:
5825         /* The operation requires no specific atomicity. */
5826         atmax = MO_8;
5827         break;
5828 
5829     case MO_ATOM_IFALIGN:
5830         atmax = size;
5831         break;
5832 
5833     case MO_ATOM_IFALIGN_PAIR:
5834         atmax = half;
5835         break;
5836 
5837     case MO_ATOM_WITHIN16:
5838         atmax = size;
5839         if (size == MO_128) {
5840             /* Misalignment implies !within16, and therefore no atomicity. */
5841         } else if (host_atom != MO_ATOM_WITHIN16) {
5842             /* The host does not implement within16, so require alignment. */
5843             align = MAX(align, size);
5844         }
5845         break;
5846 
5847     case MO_ATOM_WITHIN16_PAIR:
5848         atmax = size;
5849         /*
5850          * Misalignment implies !within16, and therefore half atomicity.
5851          * Any host prepared for two operations can implement this with
5852          * half alignment.
5853          */
5854         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5855             align = MAX(align, half);
5856         }
5857         break;
5858 
5859     case MO_ATOM_SUBALIGN:
5860         atmax = size;
5861         if (host_atom != MO_ATOM_SUBALIGN) {
5862             /* If unaligned but not odd, there are subobjects up to half. */
5863             if (allow_two_ops) {
5864                 align = MAX(align, half);
5865             } else {
5866                 align = MAX(align, size);
5867             }
5868         }
5869         break;
5870 
5871     default:
5872         g_assert_not_reached();
5873     }
5874 
5875     return (TCGAtomAlign){ .atom = atmax, .align = align };
5876 }
5877 
5878 /*
5879  * Similarly for qemu_ld/st slow path helpers.
5880  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5881  * using only the provided backend tcg_out_* functions.
5882  */
5883 
5884 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5885 {
5886     int ofs = arg_slot_stk_ofs(slot);
5887 
5888     /*
5889      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5890      * require extension to uint64_t, adjust the address for uint32_t.
5891      */
5892     if (HOST_BIG_ENDIAN &&
5893         TCG_TARGET_REG_BITS == 64 &&
5894         type == TCG_TYPE_I32) {
5895         ofs += 4;
5896     }
5897     return ofs;
5898 }
5899 
5900 static void tcg_out_helper_load_slots(TCGContext *s,
5901                                       unsigned nmov, TCGMovExtend *mov,
5902                                       const TCGLdstHelperParam *parm)
5903 {
5904     unsigned i;
5905     TCGReg dst3;
5906 
5907     /*
5908      * Start from the end, storing to the stack first.
5909      * This frees those registers, so we need not consider overlap.
5910      */
5911     for (i = nmov; i-- > 0; ) {
5912         unsigned slot = mov[i].dst;
5913 
5914         if (arg_slot_reg_p(slot)) {
5915             goto found_reg;
5916         }
5917 
5918         TCGReg src = mov[i].src;
5919         TCGType dst_type = mov[i].dst_type;
5920         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5921 
5922         /* The argument is going onto the stack; extend into scratch. */
5923         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5924             tcg_debug_assert(parm->ntmp != 0);
5925             mov[i].dst = src = parm->tmp[0];
5926             tcg_out_movext1(s, &mov[i]);
5927         }
5928 
5929         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5930                    tcg_out_helper_stk_ofs(dst_type, slot));
5931     }
5932     return;
5933 
5934  found_reg:
5935     /*
5936      * The remaining arguments are in registers.
5937      * Convert slot numbers to argument registers.
5938      */
5939     nmov = i + 1;
5940     for (i = 0; i < nmov; ++i) {
5941         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5942     }
5943 
5944     switch (nmov) {
5945     case 4:
5946         /* The backend must have provided enough temps for the worst case. */
5947         tcg_debug_assert(parm->ntmp >= 2);
5948 
5949         dst3 = mov[3].dst;
5950         for (unsigned j = 0; j < 3; ++j) {
5951             if (dst3 == mov[j].src) {
5952                 /*
5953                  * Conflict. Copy the source to a temporary, perform the
5954                  * remaining moves, then the extension from our scratch
5955                  * on the way out.
5956                  */
5957                 TCGReg scratch = parm->tmp[1];
5958 
5959                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5960                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5961                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5962                 break;
5963             }
5964         }
5965 
5966         /* No conflicts: perform this move and continue. */
5967         tcg_out_movext1(s, &mov[3]);
5968         /* fall through */
5969 
5970     case 3:
5971         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5972                         parm->ntmp ? parm->tmp[0] : -1);
5973         break;
5974     case 2:
5975         tcg_out_movext2(s, mov, mov + 1,
5976                         parm->ntmp ? parm->tmp[0] : -1);
5977         break;
5978     case 1:
5979         tcg_out_movext1(s, mov);
5980         break;
5981     default:
5982         g_assert_not_reached();
5983     }
5984 }
5985 
5986 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5987                                     TCGType type, tcg_target_long imm,
5988                                     const TCGLdstHelperParam *parm)
5989 {
5990     if (arg_slot_reg_p(slot)) {
5991         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5992     } else {
5993         int ofs = tcg_out_helper_stk_ofs(type, slot);
5994         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5995             tcg_debug_assert(parm->ntmp != 0);
5996             tcg_out_movi(s, type, parm->tmp[0], imm);
5997             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5998         }
5999     }
6000 }
6001 
6002 static void tcg_out_helper_load_common_args(TCGContext *s,
6003                                             const TCGLabelQemuLdst *ldst,
6004                                             const TCGLdstHelperParam *parm,
6005                                             const TCGHelperInfo *info,
6006                                             unsigned next_arg)
6007 {
6008     TCGMovExtend ptr_mov = {
6009         .dst_type = TCG_TYPE_PTR,
6010         .src_type = TCG_TYPE_PTR,
6011         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6012     };
6013     const TCGCallArgumentLoc *loc = &info->in[0];
6014     TCGType type;
6015     unsigned slot;
6016     tcg_target_ulong imm;
6017 
6018     /*
6019      * Handle env, which is always first.
6020      */
6021     ptr_mov.dst = loc->arg_slot;
6022     ptr_mov.src = TCG_AREG0;
6023     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6024 
6025     /*
6026      * Handle oi.
6027      */
6028     imm = ldst->oi;
6029     loc = &info->in[next_arg];
6030     type = TCG_TYPE_I32;
6031     switch (loc->kind) {
6032     case TCG_CALL_ARG_NORMAL:
6033         break;
6034     case TCG_CALL_ARG_EXTEND_U:
6035     case TCG_CALL_ARG_EXTEND_S:
6036         /* No extension required for MemOpIdx. */
6037         tcg_debug_assert(imm <= INT32_MAX);
6038         type = TCG_TYPE_REG;
6039         break;
6040     default:
6041         g_assert_not_reached();
6042     }
6043     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6044     next_arg++;
6045 
6046     /*
6047      * Handle ra.
6048      */
6049     loc = &info->in[next_arg];
6050     slot = loc->arg_slot;
6051     if (parm->ra_gen) {
6052         int arg_reg = -1;
6053         TCGReg ra_reg;
6054 
6055         if (arg_slot_reg_p(slot)) {
6056             arg_reg = tcg_target_call_iarg_regs[slot];
6057         }
6058         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6059 
6060         ptr_mov.dst = slot;
6061         ptr_mov.src = ra_reg;
6062         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6063     } else {
6064         imm = (uintptr_t)ldst->raddr;
6065         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6066     }
6067 }
6068 
6069 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6070                                        const TCGCallArgumentLoc *loc,
6071                                        TCGType dst_type, TCGType src_type,
6072                                        TCGReg lo, TCGReg hi)
6073 {
6074     MemOp reg_mo;
6075 
6076     if (dst_type <= TCG_TYPE_REG) {
6077         MemOp src_ext;
6078 
6079         switch (loc->kind) {
6080         case TCG_CALL_ARG_NORMAL:
6081             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6082             break;
6083         case TCG_CALL_ARG_EXTEND_U:
6084             dst_type = TCG_TYPE_REG;
6085             src_ext = MO_UL;
6086             break;
6087         case TCG_CALL_ARG_EXTEND_S:
6088             dst_type = TCG_TYPE_REG;
6089             src_ext = MO_SL;
6090             break;
6091         default:
6092             g_assert_not_reached();
6093         }
6094 
6095         mov[0].dst = loc->arg_slot;
6096         mov[0].dst_type = dst_type;
6097         mov[0].src = lo;
6098         mov[0].src_type = src_type;
6099         mov[0].src_ext = src_ext;
6100         return 1;
6101     }
6102 
6103     if (TCG_TARGET_REG_BITS == 32) {
6104         assert(dst_type == TCG_TYPE_I64);
6105         reg_mo = MO_32;
6106     } else {
6107         assert(dst_type == TCG_TYPE_I128);
6108         reg_mo = MO_64;
6109     }
6110 
6111     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6112     mov[0].src = lo;
6113     mov[0].dst_type = TCG_TYPE_REG;
6114     mov[0].src_type = TCG_TYPE_REG;
6115     mov[0].src_ext = reg_mo;
6116 
6117     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6118     mov[1].src = hi;
6119     mov[1].dst_type = TCG_TYPE_REG;
6120     mov[1].src_type = TCG_TYPE_REG;
6121     mov[1].src_ext = reg_mo;
6122 
6123     return 2;
6124 }
6125 
6126 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6127                                    const TCGLdstHelperParam *parm)
6128 {
6129     const TCGHelperInfo *info;
6130     const TCGCallArgumentLoc *loc;
6131     TCGMovExtend mov[2];
6132     unsigned next_arg, nmov;
6133     MemOp mop = get_memop(ldst->oi);
6134 
6135     switch (mop & MO_SIZE) {
6136     case MO_8:
6137     case MO_16:
6138     case MO_32:
6139         info = &info_helper_ld32_mmu;
6140         break;
6141     case MO_64:
6142         info = &info_helper_ld64_mmu;
6143         break;
6144     case MO_128:
6145         info = &info_helper_ld128_mmu;
6146         break;
6147     default:
6148         g_assert_not_reached();
6149     }
6150 
6151     /* Defer env argument. */
6152     next_arg = 1;
6153 
6154     loc = &info->in[next_arg];
6155     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6156         /*
6157          * 32-bit host with 32-bit guest: zero-extend the guest address
6158          * to 64-bits for the helper by storing the low part, then
6159          * load a zero for the high part.
6160          */
6161         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6162                                TCG_TYPE_I32, TCG_TYPE_I32,
6163                                ldst->addr_reg, -1);
6164         tcg_out_helper_load_slots(s, 1, mov, parm);
6165 
6166         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6167                                 TCG_TYPE_I32, 0, parm);
6168         next_arg += 2;
6169     } else {
6170         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6171                                       ldst->addr_reg, -1);
6172         tcg_out_helper_load_slots(s, nmov, mov, parm);
6173         next_arg += nmov;
6174     }
6175 
6176     switch (info->out_kind) {
6177     case TCG_CALL_RET_NORMAL:
6178     case TCG_CALL_RET_BY_VEC:
6179         break;
6180     case TCG_CALL_RET_BY_REF:
6181         /*
6182          * The return reference is in the first argument slot.
6183          * We need memory in which to return: re-use the top of stack.
6184          */
6185         {
6186             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6187 
6188             if (arg_slot_reg_p(0)) {
6189                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6190                                  TCG_REG_CALL_STACK, ofs_slot0);
6191             } else {
6192                 tcg_debug_assert(parm->ntmp != 0);
6193                 tcg_out_addi_ptr(s, parm->tmp[0],
6194                                  TCG_REG_CALL_STACK, ofs_slot0);
6195                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6196                            TCG_REG_CALL_STACK, ofs_slot0);
6197             }
6198         }
6199         break;
6200     default:
6201         g_assert_not_reached();
6202     }
6203 
6204     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6205 }
6206 
6207 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6208                                   bool load_sign,
6209                                   const TCGLdstHelperParam *parm)
6210 {
6211     MemOp mop = get_memop(ldst->oi);
6212     TCGMovExtend mov[2];
6213     int ofs_slot0;
6214 
6215     switch (ldst->type) {
6216     case TCG_TYPE_I64:
6217         if (TCG_TARGET_REG_BITS == 32) {
6218             break;
6219         }
6220         /* fall through */
6221 
6222     case TCG_TYPE_I32:
6223         mov[0].dst = ldst->datalo_reg;
6224         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6225         mov[0].dst_type = ldst->type;
6226         mov[0].src_type = TCG_TYPE_REG;
6227 
6228         /*
6229          * If load_sign, then we allowed the helper to perform the
6230          * appropriate sign extension to tcg_target_ulong, and all
6231          * we need now is a plain move.
6232          *
6233          * If they do not, then we expect the relevant extension
6234          * instruction to be no more expensive than a move, and
6235          * we thus save the icache etc by only using one of two
6236          * helper functions.
6237          */
6238         if (load_sign || !(mop & MO_SIGN)) {
6239             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6240                 mov[0].src_ext = MO_32;
6241             } else {
6242                 mov[0].src_ext = MO_64;
6243             }
6244         } else {
6245             mov[0].src_ext = mop & MO_SSIZE;
6246         }
6247         tcg_out_movext1(s, mov);
6248         return;
6249 
6250     case TCG_TYPE_I128:
6251         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6252         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6253         switch (TCG_TARGET_CALL_RET_I128) {
6254         case TCG_CALL_RET_NORMAL:
6255             break;
6256         case TCG_CALL_RET_BY_VEC:
6257             tcg_out_st(s, TCG_TYPE_V128,
6258                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6259                        TCG_REG_CALL_STACK, ofs_slot0);
6260             /* fall through */
6261         case TCG_CALL_RET_BY_REF:
6262             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6263                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6264             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6265                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6266             return;
6267         default:
6268             g_assert_not_reached();
6269         }
6270         break;
6271 
6272     default:
6273         g_assert_not_reached();
6274     }
6275 
6276     mov[0].dst = ldst->datalo_reg;
6277     mov[0].src =
6278         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6279     mov[0].dst_type = TCG_TYPE_REG;
6280     mov[0].src_type = TCG_TYPE_REG;
6281     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6282 
6283     mov[1].dst = ldst->datahi_reg;
6284     mov[1].src =
6285         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6286     mov[1].dst_type = TCG_TYPE_REG;
6287     mov[1].src_type = TCG_TYPE_REG;
6288     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6289 
6290     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6291 }
6292 
6293 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6294                                    const TCGLdstHelperParam *parm)
6295 {
6296     const TCGHelperInfo *info;
6297     const TCGCallArgumentLoc *loc;
6298     TCGMovExtend mov[4];
6299     TCGType data_type;
6300     unsigned next_arg, nmov, n;
6301     MemOp mop = get_memop(ldst->oi);
6302 
6303     switch (mop & MO_SIZE) {
6304     case MO_8:
6305     case MO_16:
6306     case MO_32:
6307         info = &info_helper_st32_mmu;
6308         data_type = TCG_TYPE_I32;
6309         break;
6310     case MO_64:
6311         info = &info_helper_st64_mmu;
6312         data_type = TCG_TYPE_I64;
6313         break;
6314     case MO_128:
6315         info = &info_helper_st128_mmu;
6316         data_type = TCG_TYPE_I128;
6317         break;
6318     default:
6319         g_assert_not_reached();
6320     }
6321 
6322     /* Defer env argument. */
6323     next_arg = 1;
6324     nmov = 0;
6325 
6326     /* Handle addr argument. */
6327     loc = &info->in[next_arg];
6328     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6329     if (TCG_TARGET_REG_BITS == 32) {
6330         /*
6331          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6332          * to 64-bits for the helper by storing the low part.  Later,
6333          * after we have processed the register inputs, we will load a
6334          * zero for the high part.
6335          */
6336         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6337                                TCG_TYPE_I32, TCG_TYPE_I32,
6338                                ldst->addr_reg, -1);
6339         next_arg += 2;
6340         nmov += 1;
6341     } else {
6342         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6343                                    ldst->addr_reg, -1);
6344         next_arg += n;
6345         nmov += n;
6346     }
6347 
6348     /* Handle data argument. */
6349     loc = &info->in[next_arg];
6350     switch (loc->kind) {
6351     case TCG_CALL_ARG_NORMAL:
6352     case TCG_CALL_ARG_EXTEND_U:
6353     case TCG_CALL_ARG_EXTEND_S:
6354         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6355                                    ldst->datalo_reg, ldst->datahi_reg);
6356         next_arg += n;
6357         nmov += n;
6358         tcg_out_helper_load_slots(s, nmov, mov, parm);
6359         break;
6360 
6361     case TCG_CALL_ARG_BY_REF:
6362         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6363         tcg_debug_assert(data_type == TCG_TYPE_I128);
6364         tcg_out_st(s, TCG_TYPE_I64,
6365                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6366                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6367         tcg_out_st(s, TCG_TYPE_I64,
6368                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6369                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6370 
6371         tcg_out_helper_load_slots(s, nmov, mov, parm);
6372 
6373         if (arg_slot_reg_p(loc->arg_slot)) {
6374             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6375                              TCG_REG_CALL_STACK,
6376                              arg_slot_stk_ofs(loc->ref_slot));
6377         } else {
6378             tcg_debug_assert(parm->ntmp != 0);
6379             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6380                              arg_slot_stk_ofs(loc->ref_slot));
6381             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6382                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6383         }
6384         next_arg += 2;
6385         break;
6386 
6387     default:
6388         g_assert_not_reached();
6389     }
6390 
6391     if (TCG_TARGET_REG_BITS == 32) {
6392         /* Zero extend the address by loading a zero for the high part. */
6393         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6394         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6395     }
6396 
6397     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6398 }
6399 
6400 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6401 {
6402     int i, start_words, num_insns;
6403     TCGOp *op;
6404 
6405     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6406                  && qemu_log_in_addr_range(pc_start))) {
6407         FILE *logfile = qemu_log_trylock();
6408         if (logfile) {
6409             fprintf(logfile, "OP:\n");
6410             tcg_dump_ops(s, logfile, false);
6411             fprintf(logfile, "\n");
6412             qemu_log_unlock(logfile);
6413         }
6414     }
6415 
6416 #ifdef CONFIG_DEBUG_TCG
6417     /* Ensure all labels referenced have been emitted.  */
6418     {
6419         TCGLabel *l;
6420         bool error = false;
6421 
6422         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6423             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6424                 qemu_log_mask(CPU_LOG_TB_OP,
6425                               "$L%d referenced but not present.\n", l->id);
6426                 error = true;
6427             }
6428         }
6429         assert(!error);
6430     }
6431 #endif
6432 
6433     /* Do not reuse any EBB that may be allocated within the TB. */
6434     tcg_temp_ebb_reset_freed(s);
6435 
6436     tcg_optimize(s);
6437 
6438     reachable_code_pass(s);
6439     liveness_pass_0(s);
6440     liveness_pass_1(s);
6441 
6442     if (s->nb_indirects > 0) {
6443         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6444                      && qemu_log_in_addr_range(pc_start))) {
6445             FILE *logfile = qemu_log_trylock();
6446             if (logfile) {
6447                 fprintf(logfile, "OP before indirect lowering:\n");
6448                 tcg_dump_ops(s, logfile, false);
6449                 fprintf(logfile, "\n");
6450                 qemu_log_unlock(logfile);
6451             }
6452         }
6453 
6454         /* Replace indirect temps with direct temps.  */
6455         if (liveness_pass_2(s)) {
6456             /* If changes were made, re-run liveness.  */
6457             liveness_pass_1(s);
6458         }
6459     }
6460 
6461     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6462                  && qemu_log_in_addr_range(pc_start))) {
6463         FILE *logfile = qemu_log_trylock();
6464         if (logfile) {
6465             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6466             tcg_dump_ops(s, logfile, true);
6467             fprintf(logfile, "\n");
6468             qemu_log_unlock(logfile);
6469         }
6470     }
6471 
6472     /* Initialize goto_tb jump offsets. */
6473     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6474     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6475     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6476     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6477 
6478     tcg_reg_alloc_start(s);
6479 
6480     /*
6481      * Reset the buffer pointers when restarting after overflow.
6482      * TODO: Move this into translate-all.c with the rest of the
6483      * buffer management.  Having only this done here is confusing.
6484      */
6485     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6486     s->code_ptr = s->code_buf;
6487     s->data_gen_ptr = NULL;
6488 
6489     QSIMPLEQ_INIT(&s->ldst_labels);
6490     s->pool_labels = NULL;
6491 
6492     start_words = s->insn_start_words;
6493     s->gen_insn_data =
6494         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6495 
6496     tcg_out_tb_start(s);
6497 
6498     num_insns = -1;
6499     QTAILQ_FOREACH(op, &s->ops, link) {
6500         TCGOpcode opc = op->opc;
6501 
6502         switch (opc) {
6503         case INDEX_op_mov:
6504         case INDEX_op_mov_vec:
6505             tcg_reg_alloc_mov(s, op);
6506             break;
6507         case INDEX_op_dup_vec:
6508             tcg_reg_alloc_dup(s, op);
6509             break;
6510         case INDEX_op_insn_start:
6511             if (num_insns >= 0) {
6512                 size_t off = tcg_current_code_size(s);
6513                 s->gen_insn_end_off[num_insns] = off;
6514                 /* Assert that we do not overflow our stored offset.  */
6515                 assert(s->gen_insn_end_off[num_insns] == off);
6516             }
6517             num_insns++;
6518             for (i = 0; i < start_words; ++i) {
6519                 s->gen_insn_data[num_insns * start_words + i] =
6520                     tcg_get_insn_start_param(op, i);
6521             }
6522             break;
6523         case INDEX_op_discard:
6524             temp_dead(s, arg_temp(op->args[0]));
6525             break;
6526         case INDEX_op_set_label:
6527             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6528             tcg_out_label(s, arg_label(op->args[0]));
6529             break;
6530         case INDEX_op_call:
6531             tcg_reg_alloc_call(s, op);
6532             break;
6533         case INDEX_op_exit_tb:
6534             tcg_out_exit_tb(s, op->args[0]);
6535             break;
6536         case INDEX_op_goto_tb:
6537             tcg_out_goto_tb(s, op->args[0]);
6538             break;
6539         case INDEX_op_dup2_vec:
6540             if (tcg_reg_alloc_dup2(s, op)) {
6541                 break;
6542             }
6543             /* fall through */
6544         default:
6545             /* Sanity check that we've not introduced any unhandled opcodes. */
6546             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6547                                               TCGOP_FLAGS(op)));
6548             /* Note: in order to speed up the code, it would be much
6549                faster to have specialized register allocator functions for
6550                some common argument patterns */
6551             tcg_reg_alloc_op(s, op);
6552             break;
6553         }
6554         /* Test for (pending) buffer overflow.  The assumption is that any
6555            one operation beginning below the high water mark cannot overrun
6556            the buffer completely.  Thus we can test for overflow after
6557            generating code without having to check during generation.  */
6558         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6559             return -1;
6560         }
6561         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6562         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6563             return -2;
6564         }
6565     }
6566     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6567     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6568 
6569     /* Generate TB finalization at the end of block */
6570     i = tcg_out_ldst_finalize(s);
6571     if (i < 0) {
6572         return i;
6573     }
6574     i = tcg_out_pool_finalize(s);
6575     if (i < 0) {
6576         return i;
6577     }
6578     if (!tcg_resolve_relocs(s)) {
6579         return -2;
6580     }
6581 
6582 #ifndef CONFIG_TCG_INTERPRETER
6583     /* flush instruction cache */
6584     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6585                         (uintptr_t)s->code_buf,
6586                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6587 #endif
6588 
6589     return tcg_current_code_size(s);
6590 }
6591 
6592 #ifdef ELF_HOST_MACHINE
6593 /* In order to use this feature, the backend needs to do three things:
6594 
6595    (1) Define ELF_HOST_MACHINE to indicate both what value to
6596        put into the ELF image and to indicate support for the feature.
6597 
6598    (2) Define tcg_register_jit.  This should create a buffer containing
6599        the contents of a .debug_frame section that describes the post-
6600        prologue unwind info for the tcg machine.
6601 
6602    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6603 */
6604 
6605 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6606 typedef enum {
6607     JIT_NOACTION = 0,
6608     JIT_REGISTER_FN,
6609     JIT_UNREGISTER_FN
6610 } jit_actions_t;
6611 
6612 struct jit_code_entry {
6613     struct jit_code_entry *next_entry;
6614     struct jit_code_entry *prev_entry;
6615     const void *symfile_addr;
6616     uint64_t symfile_size;
6617 };
6618 
6619 struct jit_descriptor {
6620     uint32_t version;
6621     uint32_t action_flag;
6622     struct jit_code_entry *relevant_entry;
6623     struct jit_code_entry *first_entry;
6624 };
6625 
6626 void __jit_debug_register_code(void) __attribute__((noinline));
6627 void __jit_debug_register_code(void)
6628 {
6629     asm("");
6630 }
6631 
6632 /* Must statically initialize the version, because GDB may check
6633    the version before we can set it.  */
6634 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6635 
6636 /* End GDB interface.  */
6637 
6638 static int find_string(const char *strtab, const char *str)
6639 {
6640     const char *p = strtab + 1;
6641 
6642     while (1) {
6643         if (strcmp(p, str) == 0) {
6644             return p - strtab;
6645         }
6646         p += strlen(p) + 1;
6647     }
6648 }
6649 
6650 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6651                                  const void *debug_frame,
6652                                  size_t debug_frame_size)
6653 {
6654     struct __attribute__((packed)) DebugInfo {
6655         uint32_t  len;
6656         uint16_t  version;
6657         uint32_t  abbrev;
6658         uint8_t   ptr_size;
6659         uint8_t   cu_die;
6660         uint16_t  cu_lang;
6661         uintptr_t cu_low_pc;
6662         uintptr_t cu_high_pc;
6663         uint8_t   fn_die;
6664         char      fn_name[16];
6665         uintptr_t fn_low_pc;
6666         uintptr_t fn_high_pc;
6667         uint8_t   cu_eoc;
6668     };
6669 
6670     struct ElfImage {
6671         ElfW(Ehdr) ehdr;
6672         ElfW(Phdr) phdr;
6673         ElfW(Shdr) shdr[7];
6674         ElfW(Sym)  sym[2];
6675         struct DebugInfo di;
6676         uint8_t    da[24];
6677         char       str[80];
6678     };
6679 
6680     struct ElfImage *img;
6681 
6682     static const struct ElfImage img_template = {
6683         .ehdr = {
6684             .e_ident[EI_MAG0] = ELFMAG0,
6685             .e_ident[EI_MAG1] = ELFMAG1,
6686             .e_ident[EI_MAG2] = ELFMAG2,
6687             .e_ident[EI_MAG3] = ELFMAG3,
6688             .e_ident[EI_CLASS] = ELF_CLASS,
6689             .e_ident[EI_DATA] = ELF_DATA,
6690             .e_ident[EI_VERSION] = EV_CURRENT,
6691             .e_type = ET_EXEC,
6692             .e_machine = ELF_HOST_MACHINE,
6693             .e_version = EV_CURRENT,
6694             .e_phoff = offsetof(struct ElfImage, phdr),
6695             .e_shoff = offsetof(struct ElfImage, shdr),
6696             .e_ehsize = sizeof(ElfW(Shdr)),
6697             .e_phentsize = sizeof(ElfW(Phdr)),
6698             .e_phnum = 1,
6699             .e_shentsize = sizeof(ElfW(Shdr)),
6700             .e_shnum = ARRAY_SIZE(img->shdr),
6701             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6702 #ifdef ELF_HOST_FLAGS
6703             .e_flags = ELF_HOST_FLAGS,
6704 #endif
6705 #ifdef ELF_OSABI
6706             .e_ident[EI_OSABI] = ELF_OSABI,
6707 #endif
6708         },
6709         .phdr = {
6710             .p_type = PT_LOAD,
6711             .p_flags = PF_X,
6712         },
6713         .shdr = {
6714             [0] = { .sh_type = SHT_NULL },
6715             /* Trick: The contents of code_gen_buffer are not present in
6716                this fake ELF file; that got allocated elsewhere.  Therefore
6717                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6718                will not look for contents.  We can record any address.  */
6719             [1] = { /* .text */
6720                 .sh_type = SHT_NOBITS,
6721                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6722             },
6723             [2] = { /* .debug_info */
6724                 .sh_type = SHT_PROGBITS,
6725                 .sh_offset = offsetof(struct ElfImage, di),
6726                 .sh_size = sizeof(struct DebugInfo),
6727             },
6728             [3] = { /* .debug_abbrev */
6729                 .sh_type = SHT_PROGBITS,
6730                 .sh_offset = offsetof(struct ElfImage, da),
6731                 .sh_size = sizeof(img->da),
6732             },
6733             [4] = { /* .debug_frame */
6734                 .sh_type = SHT_PROGBITS,
6735                 .sh_offset = sizeof(struct ElfImage),
6736             },
6737             [5] = { /* .symtab */
6738                 .sh_type = SHT_SYMTAB,
6739                 .sh_offset = offsetof(struct ElfImage, sym),
6740                 .sh_size = sizeof(img->sym),
6741                 .sh_info = 1,
6742                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6743                 .sh_entsize = sizeof(ElfW(Sym)),
6744             },
6745             [6] = { /* .strtab */
6746                 .sh_type = SHT_STRTAB,
6747                 .sh_offset = offsetof(struct ElfImage, str),
6748                 .sh_size = sizeof(img->str),
6749             }
6750         },
6751         .sym = {
6752             [1] = { /* code_gen_buffer */
6753                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6754                 .st_shndx = 1,
6755             }
6756         },
6757         .di = {
6758             .len = sizeof(struct DebugInfo) - 4,
6759             .version = 2,
6760             .ptr_size = sizeof(void *),
6761             .cu_die = 1,
6762             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6763             .fn_die = 2,
6764             .fn_name = "code_gen_buffer"
6765         },
6766         .da = {
6767             1,          /* abbrev number (the cu) */
6768             0x11, 1,    /* DW_TAG_compile_unit, has children */
6769             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6770             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6771             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6772             0, 0,       /* end of abbrev */
6773             2,          /* abbrev number (the fn) */
6774             0x2e, 0,    /* DW_TAG_subprogram, no children */
6775             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6776             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6777             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6778             0, 0,       /* end of abbrev */
6779             0           /* no more abbrev */
6780         },
6781         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6782                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6783     };
6784 
6785     /* We only need a single jit entry; statically allocate it.  */
6786     static struct jit_code_entry one_entry;
6787 
6788     uintptr_t buf = (uintptr_t)buf_ptr;
6789     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6790     DebugFrameHeader *dfh;
6791 
6792     img = g_malloc(img_size);
6793     *img = img_template;
6794 
6795     img->phdr.p_vaddr = buf;
6796     img->phdr.p_paddr = buf;
6797     img->phdr.p_memsz = buf_size;
6798 
6799     img->shdr[1].sh_name = find_string(img->str, ".text");
6800     img->shdr[1].sh_addr = buf;
6801     img->shdr[1].sh_size = buf_size;
6802 
6803     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6804     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6805 
6806     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6807     img->shdr[4].sh_size = debug_frame_size;
6808 
6809     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6810     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6811 
6812     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6813     img->sym[1].st_value = buf;
6814     img->sym[1].st_size = buf_size;
6815 
6816     img->di.cu_low_pc = buf;
6817     img->di.cu_high_pc = buf + buf_size;
6818     img->di.fn_low_pc = buf;
6819     img->di.fn_high_pc = buf + buf_size;
6820 
6821     dfh = (DebugFrameHeader *)(img + 1);
6822     memcpy(dfh, debug_frame, debug_frame_size);
6823     dfh->fde.func_start = buf;
6824     dfh->fde.func_len = buf_size;
6825 
6826 #ifdef DEBUG_JIT
6827     /* Enable this block to be able to debug the ELF image file creation.
6828        One can use readelf, objdump, or other inspection utilities.  */
6829     {
6830         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6831         FILE *f = fopen(jit, "w+b");
6832         if (f) {
6833             if (fwrite(img, img_size, 1, f) != img_size) {
6834                 /* Avoid stupid unused return value warning for fwrite.  */
6835             }
6836             fclose(f);
6837         }
6838     }
6839 #endif
6840 
6841     one_entry.symfile_addr = img;
6842     one_entry.symfile_size = img_size;
6843 
6844     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6845     __jit_debug_descriptor.relevant_entry = &one_entry;
6846     __jit_debug_descriptor.first_entry = &one_entry;
6847     __jit_debug_register_code();
6848 }
6849 #else
6850 /* No support for the feature.  Provide the entry point expected by exec.c,
6851    and implement the internal function we declared earlier.  */
6852 
6853 static void tcg_register_jit_int(const void *buf, size_t size,
6854                                  const void *debug_frame,
6855                                  size_t debug_frame_size)
6856 {
6857 }
6858 
6859 void tcg_register_jit(const void *buf, size_t buf_size)
6860 {
6861 }
6862 #endif /* ELF_HOST_MACHINE */
6863 
6864 #if !TCG_TARGET_MAYBE_vec
6865 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6866 {
6867     g_assert_not_reached();
6868 }
6869 #endif
6870