xref: /openbmc/qemu/tcg/tcg.c (revision 961b80aecd1a503eedb885c309a1d5267d89c98c)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpUnary {
990     TCGOutOp base;
991     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
992 } TCGOutOpUnary;
993 
994 typedef struct TCGOutOpSubtract {
995     TCGOutOp base;
996     void (*out_rrr)(TCGContext *s, TCGType type,
997                     TCGReg a0, TCGReg a1, TCGReg a2);
998     void (*out_rir)(TCGContext *s, TCGType type,
999                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1000 } TCGOutOpSubtract;
1001 
1002 #include "tcg-target.c.inc"
1003 
1004 #ifndef CONFIG_TCG_INTERPRETER
1005 /* Validate CPUTLBDescFast placement. */
1006 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1007                         sizeof(CPUNegativeOffsetState))
1008                   < MIN_TLB_MASK_TABLE_OFS);
1009 #endif
1010 
1011 /*
1012  * Register V as the TCGOutOp for O.
1013  * This verifies that V is of type T, otherwise give a nice compiler error.
1014  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1015  */
1016 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1017 
1018 /* Register allocation descriptions for every TCGOpcode. */
1019 static const TCGOutOp * const all_outop[NB_OPS] = {
1020     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1021     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1022     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1023     OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1024     OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
1025     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1026     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1027     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1028     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1029     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1030     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1031     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1032     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1033     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1034     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1035     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1036     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1037 };
1038 
1039 #undef OUTOP
1040 
1041 /*
1042  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1043  * and registered the target's TCG globals) must register with this function
1044  * before initiating translation.
1045  *
1046  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1047  * of tcg_region_init() for the reasoning behind this.
1048  *
1049  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1050  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1051  * is not used anymore for translation once this function is called.
1052  *
1053  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1054  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1055  * modes.
1056  */
1057 #ifdef CONFIG_USER_ONLY
1058 void tcg_register_thread(void)
1059 {
1060     tcg_ctx = &tcg_init_ctx;
1061 }
1062 #else
1063 void tcg_register_thread(void)
1064 {
1065     TCGContext *s = g_malloc(sizeof(*s));
1066     unsigned int i, n;
1067 
1068     *s = tcg_init_ctx;
1069 
1070     /* Relink mem_base.  */
1071     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1072         if (tcg_init_ctx.temps[i].mem_base) {
1073             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1074             tcg_debug_assert(b >= 0 && b < n);
1075             s->temps[i].mem_base = &s->temps[b];
1076         }
1077     }
1078 
1079     /* Claim an entry in tcg_ctxs */
1080     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1081     g_assert(n < tcg_max_ctxs);
1082     qatomic_set(&tcg_ctxs[n], s);
1083 
1084     if (n > 0) {
1085         tcg_region_initial_alloc(s);
1086     }
1087 
1088     tcg_ctx = s;
1089 }
1090 #endif /* !CONFIG_USER_ONLY */
1091 
1092 /* pool based memory allocation */
1093 void *tcg_malloc_internal(TCGContext *s, int size)
1094 {
1095     TCGPool *p;
1096     int pool_size;
1097 
1098     if (size > TCG_POOL_CHUNK_SIZE) {
1099         /* big malloc: insert a new pool (XXX: could optimize) */
1100         p = g_malloc(sizeof(TCGPool) + size);
1101         p->size = size;
1102         p->next = s->pool_first_large;
1103         s->pool_first_large = p;
1104         return p->data;
1105     } else {
1106         p = s->pool_current;
1107         if (!p) {
1108             p = s->pool_first;
1109             if (!p)
1110                 goto new_pool;
1111         } else {
1112             if (!p->next) {
1113             new_pool:
1114                 pool_size = TCG_POOL_CHUNK_SIZE;
1115                 p = g_malloc(sizeof(TCGPool) + pool_size);
1116                 p->size = pool_size;
1117                 p->next = NULL;
1118                 if (s->pool_current) {
1119                     s->pool_current->next = p;
1120                 } else {
1121                     s->pool_first = p;
1122                 }
1123             } else {
1124                 p = p->next;
1125             }
1126         }
1127     }
1128     s->pool_current = p;
1129     s->pool_cur = p->data + size;
1130     s->pool_end = p->data + p->size;
1131     return p->data;
1132 }
1133 
1134 void tcg_pool_reset(TCGContext *s)
1135 {
1136     TCGPool *p, *t;
1137     for (p = s->pool_first_large; p; p = t) {
1138         t = p->next;
1139         g_free(p);
1140     }
1141     s->pool_first_large = NULL;
1142     s->pool_cur = s->pool_end = NULL;
1143     s->pool_current = NULL;
1144 }
1145 
1146 /*
1147  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1148  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1149  * We only use these for layout in tcg_out_ld_helper_ret and
1150  * tcg_out_st_helper_args, and share them between several of
1151  * the helpers, with the end result that it's easier to build manually.
1152  */
1153 
1154 #if TCG_TARGET_REG_BITS == 32
1155 # define dh_typecode_ttl  dh_typecode_i32
1156 #else
1157 # define dh_typecode_ttl  dh_typecode_i64
1158 #endif
1159 
1160 static TCGHelperInfo info_helper_ld32_mmu = {
1161     .flags = TCG_CALL_NO_WG,
1162     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1163               | dh_typemask(env, 1)
1164               | dh_typemask(i64, 2)  /* uint64_t addr */
1165               | dh_typemask(i32, 3)  /* unsigned oi */
1166               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1167 };
1168 
1169 static TCGHelperInfo info_helper_ld64_mmu = {
1170     .flags = TCG_CALL_NO_WG,
1171     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1172               | dh_typemask(env, 1)
1173               | dh_typemask(i64, 2)  /* uint64_t addr */
1174               | dh_typemask(i32, 3)  /* unsigned oi */
1175               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1176 };
1177 
1178 static TCGHelperInfo info_helper_ld128_mmu = {
1179     .flags = TCG_CALL_NO_WG,
1180     .typemask = dh_typemask(i128, 0) /* return Int128 */
1181               | dh_typemask(env, 1)
1182               | dh_typemask(i64, 2)  /* uint64_t addr */
1183               | dh_typemask(i32, 3)  /* unsigned oi */
1184               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1185 };
1186 
1187 static TCGHelperInfo info_helper_st32_mmu = {
1188     .flags = TCG_CALL_NO_WG,
1189     .typemask = dh_typemask(void, 0)
1190               | dh_typemask(env, 1)
1191               | dh_typemask(i64, 2)  /* uint64_t addr */
1192               | dh_typemask(i32, 3)  /* uint32_t data */
1193               | dh_typemask(i32, 4)  /* unsigned oi */
1194               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1195 };
1196 
1197 static TCGHelperInfo info_helper_st64_mmu = {
1198     .flags = TCG_CALL_NO_WG,
1199     .typemask = dh_typemask(void, 0)
1200               | dh_typemask(env, 1)
1201               | dh_typemask(i64, 2)  /* uint64_t addr */
1202               | dh_typemask(i64, 3)  /* uint64_t data */
1203               | dh_typemask(i32, 4)  /* unsigned oi */
1204               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1205 };
1206 
1207 static TCGHelperInfo info_helper_st128_mmu = {
1208     .flags = TCG_CALL_NO_WG,
1209     .typemask = dh_typemask(void, 0)
1210               | dh_typemask(env, 1)
1211               | dh_typemask(i64, 2)  /* uint64_t addr */
1212               | dh_typemask(i128, 3) /* Int128 data */
1213               | dh_typemask(i32, 4)  /* unsigned oi */
1214               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1215 };
1216 
1217 #ifdef CONFIG_TCG_INTERPRETER
1218 static ffi_type *typecode_to_ffi(int argmask)
1219 {
1220     /*
1221      * libffi does not support __int128_t, so we have forced Int128
1222      * to use the structure definition instead of the builtin type.
1223      */
1224     static ffi_type *ffi_type_i128_elements[3] = {
1225         &ffi_type_uint64,
1226         &ffi_type_uint64,
1227         NULL
1228     };
1229     static ffi_type ffi_type_i128 = {
1230         .size = 16,
1231         .alignment = __alignof__(Int128),
1232         .type = FFI_TYPE_STRUCT,
1233         .elements = ffi_type_i128_elements,
1234     };
1235 
1236     switch (argmask) {
1237     case dh_typecode_void:
1238         return &ffi_type_void;
1239     case dh_typecode_i32:
1240         return &ffi_type_uint32;
1241     case dh_typecode_s32:
1242         return &ffi_type_sint32;
1243     case dh_typecode_i64:
1244         return &ffi_type_uint64;
1245     case dh_typecode_s64:
1246         return &ffi_type_sint64;
1247     case dh_typecode_ptr:
1248         return &ffi_type_pointer;
1249     case dh_typecode_i128:
1250         return &ffi_type_i128;
1251     }
1252     g_assert_not_reached();
1253 }
1254 
1255 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1256 {
1257     unsigned typemask = info->typemask;
1258     struct {
1259         ffi_cif cif;
1260         ffi_type *args[];
1261     } *ca;
1262     ffi_status status;
1263     int nargs;
1264 
1265     /* Ignoring the return type, find the last non-zero field. */
1266     nargs = 32 - clz32(typemask >> 3);
1267     nargs = DIV_ROUND_UP(nargs, 3);
1268     assert(nargs <= MAX_CALL_IARGS);
1269 
1270     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1271     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1272     ca->cif.nargs = nargs;
1273 
1274     if (nargs != 0) {
1275         ca->cif.arg_types = ca->args;
1276         for (int j = 0; j < nargs; ++j) {
1277             int typecode = extract32(typemask, (j + 1) * 3, 3);
1278             ca->args[j] = typecode_to_ffi(typecode);
1279         }
1280     }
1281 
1282     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1283                           ca->cif.rtype, ca->cif.arg_types);
1284     assert(status == FFI_OK);
1285 
1286     return &ca->cif;
1287 }
1288 
1289 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1290 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1291 #else
1292 #define HELPER_INFO_INIT(I)      (&(I)->init)
1293 #define HELPER_INFO_INIT_VAL(I)  1
1294 #endif /* CONFIG_TCG_INTERPRETER */
1295 
1296 static inline bool arg_slot_reg_p(unsigned arg_slot)
1297 {
1298     /*
1299      * Split the sizeof away from the comparison to avoid Werror from
1300      * "unsigned < 0 is always false", when iarg_regs is empty.
1301      */
1302     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1303     return arg_slot < nreg;
1304 }
1305 
1306 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1307 {
1308     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1309     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1310 
1311     tcg_debug_assert(stk_slot < max);
1312     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1313 }
1314 
1315 typedef struct TCGCumulativeArgs {
1316     int arg_idx;                /* tcg_gen_callN args[] */
1317     int info_in_idx;            /* TCGHelperInfo in[] */
1318     int arg_slot;               /* regs+stack slot */
1319     int ref_slot;               /* stack slots for references */
1320 } TCGCumulativeArgs;
1321 
1322 static void layout_arg_even(TCGCumulativeArgs *cum)
1323 {
1324     cum->arg_slot += cum->arg_slot & 1;
1325 }
1326 
1327 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1328                          TCGCallArgumentKind kind)
1329 {
1330     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1331 
1332     *loc = (TCGCallArgumentLoc){
1333         .kind = kind,
1334         .arg_idx = cum->arg_idx,
1335         .arg_slot = cum->arg_slot,
1336     };
1337     cum->info_in_idx++;
1338     cum->arg_slot++;
1339 }
1340 
1341 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1342                                 TCGHelperInfo *info, int n)
1343 {
1344     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1345 
1346     for (int i = 0; i < n; ++i) {
1347         /* Layout all using the same arg_idx, adjusting the subindex. */
1348         loc[i] = (TCGCallArgumentLoc){
1349             .kind = TCG_CALL_ARG_NORMAL,
1350             .arg_idx = cum->arg_idx,
1351             .tmp_subindex = i,
1352             .arg_slot = cum->arg_slot + i,
1353         };
1354     }
1355     cum->info_in_idx += n;
1356     cum->arg_slot += n;
1357 }
1358 
1359 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1360 {
1361     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1362     int n = 128 / TCG_TARGET_REG_BITS;
1363 
1364     /* The first subindex carries the pointer. */
1365     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1366 
1367     /*
1368      * The callee is allowed to clobber memory associated with
1369      * structure pass by-reference.  Therefore we must make copies.
1370      * Allocate space from "ref_slot", which will be adjusted to
1371      * follow the parameters on the stack.
1372      */
1373     loc[0].ref_slot = cum->ref_slot;
1374 
1375     /*
1376      * Subsequent words also go into the reference slot, but
1377      * do not accumulate into the regular arguments.
1378      */
1379     for (int i = 1; i < n; ++i) {
1380         loc[i] = (TCGCallArgumentLoc){
1381             .kind = TCG_CALL_ARG_BY_REF_N,
1382             .arg_idx = cum->arg_idx,
1383             .tmp_subindex = i,
1384             .ref_slot = cum->ref_slot + i,
1385         };
1386     }
1387     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1388     cum->ref_slot += n;
1389 }
1390 
1391 static void init_call_layout(TCGHelperInfo *info)
1392 {
1393     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1394     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1395     unsigned typemask = info->typemask;
1396     unsigned typecode;
1397     TCGCumulativeArgs cum = { };
1398 
1399     /*
1400      * Parse and place any function return value.
1401      */
1402     typecode = typemask & 7;
1403     switch (typecode) {
1404     case dh_typecode_void:
1405         info->nr_out = 0;
1406         break;
1407     case dh_typecode_i32:
1408     case dh_typecode_s32:
1409     case dh_typecode_ptr:
1410         info->nr_out = 1;
1411         info->out_kind = TCG_CALL_RET_NORMAL;
1412         break;
1413     case dh_typecode_i64:
1414     case dh_typecode_s64:
1415         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1416         info->out_kind = TCG_CALL_RET_NORMAL;
1417         /* Query the last register now to trigger any assert early. */
1418         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1419         break;
1420     case dh_typecode_i128:
1421         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1422         info->out_kind = TCG_TARGET_CALL_RET_I128;
1423         switch (TCG_TARGET_CALL_RET_I128) {
1424         case TCG_CALL_RET_NORMAL:
1425             /* Query the last register now to trigger any assert early. */
1426             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1427             break;
1428         case TCG_CALL_RET_BY_VEC:
1429             /* Query the single register now to trigger any assert early. */
1430             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1431             break;
1432         case TCG_CALL_RET_BY_REF:
1433             /*
1434              * Allocate the first argument to the output.
1435              * We don't need to store this anywhere, just make it
1436              * unavailable for use in the input loop below.
1437              */
1438             cum.arg_slot = 1;
1439             break;
1440         default:
1441             qemu_build_not_reached();
1442         }
1443         break;
1444     default:
1445         g_assert_not_reached();
1446     }
1447 
1448     /*
1449      * Parse and place function arguments.
1450      */
1451     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1452         TCGCallArgumentKind kind;
1453         TCGType type;
1454 
1455         typecode = typemask & 7;
1456         switch (typecode) {
1457         case dh_typecode_i32:
1458         case dh_typecode_s32:
1459             type = TCG_TYPE_I32;
1460             break;
1461         case dh_typecode_i64:
1462         case dh_typecode_s64:
1463             type = TCG_TYPE_I64;
1464             break;
1465         case dh_typecode_ptr:
1466             type = TCG_TYPE_PTR;
1467             break;
1468         case dh_typecode_i128:
1469             type = TCG_TYPE_I128;
1470             break;
1471         default:
1472             g_assert_not_reached();
1473         }
1474 
1475         switch (type) {
1476         case TCG_TYPE_I32:
1477             switch (TCG_TARGET_CALL_ARG_I32) {
1478             case TCG_CALL_ARG_EVEN:
1479                 layout_arg_even(&cum);
1480                 /* fall through */
1481             case TCG_CALL_ARG_NORMAL:
1482                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1483                 break;
1484             case TCG_CALL_ARG_EXTEND:
1485                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1486                 layout_arg_1(&cum, info, kind);
1487                 break;
1488             default:
1489                 qemu_build_not_reached();
1490             }
1491             break;
1492 
1493         case TCG_TYPE_I64:
1494             switch (TCG_TARGET_CALL_ARG_I64) {
1495             case TCG_CALL_ARG_EVEN:
1496                 layout_arg_even(&cum);
1497                 /* fall through */
1498             case TCG_CALL_ARG_NORMAL:
1499                 if (TCG_TARGET_REG_BITS == 32) {
1500                     layout_arg_normal_n(&cum, info, 2);
1501                 } else {
1502                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1503                 }
1504                 break;
1505             default:
1506                 qemu_build_not_reached();
1507             }
1508             break;
1509 
1510         case TCG_TYPE_I128:
1511             switch (TCG_TARGET_CALL_ARG_I128) {
1512             case TCG_CALL_ARG_EVEN:
1513                 layout_arg_even(&cum);
1514                 /* fall through */
1515             case TCG_CALL_ARG_NORMAL:
1516                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1517                 break;
1518             case TCG_CALL_ARG_BY_REF:
1519                 layout_arg_by_ref(&cum, info);
1520                 break;
1521             default:
1522                 qemu_build_not_reached();
1523             }
1524             break;
1525 
1526         default:
1527             g_assert_not_reached();
1528         }
1529     }
1530     info->nr_in = cum.info_in_idx;
1531 
1532     /* Validate that we didn't overrun the input array. */
1533     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1534     /* Validate the backend has enough argument space. */
1535     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1536 
1537     /*
1538      * Relocate the "ref_slot" area to the end of the parameters.
1539      * Minimizing this stack offset helps code size for x86,
1540      * which has a signed 8-bit offset encoding.
1541      */
1542     if (cum.ref_slot != 0) {
1543         int ref_base = 0;
1544 
1545         if (cum.arg_slot > max_reg_slots) {
1546             int align = __alignof(Int128) / sizeof(tcg_target_long);
1547 
1548             ref_base = cum.arg_slot - max_reg_slots;
1549             if (align > 1) {
1550                 ref_base = ROUND_UP(ref_base, align);
1551             }
1552         }
1553         assert(ref_base + cum.ref_slot <= max_stk_slots);
1554         ref_base += max_reg_slots;
1555 
1556         if (ref_base != 0) {
1557             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1558                 TCGCallArgumentLoc *loc = &info->in[i];
1559                 switch (loc->kind) {
1560                 case TCG_CALL_ARG_BY_REF:
1561                 case TCG_CALL_ARG_BY_REF_N:
1562                     loc->ref_slot += ref_base;
1563                     break;
1564                 default:
1565                     break;
1566                 }
1567             }
1568         }
1569     }
1570 }
1571 
1572 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1573 static void process_constraint_sets(void);
1574 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1575                                             TCGReg reg, const char *name);
1576 
1577 static void tcg_context_init(unsigned max_threads)
1578 {
1579     TCGContext *s = &tcg_init_ctx;
1580     int n, i;
1581     TCGTemp *ts;
1582 
1583     memset(s, 0, sizeof(*s));
1584     s->nb_globals = 0;
1585 
1586     init_call_layout(&info_helper_ld32_mmu);
1587     init_call_layout(&info_helper_ld64_mmu);
1588     init_call_layout(&info_helper_ld128_mmu);
1589     init_call_layout(&info_helper_st32_mmu);
1590     init_call_layout(&info_helper_st64_mmu);
1591     init_call_layout(&info_helper_st128_mmu);
1592 
1593     tcg_target_init(s);
1594     process_constraint_sets();
1595 
1596     /* Reverse the order of the saved registers, assuming they're all at
1597        the start of tcg_target_reg_alloc_order.  */
1598     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1599         int r = tcg_target_reg_alloc_order[n];
1600         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1601             break;
1602         }
1603     }
1604     for (i = 0; i < n; ++i) {
1605         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1606     }
1607     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1608         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1609     }
1610 
1611     tcg_ctx = s;
1612     /*
1613      * In user-mode we simply share the init context among threads, since we
1614      * use a single region. See the documentation tcg_region_init() for the
1615      * reasoning behind this.
1616      * In system-mode we will have at most max_threads TCG threads.
1617      */
1618 #ifdef CONFIG_USER_ONLY
1619     tcg_ctxs = &tcg_ctx;
1620     tcg_cur_ctxs = 1;
1621     tcg_max_ctxs = 1;
1622 #else
1623     tcg_max_ctxs = max_threads;
1624     tcg_ctxs = g_new0(TCGContext *, max_threads);
1625 #endif
1626 
1627     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1628     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1629     tcg_env = temp_tcgv_ptr(ts);
1630 }
1631 
1632 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1633 {
1634     tcg_context_init(max_threads);
1635     tcg_region_init(tb_size, splitwx, max_threads);
1636 }
1637 
1638 /*
1639  * Allocate TBs right before their corresponding translated code, making
1640  * sure that TBs and code are on different cache lines.
1641  */
1642 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1643 {
1644     uintptr_t align = qemu_icache_linesize;
1645     TranslationBlock *tb;
1646     void *next;
1647 
1648  retry:
1649     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1650     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1651 
1652     if (unlikely(next > s->code_gen_highwater)) {
1653         if (tcg_region_alloc(s)) {
1654             return NULL;
1655         }
1656         goto retry;
1657     }
1658     qatomic_set(&s->code_gen_ptr, next);
1659     return tb;
1660 }
1661 
1662 void tcg_prologue_init(void)
1663 {
1664     TCGContext *s = tcg_ctx;
1665     size_t prologue_size;
1666 
1667     s->code_ptr = s->code_gen_ptr;
1668     s->code_buf = s->code_gen_ptr;
1669     s->data_gen_ptr = NULL;
1670 
1671 #ifndef CONFIG_TCG_INTERPRETER
1672     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1673 #endif
1674 
1675     s->pool_labels = NULL;
1676 
1677     qemu_thread_jit_write();
1678     /* Generate the prologue.  */
1679     tcg_target_qemu_prologue(s);
1680 
1681     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1682     {
1683         int result = tcg_out_pool_finalize(s);
1684         tcg_debug_assert(result == 0);
1685     }
1686 
1687     prologue_size = tcg_current_code_size(s);
1688     perf_report_prologue(s->code_gen_ptr, prologue_size);
1689 
1690 #ifndef CONFIG_TCG_INTERPRETER
1691     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1692                         (uintptr_t)s->code_buf, prologue_size);
1693 #endif
1694 
1695     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1696         FILE *logfile = qemu_log_trylock();
1697         if (logfile) {
1698             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1699             if (s->data_gen_ptr) {
1700                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1701                 size_t data_size = prologue_size - code_size;
1702                 size_t i;
1703 
1704                 disas(logfile, s->code_gen_ptr, code_size);
1705 
1706                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1707                     if (sizeof(tcg_target_ulong) == 8) {
1708                         fprintf(logfile,
1709                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1710                                 (uintptr_t)s->data_gen_ptr + i,
1711                                 *(uint64_t *)(s->data_gen_ptr + i));
1712                     } else {
1713                         fprintf(logfile,
1714                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1715                                 (uintptr_t)s->data_gen_ptr + i,
1716                                 *(uint32_t *)(s->data_gen_ptr + i));
1717                     }
1718                 }
1719             } else {
1720                 disas(logfile, s->code_gen_ptr, prologue_size);
1721             }
1722             fprintf(logfile, "\n");
1723             qemu_log_unlock(logfile);
1724         }
1725     }
1726 
1727 #ifndef CONFIG_TCG_INTERPRETER
1728     /*
1729      * Assert that goto_ptr is implemented completely, setting an epilogue.
1730      * For tci, we use NULL as the signal to return from the interpreter,
1731      * so skip this check.
1732      */
1733     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1734 #endif
1735 
1736     tcg_region_prologue_set(s);
1737 }
1738 
1739 void tcg_func_start(TCGContext *s)
1740 {
1741     tcg_pool_reset(s);
1742     s->nb_temps = s->nb_globals;
1743 
1744     /* No temps have been previously allocated for size or locality.  */
1745     tcg_temp_ebb_reset_freed(s);
1746 
1747     /* No constant temps have been previously allocated. */
1748     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1749         if (s->const_table[i]) {
1750             g_hash_table_remove_all(s->const_table[i]);
1751         }
1752     }
1753 
1754     s->nb_ops = 0;
1755     s->nb_labels = 0;
1756     s->current_frame_offset = s->frame_start;
1757 
1758 #ifdef CONFIG_DEBUG_TCG
1759     s->goto_tb_issue_mask = 0;
1760 #endif
1761 
1762     QTAILQ_INIT(&s->ops);
1763     QTAILQ_INIT(&s->free_ops);
1764     s->emit_before_op = NULL;
1765     QSIMPLEQ_INIT(&s->labels);
1766 
1767     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1768     tcg_debug_assert(s->insn_start_words > 0);
1769 }
1770 
1771 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1772 {
1773     int n = s->nb_temps++;
1774 
1775     if (n >= TCG_MAX_TEMPS) {
1776         tcg_raise_tb_overflow(s);
1777     }
1778     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1779 }
1780 
1781 static TCGTemp *tcg_global_alloc(TCGContext *s)
1782 {
1783     TCGTemp *ts;
1784 
1785     tcg_debug_assert(s->nb_globals == s->nb_temps);
1786     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1787     s->nb_globals++;
1788     ts = tcg_temp_alloc(s);
1789     ts->kind = TEMP_GLOBAL;
1790 
1791     return ts;
1792 }
1793 
1794 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1795                                             TCGReg reg, const char *name)
1796 {
1797     TCGTemp *ts;
1798 
1799     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1800 
1801     ts = tcg_global_alloc(s);
1802     ts->base_type = type;
1803     ts->type = type;
1804     ts->kind = TEMP_FIXED;
1805     ts->reg = reg;
1806     ts->name = name;
1807     tcg_regset_set_reg(s->reserved_regs, reg);
1808 
1809     return ts;
1810 }
1811 
1812 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1813 {
1814     s->frame_start = start;
1815     s->frame_end = start + size;
1816     s->frame_temp
1817         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1818 }
1819 
1820 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1821                                             const char *name, TCGType type)
1822 {
1823     TCGContext *s = tcg_ctx;
1824     TCGTemp *base_ts = tcgv_ptr_temp(base);
1825     TCGTemp *ts = tcg_global_alloc(s);
1826     int indirect_reg = 0;
1827 
1828     switch (base_ts->kind) {
1829     case TEMP_FIXED:
1830         break;
1831     case TEMP_GLOBAL:
1832         /* We do not support double-indirect registers.  */
1833         tcg_debug_assert(!base_ts->indirect_reg);
1834         base_ts->indirect_base = 1;
1835         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1836                             ? 2 : 1);
1837         indirect_reg = 1;
1838         break;
1839     default:
1840         g_assert_not_reached();
1841     }
1842 
1843     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1844         TCGTemp *ts2 = tcg_global_alloc(s);
1845         char buf[64];
1846 
1847         ts->base_type = TCG_TYPE_I64;
1848         ts->type = TCG_TYPE_I32;
1849         ts->indirect_reg = indirect_reg;
1850         ts->mem_allocated = 1;
1851         ts->mem_base = base_ts;
1852         ts->mem_offset = offset;
1853         pstrcpy(buf, sizeof(buf), name);
1854         pstrcat(buf, sizeof(buf), "_0");
1855         ts->name = strdup(buf);
1856 
1857         tcg_debug_assert(ts2 == ts + 1);
1858         ts2->base_type = TCG_TYPE_I64;
1859         ts2->type = TCG_TYPE_I32;
1860         ts2->indirect_reg = indirect_reg;
1861         ts2->mem_allocated = 1;
1862         ts2->mem_base = base_ts;
1863         ts2->mem_offset = offset + 4;
1864         ts2->temp_subindex = 1;
1865         pstrcpy(buf, sizeof(buf), name);
1866         pstrcat(buf, sizeof(buf), "_1");
1867         ts2->name = strdup(buf);
1868     } else {
1869         ts->base_type = type;
1870         ts->type = type;
1871         ts->indirect_reg = indirect_reg;
1872         ts->mem_allocated = 1;
1873         ts->mem_base = base_ts;
1874         ts->mem_offset = offset;
1875         ts->name = name;
1876     }
1877     return ts;
1878 }
1879 
1880 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1881 {
1882     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1883     return temp_tcgv_i32(ts);
1884 }
1885 
1886 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1887 {
1888     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1889     return temp_tcgv_i64(ts);
1890 }
1891 
1892 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1893 {
1894     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1895     return temp_tcgv_ptr(ts);
1896 }
1897 
1898 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1899 {
1900     TCGContext *s = tcg_ctx;
1901     TCGTemp *ts;
1902     int n;
1903 
1904     if (kind == TEMP_EBB) {
1905         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1906 
1907         if (idx < TCG_MAX_TEMPS) {
1908             /* There is already an available temp with the right type.  */
1909             clear_bit(idx, s->free_temps[type].l);
1910 
1911             ts = &s->temps[idx];
1912             ts->temp_allocated = 1;
1913             tcg_debug_assert(ts->base_type == type);
1914             tcg_debug_assert(ts->kind == kind);
1915             return ts;
1916         }
1917     } else {
1918         tcg_debug_assert(kind == TEMP_TB);
1919     }
1920 
1921     switch (type) {
1922     case TCG_TYPE_I32:
1923     case TCG_TYPE_V64:
1924     case TCG_TYPE_V128:
1925     case TCG_TYPE_V256:
1926         n = 1;
1927         break;
1928     case TCG_TYPE_I64:
1929         n = 64 / TCG_TARGET_REG_BITS;
1930         break;
1931     case TCG_TYPE_I128:
1932         n = 128 / TCG_TARGET_REG_BITS;
1933         break;
1934     default:
1935         g_assert_not_reached();
1936     }
1937 
1938     ts = tcg_temp_alloc(s);
1939     ts->base_type = type;
1940     ts->temp_allocated = 1;
1941     ts->kind = kind;
1942 
1943     if (n == 1) {
1944         ts->type = type;
1945     } else {
1946         ts->type = TCG_TYPE_REG;
1947 
1948         for (int i = 1; i < n; ++i) {
1949             TCGTemp *ts2 = tcg_temp_alloc(s);
1950 
1951             tcg_debug_assert(ts2 == ts + i);
1952             ts2->base_type = type;
1953             ts2->type = TCG_TYPE_REG;
1954             ts2->temp_allocated = 1;
1955             ts2->temp_subindex = i;
1956             ts2->kind = kind;
1957         }
1958     }
1959     return ts;
1960 }
1961 
1962 TCGv_i32 tcg_temp_new_i32(void)
1963 {
1964     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1965 }
1966 
1967 TCGv_i32 tcg_temp_ebb_new_i32(void)
1968 {
1969     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1970 }
1971 
1972 TCGv_i64 tcg_temp_new_i64(void)
1973 {
1974     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1975 }
1976 
1977 TCGv_i64 tcg_temp_ebb_new_i64(void)
1978 {
1979     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1980 }
1981 
1982 TCGv_ptr tcg_temp_new_ptr(void)
1983 {
1984     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1985 }
1986 
1987 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1988 {
1989     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1990 }
1991 
1992 TCGv_i128 tcg_temp_new_i128(void)
1993 {
1994     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1995 }
1996 
1997 TCGv_i128 tcg_temp_ebb_new_i128(void)
1998 {
1999     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
2000 }
2001 
2002 TCGv_vec tcg_temp_new_vec(TCGType type)
2003 {
2004     TCGTemp *t;
2005 
2006 #ifdef CONFIG_DEBUG_TCG
2007     switch (type) {
2008     case TCG_TYPE_V64:
2009         assert(TCG_TARGET_HAS_v64);
2010         break;
2011     case TCG_TYPE_V128:
2012         assert(TCG_TARGET_HAS_v128);
2013         break;
2014     case TCG_TYPE_V256:
2015         assert(TCG_TARGET_HAS_v256);
2016         break;
2017     default:
2018         g_assert_not_reached();
2019     }
2020 #endif
2021 
2022     t = tcg_temp_new_internal(type, TEMP_EBB);
2023     return temp_tcgv_vec(t);
2024 }
2025 
2026 /* Create a new temp of the same type as an existing temp.  */
2027 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2028 {
2029     TCGTemp *t = tcgv_vec_temp(match);
2030 
2031     tcg_debug_assert(t->temp_allocated != 0);
2032 
2033     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2034     return temp_tcgv_vec(t);
2035 }
2036 
2037 void tcg_temp_free_internal(TCGTemp *ts)
2038 {
2039     TCGContext *s = tcg_ctx;
2040 
2041     switch (ts->kind) {
2042     case TEMP_CONST:
2043     case TEMP_TB:
2044         /* Silently ignore free. */
2045         break;
2046     case TEMP_EBB:
2047         tcg_debug_assert(ts->temp_allocated != 0);
2048         ts->temp_allocated = 0;
2049         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2050         break;
2051     default:
2052         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2053         g_assert_not_reached();
2054     }
2055 }
2056 
2057 void tcg_temp_free_i32(TCGv_i32 arg)
2058 {
2059     tcg_temp_free_internal(tcgv_i32_temp(arg));
2060 }
2061 
2062 void tcg_temp_free_i64(TCGv_i64 arg)
2063 {
2064     tcg_temp_free_internal(tcgv_i64_temp(arg));
2065 }
2066 
2067 void tcg_temp_free_i128(TCGv_i128 arg)
2068 {
2069     tcg_temp_free_internal(tcgv_i128_temp(arg));
2070 }
2071 
2072 void tcg_temp_free_ptr(TCGv_ptr arg)
2073 {
2074     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2075 }
2076 
2077 void tcg_temp_free_vec(TCGv_vec arg)
2078 {
2079     tcg_temp_free_internal(tcgv_vec_temp(arg));
2080 }
2081 
2082 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2083 {
2084     TCGContext *s = tcg_ctx;
2085     GHashTable *h = s->const_table[type];
2086     TCGTemp *ts;
2087 
2088     if (h == NULL) {
2089         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2090         s->const_table[type] = h;
2091     }
2092 
2093     ts = g_hash_table_lookup(h, &val);
2094     if (ts == NULL) {
2095         int64_t *val_ptr;
2096 
2097         ts = tcg_temp_alloc(s);
2098 
2099         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2100             TCGTemp *ts2 = tcg_temp_alloc(s);
2101 
2102             tcg_debug_assert(ts2 == ts + 1);
2103 
2104             ts->base_type = TCG_TYPE_I64;
2105             ts->type = TCG_TYPE_I32;
2106             ts->kind = TEMP_CONST;
2107             ts->temp_allocated = 1;
2108 
2109             ts2->base_type = TCG_TYPE_I64;
2110             ts2->type = TCG_TYPE_I32;
2111             ts2->kind = TEMP_CONST;
2112             ts2->temp_allocated = 1;
2113             ts2->temp_subindex = 1;
2114 
2115             /*
2116              * Retain the full value of the 64-bit constant in the low
2117              * part, so that the hash table works.  Actual uses will
2118              * truncate the value to the low part.
2119              */
2120             ts[HOST_BIG_ENDIAN].val = val;
2121             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2122             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2123         } else {
2124             ts->base_type = type;
2125             ts->type = type;
2126             ts->kind = TEMP_CONST;
2127             ts->temp_allocated = 1;
2128             ts->val = val;
2129             val_ptr = &ts->val;
2130         }
2131         g_hash_table_insert(h, val_ptr, ts);
2132     }
2133 
2134     return ts;
2135 }
2136 
2137 TCGv_i32 tcg_constant_i32(int32_t val)
2138 {
2139     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2140 }
2141 
2142 TCGv_i64 tcg_constant_i64(int64_t val)
2143 {
2144     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2145 }
2146 
2147 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2148 {
2149     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2150 }
2151 
2152 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2153 {
2154     val = dup_const(vece, val);
2155     return temp_tcgv_vec(tcg_constant_internal(type, val));
2156 }
2157 
2158 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2159 {
2160     TCGTemp *t = tcgv_vec_temp(match);
2161 
2162     tcg_debug_assert(t->temp_allocated != 0);
2163     return tcg_constant_vec(t->base_type, vece, val);
2164 }
2165 
2166 #ifdef CONFIG_DEBUG_TCG
2167 size_t temp_idx(TCGTemp *ts)
2168 {
2169     ptrdiff_t n = ts - tcg_ctx->temps;
2170     assert(n >= 0 && n < tcg_ctx->nb_temps);
2171     return n;
2172 }
2173 
2174 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2175 {
2176     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2177 
2178     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2179     assert(o % sizeof(TCGTemp) == 0);
2180 
2181     return (void *)tcg_ctx + (uintptr_t)v;
2182 }
2183 #endif /* CONFIG_DEBUG_TCG */
2184 
2185 /*
2186  * Return true if OP may appear in the opcode stream with TYPE.
2187  * Test the runtime variable that controls each opcode.
2188  */
2189 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2190 {
2191     bool has_type;
2192 
2193     switch (type) {
2194     case TCG_TYPE_I32:
2195         has_type = true;
2196         break;
2197     case TCG_TYPE_I64:
2198         has_type = TCG_TARGET_REG_BITS == 64;
2199         break;
2200     case TCG_TYPE_V64:
2201         has_type = TCG_TARGET_HAS_v64;
2202         break;
2203     case TCG_TYPE_V128:
2204         has_type = TCG_TARGET_HAS_v128;
2205         break;
2206     case TCG_TYPE_V256:
2207         has_type = TCG_TARGET_HAS_v256;
2208         break;
2209     default:
2210         has_type = false;
2211         break;
2212     }
2213 
2214     switch (op) {
2215     case INDEX_op_discard:
2216     case INDEX_op_set_label:
2217     case INDEX_op_call:
2218     case INDEX_op_br:
2219     case INDEX_op_mb:
2220     case INDEX_op_insn_start:
2221     case INDEX_op_exit_tb:
2222     case INDEX_op_goto_tb:
2223     case INDEX_op_goto_ptr:
2224     case INDEX_op_qemu_ld_i32:
2225     case INDEX_op_qemu_st_i32:
2226     case INDEX_op_qemu_ld_i64:
2227     case INDEX_op_qemu_st_i64:
2228         return true;
2229 
2230     case INDEX_op_qemu_st8_i32:
2231         return TCG_TARGET_HAS_qemu_st8_i32;
2232 
2233     case INDEX_op_qemu_ld_i128:
2234     case INDEX_op_qemu_st_i128:
2235         return TCG_TARGET_HAS_qemu_ldst_i128;
2236 
2237     case INDEX_op_add:
2238     case INDEX_op_and:
2239     case INDEX_op_mov:
2240     case INDEX_op_or:
2241     case INDEX_op_xor:
2242         return has_type;
2243 
2244     case INDEX_op_setcond_i32:
2245     case INDEX_op_brcond_i32:
2246     case INDEX_op_movcond_i32:
2247     case INDEX_op_ld8u_i32:
2248     case INDEX_op_ld8s_i32:
2249     case INDEX_op_ld16u_i32:
2250     case INDEX_op_ld16s_i32:
2251     case INDEX_op_ld_i32:
2252     case INDEX_op_st8_i32:
2253     case INDEX_op_st16_i32:
2254     case INDEX_op_st_i32:
2255     case INDEX_op_shl_i32:
2256     case INDEX_op_shr_i32:
2257     case INDEX_op_sar_i32:
2258     case INDEX_op_extract_i32:
2259     case INDEX_op_sextract_i32:
2260     case INDEX_op_deposit_i32:
2261         return true;
2262 
2263     case INDEX_op_negsetcond_i32:
2264         return TCG_TARGET_HAS_negsetcond_i32;
2265     case INDEX_op_rem_i32:
2266     case INDEX_op_remu_i32:
2267         return TCG_TARGET_HAS_rem_i32;
2268     case INDEX_op_div2_i32:
2269     case INDEX_op_divu2_i32:
2270         return TCG_TARGET_HAS_div2_i32;
2271     case INDEX_op_rotl_i32:
2272     case INDEX_op_rotr_i32:
2273         return TCG_TARGET_HAS_rot_i32;
2274     case INDEX_op_extract2_i32:
2275         return TCG_TARGET_HAS_extract2_i32;
2276     case INDEX_op_add2_i32:
2277         return TCG_TARGET_HAS_add2_i32;
2278     case INDEX_op_sub2_i32:
2279         return TCG_TARGET_HAS_sub2_i32;
2280     case INDEX_op_mulu2_i32:
2281         return TCG_TARGET_HAS_mulu2_i32;
2282     case INDEX_op_muls2_i32:
2283         return TCG_TARGET_HAS_muls2_i32;
2284     case INDEX_op_bswap16_i32:
2285         return TCG_TARGET_HAS_bswap16_i32;
2286     case INDEX_op_bswap32_i32:
2287         return TCG_TARGET_HAS_bswap32_i32;
2288     case INDEX_op_clz_i32:
2289         return TCG_TARGET_HAS_clz_i32;
2290     case INDEX_op_ctz_i32:
2291         return TCG_TARGET_HAS_ctz_i32;
2292     case INDEX_op_ctpop_i32:
2293         return TCG_TARGET_HAS_ctpop_i32;
2294 
2295     case INDEX_op_brcond2_i32:
2296     case INDEX_op_setcond2_i32:
2297         return TCG_TARGET_REG_BITS == 32;
2298 
2299     case INDEX_op_setcond_i64:
2300     case INDEX_op_brcond_i64:
2301     case INDEX_op_movcond_i64:
2302     case INDEX_op_ld8u_i64:
2303     case INDEX_op_ld8s_i64:
2304     case INDEX_op_ld16u_i64:
2305     case INDEX_op_ld16s_i64:
2306     case INDEX_op_ld32u_i64:
2307     case INDEX_op_ld32s_i64:
2308     case INDEX_op_ld_i64:
2309     case INDEX_op_st8_i64:
2310     case INDEX_op_st16_i64:
2311     case INDEX_op_st32_i64:
2312     case INDEX_op_st_i64:
2313     case INDEX_op_shl_i64:
2314     case INDEX_op_shr_i64:
2315     case INDEX_op_sar_i64:
2316     case INDEX_op_ext_i32_i64:
2317     case INDEX_op_extu_i32_i64:
2318     case INDEX_op_extract_i64:
2319     case INDEX_op_sextract_i64:
2320     case INDEX_op_deposit_i64:
2321         return TCG_TARGET_REG_BITS == 64;
2322 
2323     case INDEX_op_negsetcond_i64:
2324         return TCG_TARGET_HAS_negsetcond_i64;
2325     case INDEX_op_rem_i64:
2326     case INDEX_op_remu_i64:
2327         return TCG_TARGET_HAS_rem_i64;
2328     case INDEX_op_div2_i64:
2329     case INDEX_op_divu2_i64:
2330         return TCG_TARGET_HAS_div2_i64;
2331     case INDEX_op_rotl_i64:
2332     case INDEX_op_rotr_i64:
2333         return TCG_TARGET_HAS_rot_i64;
2334     case INDEX_op_extract2_i64:
2335         return TCG_TARGET_HAS_extract2_i64;
2336     case INDEX_op_extrl_i64_i32:
2337     case INDEX_op_extrh_i64_i32:
2338         return TCG_TARGET_HAS_extr_i64_i32;
2339     case INDEX_op_bswap16_i64:
2340         return TCG_TARGET_HAS_bswap16_i64;
2341     case INDEX_op_bswap32_i64:
2342         return TCG_TARGET_HAS_bswap32_i64;
2343     case INDEX_op_bswap64_i64:
2344         return TCG_TARGET_HAS_bswap64_i64;
2345     case INDEX_op_clz_i64:
2346         return TCG_TARGET_HAS_clz_i64;
2347     case INDEX_op_ctz_i64:
2348         return TCG_TARGET_HAS_ctz_i64;
2349     case INDEX_op_ctpop_i64:
2350         return TCG_TARGET_HAS_ctpop_i64;
2351     case INDEX_op_add2_i64:
2352         return TCG_TARGET_HAS_add2_i64;
2353     case INDEX_op_sub2_i64:
2354         return TCG_TARGET_HAS_sub2_i64;
2355     case INDEX_op_mulu2_i64:
2356         return TCG_TARGET_HAS_mulu2_i64;
2357     case INDEX_op_muls2_i64:
2358         return TCG_TARGET_HAS_muls2_i64;
2359 
2360     case INDEX_op_mov_vec:
2361     case INDEX_op_dup_vec:
2362     case INDEX_op_dupm_vec:
2363     case INDEX_op_ld_vec:
2364     case INDEX_op_st_vec:
2365     case INDEX_op_add_vec:
2366     case INDEX_op_sub_vec:
2367     case INDEX_op_and_vec:
2368     case INDEX_op_or_vec:
2369     case INDEX_op_xor_vec:
2370     case INDEX_op_cmp_vec:
2371         return has_type;
2372     case INDEX_op_dup2_vec:
2373         return has_type && TCG_TARGET_REG_BITS == 32;
2374     case INDEX_op_not_vec:
2375         return has_type && TCG_TARGET_HAS_not_vec;
2376     case INDEX_op_neg_vec:
2377         return has_type && TCG_TARGET_HAS_neg_vec;
2378     case INDEX_op_abs_vec:
2379         return has_type && TCG_TARGET_HAS_abs_vec;
2380     case INDEX_op_andc_vec:
2381         return has_type && TCG_TARGET_HAS_andc_vec;
2382     case INDEX_op_orc_vec:
2383         return has_type && TCG_TARGET_HAS_orc_vec;
2384     case INDEX_op_nand_vec:
2385         return has_type && TCG_TARGET_HAS_nand_vec;
2386     case INDEX_op_nor_vec:
2387         return has_type && TCG_TARGET_HAS_nor_vec;
2388     case INDEX_op_eqv_vec:
2389         return has_type && TCG_TARGET_HAS_eqv_vec;
2390     case INDEX_op_mul_vec:
2391         return has_type && TCG_TARGET_HAS_mul_vec;
2392     case INDEX_op_shli_vec:
2393     case INDEX_op_shri_vec:
2394     case INDEX_op_sari_vec:
2395         return has_type && TCG_TARGET_HAS_shi_vec;
2396     case INDEX_op_shls_vec:
2397     case INDEX_op_shrs_vec:
2398     case INDEX_op_sars_vec:
2399         return has_type && TCG_TARGET_HAS_shs_vec;
2400     case INDEX_op_shlv_vec:
2401     case INDEX_op_shrv_vec:
2402     case INDEX_op_sarv_vec:
2403         return has_type && TCG_TARGET_HAS_shv_vec;
2404     case INDEX_op_rotli_vec:
2405         return has_type && TCG_TARGET_HAS_roti_vec;
2406     case INDEX_op_rotls_vec:
2407         return has_type && TCG_TARGET_HAS_rots_vec;
2408     case INDEX_op_rotlv_vec:
2409     case INDEX_op_rotrv_vec:
2410         return has_type && TCG_TARGET_HAS_rotv_vec;
2411     case INDEX_op_ssadd_vec:
2412     case INDEX_op_usadd_vec:
2413     case INDEX_op_sssub_vec:
2414     case INDEX_op_ussub_vec:
2415         return has_type && TCG_TARGET_HAS_sat_vec;
2416     case INDEX_op_smin_vec:
2417     case INDEX_op_umin_vec:
2418     case INDEX_op_smax_vec:
2419     case INDEX_op_umax_vec:
2420         return has_type && TCG_TARGET_HAS_minmax_vec;
2421     case INDEX_op_bitsel_vec:
2422         return has_type && TCG_TARGET_HAS_bitsel_vec;
2423     case INDEX_op_cmpsel_vec:
2424         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2425 
2426     default:
2427         if (op < INDEX_op_last_generic) {
2428             const TCGOutOp *outop;
2429             TCGConstraintSetIndex con_set;
2430 
2431             if (!has_type) {
2432                 return false;
2433             }
2434 
2435             outop = all_outop[op];
2436             tcg_debug_assert(outop != NULL);
2437 
2438             con_set = outop->static_constraint;
2439             if (con_set == C_Dynamic) {
2440                 con_set = outop->dynamic_constraint(type, flags);
2441             }
2442             if (con_set >= 0) {
2443                 return true;
2444             }
2445             tcg_debug_assert(con_set == C_NotImplemented);
2446             return false;
2447         }
2448         tcg_debug_assert(op < NB_OPS);
2449         return true;
2450 
2451     case INDEX_op_last_generic:
2452         g_assert_not_reached();
2453     }
2454 }
2455 
2456 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2457 {
2458     unsigned width;
2459 
2460     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2461     width = (type == TCG_TYPE_I32 ? 32 : 64);
2462 
2463     tcg_debug_assert(ofs < width);
2464     tcg_debug_assert(len > 0);
2465     tcg_debug_assert(len <= width - ofs);
2466 
2467     return TCG_TARGET_deposit_valid(type, ofs, len);
2468 }
2469 
2470 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2471 
2472 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2473                           TCGTemp *ret, TCGTemp **args)
2474 {
2475     TCGv_i64 extend_free[MAX_CALL_IARGS];
2476     int n_extend = 0;
2477     TCGOp *op;
2478     int i, n, pi = 0, total_args;
2479 
2480     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2481         init_call_layout(info);
2482         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2483     }
2484 
2485     total_args = info->nr_out + info->nr_in + 2;
2486     op = tcg_op_alloc(INDEX_op_call, total_args);
2487 
2488 #ifdef CONFIG_PLUGIN
2489     /* Flag helpers that may affect guest state */
2490     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2491         tcg_ctx->plugin_insn->calls_helpers = true;
2492     }
2493 #endif
2494 
2495     TCGOP_CALLO(op) = n = info->nr_out;
2496     switch (n) {
2497     case 0:
2498         tcg_debug_assert(ret == NULL);
2499         break;
2500     case 1:
2501         tcg_debug_assert(ret != NULL);
2502         op->args[pi++] = temp_arg(ret);
2503         break;
2504     case 2:
2505     case 4:
2506         tcg_debug_assert(ret != NULL);
2507         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2508         tcg_debug_assert(ret->temp_subindex == 0);
2509         for (i = 0; i < n; ++i) {
2510             op->args[pi++] = temp_arg(ret + i);
2511         }
2512         break;
2513     default:
2514         g_assert_not_reached();
2515     }
2516 
2517     TCGOP_CALLI(op) = n = info->nr_in;
2518     for (i = 0; i < n; i++) {
2519         const TCGCallArgumentLoc *loc = &info->in[i];
2520         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2521 
2522         switch (loc->kind) {
2523         case TCG_CALL_ARG_NORMAL:
2524         case TCG_CALL_ARG_BY_REF:
2525         case TCG_CALL_ARG_BY_REF_N:
2526             op->args[pi++] = temp_arg(ts);
2527             break;
2528 
2529         case TCG_CALL_ARG_EXTEND_U:
2530         case TCG_CALL_ARG_EXTEND_S:
2531             {
2532                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2533                 TCGv_i32 orig = temp_tcgv_i32(ts);
2534 
2535                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2536                     tcg_gen_ext_i32_i64(temp, orig);
2537                 } else {
2538                     tcg_gen_extu_i32_i64(temp, orig);
2539                 }
2540                 op->args[pi++] = tcgv_i64_arg(temp);
2541                 extend_free[n_extend++] = temp;
2542             }
2543             break;
2544 
2545         default:
2546             g_assert_not_reached();
2547         }
2548     }
2549     op->args[pi++] = (uintptr_t)func;
2550     op->args[pi++] = (uintptr_t)info;
2551     tcg_debug_assert(pi == total_args);
2552 
2553     if (tcg_ctx->emit_before_op) {
2554         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2555     } else {
2556         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2557     }
2558 
2559     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2560     for (i = 0; i < n_extend; ++i) {
2561         tcg_temp_free_i64(extend_free[i]);
2562     }
2563 }
2564 
2565 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2566 {
2567     tcg_gen_callN(func, info, ret, NULL);
2568 }
2569 
2570 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2571 {
2572     tcg_gen_callN(func, info, ret, &t1);
2573 }
2574 
2575 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2576                    TCGTemp *t1, TCGTemp *t2)
2577 {
2578     TCGTemp *args[2] = { t1, t2 };
2579     tcg_gen_callN(func, info, ret, args);
2580 }
2581 
2582 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2583                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2584 {
2585     TCGTemp *args[3] = { t1, t2, t3 };
2586     tcg_gen_callN(func, info, ret, args);
2587 }
2588 
2589 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2590                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2591 {
2592     TCGTemp *args[4] = { t1, t2, t3, t4 };
2593     tcg_gen_callN(func, info, ret, args);
2594 }
2595 
2596 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2597                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2598 {
2599     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2600     tcg_gen_callN(func, info, ret, args);
2601 }
2602 
2603 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2604                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2605                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2606 {
2607     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2608     tcg_gen_callN(func, info, ret, args);
2609 }
2610 
2611 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2612                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2613                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2614 {
2615     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2616     tcg_gen_callN(func, info, ret, args);
2617 }
2618 
2619 static void tcg_reg_alloc_start(TCGContext *s)
2620 {
2621     int i, n;
2622 
2623     for (i = 0, n = s->nb_temps; i < n; i++) {
2624         TCGTemp *ts = &s->temps[i];
2625         TCGTempVal val = TEMP_VAL_MEM;
2626 
2627         switch (ts->kind) {
2628         case TEMP_CONST:
2629             val = TEMP_VAL_CONST;
2630             break;
2631         case TEMP_FIXED:
2632             val = TEMP_VAL_REG;
2633             break;
2634         case TEMP_GLOBAL:
2635             break;
2636         case TEMP_EBB:
2637             val = TEMP_VAL_DEAD;
2638             /* fall through */
2639         case TEMP_TB:
2640             ts->mem_allocated = 0;
2641             break;
2642         default:
2643             g_assert_not_reached();
2644         }
2645         ts->val_type = val;
2646     }
2647 
2648     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2649 }
2650 
2651 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2652                                  TCGTemp *ts)
2653 {
2654     int idx = temp_idx(ts);
2655 
2656     switch (ts->kind) {
2657     case TEMP_FIXED:
2658     case TEMP_GLOBAL:
2659         pstrcpy(buf, buf_size, ts->name);
2660         break;
2661     case TEMP_TB:
2662         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2663         break;
2664     case TEMP_EBB:
2665         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2666         break;
2667     case TEMP_CONST:
2668         switch (ts->type) {
2669         case TCG_TYPE_I32:
2670             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2671             break;
2672 #if TCG_TARGET_REG_BITS > 32
2673         case TCG_TYPE_I64:
2674             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2675             break;
2676 #endif
2677         case TCG_TYPE_V64:
2678         case TCG_TYPE_V128:
2679         case TCG_TYPE_V256:
2680             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2681                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2682             break;
2683         default:
2684             g_assert_not_reached();
2685         }
2686         break;
2687     }
2688     return buf;
2689 }
2690 
2691 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2692                              int buf_size, TCGArg arg)
2693 {
2694     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2695 }
2696 
2697 static const char * const cond_name[] =
2698 {
2699     [TCG_COND_NEVER] = "never",
2700     [TCG_COND_ALWAYS] = "always",
2701     [TCG_COND_EQ] = "eq",
2702     [TCG_COND_NE] = "ne",
2703     [TCG_COND_LT] = "lt",
2704     [TCG_COND_GE] = "ge",
2705     [TCG_COND_LE] = "le",
2706     [TCG_COND_GT] = "gt",
2707     [TCG_COND_LTU] = "ltu",
2708     [TCG_COND_GEU] = "geu",
2709     [TCG_COND_LEU] = "leu",
2710     [TCG_COND_GTU] = "gtu",
2711     [TCG_COND_TSTEQ] = "tsteq",
2712     [TCG_COND_TSTNE] = "tstne",
2713 };
2714 
2715 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2716 {
2717     [MO_UB]   = "ub",
2718     [MO_SB]   = "sb",
2719     [MO_LEUW] = "leuw",
2720     [MO_LESW] = "lesw",
2721     [MO_LEUL] = "leul",
2722     [MO_LESL] = "lesl",
2723     [MO_LEUQ] = "leq",
2724     [MO_BEUW] = "beuw",
2725     [MO_BESW] = "besw",
2726     [MO_BEUL] = "beul",
2727     [MO_BESL] = "besl",
2728     [MO_BEUQ] = "beq",
2729     [MO_128 + MO_BE] = "beo",
2730     [MO_128 + MO_LE] = "leo",
2731 };
2732 
2733 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2734     [MO_UNALN >> MO_ASHIFT]    = "un+",
2735     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2736     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2737     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2738     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2739     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2740     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2741     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2742 };
2743 
2744 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2745     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2746     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2747     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2748     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2749     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2750     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2751 };
2752 
2753 static const char bswap_flag_name[][6] = {
2754     [TCG_BSWAP_IZ] = "iz",
2755     [TCG_BSWAP_OZ] = "oz",
2756     [TCG_BSWAP_OS] = "os",
2757     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2758     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2759 };
2760 
2761 #ifdef CONFIG_PLUGIN
2762 static const char * const plugin_from_name[] = {
2763     "from-tb",
2764     "from-insn",
2765     "after-insn",
2766     "after-tb",
2767 };
2768 #endif
2769 
2770 static inline bool tcg_regset_single(TCGRegSet d)
2771 {
2772     return (d & (d - 1)) == 0;
2773 }
2774 
2775 static inline TCGReg tcg_regset_first(TCGRegSet d)
2776 {
2777     if (TCG_TARGET_NB_REGS <= 32) {
2778         return ctz32(d);
2779     } else {
2780         return ctz64(d);
2781     }
2782 }
2783 
2784 /* Return only the number of characters output -- no error return. */
2785 #define ne_fprintf(...) \
2786     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2787 
2788 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2789 {
2790     char buf[128];
2791     TCGOp *op;
2792 
2793     QTAILQ_FOREACH(op, &s->ops, link) {
2794         int i, k, nb_oargs, nb_iargs, nb_cargs;
2795         const TCGOpDef *def;
2796         TCGOpcode c;
2797         int col = 0;
2798 
2799         c = op->opc;
2800         def = &tcg_op_defs[c];
2801 
2802         if (c == INDEX_op_insn_start) {
2803             nb_oargs = 0;
2804             col += ne_fprintf(f, "\n ----");
2805 
2806             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2807                 col += ne_fprintf(f, " %016" PRIx64,
2808                                   tcg_get_insn_start_param(op, i));
2809             }
2810         } else if (c == INDEX_op_call) {
2811             const TCGHelperInfo *info = tcg_call_info(op);
2812             void *func = tcg_call_func(op);
2813 
2814             /* variable number of arguments */
2815             nb_oargs = TCGOP_CALLO(op);
2816             nb_iargs = TCGOP_CALLI(op);
2817             nb_cargs = def->nb_cargs;
2818 
2819             col += ne_fprintf(f, " %s ", def->name);
2820 
2821             /*
2822              * Print the function name from TCGHelperInfo, if available.
2823              * Note that plugins have a template function for the info,
2824              * but the actual function pointer comes from the plugin.
2825              */
2826             if (func == info->func) {
2827                 col += ne_fprintf(f, "%s", info->name);
2828             } else {
2829                 col += ne_fprintf(f, "plugin(%p)", func);
2830             }
2831 
2832             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2833             for (i = 0; i < nb_oargs; i++) {
2834                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2835                                                             op->args[i]));
2836             }
2837             for (i = 0; i < nb_iargs; i++) {
2838                 TCGArg arg = op->args[nb_oargs + i];
2839                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2840                 col += ne_fprintf(f, ",%s", t);
2841             }
2842         } else {
2843             if (def->flags & TCG_OPF_INT) {
2844                 col += ne_fprintf(f, " %s_i%d ",
2845                                   def->name,
2846                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2847             } else if (def->flags & TCG_OPF_VECTOR) {
2848                 col += ne_fprintf(f, "%s v%d,e%d,",
2849                                   def->name,
2850                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2851                                   8 << TCGOP_VECE(op));
2852             } else {
2853                 col += ne_fprintf(f, " %s ", def->name);
2854             }
2855 
2856             nb_oargs = def->nb_oargs;
2857             nb_iargs = def->nb_iargs;
2858             nb_cargs = def->nb_cargs;
2859 
2860             k = 0;
2861             for (i = 0; i < nb_oargs; i++) {
2862                 const char *sep =  k ? "," : "";
2863                 col += ne_fprintf(f, "%s%s", sep,
2864                                   tcg_get_arg_str(s, buf, sizeof(buf),
2865                                                   op->args[k++]));
2866             }
2867             for (i = 0; i < nb_iargs; i++) {
2868                 const char *sep =  k ? "," : "";
2869                 col += ne_fprintf(f, "%s%s", sep,
2870                                   tcg_get_arg_str(s, buf, sizeof(buf),
2871                                                   op->args[k++]));
2872             }
2873             switch (c) {
2874             case INDEX_op_brcond_i32:
2875             case INDEX_op_setcond_i32:
2876             case INDEX_op_negsetcond_i32:
2877             case INDEX_op_movcond_i32:
2878             case INDEX_op_brcond2_i32:
2879             case INDEX_op_setcond2_i32:
2880             case INDEX_op_brcond_i64:
2881             case INDEX_op_setcond_i64:
2882             case INDEX_op_negsetcond_i64:
2883             case INDEX_op_movcond_i64:
2884             case INDEX_op_cmp_vec:
2885             case INDEX_op_cmpsel_vec:
2886                 if (op->args[k] < ARRAY_SIZE(cond_name)
2887                     && cond_name[op->args[k]]) {
2888                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2889                 } else {
2890                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2891                 }
2892                 i = 1;
2893                 break;
2894             case INDEX_op_qemu_ld_i32:
2895             case INDEX_op_qemu_st_i32:
2896             case INDEX_op_qemu_st8_i32:
2897             case INDEX_op_qemu_ld_i64:
2898             case INDEX_op_qemu_st_i64:
2899             case INDEX_op_qemu_ld_i128:
2900             case INDEX_op_qemu_st_i128:
2901                 {
2902                     const char *s_al, *s_op, *s_at;
2903                     MemOpIdx oi = op->args[k++];
2904                     MemOp mop = get_memop(oi);
2905                     unsigned ix = get_mmuidx(oi);
2906 
2907                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2908                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2909                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2910                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2911 
2912                     /* If all fields are accounted for, print symbolically. */
2913                     if (!mop && s_al && s_op && s_at) {
2914                         col += ne_fprintf(f, ",%s%s%s,%u",
2915                                           s_at, s_al, s_op, ix);
2916                     } else {
2917                         mop = get_memop(oi);
2918                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2919                     }
2920                     i = 1;
2921                 }
2922                 break;
2923             case INDEX_op_bswap16_i32:
2924             case INDEX_op_bswap16_i64:
2925             case INDEX_op_bswap32_i32:
2926             case INDEX_op_bswap32_i64:
2927             case INDEX_op_bswap64_i64:
2928                 {
2929                     TCGArg flags = op->args[k];
2930                     const char *name = NULL;
2931 
2932                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2933                         name = bswap_flag_name[flags];
2934                     }
2935                     if (name) {
2936                         col += ne_fprintf(f, ",%s", name);
2937                     } else {
2938                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2939                     }
2940                     i = k = 1;
2941                 }
2942                 break;
2943 #ifdef CONFIG_PLUGIN
2944             case INDEX_op_plugin_cb:
2945                 {
2946                     TCGArg from = op->args[k++];
2947                     const char *name = NULL;
2948 
2949                     if (from < ARRAY_SIZE(plugin_from_name)) {
2950                         name = plugin_from_name[from];
2951                     }
2952                     if (name) {
2953                         col += ne_fprintf(f, "%s", name);
2954                     } else {
2955                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2956                     }
2957                     i = 1;
2958                 }
2959                 break;
2960 #endif
2961             default:
2962                 i = 0;
2963                 break;
2964             }
2965             switch (c) {
2966             case INDEX_op_set_label:
2967             case INDEX_op_br:
2968             case INDEX_op_brcond_i32:
2969             case INDEX_op_brcond_i64:
2970             case INDEX_op_brcond2_i32:
2971                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2972                                   arg_label(op->args[k])->id);
2973                 i++, k++;
2974                 break;
2975             case INDEX_op_mb:
2976                 {
2977                     TCGBar membar = op->args[k];
2978                     const char *b_op, *m_op;
2979 
2980                     switch (membar & TCG_BAR_SC) {
2981                     case 0:
2982                         b_op = "none";
2983                         break;
2984                     case TCG_BAR_LDAQ:
2985                         b_op = "acq";
2986                         break;
2987                     case TCG_BAR_STRL:
2988                         b_op = "rel";
2989                         break;
2990                     case TCG_BAR_SC:
2991                         b_op = "seq";
2992                         break;
2993                     default:
2994                         g_assert_not_reached();
2995                     }
2996 
2997                     switch (membar & TCG_MO_ALL) {
2998                     case 0:
2999                         m_op = "none";
3000                         break;
3001                     case TCG_MO_LD_LD:
3002                         m_op = "rr";
3003                         break;
3004                     case TCG_MO_LD_ST:
3005                         m_op = "rw";
3006                         break;
3007                     case TCG_MO_ST_LD:
3008                         m_op = "wr";
3009                         break;
3010                     case TCG_MO_ST_ST:
3011                         m_op = "ww";
3012                         break;
3013                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3014                         m_op = "rr+rw";
3015                         break;
3016                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3017                         m_op = "rr+wr";
3018                         break;
3019                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3020                         m_op = "rr+ww";
3021                         break;
3022                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3023                         m_op = "rw+wr";
3024                         break;
3025                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3026                         m_op = "rw+ww";
3027                         break;
3028                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3029                         m_op = "wr+ww";
3030                         break;
3031                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3032                         m_op = "rr+rw+wr";
3033                         break;
3034                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3035                         m_op = "rr+rw+ww";
3036                         break;
3037                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3038                         m_op = "rr+wr+ww";
3039                         break;
3040                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3041                         m_op = "rw+wr+ww";
3042                         break;
3043                     case TCG_MO_ALL:
3044                         m_op = "all";
3045                         break;
3046                     default:
3047                         g_assert_not_reached();
3048                     }
3049 
3050                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3051                     i++, k++;
3052                 }
3053                 break;
3054             default:
3055                 break;
3056             }
3057             for (; i < nb_cargs; i++, k++) {
3058                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3059                                   op->args[k]);
3060             }
3061         }
3062 
3063         if (have_prefs || op->life) {
3064             for (; col < 40; ++col) {
3065                 putc(' ', f);
3066             }
3067         }
3068 
3069         if (op->life) {
3070             unsigned life = op->life;
3071 
3072             if (life & (SYNC_ARG * 3)) {
3073                 ne_fprintf(f, "  sync:");
3074                 for (i = 0; i < 2; ++i) {
3075                     if (life & (SYNC_ARG << i)) {
3076                         ne_fprintf(f, " %d", i);
3077                     }
3078                 }
3079             }
3080             life /= DEAD_ARG;
3081             if (life) {
3082                 ne_fprintf(f, "  dead:");
3083                 for (i = 0; life; ++i, life >>= 1) {
3084                     if (life & 1) {
3085                         ne_fprintf(f, " %d", i);
3086                     }
3087                 }
3088             }
3089         }
3090 
3091         if (have_prefs) {
3092             for (i = 0; i < nb_oargs; ++i) {
3093                 TCGRegSet set = output_pref(op, i);
3094 
3095                 if (i == 0) {
3096                     ne_fprintf(f, "  pref=");
3097                 } else {
3098                     ne_fprintf(f, ",");
3099                 }
3100                 if (set == 0) {
3101                     ne_fprintf(f, "none");
3102                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3103                     ne_fprintf(f, "all");
3104 #ifdef CONFIG_DEBUG_TCG
3105                 } else if (tcg_regset_single(set)) {
3106                     TCGReg reg = tcg_regset_first(set);
3107                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3108 #endif
3109                 } else if (TCG_TARGET_NB_REGS <= 32) {
3110                     ne_fprintf(f, "0x%x", (uint32_t)set);
3111                 } else {
3112                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3113                 }
3114             }
3115         }
3116 
3117         putc('\n', f);
3118     }
3119 }
3120 
3121 /* we give more priority to constraints with less registers */
3122 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3123 {
3124     int n;
3125 
3126     arg_ct += k;
3127     n = ctpop64(arg_ct->regs);
3128 
3129     /*
3130      * Sort constraints of a single register first, which includes output
3131      * aliases (which must exactly match the input already allocated).
3132      */
3133     if (n == 1 || arg_ct->oalias) {
3134         return INT_MAX;
3135     }
3136 
3137     /*
3138      * Sort register pairs next, first then second immediately after.
3139      * Arbitrarily sort multiple pairs by the index of the first reg;
3140      * there shouldn't be many pairs.
3141      */
3142     switch (arg_ct->pair) {
3143     case 1:
3144     case 3:
3145         return (k + 1) * 2;
3146     case 2:
3147         return (arg_ct->pair_index + 1) * 2 - 1;
3148     }
3149 
3150     /* Finally, sort by decreasing register count. */
3151     assert(n > 1);
3152     return -n;
3153 }
3154 
3155 /* sort from highest priority to lowest */
3156 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3157 {
3158     int i, j;
3159 
3160     for (i = 0; i < n; i++) {
3161         a[start + i].sort_index = start + i;
3162     }
3163     if (n <= 1) {
3164         return;
3165     }
3166     for (i = 0; i < n - 1; i++) {
3167         for (j = i + 1; j < n; j++) {
3168             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3169             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3170             if (p1 < p2) {
3171                 int tmp = a[start + i].sort_index;
3172                 a[start + i].sort_index = a[start + j].sort_index;
3173                 a[start + j].sort_index = tmp;
3174             }
3175         }
3176     }
3177 }
3178 
3179 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3180 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3181 
3182 static void process_constraint_sets(void)
3183 {
3184     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3185         const TCGConstraintSet *tdefs = &constraint_sets[c];
3186         TCGArgConstraint *args_ct = all_cts[c];
3187         int nb_oargs = tdefs->nb_oargs;
3188         int nb_iargs = tdefs->nb_iargs;
3189         int nb_args = nb_oargs + nb_iargs;
3190         bool saw_alias_pair = false;
3191 
3192         for (int i = 0; i < nb_args; i++) {
3193             const char *ct_str = tdefs->args_ct_str[i];
3194             bool input_p = i >= nb_oargs;
3195             int o;
3196 
3197             switch (*ct_str) {
3198             case '0' ... '9':
3199                 o = *ct_str - '0';
3200                 tcg_debug_assert(input_p);
3201                 tcg_debug_assert(o < nb_oargs);
3202                 tcg_debug_assert(args_ct[o].regs != 0);
3203                 tcg_debug_assert(!args_ct[o].oalias);
3204                 args_ct[i] = args_ct[o];
3205                 /* The output sets oalias.  */
3206                 args_ct[o].oalias = 1;
3207                 args_ct[o].alias_index = i;
3208                 /* The input sets ialias. */
3209                 args_ct[i].ialias = 1;
3210                 args_ct[i].alias_index = o;
3211                 if (args_ct[i].pair) {
3212                     saw_alias_pair = true;
3213                 }
3214                 tcg_debug_assert(ct_str[1] == '\0');
3215                 continue;
3216 
3217             case '&':
3218                 tcg_debug_assert(!input_p);
3219                 args_ct[i].newreg = true;
3220                 ct_str++;
3221                 break;
3222 
3223             case 'p': /* plus */
3224                 /* Allocate to the register after the previous. */
3225                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3226                 o = i - 1;
3227                 tcg_debug_assert(!args_ct[o].pair);
3228                 tcg_debug_assert(!args_ct[o].ct);
3229                 args_ct[i] = (TCGArgConstraint){
3230                     .pair = 2,
3231                     .pair_index = o,
3232                     .regs = args_ct[o].regs << 1,
3233                     .newreg = args_ct[o].newreg,
3234                 };
3235                 args_ct[o].pair = 1;
3236                 args_ct[o].pair_index = i;
3237                 tcg_debug_assert(ct_str[1] == '\0');
3238                 continue;
3239 
3240             case 'm': /* minus */
3241                 /* Allocate to the register before the previous. */
3242                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3243                 o = i - 1;
3244                 tcg_debug_assert(!args_ct[o].pair);
3245                 tcg_debug_assert(!args_ct[o].ct);
3246                 args_ct[i] = (TCGArgConstraint){
3247                     .pair = 1,
3248                     .pair_index = o,
3249                     .regs = args_ct[o].regs >> 1,
3250                     .newreg = args_ct[o].newreg,
3251                 };
3252                 args_ct[o].pair = 2;
3253                 args_ct[o].pair_index = i;
3254                 tcg_debug_assert(ct_str[1] == '\0');
3255                 continue;
3256             }
3257 
3258             do {
3259                 switch (*ct_str) {
3260                 case 'i':
3261                     args_ct[i].ct |= TCG_CT_CONST;
3262                     break;
3263 #ifdef TCG_REG_ZERO
3264                 case 'z':
3265                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3266                     break;
3267 #endif
3268 
3269                 /* Include all of the target-specific constraints. */
3270 
3271 #undef CONST
3272 #define CONST(CASE, MASK) \
3273     case CASE: args_ct[i].ct |= MASK; break;
3274 #define REGS(CASE, MASK) \
3275     case CASE: args_ct[i].regs |= MASK; break;
3276 
3277 #include "tcg-target-con-str.h"
3278 
3279 #undef REGS
3280 #undef CONST
3281                 default:
3282                 case '0' ... '9':
3283                 case '&':
3284                 case 'p':
3285                 case 'm':
3286                     /* Typo in TCGConstraintSet constraint. */
3287                     g_assert_not_reached();
3288                 }
3289             } while (*++ct_str != '\0');
3290         }
3291 
3292         /*
3293          * Fix up output pairs that are aliased with inputs.
3294          * When we created the alias, we copied pair from the output.
3295          * There are three cases:
3296          *    (1a) Pairs of inputs alias pairs of outputs.
3297          *    (1b) One input aliases the first of a pair of outputs.
3298          *    (2)  One input aliases the second of a pair of outputs.
3299          *
3300          * Case 1a is handled by making sure that the pair_index'es are
3301          * properly updated so that they appear the same as a pair of inputs.
3302          *
3303          * Case 1b is handled by setting the pair_index of the input to
3304          * itself, simply so it doesn't point to an unrelated argument.
3305          * Since we don't encounter the "second" during the input allocation
3306          * phase, nothing happens with the second half of the input pair.
3307          *
3308          * Case 2 is handled by setting the second input to pair=3, the
3309          * first output to pair=3, and the pair_index'es to match.
3310          */
3311         if (saw_alias_pair) {
3312             for (int i = nb_oargs; i < nb_args; i++) {
3313                 int o, o2, i2;
3314 
3315                 /*
3316                  * Since [0-9pm] must be alone in the constraint string,
3317                  * the only way they can both be set is if the pair comes
3318                  * from the output alias.
3319                  */
3320                 if (!args_ct[i].ialias) {
3321                     continue;
3322                 }
3323                 switch (args_ct[i].pair) {
3324                 case 0:
3325                     break;
3326                 case 1:
3327                     o = args_ct[i].alias_index;
3328                     o2 = args_ct[o].pair_index;
3329                     tcg_debug_assert(args_ct[o].pair == 1);
3330                     tcg_debug_assert(args_ct[o2].pair == 2);
3331                     if (args_ct[o2].oalias) {
3332                         /* Case 1a */
3333                         i2 = args_ct[o2].alias_index;
3334                         tcg_debug_assert(args_ct[i2].pair == 2);
3335                         args_ct[i2].pair_index = i;
3336                         args_ct[i].pair_index = i2;
3337                     } else {
3338                         /* Case 1b */
3339                         args_ct[i].pair_index = i;
3340                     }
3341                     break;
3342                 case 2:
3343                     o = args_ct[i].alias_index;
3344                     o2 = args_ct[o].pair_index;
3345                     tcg_debug_assert(args_ct[o].pair == 2);
3346                     tcg_debug_assert(args_ct[o2].pair == 1);
3347                     if (args_ct[o2].oalias) {
3348                         /* Case 1a */
3349                         i2 = args_ct[o2].alias_index;
3350                         tcg_debug_assert(args_ct[i2].pair == 1);
3351                         args_ct[i2].pair_index = i;
3352                         args_ct[i].pair_index = i2;
3353                     } else {
3354                         /* Case 2 */
3355                         args_ct[i].pair = 3;
3356                         args_ct[o2].pair = 3;
3357                         args_ct[i].pair_index = o2;
3358                         args_ct[o2].pair_index = i;
3359                     }
3360                     break;
3361                 default:
3362                     g_assert_not_reached();
3363                 }
3364             }
3365         }
3366 
3367         /* sort the constraints (XXX: this is just an heuristic) */
3368         sort_constraints(args_ct, 0, nb_oargs);
3369         sort_constraints(args_ct, nb_oargs, nb_iargs);
3370     }
3371 }
3372 
3373 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3374 {
3375     TCGOpcode opc = op->opc;
3376     TCGType type = TCGOP_TYPE(op);
3377     unsigned flags = TCGOP_FLAGS(op);
3378     const TCGOpDef *def = &tcg_op_defs[opc];
3379     const TCGOutOp *outop = all_outop[opc];
3380     TCGConstraintSetIndex con_set;
3381 
3382     if (def->flags & TCG_OPF_NOT_PRESENT) {
3383         return empty_cts;
3384     }
3385 
3386     if (outop) {
3387         con_set = outop->static_constraint;
3388         if (con_set == C_Dynamic) {
3389             con_set = outop->dynamic_constraint(type, flags);
3390         }
3391     } else {
3392         con_set = tcg_target_op_def(opc, type, flags);
3393     }
3394     tcg_debug_assert(con_set >= 0);
3395     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3396 
3397     /* The constraint arguments must match TCGOpcode arguments. */
3398     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3399     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3400 
3401     return all_cts[con_set];
3402 }
3403 
3404 static void remove_label_use(TCGOp *op, int idx)
3405 {
3406     TCGLabel *label = arg_label(op->args[idx]);
3407     TCGLabelUse *use;
3408 
3409     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3410         if (use->op == op) {
3411             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3412             return;
3413         }
3414     }
3415     g_assert_not_reached();
3416 }
3417 
3418 void tcg_op_remove(TCGContext *s, TCGOp *op)
3419 {
3420     switch (op->opc) {
3421     case INDEX_op_br:
3422         remove_label_use(op, 0);
3423         break;
3424     case INDEX_op_brcond_i32:
3425     case INDEX_op_brcond_i64:
3426         remove_label_use(op, 3);
3427         break;
3428     case INDEX_op_brcond2_i32:
3429         remove_label_use(op, 5);
3430         break;
3431     default:
3432         break;
3433     }
3434 
3435     QTAILQ_REMOVE(&s->ops, op, link);
3436     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3437     s->nb_ops--;
3438 }
3439 
3440 void tcg_remove_ops_after(TCGOp *op)
3441 {
3442     TCGContext *s = tcg_ctx;
3443 
3444     while (true) {
3445         TCGOp *last = tcg_last_op();
3446         if (last == op) {
3447             return;
3448         }
3449         tcg_op_remove(s, last);
3450     }
3451 }
3452 
3453 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3454 {
3455     TCGContext *s = tcg_ctx;
3456     TCGOp *op = NULL;
3457 
3458     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3459         QTAILQ_FOREACH(op, &s->free_ops, link) {
3460             if (nargs <= op->nargs) {
3461                 QTAILQ_REMOVE(&s->free_ops, op, link);
3462                 nargs = op->nargs;
3463                 goto found;
3464             }
3465         }
3466     }
3467 
3468     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3469     nargs = MAX(4, nargs);
3470     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3471 
3472  found:
3473     memset(op, 0, offsetof(TCGOp, link));
3474     op->opc = opc;
3475     op->nargs = nargs;
3476 
3477     /* Check for bitfield overflow. */
3478     tcg_debug_assert(op->nargs == nargs);
3479 
3480     s->nb_ops++;
3481     return op;
3482 }
3483 
3484 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3485 {
3486     TCGOp *op = tcg_op_alloc(opc, nargs);
3487 
3488     if (tcg_ctx->emit_before_op) {
3489         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3490     } else {
3491         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3492     }
3493     return op;
3494 }
3495 
3496 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3497                             TCGOpcode opc, TCGType type, unsigned nargs)
3498 {
3499     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3500 
3501     TCGOP_TYPE(new_op) = type;
3502     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3503     return new_op;
3504 }
3505 
3506 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3507                            TCGOpcode opc, TCGType type, unsigned nargs)
3508 {
3509     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3510 
3511     TCGOP_TYPE(new_op) = type;
3512     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3513     return new_op;
3514 }
3515 
3516 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3517 {
3518     TCGLabelUse *u;
3519 
3520     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3521         TCGOp *op = u->op;
3522         switch (op->opc) {
3523         case INDEX_op_br:
3524             op->args[0] = label_arg(to);
3525             break;
3526         case INDEX_op_brcond_i32:
3527         case INDEX_op_brcond_i64:
3528             op->args[3] = label_arg(to);
3529             break;
3530         case INDEX_op_brcond2_i32:
3531             op->args[5] = label_arg(to);
3532             break;
3533         default:
3534             g_assert_not_reached();
3535         }
3536     }
3537 
3538     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3539 }
3540 
3541 /* Reachable analysis : remove unreachable code.  */
3542 static void __attribute__((noinline))
3543 reachable_code_pass(TCGContext *s)
3544 {
3545     TCGOp *op, *op_next, *op_prev;
3546     bool dead = false;
3547 
3548     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3549         bool remove = dead;
3550         TCGLabel *label;
3551 
3552         switch (op->opc) {
3553         case INDEX_op_set_label:
3554             label = arg_label(op->args[0]);
3555 
3556             /*
3557              * Note that the first op in the TB is always a load,
3558              * so there is always something before a label.
3559              */
3560             op_prev = QTAILQ_PREV(op, link);
3561 
3562             /*
3563              * If we find two sequential labels, move all branches to
3564              * reference the second label and remove the first label.
3565              * Do this before branch to next optimization, so that the
3566              * middle label is out of the way.
3567              */
3568             if (op_prev->opc == INDEX_op_set_label) {
3569                 move_label_uses(label, arg_label(op_prev->args[0]));
3570                 tcg_op_remove(s, op_prev);
3571                 op_prev = QTAILQ_PREV(op, link);
3572             }
3573 
3574             /*
3575              * Optimization can fold conditional branches to unconditional.
3576              * If we find a label which is preceded by an unconditional
3577              * branch to next, remove the branch.  We couldn't do this when
3578              * processing the branch because any dead code between the branch
3579              * and label had not yet been removed.
3580              */
3581             if (op_prev->opc == INDEX_op_br &&
3582                 label == arg_label(op_prev->args[0])) {
3583                 tcg_op_remove(s, op_prev);
3584                 /* Fall through means insns become live again.  */
3585                 dead = false;
3586             }
3587 
3588             if (QSIMPLEQ_EMPTY(&label->branches)) {
3589                 /*
3590                  * While there is an occasional backward branch, virtually
3591                  * all branches generated by the translators are forward.
3592                  * Which means that generally we will have already removed
3593                  * all references to the label that will be, and there is
3594                  * little to be gained by iterating.
3595                  */
3596                 remove = true;
3597             } else {
3598                 /* Once we see a label, insns become live again.  */
3599                 dead = false;
3600                 remove = false;
3601             }
3602             break;
3603 
3604         case INDEX_op_br:
3605         case INDEX_op_exit_tb:
3606         case INDEX_op_goto_ptr:
3607             /* Unconditional branches; everything following is dead.  */
3608             dead = true;
3609             break;
3610 
3611         case INDEX_op_call:
3612             /* Notice noreturn helper calls, raising exceptions.  */
3613             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3614                 dead = true;
3615             }
3616             break;
3617 
3618         case INDEX_op_insn_start:
3619             /* Never remove -- we need to keep these for unwind.  */
3620             remove = false;
3621             break;
3622 
3623         default:
3624             break;
3625         }
3626 
3627         if (remove) {
3628             tcg_op_remove(s, op);
3629         }
3630     }
3631 }
3632 
3633 #define TS_DEAD  1
3634 #define TS_MEM   2
3635 
3636 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3637 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3638 
3639 /* For liveness_pass_1, the register preferences for a given temp.  */
3640 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3641 {
3642     return ts->state_ptr;
3643 }
3644 
3645 /* For liveness_pass_1, reset the preferences for a given temp to the
3646  * maximal regset for its type.
3647  */
3648 static inline void la_reset_pref(TCGTemp *ts)
3649 {
3650     *la_temp_pref(ts)
3651         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3652 }
3653 
3654 /* liveness analysis: end of function: all temps are dead, and globals
3655    should be in memory. */
3656 static void la_func_end(TCGContext *s, int ng, int nt)
3657 {
3658     int i;
3659 
3660     for (i = 0; i < ng; ++i) {
3661         s->temps[i].state = TS_DEAD | TS_MEM;
3662         la_reset_pref(&s->temps[i]);
3663     }
3664     for (i = ng; i < nt; ++i) {
3665         s->temps[i].state = TS_DEAD;
3666         la_reset_pref(&s->temps[i]);
3667     }
3668 }
3669 
3670 /* liveness analysis: end of basic block: all temps are dead, globals
3671    and local temps should be in memory. */
3672 static void la_bb_end(TCGContext *s, int ng, int nt)
3673 {
3674     int i;
3675 
3676     for (i = 0; i < nt; ++i) {
3677         TCGTemp *ts = &s->temps[i];
3678         int state;
3679 
3680         switch (ts->kind) {
3681         case TEMP_FIXED:
3682         case TEMP_GLOBAL:
3683         case TEMP_TB:
3684             state = TS_DEAD | TS_MEM;
3685             break;
3686         case TEMP_EBB:
3687         case TEMP_CONST:
3688             state = TS_DEAD;
3689             break;
3690         default:
3691             g_assert_not_reached();
3692         }
3693         ts->state = state;
3694         la_reset_pref(ts);
3695     }
3696 }
3697 
3698 /* liveness analysis: sync globals back to memory.  */
3699 static void la_global_sync(TCGContext *s, int ng)
3700 {
3701     int i;
3702 
3703     for (i = 0; i < ng; ++i) {
3704         int state = s->temps[i].state;
3705         s->temps[i].state = state | TS_MEM;
3706         if (state == TS_DEAD) {
3707             /* If the global was previously dead, reset prefs.  */
3708             la_reset_pref(&s->temps[i]);
3709         }
3710     }
3711 }
3712 
3713 /*
3714  * liveness analysis: conditional branch: all temps are dead unless
3715  * explicitly live-across-conditional-branch, globals and local temps
3716  * should be synced.
3717  */
3718 static void la_bb_sync(TCGContext *s, int ng, int nt)
3719 {
3720     la_global_sync(s, ng);
3721 
3722     for (int i = ng; i < nt; ++i) {
3723         TCGTemp *ts = &s->temps[i];
3724         int state;
3725 
3726         switch (ts->kind) {
3727         case TEMP_TB:
3728             state = ts->state;
3729             ts->state = state | TS_MEM;
3730             if (state != TS_DEAD) {
3731                 continue;
3732             }
3733             break;
3734         case TEMP_EBB:
3735         case TEMP_CONST:
3736             continue;
3737         default:
3738             g_assert_not_reached();
3739         }
3740         la_reset_pref(&s->temps[i]);
3741     }
3742 }
3743 
3744 /* liveness analysis: sync globals back to memory and kill.  */
3745 static void la_global_kill(TCGContext *s, int ng)
3746 {
3747     int i;
3748 
3749     for (i = 0; i < ng; i++) {
3750         s->temps[i].state = TS_DEAD | TS_MEM;
3751         la_reset_pref(&s->temps[i]);
3752     }
3753 }
3754 
3755 /* liveness analysis: note live globals crossing calls.  */
3756 static void la_cross_call(TCGContext *s, int nt)
3757 {
3758     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3759     int i;
3760 
3761     for (i = 0; i < nt; i++) {
3762         TCGTemp *ts = &s->temps[i];
3763         if (!(ts->state & TS_DEAD)) {
3764             TCGRegSet *pset = la_temp_pref(ts);
3765             TCGRegSet set = *pset;
3766 
3767             set &= mask;
3768             /* If the combination is not possible, restart.  */
3769             if (set == 0) {
3770                 set = tcg_target_available_regs[ts->type] & mask;
3771             }
3772             *pset = set;
3773         }
3774     }
3775 }
3776 
3777 /*
3778  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3779  * to TEMP_EBB, if possible.
3780  */
3781 static void __attribute__((noinline))
3782 liveness_pass_0(TCGContext *s)
3783 {
3784     void * const multiple_ebb = (void *)(uintptr_t)-1;
3785     int nb_temps = s->nb_temps;
3786     TCGOp *op, *ebb;
3787 
3788     for (int i = s->nb_globals; i < nb_temps; ++i) {
3789         s->temps[i].state_ptr = NULL;
3790     }
3791 
3792     /*
3793      * Represent each EBB by the op at which it begins.  In the case of
3794      * the first EBB, this is the first op, otherwise it is a label.
3795      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3796      * within a single EBB, else MULTIPLE_EBB.
3797      */
3798     ebb = QTAILQ_FIRST(&s->ops);
3799     QTAILQ_FOREACH(op, &s->ops, link) {
3800         const TCGOpDef *def;
3801         int nb_oargs, nb_iargs;
3802 
3803         switch (op->opc) {
3804         case INDEX_op_set_label:
3805             ebb = op;
3806             continue;
3807         case INDEX_op_discard:
3808             continue;
3809         case INDEX_op_call:
3810             nb_oargs = TCGOP_CALLO(op);
3811             nb_iargs = TCGOP_CALLI(op);
3812             break;
3813         default:
3814             def = &tcg_op_defs[op->opc];
3815             nb_oargs = def->nb_oargs;
3816             nb_iargs = def->nb_iargs;
3817             break;
3818         }
3819 
3820         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3821             TCGTemp *ts = arg_temp(op->args[i]);
3822 
3823             if (ts->kind != TEMP_TB) {
3824                 continue;
3825             }
3826             if (ts->state_ptr == NULL) {
3827                 ts->state_ptr = ebb;
3828             } else if (ts->state_ptr != ebb) {
3829                 ts->state_ptr = multiple_ebb;
3830             }
3831         }
3832     }
3833 
3834     /*
3835      * For TEMP_TB that turned out not to be used beyond one EBB,
3836      * reduce the liveness to TEMP_EBB.
3837      */
3838     for (int i = s->nb_globals; i < nb_temps; ++i) {
3839         TCGTemp *ts = &s->temps[i];
3840         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3841             ts->kind = TEMP_EBB;
3842         }
3843     }
3844 }
3845 
3846 /* Liveness analysis : update the opc_arg_life array to tell if a
3847    given input arguments is dead. Instructions updating dead
3848    temporaries are removed. */
3849 static void __attribute__((noinline))
3850 liveness_pass_1(TCGContext *s)
3851 {
3852     int nb_globals = s->nb_globals;
3853     int nb_temps = s->nb_temps;
3854     TCGOp *op, *op_prev;
3855     TCGRegSet *prefs;
3856     int i;
3857 
3858     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3859     for (i = 0; i < nb_temps; ++i) {
3860         s->temps[i].state_ptr = prefs + i;
3861     }
3862 
3863     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3864     la_func_end(s, nb_globals, nb_temps);
3865 
3866     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3867         int nb_iargs, nb_oargs;
3868         TCGOpcode opc_new, opc_new2;
3869         TCGLifeData arg_life = 0;
3870         TCGTemp *ts;
3871         TCGOpcode opc = op->opc;
3872         const TCGOpDef *def = &tcg_op_defs[opc];
3873         const TCGArgConstraint *args_ct;
3874 
3875         switch (opc) {
3876         case INDEX_op_call:
3877             {
3878                 const TCGHelperInfo *info = tcg_call_info(op);
3879                 int call_flags = tcg_call_flags(op);
3880 
3881                 nb_oargs = TCGOP_CALLO(op);
3882                 nb_iargs = TCGOP_CALLI(op);
3883 
3884                 /* pure functions can be removed if their result is unused */
3885                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3886                     for (i = 0; i < nb_oargs; i++) {
3887                         ts = arg_temp(op->args[i]);
3888                         if (ts->state != TS_DEAD) {
3889                             goto do_not_remove_call;
3890                         }
3891                     }
3892                     goto do_remove;
3893                 }
3894             do_not_remove_call:
3895 
3896                 /* Output args are dead.  */
3897                 for (i = 0; i < nb_oargs; i++) {
3898                     ts = arg_temp(op->args[i]);
3899                     if (ts->state & TS_DEAD) {
3900                         arg_life |= DEAD_ARG << i;
3901                     }
3902                     if (ts->state & TS_MEM) {
3903                         arg_life |= SYNC_ARG << i;
3904                     }
3905                     ts->state = TS_DEAD;
3906                     la_reset_pref(ts);
3907                 }
3908 
3909                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3910                 memset(op->output_pref, 0, sizeof(op->output_pref));
3911 
3912                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3913                                     TCG_CALL_NO_READ_GLOBALS))) {
3914                     la_global_kill(s, nb_globals);
3915                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3916                     la_global_sync(s, nb_globals);
3917                 }
3918 
3919                 /* Record arguments that die in this helper.  */
3920                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3921                     ts = arg_temp(op->args[i]);
3922                     if (ts->state & TS_DEAD) {
3923                         arg_life |= DEAD_ARG << i;
3924                     }
3925                 }
3926 
3927                 /* For all live registers, remove call-clobbered prefs.  */
3928                 la_cross_call(s, nb_temps);
3929 
3930                 /*
3931                  * Input arguments are live for preceding opcodes.
3932                  *
3933                  * For those arguments that die, and will be allocated in
3934                  * registers, clear the register set for that arg, to be
3935                  * filled in below.  For args that will be on the stack,
3936                  * reset to any available reg.  Process arguments in reverse
3937                  * order so that if a temp is used more than once, the stack
3938                  * reset to max happens before the register reset to 0.
3939                  */
3940                 for (i = nb_iargs - 1; i >= 0; i--) {
3941                     const TCGCallArgumentLoc *loc = &info->in[i];
3942                     ts = arg_temp(op->args[nb_oargs + i]);
3943 
3944                     if (ts->state & TS_DEAD) {
3945                         switch (loc->kind) {
3946                         case TCG_CALL_ARG_NORMAL:
3947                         case TCG_CALL_ARG_EXTEND_U:
3948                         case TCG_CALL_ARG_EXTEND_S:
3949                             if (arg_slot_reg_p(loc->arg_slot)) {
3950                                 *la_temp_pref(ts) = 0;
3951                                 break;
3952                             }
3953                             /* fall through */
3954                         default:
3955                             *la_temp_pref(ts) =
3956                                 tcg_target_available_regs[ts->type];
3957                             break;
3958                         }
3959                         ts->state &= ~TS_DEAD;
3960                     }
3961                 }
3962 
3963                 /*
3964                  * For each input argument, add its input register to prefs.
3965                  * If a temp is used once, this produces a single set bit;
3966                  * if a temp is used multiple times, this produces a set.
3967                  */
3968                 for (i = 0; i < nb_iargs; i++) {
3969                     const TCGCallArgumentLoc *loc = &info->in[i];
3970                     ts = arg_temp(op->args[nb_oargs + i]);
3971 
3972                     switch (loc->kind) {
3973                     case TCG_CALL_ARG_NORMAL:
3974                     case TCG_CALL_ARG_EXTEND_U:
3975                     case TCG_CALL_ARG_EXTEND_S:
3976                         if (arg_slot_reg_p(loc->arg_slot)) {
3977                             tcg_regset_set_reg(*la_temp_pref(ts),
3978                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3979                         }
3980                         break;
3981                     default:
3982                         break;
3983                     }
3984                 }
3985             }
3986             break;
3987         case INDEX_op_insn_start:
3988             break;
3989         case INDEX_op_discard:
3990             /* mark the temporary as dead */
3991             ts = arg_temp(op->args[0]);
3992             ts->state = TS_DEAD;
3993             la_reset_pref(ts);
3994             break;
3995 
3996         case INDEX_op_add2_i32:
3997         case INDEX_op_add2_i64:
3998             opc_new = INDEX_op_add;
3999             goto do_addsub2;
4000         case INDEX_op_sub2_i32:
4001         case INDEX_op_sub2_i64:
4002             opc_new = INDEX_op_sub;
4003         do_addsub2:
4004             nb_iargs = 4;
4005             nb_oargs = 2;
4006             /* Test if the high part of the operation is dead, but not
4007                the low part.  The result can be optimized to a simple
4008                add or sub.  This happens often for x86_64 guest when the
4009                cpu mode is set to 32 bit.  */
4010             if (arg_temp(op->args[1])->state == TS_DEAD) {
4011                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4012                     goto do_remove;
4013                 }
4014                 /* Replace the opcode and adjust the args in place,
4015                    leaving 3 unused args at the end.  */
4016                 op->opc = opc = opc_new;
4017                 op->args[1] = op->args[2];
4018                 op->args[2] = op->args[4];
4019                 /* Fall through and mark the single-word operation live.  */
4020                 nb_iargs = 2;
4021                 nb_oargs = 1;
4022             }
4023             goto do_not_remove;
4024 
4025         case INDEX_op_muls2_i32:
4026         case INDEX_op_muls2_i64:
4027             opc_new = INDEX_op_mul;
4028             opc_new2 = INDEX_op_mulsh;
4029             goto do_mul2;
4030         case INDEX_op_mulu2_i32:
4031         case INDEX_op_mulu2_i64:
4032             opc_new = INDEX_op_mul;
4033             opc_new2 = INDEX_op_muluh;
4034         do_mul2:
4035             nb_iargs = 2;
4036             nb_oargs = 2;
4037             if (arg_temp(op->args[1])->state == TS_DEAD) {
4038                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4039                     /* Both parts of the operation are dead.  */
4040                     goto do_remove;
4041                 }
4042                 /* The high part of the operation is dead; generate the low. */
4043                 op->opc = opc = opc_new;
4044                 op->args[1] = op->args[2];
4045                 op->args[2] = op->args[3];
4046             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4047                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4048                 /* The low part of the operation is dead; generate the high. */
4049                 op->opc = opc = opc_new2;
4050                 op->args[0] = op->args[1];
4051                 op->args[1] = op->args[2];
4052                 op->args[2] = op->args[3];
4053             } else {
4054                 goto do_not_remove;
4055             }
4056             /* Mark the single-word operation live.  */
4057             nb_oargs = 1;
4058             goto do_not_remove;
4059 
4060         default:
4061             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4062             nb_iargs = def->nb_iargs;
4063             nb_oargs = def->nb_oargs;
4064 
4065             /* Test if the operation can be removed because all
4066                its outputs are dead. We assume that nb_oargs == 0
4067                implies side effects */
4068             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4069                 for (i = 0; i < nb_oargs; i++) {
4070                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4071                         goto do_not_remove;
4072                     }
4073                 }
4074                 goto do_remove;
4075             }
4076             goto do_not_remove;
4077 
4078         do_remove:
4079             tcg_op_remove(s, op);
4080             break;
4081 
4082         do_not_remove:
4083             for (i = 0; i < nb_oargs; i++) {
4084                 ts = arg_temp(op->args[i]);
4085 
4086                 /* Remember the preference of the uses that followed.  */
4087                 if (i < ARRAY_SIZE(op->output_pref)) {
4088                     op->output_pref[i] = *la_temp_pref(ts);
4089                 }
4090 
4091                 /* Output args are dead.  */
4092                 if (ts->state & TS_DEAD) {
4093                     arg_life |= DEAD_ARG << i;
4094                 }
4095                 if (ts->state & TS_MEM) {
4096                     arg_life |= SYNC_ARG << i;
4097                 }
4098                 ts->state = TS_DEAD;
4099                 la_reset_pref(ts);
4100             }
4101 
4102             /* If end of basic block, update.  */
4103             if (def->flags & TCG_OPF_BB_EXIT) {
4104                 la_func_end(s, nb_globals, nb_temps);
4105             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4106                 la_bb_sync(s, nb_globals, nb_temps);
4107             } else if (def->flags & TCG_OPF_BB_END) {
4108                 la_bb_end(s, nb_globals, nb_temps);
4109             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4110                 la_global_sync(s, nb_globals);
4111                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4112                     la_cross_call(s, nb_temps);
4113                 }
4114             }
4115 
4116             /* Record arguments that die in this opcode.  */
4117             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4118                 ts = arg_temp(op->args[i]);
4119                 if (ts->state & TS_DEAD) {
4120                     arg_life |= DEAD_ARG << i;
4121                 }
4122             }
4123 
4124             /* Input arguments are live for preceding opcodes.  */
4125             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4126                 ts = arg_temp(op->args[i]);
4127                 if (ts->state & TS_DEAD) {
4128                     /* For operands that were dead, initially allow
4129                        all regs for the type.  */
4130                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4131                     ts->state &= ~TS_DEAD;
4132                 }
4133             }
4134 
4135             /* Incorporate constraints for this operand.  */
4136             switch (opc) {
4137             case INDEX_op_mov:
4138                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4139                    have proper constraints.  That said, special case
4140                    moves to propagate preferences backward.  */
4141                 if (IS_DEAD_ARG(1)) {
4142                     *la_temp_pref(arg_temp(op->args[0]))
4143                         = *la_temp_pref(arg_temp(op->args[1]));
4144                 }
4145                 break;
4146 
4147             default:
4148                 args_ct = opcode_args_ct(op);
4149                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4150                     const TCGArgConstraint *ct = &args_ct[i];
4151                     TCGRegSet set, *pset;
4152 
4153                     ts = arg_temp(op->args[i]);
4154                     pset = la_temp_pref(ts);
4155                     set = *pset;
4156 
4157                     set &= ct->regs;
4158                     if (ct->ialias) {
4159                         set &= output_pref(op, ct->alias_index);
4160                     }
4161                     /* If the combination is not possible, restart.  */
4162                     if (set == 0) {
4163                         set = ct->regs;
4164                     }
4165                     *pset = set;
4166                 }
4167                 break;
4168             }
4169             break;
4170         }
4171         op->life = arg_life;
4172     }
4173 }
4174 
4175 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4176 static bool __attribute__((noinline))
4177 liveness_pass_2(TCGContext *s)
4178 {
4179     int nb_globals = s->nb_globals;
4180     int nb_temps, i;
4181     bool changes = false;
4182     TCGOp *op, *op_next;
4183 
4184     /* Create a temporary for each indirect global.  */
4185     for (i = 0; i < nb_globals; ++i) {
4186         TCGTemp *its = &s->temps[i];
4187         if (its->indirect_reg) {
4188             TCGTemp *dts = tcg_temp_alloc(s);
4189             dts->type = its->type;
4190             dts->base_type = its->base_type;
4191             dts->temp_subindex = its->temp_subindex;
4192             dts->kind = TEMP_EBB;
4193             its->state_ptr = dts;
4194         } else {
4195             its->state_ptr = NULL;
4196         }
4197         /* All globals begin dead.  */
4198         its->state = TS_DEAD;
4199     }
4200     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4201         TCGTemp *its = &s->temps[i];
4202         its->state_ptr = NULL;
4203         its->state = TS_DEAD;
4204     }
4205 
4206     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4207         TCGOpcode opc = op->opc;
4208         const TCGOpDef *def = &tcg_op_defs[opc];
4209         TCGLifeData arg_life = op->life;
4210         int nb_iargs, nb_oargs, call_flags;
4211         TCGTemp *arg_ts, *dir_ts;
4212 
4213         if (opc == INDEX_op_call) {
4214             nb_oargs = TCGOP_CALLO(op);
4215             nb_iargs = TCGOP_CALLI(op);
4216             call_flags = tcg_call_flags(op);
4217         } else {
4218             nb_iargs = def->nb_iargs;
4219             nb_oargs = def->nb_oargs;
4220 
4221             /* Set flags similar to how calls require.  */
4222             if (def->flags & TCG_OPF_COND_BRANCH) {
4223                 /* Like reading globals: sync_globals */
4224                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4225             } else if (def->flags & TCG_OPF_BB_END) {
4226                 /* Like writing globals: save_globals */
4227                 call_flags = 0;
4228             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4229                 /* Like reading globals: sync_globals */
4230                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4231             } else {
4232                 /* No effect on globals.  */
4233                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4234                               TCG_CALL_NO_WRITE_GLOBALS);
4235             }
4236         }
4237 
4238         /* Make sure that input arguments are available.  */
4239         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4240             arg_ts = arg_temp(op->args[i]);
4241             dir_ts = arg_ts->state_ptr;
4242             if (dir_ts && arg_ts->state == TS_DEAD) {
4243                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4244                                   ? INDEX_op_ld_i32
4245                                   : INDEX_op_ld_i64);
4246                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4247                                                   arg_ts->type, 3);
4248 
4249                 lop->args[0] = temp_arg(dir_ts);
4250                 lop->args[1] = temp_arg(arg_ts->mem_base);
4251                 lop->args[2] = arg_ts->mem_offset;
4252 
4253                 /* Loaded, but synced with memory.  */
4254                 arg_ts->state = TS_MEM;
4255             }
4256         }
4257 
4258         /* Perform input replacement, and mark inputs that became dead.
4259            No action is required except keeping temp_state up to date
4260            so that we reload when needed.  */
4261         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4262             arg_ts = arg_temp(op->args[i]);
4263             dir_ts = arg_ts->state_ptr;
4264             if (dir_ts) {
4265                 op->args[i] = temp_arg(dir_ts);
4266                 changes = true;
4267                 if (IS_DEAD_ARG(i)) {
4268                     arg_ts->state = TS_DEAD;
4269                 }
4270             }
4271         }
4272 
4273         /* Liveness analysis should ensure that the following are
4274            all correct, for call sites and basic block end points.  */
4275         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4276             /* Nothing to do */
4277         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4278             for (i = 0; i < nb_globals; ++i) {
4279                 /* Liveness should see that globals are synced back,
4280                    that is, either TS_DEAD or TS_MEM.  */
4281                 arg_ts = &s->temps[i];
4282                 tcg_debug_assert(arg_ts->state_ptr == 0
4283                                  || arg_ts->state != 0);
4284             }
4285         } else {
4286             for (i = 0; i < nb_globals; ++i) {
4287                 /* Liveness should see that globals are saved back,
4288                    that is, TS_DEAD, waiting to be reloaded.  */
4289                 arg_ts = &s->temps[i];
4290                 tcg_debug_assert(arg_ts->state_ptr == 0
4291                                  || arg_ts->state == TS_DEAD);
4292             }
4293         }
4294 
4295         /* Outputs become available.  */
4296         if (opc == INDEX_op_mov) {
4297             arg_ts = arg_temp(op->args[0]);
4298             dir_ts = arg_ts->state_ptr;
4299             if (dir_ts) {
4300                 op->args[0] = temp_arg(dir_ts);
4301                 changes = true;
4302 
4303                 /* The output is now live and modified.  */
4304                 arg_ts->state = 0;
4305 
4306                 if (NEED_SYNC_ARG(0)) {
4307                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4308                                       ? INDEX_op_st_i32
4309                                       : INDEX_op_st_i64);
4310                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4311                                                      arg_ts->type, 3);
4312                     TCGTemp *out_ts = dir_ts;
4313 
4314                     if (IS_DEAD_ARG(0)) {
4315                         out_ts = arg_temp(op->args[1]);
4316                         arg_ts->state = TS_DEAD;
4317                         tcg_op_remove(s, op);
4318                     } else {
4319                         arg_ts->state = TS_MEM;
4320                     }
4321 
4322                     sop->args[0] = temp_arg(out_ts);
4323                     sop->args[1] = temp_arg(arg_ts->mem_base);
4324                     sop->args[2] = arg_ts->mem_offset;
4325                 } else {
4326                     tcg_debug_assert(!IS_DEAD_ARG(0));
4327                 }
4328             }
4329         } else {
4330             for (i = 0; i < nb_oargs; i++) {
4331                 arg_ts = arg_temp(op->args[i]);
4332                 dir_ts = arg_ts->state_ptr;
4333                 if (!dir_ts) {
4334                     continue;
4335                 }
4336                 op->args[i] = temp_arg(dir_ts);
4337                 changes = true;
4338 
4339                 /* The output is now live and modified.  */
4340                 arg_ts->state = 0;
4341 
4342                 /* Sync outputs upon their last write.  */
4343                 if (NEED_SYNC_ARG(i)) {
4344                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4345                                       ? INDEX_op_st_i32
4346                                       : INDEX_op_st_i64);
4347                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4348                                                      arg_ts->type, 3);
4349 
4350                     sop->args[0] = temp_arg(dir_ts);
4351                     sop->args[1] = temp_arg(arg_ts->mem_base);
4352                     sop->args[2] = arg_ts->mem_offset;
4353 
4354                     arg_ts->state = TS_MEM;
4355                 }
4356                 /* Drop outputs that are dead.  */
4357                 if (IS_DEAD_ARG(i)) {
4358                     arg_ts->state = TS_DEAD;
4359                 }
4360             }
4361         }
4362     }
4363 
4364     return changes;
4365 }
4366 
4367 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4368 {
4369     intptr_t off;
4370     int size, align;
4371 
4372     /* When allocating an object, look at the full type. */
4373     size = tcg_type_size(ts->base_type);
4374     switch (ts->base_type) {
4375     case TCG_TYPE_I32:
4376         align = 4;
4377         break;
4378     case TCG_TYPE_I64:
4379     case TCG_TYPE_V64:
4380         align = 8;
4381         break;
4382     case TCG_TYPE_I128:
4383     case TCG_TYPE_V128:
4384     case TCG_TYPE_V256:
4385         /*
4386          * Note that we do not require aligned storage for V256,
4387          * and that we provide alignment for I128 to match V128,
4388          * even if that's above what the host ABI requires.
4389          */
4390         align = 16;
4391         break;
4392     default:
4393         g_assert_not_reached();
4394     }
4395 
4396     /*
4397      * Assume the stack is sufficiently aligned.
4398      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4399      * and do not require 16 byte vector alignment.  This seems slightly
4400      * easier than fully parameterizing the above switch statement.
4401      */
4402     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4403     off = ROUND_UP(s->current_frame_offset, align);
4404 
4405     /* If we've exhausted the stack frame, restart with a smaller TB. */
4406     if (off + size > s->frame_end) {
4407         tcg_raise_tb_overflow(s);
4408     }
4409     s->current_frame_offset = off + size;
4410 #if defined(__sparc__)
4411     off += TCG_TARGET_STACK_BIAS;
4412 #endif
4413 
4414     /* If the object was subdivided, assign memory to all the parts. */
4415     if (ts->base_type != ts->type) {
4416         int part_size = tcg_type_size(ts->type);
4417         int part_count = size / part_size;
4418 
4419         /*
4420          * Each part is allocated sequentially in tcg_temp_new_internal.
4421          * Jump back to the first part by subtracting the current index.
4422          */
4423         ts -= ts->temp_subindex;
4424         for (int i = 0; i < part_count; ++i) {
4425             ts[i].mem_offset = off + i * part_size;
4426             ts[i].mem_base = s->frame_temp;
4427             ts[i].mem_allocated = 1;
4428         }
4429     } else {
4430         ts->mem_offset = off;
4431         ts->mem_base = s->frame_temp;
4432         ts->mem_allocated = 1;
4433     }
4434 }
4435 
4436 /* Assign @reg to @ts, and update reg_to_temp[]. */
4437 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4438 {
4439     if (ts->val_type == TEMP_VAL_REG) {
4440         TCGReg old = ts->reg;
4441         tcg_debug_assert(s->reg_to_temp[old] == ts);
4442         if (old == reg) {
4443             return;
4444         }
4445         s->reg_to_temp[old] = NULL;
4446     }
4447     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4448     s->reg_to_temp[reg] = ts;
4449     ts->val_type = TEMP_VAL_REG;
4450     ts->reg = reg;
4451 }
4452 
4453 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4454 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4455 {
4456     tcg_debug_assert(type != TEMP_VAL_REG);
4457     if (ts->val_type == TEMP_VAL_REG) {
4458         TCGReg reg = ts->reg;
4459         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4460         s->reg_to_temp[reg] = NULL;
4461     }
4462     ts->val_type = type;
4463 }
4464 
4465 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4466 
4467 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4468    mark it free; otherwise mark it dead.  */
4469 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4470 {
4471     TCGTempVal new_type;
4472 
4473     switch (ts->kind) {
4474     case TEMP_FIXED:
4475         return;
4476     case TEMP_GLOBAL:
4477     case TEMP_TB:
4478         new_type = TEMP_VAL_MEM;
4479         break;
4480     case TEMP_EBB:
4481         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4482         break;
4483     case TEMP_CONST:
4484         new_type = TEMP_VAL_CONST;
4485         break;
4486     default:
4487         g_assert_not_reached();
4488     }
4489     set_temp_val_nonreg(s, ts, new_type);
4490 }
4491 
4492 /* Mark a temporary as dead.  */
4493 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4494 {
4495     temp_free_or_dead(s, ts, 1);
4496 }
4497 
4498 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4499    registers needs to be allocated to store a constant.  If 'free_or_dead'
4500    is non-zero, subsequently release the temporary; if it is positive, the
4501    temp is dead; if it is negative, the temp is free.  */
4502 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4503                       TCGRegSet preferred_regs, int free_or_dead)
4504 {
4505     if (!temp_readonly(ts) && !ts->mem_coherent) {
4506         if (!ts->mem_allocated) {
4507             temp_allocate_frame(s, ts);
4508         }
4509         switch (ts->val_type) {
4510         case TEMP_VAL_CONST:
4511             /* If we're going to free the temp immediately, then we won't
4512                require it later in a register, so attempt to store the
4513                constant to memory directly.  */
4514             if (free_or_dead
4515                 && tcg_out_sti(s, ts->type, ts->val,
4516                                ts->mem_base->reg, ts->mem_offset)) {
4517                 break;
4518             }
4519             temp_load(s, ts, tcg_target_available_regs[ts->type],
4520                       allocated_regs, preferred_regs);
4521             /* fallthrough */
4522 
4523         case TEMP_VAL_REG:
4524             tcg_out_st(s, ts->type, ts->reg,
4525                        ts->mem_base->reg, ts->mem_offset);
4526             break;
4527 
4528         case TEMP_VAL_MEM:
4529             break;
4530 
4531         case TEMP_VAL_DEAD:
4532         default:
4533             g_assert_not_reached();
4534         }
4535         ts->mem_coherent = 1;
4536     }
4537     if (free_or_dead) {
4538         temp_free_or_dead(s, ts, free_or_dead);
4539     }
4540 }
4541 
4542 /* free register 'reg' by spilling the corresponding temporary if necessary */
4543 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4544 {
4545     TCGTemp *ts = s->reg_to_temp[reg];
4546     if (ts != NULL) {
4547         temp_sync(s, ts, allocated_regs, 0, -1);
4548     }
4549 }
4550 
4551 /**
4552  * tcg_reg_alloc:
4553  * @required_regs: Set of registers in which we must allocate.
4554  * @allocated_regs: Set of registers which must be avoided.
4555  * @preferred_regs: Set of registers we should prefer.
4556  * @rev: True if we search the registers in "indirect" order.
4557  *
4558  * The allocated register must be in @required_regs & ~@allocated_regs,
4559  * but if we can put it in @preferred_regs we may save a move later.
4560  */
4561 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4562                             TCGRegSet allocated_regs,
4563                             TCGRegSet preferred_regs, bool rev)
4564 {
4565     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4566     TCGRegSet reg_ct[2];
4567     const int *order;
4568 
4569     reg_ct[1] = required_regs & ~allocated_regs;
4570     tcg_debug_assert(reg_ct[1] != 0);
4571     reg_ct[0] = reg_ct[1] & preferred_regs;
4572 
4573     /* Skip the preferred_regs option if it cannot be satisfied,
4574        or if the preference made no difference.  */
4575     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4576 
4577     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4578 
4579     /* Try free registers, preferences first.  */
4580     for (j = f; j < 2; j++) {
4581         TCGRegSet set = reg_ct[j];
4582 
4583         if (tcg_regset_single(set)) {
4584             /* One register in the set.  */
4585             TCGReg reg = tcg_regset_first(set);
4586             if (s->reg_to_temp[reg] == NULL) {
4587                 return reg;
4588             }
4589         } else {
4590             for (i = 0; i < n; i++) {
4591                 TCGReg reg = order[i];
4592                 if (s->reg_to_temp[reg] == NULL &&
4593                     tcg_regset_test_reg(set, reg)) {
4594                     return reg;
4595                 }
4596             }
4597         }
4598     }
4599 
4600     /* We must spill something.  */
4601     for (j = f; j < 2; j++) {
4602         TCGRegSet set = reg_ct[j];
4603 
4604         if (tcg_regset_single(set)) {
4605             /* One register in the set.  */
4606             TCGReg reg = tcg_regset_first(set);
4607             tcg_reg_free(s, reg, allocated_regs);
4608             return reg;
4609         } else {
4610             for (i = 0; i < n; i++) {
4611                 TCGReg reg = order[i];
4612                 if (tcg_regset_test_reg(set, reg)) {
4613                     tcg_reg_free(s, reg, allocated_regs);
4614                     return reg;
4615                 }
4616             }
4617         }
4618     }
4619 
4620     g_assert_not_reached();
4621 }
4622 
4623 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4624                                  TCGRegSet allocated_regs,
4625                                  TCGRegSet preferred_regs, bool rev)
4626 {
4627     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4628     TCGRegSet reg_ct[2];
4629     const int *order;
4630 
4631     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4632     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4633     tcg_debug_assert(reg_ct[1] != 0);
4634     reg_ct[0] = reg_ct[1] & preferred_regs;
4635 
4636     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4637 
4638     /*
4639      * Skip the preferred_regs option if it cannot be satisfied,
4640      * or if the preference made no difference.
4641      */
4642     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4643 
4644     /*
4645      * Minimize the number of flushes by looking for 2 free registers first,
4646      * then a single flush, then two flushes.
4647      */
4648     for (fmin = 2; fmin >= 0; fmin--) {
4649         for (j = k; j < 2; j++) {
4650             TCGRegSet set = reg_ct[j];
4651 
4652             for (i = 0; i < n; i++) {
4653                 TCGReg reg = order[i];
4654 
4655                 if (tcg_regset_test_reg(set, reg)) {
4656                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4657                     if (f >= fmin) {
4658                         tcg_reg_free(s, reg, allocated_regs);
4659                         tcg_reg_free(s, reg + 1, allocated_regs);
4660                         return reg;
4661                     }
4662                 }
4663             }
4664         }
4665     }
4666     g_assert_not_reached();
4667 }
4668 
4669 /* Make sure the temporary is in a register.  If needed, allocate the register
4670    from DESIRED while avoiding ALLOCATED.  */
4671 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4672                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4673 {
4674     TCGReg reg;
4675 
4676     switch (ts->val_type) {
4677     case TEMP_VAL_REG:
4678         return;
4679     case TEMP_VAL_CONST:
4680         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4681                             preferred_regs, ts->indirect_base);
4682         if (ts->type <= TCG_TYPE_I64) {
4683             tcg_out_movi(s, ts->type, reg, ts->val);
4684         } else {
4685             uint64_t val = ts->val;
4686             MemOp vece = MO_64;
4687 
4688             /*
4689              * Find the minimal vector element that matches the constant.
4690              * The targets will, in general, have to do this search anyway,
4691              * do this generically.
4692              */
4693             if (val == dup_const(MO_8, val)) {
4694                 vece = MO_8;
4695             } else if (val == dup_const(MO_16, val)) {
4696                 vece = MO_16;
4697             } else if (val == dup_const(MO_32, val)) {
4698                 vece = MO_32;
4699             }
4700 
4701             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4702         }
4703         ts->mem_coherent = 0;
4704         break;
4705     case TEMP_VAL_MEM:
4706         if (!ts->mem_allocated) {
4707             temp_allocate_frame(s, ts);
4708         }
4709         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4710                             preferred_regs, ts->indirect_base);
4711         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4712         ts->mem_coherent = 1;
4713         break;
4714     case TEMP_VAL_DEAD:
4715     default:
4716         g_assert_not_reached();
4717     }
4718     set_temp_val_reg(s, ts, reg);
4719 }
4720 
4721 /* Save a temporary to memory. 'allocated_regs' is used in case a
4722    temporary registers needs to be allocated to store a constant.  */
4723 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4724 {
4725     /* The liveness analysis already ensures that globals are back
4726        in memory. Keep an tcg_debug_assert for safety. */
4727     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4728 }
4729 
4730 /* save globals to their canonical location and assume they can be
4731    modified be the following code. 'allocated_regs' is used in case a
4732    temporary registers needs to be allocated to store a constant. */
4733 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4734 {
4735     int i, n;
4736 
4737     for (i = 0, n = s->nb_globals; i < n; i++) {
4738         temp_save(s, &s->temps[i], allocated_regs);
4739     }
4740 }
4741 
4742 /* sync globals to their canonical location and assume they can be
4743    read by the following code. 'allocated_regs' is used in case a
4744    temporary registers needs to be allocated to store a constant. */
4745 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4746 {
4747     int i, n;
4748 
4749     for (i = 0, n = s->nb_globals; i < n; i++) {
4750         TCGTemp *ts = &s->temps[i];
4751         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4752                          || ts->kind == TEMP_FIXED
4753                          || ts->mem_coherent);
4754     }
4755 }
4756 
4757 /* at the end of a basic block, we assume all temporaries are dead and
4758    all globals are stored at their canonical location. */
4759 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4760 {
4761     int i;
4762 
4763     for (i = s->nb_globals; i < s->nb_temps; i++) {
4764         TCGTemp *ts = &s->temps[i];
4765 
4766         switch (ts->kind) {
4767         case TEMP_TB:
4768             temp_save(s, ts, allocated_regs);
4769             break;
4770         case TEMP_EBB:
4771             /* The liveness analysis already ensures that temps are dead.
4772                Keep an tcg_debug_assert for safety. */
4773             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4774             break;
4775         case TEMP_CONST:
4776             /* Similarly, we should have freed any allocated register. */
4777             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4778             break;
4779         default:
4780             g_assert_not_reached();
4781         }
4782     }
4783 
4784     save_globals(s, allocated_regs);
4785 }
4786 
4787 /*
4788  * At a conditional branch, we assume all temporaries are dead unless
4789  * explicitly live-across-conditional-branch; all globals and local
4790  * temps are synced to their location.
4791  */
4792 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4793 {
4794     sync_globals(s, allocated_regs);
4795 
4796     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4797         TCGTemp *ts = &s->temps[i];
4798         /*
4799          * The liveness analysis already ensures that temps are dead.
4800          * Keep tcg_debug_asserts for safety.
4801          */
4802         switch (ts->kind) {
4803         case TEMP_TB:
4804             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4805             break;
4806         case TEMP_EBB:
4807         case TEMP_CONST:
4808             break;
4809         default:
4810             g_assert_not_reached();
4811         }
4812     }
4813 }
4814 
4815 /*
4816  * Specialized code generation for INDEX_op_mov_* with a constant.
4817  */
4818 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4819                                   tcg_target_ulong val, TCGLifeData arg_life,
4820                                   TCGRegSet preferred_regs)
4821 {
4822     /* ENV should not be modified.  */
4823     tcg_debug_assert(!temp_readonly(ots));
4824 
4825     /* The movi is not explicitly generated here.  */
4826     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4827     ots->val = val;
4828     ots->mem_coherent = 0;
4829     if (NEED_SYNC_ARG(0)) {
4830         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4831     } else if (IS_DEAD_ARG(0)) {
4832         temp_dead(s, ots);
4833     }
4834 }
4835 
4836 /*
4837  * Specialized code generation for INDEX_op_mov_*.
4838  */
4839 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4840 {
4841     const TCGLifeData arg_life = op->life;
4842     TCGRegSet allocated_regs, preferred_regs;
4843     TCGTemp *ts, *ots;
4844     TCGType otype, itype;
4845     TCGReg oreg, ireg;
4846 
4847     allocated_regs = s->reserved_regs;
4848     preferred_regs = output_pref(op, 0);
4849     ots = arg_temp(op->args[0]);
4850     ts = arg_temp(op->args[1]);
4851 
4852     /* ENV should not be modified.  */
4853     tcg_debug_assert(!temp_readonly(ots));
4854 
4855     /* Note that otype != itype for no-op truncation.  */
4856     otype = ots->type;
4857     itype = ts->type;
4858 
4859     if (ts->val_type == TEMP_VAL_CONST) {
4860         /* propagate constant or generate sti */
4861         tcg_target_ulong val = ts->val;
4862         if (IS_DEAD_ARG(1)) {
4863             temp_dead(s, ts);
4864         }
4865         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4866         return;
4867     }
4868 
4869     /* If the source value is in memory we're going to be forced
4870        to have it in a register in order to perform the copy.  Copy
4871        the SOURCE value into its own register first, that way we
4872        don't have to reload SOURCE the next time it is used. */
4873     if (ts->val_type == TEMP_VAL_MEM) {
4874         temp_load(s, ts, tcg_target_available_regs[itype],
4875                   allocated_regs, preferred_regs);
4876     }
4877     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4878     ireg = ts->reg;
4879 
4880     if (IS_DEAD_ARG(0)) {
4881         /* mov to a non-saved dead register makes no sense (even with
4882            liveness analysis disabled). */
4883         tcg_debug_assert(NEED_SYNC_ARG(0));
4884         if (!ots->mem_allocated) {
4885             temp_allocate_frame(s, ots);
4886         }
4887         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4888         if (IS_DEAD_ARG(1)) {
4889             temp_dead(s, ts);
4890         }
4891         temp_dead(s, ots);
4892         return;
4893     }
4894 
4895     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4896         /*
4897          * The mov can be suppressed.  Kill input first, so that it
4898          * is unlinked from reg_to_temp, then set the output to the
4899          * reg that we saved from the input.
4900          */
4901         temp_dead(s, ts);
4902         oreg = ireg;
4903     } else {
4904         if (ots->val_type == TEMP_VAL_REG) {
4905             oreg = ots->reg;
4906         } else {
4907             /* Make sure to not spill the input register during allocation. */
4908             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4909                                  allocated_regs | ((TCGRegSet)1 << ireg),
4910                                  preferred_regs, ots->indirect_base);
4911         }
4912         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4913             /*
4914              * Cross register class move not supported.
4915              * Store the source register into the destination slot
4916              * and leave the destination temp as TEMP_VAL_MEM.
4917              */
4918             assert(!temp_readonly(ots));
4919             if (!ts->mem_allocated) {
4920                 temp_allocate_frame(s, ots);
4921             }
4922             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4923             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4924             ots->mem_coherent = 1;
4925             return;
4926         }
4927     }
4928     set_temp_val_reg(s, ots, oreg);
4929     ots->mem_coherent = 0;
4930 
4931     if (NEED_SYNC_ARG(0)) {
4932         temp_sync(s, ots, allocated_regs, 0, 0);
4933     }
4934 }
4935 
4936 /*
4937  * Specialized code generation for INDEX_op_dup_vec.
4938  */
4939 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4940 {
4941     const TCGLifeData arg_life = op->life;
4942     TCGRegSet dup_out_regs, dup_in_regs;
4943     const TCGArgConstraint *dup_args_ct;
4944     TCGTemp *its, *ots;
4945     TCGType itype, vtype;
4946     unsigned vece;
4947     int lowpart_ofs;
4948     bool ok;
4949 
4950     ots = arg_temp(op->args[0]);
4951     its = arg_temp(op->args[1]);
4952 
4953     /* ENV should not be modified.  */
4954     tcg_debug_assert(!temp_readonly(ots));
4955 
4956     itype = its->type;
4957     vece = TCGOP_VECE(op);
4958     vtype = TCGOP_TYPE(op);
4959 
4960     if (its->val_type == TEMP_VAL_CONST) {
4961         /* Propagate constant via movi -> dupi.  */
4962         tcg_target_ulong val = its->val;
4963         if (IS_DEAD_ARG(1)) {
4964             temp_dead(s, its);
4965         }
4966         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4967         return;
4968     }
4969 
4970     dup_args_ct = opcode_args_ct(op);
4971     dup_out_regs = dup_args_ct[0].regs;
4972     dup_in_regs = dup_args_ct[1].regs;
4973 
4974     /* Allocate the output register now.  */
4975     if (ots->val_type != TEMP_VAL_REG) {
4976         TCGRegSet allocated_regs = s->reserved_regs;
4977         TCGReg oreg;
4978 
4979         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4980             /* Make sure to not spill the input register. */
4981             tcg_regset_set_reg(allocated_regs, its->reg);
4982         }
4983         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4984                              output_pref(op, 0), ots->indirect_base);
4985         set_temp_val_reg(s, ots, oreg);
4986     }
4987 
4988     switch (its->val_type) {
4989     case TEMP_VAL_REG:
4990         /*
4991          * The dup constriaints must be broad, covering all possible VECE.
4992          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4993          * to fail, indicating that extra moves are required for that case.
4994          */
4995         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4996             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4997                 goto done;
4998             }
4999             /* Try again from memory or a vector input register.  */
5000         }
5001         if (!its->mem_coherent) {
5002             /*
5003              * The input register is not synced, and so an extra store
5004              * would be required to use memory.  Attempt an integer-vector
5005              * register move first.  We do not have a TCGRegSet for this.
5006              */
5007             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5008                 break;
5009             }
5010             /* Sync the temp back to its slot and load from there.  */
5011             temp_sync(s, its, s->reserved_regs, 0, 0);
5012         }
5013         /* fall through */
5014 
5015     case TEMP_VAL_MEM:
5016         lowpart_ofs = 0;
5017         if (HOST_BIG_ENDIAN) {
5018             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5019         }
5020         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5021                              its->mem_offset + lowpart_ofs)) {
5022             goto done;
5023         }
5024         /* Load the input into the destination vector register. */
5025         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5026         break;
5027 
5028     default:
5029         g_assert_not_reached();
5030     }
5031 
5032     /* We now have a vector input register, so dup must succeed. */
5033     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5034     tcg_debug_assert(ok);
5035 
5036  done:
5037     ots->mem_coherent = 0;
5038     if (IS_DEAD_ARG(1)) {
5039         temp_dead(s, its);
5040     }
5041     if (NEED_SYNC_ARG(0)) {
5042         temp_sync(s, ots, s->reserved_regs, 0, 0);
5043     }
5044     if (IS_DEAD_ARG(0)) {
5045         temp_dead(s, ots);
5046     }
5047 }
5048 
5049 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5050 {
5051     const TCGLifeData arg_life = op->life;
5052     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5053     TCGRegSet i_allocated_regs;
5054     TCGRegSet o_allocated_regs;
5055     int i, k, nb_iargs, nb_oargs;
5056     TCGReg reg;
5057     TCGArg arg;
5058     const TCGArgConstraint *args_ct;
5059     const TCGArgConstraint *arg_ct;
5060     TCGTemp *ts;
5061     TCGArg new_args[TCG_MAX_OP_ARGS];
5062     int const_args[TCG_MAX_OP_ARGS];
5063     TCGCond op_cond;
5064 
5065     nb_oargs = def->nb_oargs;
5066     nb_iargs = def->nb_iargs;
5067 
5068     /* copy constants */
5069     memcpy(new_args + nb_oargs + nb_iargs,
5070            op->args + nb_oargs + nb_iargs,
5071            sizeof(TCGArg) * def->nb_cargs);
5072 
5073     i_allocated_regs = s->reserved_regs;
5074     o_allocated_regs = s->reserved_regs;
5075 
5076     switch (op->opc) {
5077     case INDEX_op_brcond_i32:
5078     case INDEX_op_brcond_i64:
5079         op_cond = op->args[2];
5080         break;
5081     case INDEX_op_setcond_i32:
5082     case INDEX_op_setcond_i64:
5083     case INDEX_op_negsetcond_i32:
5084     case INDEX_op_negsetcond_i64:
5085     case INDEX_op_cmp_vec:
5086         op_cond = op->args[3];
5087         break;
5088     case INDEX_op_brcond2_i32:
5089         op_cond = op->args[4];
5090         break;
5091     case INDEX_op_movcond_i32:
5092     case INDEX_op_movcond_i64:
5093     case INDEX_op_setcond2_i32:
5094     case INDEX_op_cmpsel_vec:
5095         op_cond = op->args[5];
5096         break;
5097     default:
5098         /* No condition within opcode. */
5099         op_cond = TCG_COND_ALWAYS;
5100         break;
5101     }
5102 
5103     args_ct = opcode_args_ct(op);
5104 
5105     /* satisfy input constraints */
5106     for (k = 0; k < nb_iargs; k++) {
5107         TCGRegSet i_preferred_regs, i_required_regs;
5108         bool allocate_new_reg, copyto_new_reg;
5109         TCGTemp *ts2;
5110         int i1, i2;
5111 
5112         i = args_ct[nb_oargs + k].sort_index;
5113         arg = op->args[i];
5114         arg_ct = &args_ct[i];
5115         ts = arg_temp(arg);
5116 
5117         if (ts->val_type == TEMP_VAL_CONST) {
5118 #ifdef TCG_REG_ZERO
5119             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5120                 /* Hardware zero register: indicate register via non-const. */
5121                 const_args[i] = 0;
5122                 new_args[i] = TCG_REG_ZERO;
5123                 continue;
5124             }
5125 #endif
5126 
5127             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5128                                        op_cond, TCGOP_VECE(op))) {
5129                 /* constant is OK for instruction */
5130                 const_args[i] = 1;
5131                 new_args[i] = ts->val;
5132                 continue;
5133             }
5134         }
5135 
5136         reg = ts->reg;
5137         i_preferred_regs = 0;
5138         i_required_regs = arg_ct->regs;
5139         allocate_new_reg = false;
5140         copyto_new_reg = false;
5141 
5142         switch (arg_ct->pair) {
5143         case 0: /* not paired */
5144             if (arg_ct->ialias) {
5145                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5146 
5147                 /*
5148                  * If the input is readonly, then it cannot also be an
5149                  * output and aliased to itself.  If the input is not
5150                  * dead after the instruction, we must allocate a new
5151                  * register and move it.
5152                  */
5153                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5154                     || args_ct[arg_ct->alias_index].newreg) {
5155                     allocate_new_reg = true;
5156                 } else if (ts->val_type == TEMP_VAL_REG) {
5157                     /*
5158                      * Check if the current register has already been
5159                      * allocated for another input.
5160                      */
5161                     allocate_new_reg =
5162                         tcg_regset_test_reg(i_allocated_regs, reg);
5163                 }
5164             }
5165             if (!allocate_new_reg) {
5166                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5167                           i_preferred_regs);
5168                 reg = ts->reg;
5169                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5170             }
5171             if (allocate_new_reg) {
5172                 /*
5173                  * Allocate a new register matching the constraint
5174                  * and move the temporary register into it.
5175                  */
5176                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5177                           i_allocated_regs, 0);
5178                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5179                                     i_preferred_regs, ts->indirect_base);
5180                 copyto_new_reg = true;
5181             }
5182             break;
5183 
5184         case 1:
5185             /* First of an input pair; if i1 == i2, the second is an output. */
5186             i1 = i;
5187             i2 = arg_ct->pair_index;
5188             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5189 
5190             /*
5191              * It is easier to default to allocating a new pair
5192              * and to identify a few cases where it's not required.
5193              */
5194             if (arg_ct->ialias) {
5195                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5196                 if (IS_DEAD_ARG(i1) &&
5197                     IS_DEAD_ARG(i2) &&
5198                     !temp_readonly(ts) &&
5199                     ts->val_type == TEMP_VAL_REG &&
5200                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5201                     tcg_regset_test_reg(i_required_regs, reg) &&
5202                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5203                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5204                     (ts2
5205                      ? ts2->val_type == TEMP_VAL_REG &&
5206                        ts2->reg == reg + 1 &&
5207                        !temp_readonly(ts2)
5208                      : s->reg_to_temp[reg + 1] == NULL)) {
5209                     break;
5210                 }
5211             } else {
5212                 /* Without aliasing, the pair must also be an input. */
5213                 tcg_debug_assert(ts2);
5214                 if (ts->val_type == TEMP_VAL_REG &&
5215                     ts2->val_type == TEMP_VAL_REG &&
5216                     ts2->reg == reg + 1 &&
5217                     tcg_regset_test_reg(i_required_regs, reg)) {
5218                     break;
5219                 }
5220             }
5221             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5222                                      0, ts->indirect_base);
5223             goto do_pair;
5224 
5225         case 2: /* pair second */
5226             reg = new_args[arg_ct->pair_index] + 1;
5227             goto do_pair;
5228 
5229         case 3: /* ialias with second output, no first input */
5230             tcg_debug_assert(arg_ct->ialias);
5231             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5232 
5233             if (IS_DEAD_ARG(i) &&
5234                 !temp_readonly(ts) &&
5235                 ts->val_type == TEMP_VAL_REG &&
5236                 reg > 0 &&
5237                 s->reg_to_temp[reg - 1] == NULL &&
5238                 tcg_regset_test_reg(i_required_regs, reg) &&
5239                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5240                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5241                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5242                 break;
5243             }
5244             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5245                                      i_allocated_regs, 0,
5246                                      ts->indirect_base);
5247             tcg_regset_set_reg(i_allocated_regs, reg);
5248             reg += 1;
5249             goto do_pair;
5250 
5251         do_pair:
5252             /*
5253              * If an aliased input is not dead after the instruction,
5254              * we must allocate a new register and move it.
5255              */
5256             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5257                 TCGRegSet t_allocated_regs = i_allocated_regs;
5258 
5259                 /*
5260                  * Because of the alias, and the continued life, make sure
5261                  * that the temp is somewhere *other* than the reg pair,
5262                  * and we get a copy in reg.
5263                  */
5264                 tcg_regset_set_reg(t_allocated_regs, reg);
5265                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5266                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5267                     /* If ts was already in reg, copy it somewhere else. */
5268                     TCGReg nr;
5269                     bool ok;
5270 
5271                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5272                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5273                                        t_allocated_regs, 0, ts->indirect_base);
5274                     ok = tcg_out_mov(s, ts->type, nr, reg);
5275                     tcg_debug_assert(ok);
5276 
5277                     set_temp_val_reg(s, ts, nr);
5278                 } else {
5279                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5280                               t_allocated_regs, 0);
5281                     copyto_new_reg = true;
5282                 }
5283             } else {
5284                 /* Preferably allocate to reg, otherwise copy. */
5285                 i_required_regs = (TCGRegSet)1 << reg;
5286                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5287                           i_preferred_regs);
5288                 copyto_new_reg = ts->reg != reg;
5289             }
5290             break;
5291 
5292         default:
5293             g_assert_not_reached();
5294         }
5295 
5296         if (copyto_new_reg) {
5297             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5298                 /*
5299                  * Cross register class move not supported.  Sync the
5300                  * temp back to its slot and load from there.
5301                  */
5302                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5303                 tcg_out_ld(s, ts->type, reg,
5304                            ts->mem_base->reg, ts->mem_offset);
5305             }
5306         }
5307         new_args[i] = reg;
5308         const_args[i] = 0;
5309         tcg_regset_set_reg(i_allocated_regs, reg);
5310     }
5311 
5312     /* mark dead temporaries and free the associated registers */
5313     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5314         if (IS_DEAD_ARG(i)) {
5315             temp_dead(s, arg_temp(op->args[i]));
5316         }
5317     }
5318 
5319     if (def->flags & TCG_OPF_COND_BRANCH) {
5320         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5321     } else if (def->flags & TCG_OPF_BB_END) {
5322         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5323     } else {
5324         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5325             /* XXX: permit generic clobber register list ? */
5326             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5327                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5328                     tcg_reg_free(s, i, i_allocated_regs);
5329                 }
5330             }
5331         }
5332         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5333             /* sync globals if the op has side effects and might trigger
5334                an exception. */
5335             sync_globals(s, i_allocated_regs);
5336         }
5337 
5338         /* satisfy the output constraints */
5339         for (k = 0; k < nb_oargs; k++) {
5340             i = args_ct[k].sort_index;
5341             arg = op->args[i];
5342             arg_ct = &args_ct[i];
5343             ts = arg_temp(arg);
5344 
5345             /* ENV should not be modified.  */
5346             tcg_debug_assert(!temp_readonly(ts));
5347 
5348             switch (arg_ct->pair) {
5349             case 0: /* not paired */
5350                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5351                     reg = new_args[arg_ct->alias_index];
5352                 } else if (arg_ct->newreg) {
5353                     reg = tcg_reg_alloc(s, arg_ct->regs,
5354                                         i_allocated_regs | o_allocated_regs,
5355                                         output_pref(op, k), ts->indirect_base);
5356                 } else {
5357                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5358                                         output_pref(op, k), ts->indirect_base);
5359                 }
5360                 break;
5361 
5362             case 1: /* first of pair */
5363                 if (arg_ct->oalias) {
5364                     reg = new_args[arg_ct->alias_index];
5365                 } else if (arg_ct->newreg) {
5366                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5367                                              i_allocated_regs | o_allocated_regs,
5368                                              output_pref(op, k),
5369                                              ts->indirect_base);
5370                 } else {
5371                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5372                                              output_pref(op, k),
5373                                              ts->indirect_base);
5374                 }
5375                 break;
5376 
5377             case 2: /* second of pair */
5378                 if (arg_ct->oalias) {
5379                     reg = new_args[arg_ct->alias_index];
5380                 } else {
5381                     reg = new_args[arg_ct->pair_index] + 1;
5382                 }
5383                 break;
5384 
5385             case 3: /* first of pair, aliasing with a second input */
5386                 tcg_debug_assert(!arg_ct->newreg);
5387                 reg = new_args[arg_ct->pair_index] - 1;
5388                 break;
5389 
5390             default:
5391                 g_assert_not_reached();
5392             }
5393             tcg_regset_set_reg(o_allocated_regs, reg);
5394             set_temp_val_reg(s, ts, reg);
5395             ts->mem_coherent = 0;
5396             new_args[i] = reg;
5397         }
5398     }
5399 
5400     /* emit instruction */
5401     TCGType type = TCGOP_TYPE(op);
5402     switch (op->opc) {
5403     case INDEX_op_ext_i32_i64:
5404         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5405         break;
5406     case INDEX_op_extu_i32_i64:
5407         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5408         break;
5409     case INDEX_op_extrl_i64_i32:
5410         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5411         break;
5412 
5413     case INDEX_op_add:
5414     case INDEX_op_and:
5415     case INDEX_op_andc:
5416     case INDEX_op_divs:
5417     case INDEX_op_divu:
5418     case INDEX_op_eqv:
5419     case INDEX_op_mul:
5420     case INDEX_op_mulsh:
5421     case INDEX_op_muluh:
5422     case INDEX_op_nand:
5423     case INDEX_op_nor:
5424     case INDEX_op_or:
5425     case INDEX_op_orc:
5426     case INDEX_op_xor:
5427         {
5428             const TCGOutOpBinary *out =
5429                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5430 
5431             /* Constants should never appear in the first source operand. */
5432             tcg_debug_assert(!const_args[1]);
5433             if (const_args[2]) {
5434                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5435             } else {
5436                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5437             }
5438         }
5439         break;
5440 
5441     case INDEX_op_sub:
5442         {
5443             const TCGOutOpSubtract *out = &outop_sub;
5444 
5445             /*
5446              * Constants should never appear in the second source operand.
5447              * These are folded to add with negative constant.
5448              */
5449             tcg_debug_assert(!const_args[2]);
5450             if (const_args[1]) {
5451                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5452             } else {
5453                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5454             }
5455         }
5456         break;
5457 
5458     case INDEX_op_neg:
5459     case INDEX_op_not:
5460         {
5461             const TCGOutOpUnary *out =
5462                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5463 
5464             /* Constants should have been folded. */
5465             tcg_debug_assert(!const_args[1]);
5466             out->out_rr(s, type, new_args[0], new_args[1]);
5467         }
5468         break;
5469 
5470     default:
5471         if (def->flags & TCG_OPF_VECTOR) {
5472             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5473                            TCGOP_VECE(op), new_args, const_args);
5474         } else {
5475             tcg_out_op(s, op->opc, type, new_args, const_args);
5476         }
5477         break;
5478     }
5479 
5480     /* move the outputs in the correct register if needed */
5481     for(i = 0; i < nb_oargs; i++) {
5482         ts = arg_temp(op->args[i]);
5483 
5484         /* ENV should not be modified.  */
5485         tcg_debug_assert(!temp_readonly(ts));
5486 
5487         if (NEED_SYNC_ARG(i)) {
5488             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5489         } else if (IS_DEAD_ARG(i)) {
5490             temp_dead(s, ts);
5491         }
5492     }
5493 }
5494 
5495 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5496 {
5497     const TCGLifeData arg_life = op->life;
5498     TCGTemp *ots, *itsl, *itsh;
5499     TCGType vtype = TCGOP_TYPE(op);
5500 
5501     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5502     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5503     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5504 
5505     ots = arg_temp(op->args[0]);
5506     itsl = arg_temp(op->args[1]);
5507     itsh = arg_temp(op->args[2]);
5508 
5509     /* ENV should not be modified.  */
5510     tcg_debug_assert(!temp_readonly(ots));
5511 
5512     /* Allocate the output register now.  */
5513     if (ots->val_type != TEMP_VAL_REG) {
5514         TCGRegSet allocated_regs = s->reserved_regs;
5515         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5516         TCGReg oreg;
5517 
5518         /* Make sure to not spill the input registers. */
5519         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5520             tcg_regset_set_reg(allocated_regs, itsl->reg);
5521         }
5522         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5523             tcg_regset_set_reg(allocated_regs, itsh->reg);
5524         }
5525 
5526         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5527                              output_pref(op, 0), ots->indirect_base);
5528         set_temp_val_reg(s, ots, oreg);
5529     }
5530 
5531     /* Promote dup2 of immediates to dupi_vec. */
5532     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5533         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5534         MemOp vece = MO_64;
5535 
5536         if (val == dup_const(MO_8, val)) {
5537             vece = MO_8;
5538         } else if (val == dup_const(MO_16, val)) {
5539             vece = MO_16;
5540         } else if (val == dup_const(MO_32, val)) {
5541             vece = MO_32;
5542         }
5543 
5544         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5545         goto done;
5546     }
5547 
5548     /* If the two inputs form one 64-bit value, try dupm_vec. */
5549     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5550         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5551         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5552         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5553 
5554         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5555         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5556 
5557         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5558                              its->mem_base->reg, its->mem_offset)) {
5559             goto done;
5560         }
5561     }
5562 
5563     /* Fall back to generic expansion. */
5564     return false;
5565 
5566  done:
5567     ots->mem_coherent = 0;
5568     if (IS_DEAD_ARG(1)) {
5569         temp_dead(s, itsl);
5570     }
5571     if (IS_DEAD_ARG(2)) {
5572         temp_dead(s, itsh);
5573     }
5574     if (NEED_SYNC_ARG(0)) {
5575         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5576     } else if (IS_DEAD_ARG(0)) {
5577         temp_dead(s, ots);
5578     }
5579     return true;
5580 }
5581 
5582 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5583                          TCGRegSet allocated_regs)
5584 {
5585     if (ts->val_type == TEMP_VAL_REG) {
5586         if (ts->reg != reg) {
5587             tcg_reg_free(s, reg, allocated_regs);
5588             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5589                 /*
5590                  * Cross register class move not supported.  Sync the
5591                  * temp back to its slot and load from there.
5592                  */
5593                 temp_sync(s, ts, allocated_regs, 0, 0);
5594                 tcg_out_ld(s, ts->type, reg,
5595                            ts->mem_base->reg, ts->mem_offset);
5596             }
5597         }
5598     } else {
5599         TCGRegSet arg_set = 0;
5600 
5601         tcg_reg_free(s, reg, allocated_regs);
5602         tcg_regset_set_reg(arg_set, reg);
5603         temp_load(s, ts, arg_set, allocated_regs, 0);
5604     }
5605 }
5606 
5607 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5608                          TCGRegSet allocated_regs)
5609 {
5610     /*
5611      * When the destination is on the stack, load up the temp and store.
5612      * If there are many call-saved registers, the temp might live to
5613      * see another use; otherwise it'll be discarded.
5614      */
5615     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5616     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5617                arg_slot_stk_ofs(arg_slot));
5618 }
5619 
5620 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5621                             TCGTemp *ts, TCGRegSet *allocated_regs)
5622 {
5623     if (arg_slot_reg_p(l->arg_slot)) {
5624         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5625         load_arg_reg(s, reg, ts, *allocated_regs);
5626         tcg_regset_set_reg(*allocated_regs, reg);
5627     } else {
5628         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5629     }
5630 }
5631 
5632 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5633                          intptr_t ref_off, TCGRegSet *allocated_regs)
5634 {
5635     TCGReg reg;
5636 
5637     if (arg_slot_reg_p(arg_slot)) {
5638         reg = tcg_target_call_iarg_regs[arg_slot];
5639         tcg_reg_free(s, reg, *allocated_regs);
5640         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5641         tcg_regset_set_reg(*allocated_regs, reg);
5642     } else {
5643         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5644                             *allocated_regs, 0, false);
5645         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5646         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5647                    arg_slot_stk_ofs(arg_slot));
5648     }
5649 }
5650 
5651 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5652 {
5653     const int nb_oargs = TCGOP_CALLO(op);
5654     const int nb_iargs = TCGOP_CALLI(op);
5655     const TCGLifeData arg_life = op->life;
5656     const TCGHelperInfo *info = tcg_call_info(op);
5657     TCGRegSet allocated_regs = s->reserved_regs;
5658     int i;
5659 
5660     /*
5661      * Move inputs into place in reverse order,
5662      * so that we place stacked arguments first.
5663      */
5664     for (i = nb_iargs - 1; i >= 0; --i) {
5665         const TCGCallArgumentLoc *loc = &info->in[i];
5666         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5667 
5668         switch (loc->kind) {
5669         case TCG_CALL_ARG_NORMAL:
5670         case TCG_CALL_ARG_EXTEND_U:
5671         case TCG_CALL_ARG_EXTEND_S:
5672             load_arg_normal(s, loc, ts, &allocated_regs);
5673             break;
5674         case TCG_CALL_ARG_BY_REF:
5675             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5676             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5677                          arg_slot_stk_ofs(loc->ref_slot),
5678                          &allocated_regs);
5679             break;
5680         case TCG_CALL_ARG_BY_REF_N:
5681             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5682             break;
5683         default:
5684             g_assert_not_reached();
5685         }
5686     }
5687 
5688     /* Mark dead temporaries and free the associated registers.  */
5689     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5690         if (IS_DEAD_ARG(i)) {
5691             temp_dead(s, arg_temp(op->args[i]));
5692         }
5693     }
5694 
5695     /* Clobber call registers.  */
5696     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5697         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5698             tcg_reg_free(s, i, allocated_regs);
5699         }
5700     }
5701 
5702     /*
5703      * Save globals if they might be written by the helper,
5704      * sync them if they might be read.
5705      */
5706     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5707         /* Nothing to do */
5708     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5709         sync_globals(s, allocated_regs);
5710     } else {
5711         save_globals(s, allocated_regs);
5712     }
5713 
5714     /*
5715      * If the ABI passes a pointer to the returned struct as the first
5716      * argument, load that now.  Pass a pointer to the output home slot.
5717      */
5718     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5719         TCGTemp *ts = arg_temp(op->args[0]);
5720 
5721         if (!ts->mem_allocated) {
5722             temp_allocate_frame(s, ts);
5723         }
5724         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5725     }
5726 
5727     tcg_out_call(s, tcg_call_func(op), info);
5728 
5729     /* Assign output registers and emit moves if needed.  */
5730     switch (info->out_kind) {
5731     case TCG_CALL_RET_NORMAL:
5732         for (i = 0; i < nb_oargs; i++) {
5733             TCGTemp *ts = arg_temp(op->args[i]);
5734             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5735 
5736             /* ENV should not be modified.  */
5737             tcg_debug_assert(!temp_readonly(ts));
5738 
5739             set_temp_val_reg(s, ts, reg);
5740             ts->mem_coherent = 0;
5741         }
5742         break;
5743 
5744     case TCG_CALL_RET_BY_VEC:
5745         {
5746             TCGTemp *ts = arg_temp(op->args[0]);
5747 
5748             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5749             tcg_debug_assert(ts->temp_subindex == 0);
5750             if (!ts->mem_allocated) {
5751                 temp_allocate_frame(s, ts);
5752             }
5753             tcg_out_st(s, TCG_TYPE_V128,
5754                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5755                        ts->mem_base->reg, ts->mem_offset);
5756         }
5757         /* fall through to mark all parts in memory */
5758 
5759     case TCG_CALL_RET_BY_REF:
5760         /* The callee has performed a write through the reference. */
5761         for (i = 0; i < nb_oargs; i++) {
5762             TCGTemp *ts = arg_temp(op->args[i]);
5763             ts->val_type = TEMP_VAL_MEM;
5764         }
5765         break;
5766 
5767     default:
5768         g_assert_not_reached();
5769     }
5770 
5771     /* Flush or discard output registers as needed. */
5772     for (i = 0; i < nb_oargs; i++) {
5773         TCGTemp *ts = arg_temp(op->args[i]);
5774         if (NEED_SYNC_ARG(i)) {
5775             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5776         } else if (IS_DEAD_ARG(i)) {
5777             temp_dead(s, ts);
5778         }
5779     }
5780 }
5781 
5782 /**
5783  * atom_and_align_for_opc:
5784  * @s: tcg context
5785  * @opc: memory operation code
5786  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5787  * @allow_two_ops: true if we are prepared to issue two operations
5788  *
5789  * Return the alignment and atomicity to use for the inline fast path
5790  * for the given memory operation.  The alignment may be larger than
5791  * that specified in @opc, and the correct alignment will be diagnosed
5792  * by the slow path helper.
5793  *
5794  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5795  * and issue two loads or stores for subalignment.
5796  */
5797 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5798                                            MemOp host_atom, bool allow_two_ops)
5799 {
5800     MemOp align = memop_alignment_bits(opc);
5801     MemOp size = opc & MO_SIZE;
5802     MemOp half = size ? size - 1 : 0;
5803     MemOp atom = opc & MO_ATOM_MASK;
5804     MemOp atmax;
5805 
5806     switch (atom) {
5807     case MO_ATOM_NONE:
5808         /* The operation requires no specific atomicity. */
5809         atmax = MO_8;
5810         break;
5811 
5812     case MO_ATOM_IFALIGN:
5813         atmax = size;
5814         break;
5815 
5816     case MO_ATOM_IFALIGN_PAIR:
5817         atmax = half;
5818         break;
5819 
5820     case MO_ATOM_WITHIN16:
5821         atmax = size;
5822         if (size == MO_128) {
5823             /* Misalignment implies !within16, and therefore no atomicity. */
5824         } else if (host_atom != MO_ATOM_WITHIN16) {
5825             /* The host does not implement within16, so require alignment. */
5826             align = MAX(align, size);
5827         }
5828         break;
5829 
5830     case MO_ATOM_WITHIN16_PAIR:
5831         atmax = size;
5832         /*
5833          * Misalignment implies !within16, and therefore half atomicity.
5834          * Any host prepared for two operations can implement this with
5835          * half alignment.
5836          */
5837         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5838             align = MAX(align, half);
5839         }
5840         break;
5841 
5842     case MO_ATOM_SUBALIGN:
5843         atmax = size;
5844         if (host_atom != MO_ATOM_SUBALIGN) {
5845             /* If unaligned but not odd, there are subobjects up to half. */
5846             if (allow_two_ops) {
5847                 align = MAX(align, half);
5848             } else {
5849                 align = MAX(align, size);
5850             }
5851         }
5852         break;
5853 
5854     default:
5855         g_assert_not_reached();
5856     }
5857 
5858     return (TCGAtomAlign){ .atom = atmax, .align = align };
5859 }
5860 
5861 /*
5862  * Similarly for qemu_ld/st slow path helpers.
5863  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5864  * using only the provided backend tcg_out_* functions.
5865  */
5866 
5867 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5868 {
5869     int ofs = arg_slot_stk_ofs(slot);
5870 
5871     /*
5872      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5873      * require extension to uint64_t, adjust the address for uint32_t.
5874      */
5875     if (HOST_BIG_ENDIAN &&
5876         TCG_TARGET_REG_BITS == 64 &&
5877         type == TCG_TYPE_I32) {
5878         ofs += 4;
5879     }
5880     return ofs;
5881 }
5882 
5883 static void tcg_out_helper_load_slots(TCGContext *s,
5884                                       unsigned nmov, TCGMovExtend *mov,
5885                                       const TCGLdstHelperParam *parm)
5886 {
5887     unsigned i;
5888     TCGReg dst3;
5889 
5890     /*
5891      * Start from the end, storing to the stack first.
5892      * This frees those registers, so we need not consider overlap.
5893      */
5894     for (i = nmov; i-- > 0; ) {
5895         unsigned slot = mov[i].dst;
5896 
5897         if (arg_slot_reg_p(slot)) {
5898             goto found_reg;
5899         }
5900 
5901         TCGReg src = mov[i].src;
5902         TCGType dst_type = mov[i].dst_type;
5903         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5904 
5905         /* The argument is going onto the stack; extend into scratch. */
5906         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5907             tcg_debug_assert(parm->ntmp != 0);
5908             mov[i].dst = src = parm->tmp[0];
5909             tcg_out_movext1(s, &mov[i]);
5910         }
5911 
5912         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5913                    tcg_out_helper_stk_ofs(dst_type, slot));
5914     }
5915     return;
5916 
5917  found_reg:
5918     /*
5919      * The remaining arguments are in registers.
5920      * Convert slot numbers to argument registers.
5921      */
5922     nmov = i + 1;
5923     for (i = 0; i < nmov; ++i) {
5924         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5925     }
5926 
5927     switch (nmov) {
5928     case 4:
5929         /* The backend must have provided enough temps for the worst case. */
5930         tcg_debug_assert(parm->ntmp >= 2);
5931 
5932         dst3 = mov[3].dst;
5933         for (unsigned j = 0; j < 3; ++j) {
5934             if (dst3 == mov[j].src) {
5935                 /*
5936                  * Conflict. Copy the source to a temporary, perform the
5937                  * remaining moves, then the extension from our scratch
5938                  * on the way out.
5939                  */
5940                 TCGReg scratch = parm->tmp[1];
5941 
5942                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5943                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5944                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5945                 break;
5946             }
5947         }
5948 
5949         /* No conflicts: perform this move and continue. */
5950         tcg_out_movext1(s, &mov[3]);
5951         /* fall through */
5952 
5953     case 3:
5954         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5955                         parm->ntmp ? parm->tmp[0] : -1);
5956         break;
5957     case 2:
5958         tcg_out_movext2(s, mov, mov + 1,
5959                         parm->ntmp ? parm->tmp[0] : -1);
5960         break;
5961     case 1:
5962         tcg_out_movext1(s, mov);
5963         break;
5964     default:
5965         g_assert_not_reached();
5966     }
5967 }
5968 
5969 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5970                                     TCGType type, tcg_target_long imm,
5971                                     const TCGLdstHelperParam *parm)
5972 {
5973     if (arg_slot_reg_p(slot)) {
5974         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5975     } else {
5976         int ofs = tcg_out_helper_stk_ofs(type, slot);
5977         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5978             tcg_debug_assert(parm->ntmp != 0);
5979             tcg_out_movi(s, type, parm->tmp[0], imm);
5980             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5981         }
5982     }
5983 }
5984 
5985 static void tcg_out_helper_load_common_args(TCGContext *s,
5986                                             const TCGLabelQemuLdst *ldst,
5987                                             const TCGLdstHelperParam *parm,
5988                                             const TCGHelperInfo *info,
5989                                             unsigned next_arg)
5990 {
5991     TCGMovExtend ptr_mov = {
5992         .dst_type = TCG_TYPE_PTR,
5993         .src_type = TCG_TYPE_PTR,
5994         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5995     };
5996     const TCGCallArgumentLoc *loc = &info->in[0];
5997     TCGType type;
5998     unsigned slot;
5999     tcg_target_ulong imm;
6000 
6001     /*
6002      * Handle env, which is always first.
6003      */
6004     ptr_mov.dst = loc->arg_slot;
6005     ptr_mov.src = TCG_AREG0;
6006     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6007 
6008     /*
6009      * Handle oi.
6010      */
6011     imm = ldst->oi;
6012     loc = &info->in[next_arg];
6013     type = TCG_TYPE_I32;
6014     switch (loc->kind) {
6015     case TCG_CALL_ARG_NORMAL:
6016         break;
6017     case TCG_CALL_ARG_EXTEND_U:
6018     case TCG_CALL_ARG_EXTEND_S:
6019         /* No extension required for MemOpIdx. */
6020         tcg_debug_assert(imm <= INT32_MAX);
6021         type = TCG_TYPE_REG;
6022         break;
6023     default:
6024         g_assert_not_reached();
6025     }
6026     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6027     next_arg++;
6028 
6029     /*
6030      * Handle ra.
6031      */
6032     loc = &info->in[next_arg];
6033     slot = loc->arg_slot;
6034     if (parm->ra_gen) {
6035         int arg_reg = -1;
6036         TCGReg ra_reg;
6037 
6038         if (arg_slot_reg_p(slot)) {
6039             arg_reg = tcg_target_call_iarg_regs[slot];
6040         }
6041         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6042 
6043         ptr_mov.dst = slot;
6044         ptr_mov.src = ra_reg;
6045         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6046     } else {
6047         imm = (uintptr_t)ldst->raddr;
6048         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6049     }
6050 }
6051 
6052 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6053                                        const TCGCallArgumentLoc *loc,
6054                                        TCGType dst_type, TCGType src_type,
6055                                        TCGReg lo, TCGReg hi)
6056 {
6057     MemOp reg_mo;
6058 
6059     if (dst_type <= TCG_TYPE_REG) {
6060         MemOp src_ext;
6061 
6062         switch (loc->kind) {
6063         case TCG_CALL_ARG_NORMAL:
6064             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6065             break;
6066         case TCG_CALL_ARG_EXTEND_U:
6067             dst_type = TCG_TYPE_REG;
6068             src_ext = MO_UL;
6069             break;
6070         case TCG_CALL_ARG_EXTEND_S:
6071             dst_type = TCG_TYPE_REG;
6072             src_ext = MO_SL;
6073             break;
6074         default:
6075             g_assert_not_reached();
6076         }
6077 
6078         mov[0].dst = loc->arg_slot;
6079         mov[0].dst_type = dst_type;
6080         mov[0].src = lo;
6081         mov[0].src_type = src_type;
6082         mov[0].src_ext = src_ext;
6083         return 1;
6084     }
6085 
6086     if (TCG_TARGET_REG_BITS == 32) {
6087         assert(dst_type == TCG_TYPE_I64);
6088         reg_mo = MO_32;
6089     } else {
6090         assert(dst_type == TCG_TYPE_I128);
6091         reg_mo = MO_64;
6092     }
6093 
6094     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6095     mov[0].src = lo;
6096     mov[0].dst_type = TCG_TYPE_REG;
6097     mov[0].src_type = TCG_TYPE_REG;
6098     mov[0].src_ext = reg_mo;
6099 
6100     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6101     mov[1].src = hi;
6102     mov[1].dst_type = TCG_TYPE_REG;
6103     mov[1].src_type = TCG_TYPE_REG;
6104     mov[1].src_ext = reg_mo;
6105 
6106     return 2;
6107 }
6108 
6109 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6110                                    const TCGLdstHelperParam *parm)
6111 {
6112     const TCGHelperInfo *info;
6113     const TCGCallArgumentLoc *loc;
6114     TCGMovExtend mov[2];
6115     unsigned next_arg, nmov;
6116     MemOp mop = get_memop(ldst->oi);
6117 
6118     switch (mop & MO_SIZE) {
6119     case MO_8:
6120     case MO_16:
6121     case MO_32:
6122         info = &info_helper_ld32_mmu;
6123         break;
6124     case MO_64:
6125         info = &info_helper_ld64_mmu;
6126         break;
6127     case MO_128:
6128         info = &info_helper_ld128_mmu;
6129         break;
6130     default:
6131         g_assert_not_reached();
6132     }
6133 
6134     /* Defer env argument. */
6135     next_arg = 1;
6136 
6137     loc = &info->in[next_arg];
6138     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6139         /*
6140          * 32-bit host with 32-bit guest: zero-extend the guest address
6141          * to 64-bits for the helper by storing the low part, then
6142          * load a zero for the high part.
6143          */
6144         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6145                                TCG_TYPE_I32, TCG_TYPE_I32,
6146                                ldst->addr_reg, -1);
6147         tcg_out_helper_load_slots(s, 1, mov, parm);
6148 
6149         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6150                                 TCG_TYPE_I32, 0, parm);
6151         next_arg += 2;
6152     } else {
6153         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6154                                       ldst->addr_reg, -1);
6155         tcg_out_helper_load_slots(s, nmov, mov, parm);
6156         next_arg += nmov;
6157     }
6158 
6159     switch (info->out_kind) {
6160     case TCG_CALL_RET_NORMAL:
6161     case TCG_CALL_RET_BY_VEC:
6162         break;
6163     case TCG_CALL_RET_BY_REF:
6164         /*
6165          * The return reference is in the first argument slot.
6166          * We need memory in which to return: re-use the top of stack.
6167          */
6168         {
6169             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6170 
6171             if (arg_slot_reg_p(0)) {
6172                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6173                                  TCG_REG_CALL_STACK, ofs_slot0);
6174             } else {
6175                 tcg_debug_assert(parm->ntmp != 0);
6176                 tcg_out_addi_ptr(s, parm->tmp[0],
6177                                  TCG_REG_CALL_STACK, ofs_slot0);
6178                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6179                            TCG_REG_CALL_STACK, ofs_slot0);
6180             }
6181         }
6182         break;
6183     default:
6184         g_assert_not_reached();
6185     }
6186 
6187     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6188 }
6189 
6190 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6191                                   bool load_sign,
6192                                   const TCGLdstHelperParam *parm)
6193 {
6194     MemOp mop = get_memop(ldst->oi);
6195     TCGMovExtend mov[2];
6196     int ofs_slot0;
6197 
6198     switch (ldst->type) {
6199     case TCG_TYPE_I64:
6200         if (TCG_TARGET_REG_BITS == 32) {
6201             break;
6202         }
6203         /* fall through */
6204 
6205     case TCG_TYPE_I32:
6206         mov[0].dst = ldst->datalo_reg;
6207         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6208         mov[0].dst_type = ldst->type;
6209         mov[0].src_type = TCG_TYPE_REG;
6210 
6211         /*
6212          * If load_sign, then we allowed the helper to perform the
6213          * appropriate sign extension to tcg_target_ulong, and all
6214          * we need now is a plain move.
6215          *
6216          * If they do not, then we expect the relevant extension
6217          * instruction to be no more expensive than a move, and
6218          * we thus save the icache etc by only using one of two
6219          * helper functions.
6220          */
6221         if (load_sign || !(mop & MO_SIGN)) {
6222             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6223                 mov[0].src_ext = MO_32;
6224             } else {
6225                 mov[0].src_ext = MO_64;
6226             }
6227         } else {
6228             mov[0].src_ext = mop & MO_SSIZE;
6229         }
6230         tcg_out_movext1(s, mov);
6231         return;
6232 
6233     case TCG_TYPE_I128:
6234         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6235         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6236         switch (TCG_TARGET_CALL_RET_I128) {
6237         case TCG_CALL_RET_NORMAL:
6238             break;
6239         case TCG_CALL_RET_BY_VEC:
6240             tcg_out_st(s, TCG_TYPE_V128,
6241                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6242                        TCG_REG_CALL_STACK, ofs_slot0);
6243             /* fall through */
6244         case TCG_CALL_RET_BY_REF:
6245             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6246                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6247             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6248                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6249             return;
6250         default:
6251             g_assert_not_reached();
6252         }
6253         break;
6254 
6255     default:
6256         g_assert_not_reached();
6257     }
6258 
6259     mov[0].dst = ldst->datalo_reg;
6260     mov[0].src =
6261         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6262     mov[0].dst_type = TCG_TYPE_REG;
6263     mov[0].src_type = TCG_TYPE_REG;
6264     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6265 
6266     mov[1].dst = ldst->datahi_reg;
6267     mov[1].src =
6268         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6269     mov[1].dst_type = TCG_TYPE_REG;
6270     mov[1].src_type = TCG_TYPE_REG;
6271     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6272 
6273     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6274 }
6275 
6276 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6277                                    const TCGLdstHelperParam *parm)
6278 {
6279     const TCGHelperInfo *info;
6280     const TCGCallArgumentLoc *loc;
6281     TCGMovExtend mov[4];
6282     TCGType data_type;
6283     unsigned next_arg, nmov, n;
6284     MemOp mop = get_memop(ldst->oi);
6285 
6286     switch (mop & MO_SIZE) {
6287     case MO_8:
6288     case MO_16:
6289     case MO_32:
6290         info = &info_helper_st32_mmu;
6291         data_type = TCG_TYPE_I32;
6292         break;
6293     case MO_64:
6294         info = &info_helper_st64_mmu;
6295         data_type = TCG_TYPE_I64;
6296         break;
6297     case MO_128:
6298         info = &info_helper_st128_mmu;
6299         data_type = TCG_TYPE_I128;
6300         break;
6301     default:
6302         g_assert_not_reached();
6303     }
6304 
6305     /* Defer env argument. */
6306     next_arg = 1;
6307     nmov = 0;
6308 
6309     /* Handle addr argument. */
6310     loc = &info->in[next_arg];
6311     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6312     if (TCG_TARGET_REG_BITS == 32) {
6313         /*
6314          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6315          * to 64-bits for the helper by storing the low part.  Later,
6316          * after we have processed the register inputs, we will load a
6317          * zero for the high part.
6318          */
6319         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6320                                TCG_TYPE_I32, TCG_TYPE_I32,
6321                                ldst->addr_reg, -1);
6322         next_arg += 2;
6323         nmov += 1;
6324     } else {
6325         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6326                                    ldst->addr_reg, -1);
6327         next_arg += n;
6328         nmov += n;
6329     }
6330 
6331     /* Handle data argument. */
6332     loc = &info->in[next_arg];
6333     switch (loc->kind) {
6334     case TCG_CALL_ARG_NORMAL:
6335     case TCG_CALL_ARG_EXTEND_U:
6336     case TCG_CALL_ARG_EXTEND_S:
6337         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6338                                    ldst->datalo_reg, ldst->datahi_reg);
6339         next_arg += n;
6340         nmov += n;
6341         tcg_out_helper_load_slots(s, nmov, mov, parm);
6342         break;
6343 
6344     case TCG_CALL_ARG_BY_REF:
6345         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6346         tcg_debug_assert(data_type == TCG_TYPE_I128);
6347         tcg_out_st(s, TCG_TYPE_I64,
6348                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6349                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6350         tcg_out_st(s, TCG_TYPE_I64,
6351                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6352                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6353 
6354         tcg_out_helper_load_slots(s, nmov, mov, parm);
6355 
6356         if (arg_slot_reg_p(loc->arg_slot)) {
6357             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6358                              TCG_REG_CALL_STACK,
6359                              arg_slot_stk_ofs(loc->ref_slot));
6360         } else {
6361             tcg_debug_assert(parm->ntmp != 0);
6362             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6363                              arg_slot_stk_ofs(loc->ref_slot));
6364             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6365                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6366         }
6367         next_arg += 2;
6368         break;
6369 
6370     default:
6371         g_assert_not_reached();
6372     }
6373 
6374     if (TCG_TARGET_REG_BITS == 32) {
6375         /* Zero extend the address by loading a zero for the high part. */
6376         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6377         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6378     }
6379 
6380     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6381 }
6382 
6383 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6384 {
6385     int i, start_words, num_insns;
6386     TCGOp *op;
6387 
6388     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6389                  && qemu_log_in_addr_range(pc_start))) {
6390         FILE *logfile = qemu_log_trylock();
6391         if (logfile) {
6392             fprintf(logfile, "OP:\n");
6393             tcg_dump_ops(s, logfile, false);
6394             fprintf(logfile, "\n");
6395             qemu_log_unlock(logfile);
6396         }
6397     }
6398 
6399 #ifdef CONFIG_DEBUG_TCG
6400     /* Ensure all labels referenced have been emitted.  */
6401     {
6402         TCGLabel *l;
6403         bool error = false;
6404 
6405         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6406             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6407                 qemu_log_mask(CPU_LOG_TB_OP,
6408                               "$L%d referenced but not present.\n", l->id);
6409                 error = true;
6410             }
6411         }
6412         assert(!error);
6413     }
6414 #endif
6415 
6416     /* Do not reuse any EBB that may be allocated within the TB. */
6417     tcg_temp_ebb_reset_freed(s);
6418 
6419     tcg_optimize(s);
6420 
6421     reachable_code_pass(s);
6422     liveness_pass_0(s);
6423     liveness_pass_1(s);
6424 
6425     if (s->nb_indirects > 0) {
6426         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6427                      && qemu_log_in_addr_range(pc_start))) {
6428             FILE *logfile = qemu_log_trylock();
6429             if (logfile) {
6430                 fprintf(logfile, "OP before indirect lowering:\n");
6431                 tcg_dump_ops(s, logfile, false);
6432                 fprintf(logfile, "\n");
6433                 qemu_log_unlock(logfile);
6434             }
6435         }
6436 
6437         /* Replace indirect temps with direct temps.  */
6438         if (liveness_pass_2(s)) {
6439             /* If changes were made, re-run liveness.  */
6440             liveness_pass_1(s);
6441         }
6442     }
6443 
6444     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6445                  && qemu_log_in_addr_range(pc_start))) {
6446         FILE *logfile = qemu_log_trylock();
6447         if (logfile) {
6448             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6449             tcg_dump_ops(s, logfile, true);
6450             fprintf(logfile, "\n");
6451             qemu_log_unlock(logfile);
6452         }
6453     }
6454 
6455     /* Initialize goto_tb jump offsets. */
6456     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6457     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6458     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6459     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6460 
6461     tcg_reg_alloc_start(s);
6462 
6463     /*
6464      * Reset the buffer pointers when restarting after overflow.
6465      * TODO: Move this into translate-all.c with the rest of the
6466      * buffer management.  Having only this done here is confusing.
6467      */
6468     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6469     s->code_ptr = s->code_buf;
6470     s->data_gen_ptr = NULL;
6471 
6472     QSIMPLEQ_INIT(&s->ldst_labels);
6473     s->pool_labels = NULL;
6474 
6475     start_words = s->insn_start_words;
6476     s->gen_insn_data =
6477         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6478 
6479     tcg_out_tb_start(s);
6480 
6481     num_insns = -1;
6482     QTAILQ_FOREACH(op, &s->ops, link) {
6483         TCGOpcode opc = op->opc;
6484 
6485         switch (opc) {
6486         case INDEX_op_mov:
6487         case INDEX_op_mov_vec:
6488             tcg_reg_alloc_mov(s, op);
6489             break;
6490         case INDEX_op_dup_vec:
6491             tcg_reg_alloc_dup(s, op);
6492             break;
6493         case INDEX_op_insn_start:
6494             if (num_insns >= 0) {
6495                 size_t off = tcg_current_code_size(s);
6496                 s->gen_insn_end_off[num_insns] = off;
6497                 /* Assert that we do not overflow our stored offset.  */
6498                 assert(s->gen_insn_end_off[num_insns] == off);
6499             }
6500             num_insns++;
6501             for (i = 0; i < start_words; ++i) {
6502                 s->gen_insn_data[num_insns * start_words + i] =
6503                     tcg_get_insn_start_param(op, i);
6504             }
6505             break;
6506         case INDEX_op_discard:
6507             temp_dead(s, arg_temp(op->args[0]));
6508             break;
6509         case INDEX_op_set_label:
6510             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6511             tcg_out_label(s, arg_label(op->args[0]));
6512             break;
6513         case INDEX_op_call:
6514             tcg_reg_alloc_call(s, op);
6515             break;
6516         case INDEX_op_exit_tb:
6517             tcg_out_exit_tb(s, op->args[0]);
6518             break;
6519         case INDEX_op_goto_tb:
6520             tcg_out_goto_tb(s, op->args[0]);
6521             break;
6522         case INDEX_op_dup2_vec:
6523             if (tcg_reg_alloc_dup2(s, op)) {
6524                 break;
6525             }
6526             /* fall through */
6527         default:
6528             /* Sanity check that we've not introduced any unhandled opcodes. */
6529             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6530                                               TCGOP_FLAGS(op)));
6531             /* Note: in order to speed up the code, it would be much
6532                faster to have specialized register allocator functions for
6533                some common argument patterns */
6534             tcg_reg_alloc_op(s, op);
6535             break;
6536         }
6537         /* Test for (pending) buffer overflow.  The assumption is that any
6538            one operation beginning below the high water mark cannot overrun
6539            the buffer completely.  Thus we can test for overflow after
6540            generating code without having to check during generation.  */
6541         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6542             return -1;
6543         }
6544         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6545         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6546             return -2;
6547         }
6548     }
6549     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6550     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6551 
6552     /* Generate TB finalization at the end of block */
6553     i = tcg_out_ldst_finalize(s);
6554     if (i < 0) {
6555         return i;
6556     }
6557     i = tcg_out_pool_finalize(s);
6558     if (i < 0) {
6559         return i;
6560     }
6561     if (!tcg_resolve_relocs(s)) {
6562         return -2;
6563     }
6564 
6565 #ifndef CONFIG_TCG_INTERPRETER
6566     /* flush instruction cache */
6567     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6568                         (uintptr_t)s->code_buf,
6569                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6570 #endif
6571 
6572     return tcg_current_code_size(s);
6573 }
6574 
6575 #ifdef ELF_HOST_MACHINE
6576 /* In order to use this feature, the backend needs to do three things:
6577 
6578    (1) Define ELF_HOST_MACHINE to indicate both what value to
6579        put into the ELF image and to indicate support for the feature.
6580 
6581    (2) Define tcg_register_jit.  This should create a buffer containing
6582        the contents of a .debug_frame section that describes the post-
6583        prologue unwind info for the tcg machine.
6584 
6585    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6586 */
6587 
6588 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6589 typedef enum {
6590     JIT_NOACTION = 0,
6591     JIT_REGISTER_FN,
6592     JIT_UNREGISTER_FN
6593 } jit_actions_t;
6594 
6595 struct jit_code_entry {
6596     struct jit_code_entry *next_entry;
6597     struct jit_code_entry *prev_entry;
6598     const void *symfile_addr;
6599     uint64_t symfile_size;
6600 };
6601 
6602 struct jit_descriptor {
6603     uint32_t version;
6604     uint32_t action_flag;
6605     struct jit_code_entry *relevant_entry;
6606     struct jit_code_entry *first_entry;
6607 };
6608 
6609 void __jit_debug_register_code(void) __attribute__((noinline));
6610 void __jit_debug_register_code(void)
6611 {
6612     asm("");
6613 }
6614 
6615 /* Must statically initialize the version, because GDB may check
6616    the version before we can set it.  */
6617 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6618 
6619 /* End GDB interface.  */
6620 
6621 static int find_string(const char *strtab, const char *str)
6622 {
6623     const char *p = strtab + 1;
6624 
6625     while (1) {
6626         if (strcmp(p, str) == 0) {
6627             return p - strtab;
6628         }
6629         p += strlen(p) + 1;
6630     }
6631 }
6632 
6633 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6634                                  const void *debug_frame,
6635                                  size_t debug_frame_size)
6636 {
6637     struct __attribute__((packed)) DebugInfo {
6638         uint32_t  len;
6639         uint16_t  version;
6640         uint32_t  abbrev;
6641         uint8_t   ptr_size;
6642         uint8_t   cu_die;
6643         uint16_t  cu_lang;
6644         uintptr_t cu_low_pc;
6645         uintptr_t cu_high_pc;
6646         uint8_t   fn_die;
6647         char      fn_name[16];
6648         uintptr_t fn_low_pc;
6649         uintptr_t fn_high_pc;
6650         uint8_t   cu_eoc;
6651     };
6652 
6653     struct ElfImage {
6654         ElfW(Ehdr) ehdr;
6655         ElfW(Phdr) phdr;
6656         ElfW(Shdr) shdr[7];
6657         ElfW(Sym)  sym[2];
6658         struct DebugInfo di;
6659         uint8_t    da[24];
6660         char       str[80];
6661     };
6662 
6663     struct ElfImage *img;
6664 
6665     static const struct ElfImage img_template = {
6666         .ehdr = {
6667             .e_ident[EI_MAG0] = ELFMAG0,
6668             .e_ident[EI_MAG1] = ELFMAG1,
6669             .e_ident[EI_MAG2] = ELFMAG2,
6670             .e_ident[EI_MAG3] = ELFMAG3,
6671             .e_ident[EI_CLASS] = ELF_CLASS,
6672             .e_ident[EI_DATA] = ELF_DATA,
6673             .e_ident[EI_VERSION] = EV_CURRENT,
6674             .e_type = ET_EXEC,
6675             .e_machine = ELF_HOST_MACHINE,
6676             .e_version = EV_CURRENT,
6677             .e_phoff = offsetof(struct ElfImage, phdr),
6678             .e_shoff = offsetof(struct ElfImage, shdr),
6679             .e_ehsize = sizeof(ElfW(Shdr)),
6680             .e_phentsize = sizeof(ElfW(Phdr)),
6681             .e_phnum = 1,
6682             .e_shentsize = sizeof(ElfW(Shdr)),
6683             .e_shnum = ARRAY_SIZE(img->shdr),
6684             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6685 #ifdef ELF_HOST_FLAGS
6686             .e_flags = ELF_HOST_FLAGS,
6687 #endif
6688 #ifdef ELF_OSABI
6689             .e_ident[EI_OSABI] = ELF_OSABI,
6690 #endif
6691         },
6692         .phdr = {
6693             .p_type = PT_LOAD,
6694             .p_flags = PF_X,
6695         },
6696         .shdr = {
6697             [0] = { .sh_type = SHT_NULL },
6698             /* Trick: The contents of code_gen_buffer are not present in
6699                this fake ELF file; that got allocated elsewhere.  Therefore
6700                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6701                will not look for contents.  We can record any address.  */
6702             [1] = { /* .text */
6703                 .sh_type = SHT_NOBITS,
6704                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6705             },
6706             [2] = { /* .debug_info */
6707                 .sh_type = SHT_PROGBITS,
6708                 .sh_offset = offsetof(struct ElfImage, di),
6709                 .sh_size = sizeof(struct DebugInfo),
6710             },
6711             [3] = { /* .debug_abbrev */
6712                 .sh_type = SHT_PROGBITS,
6713                 .sh_offset = offsetof(struct ElfImage, da),
6714                 .sh_size = sizeof(img->da),
6715             },
6716             [4] = { /* .debug_frame */
6717                 .sh_type = SHT_PROGBITS,
6718                 .sh_offset = sizeof(struct ElfImage),
6719             },
6720             [5] = { /* .symtab */
6721                 .sh_type = SHT_SYMTAB,
6722                 .sh_offset = offsetof(struct ElfImage, sym),
6723                 .sh_size = sizeof(img->sym),
6724                 .sh_info = 1,
6725                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6726                 .sh_entsize = sizeof(ElfW(Sym)),
6727             },
6728             [6] = { /* .strtab */
6729                 .sh_type = SHT_STRTAB,
6730                 .sh_offset = offsetof(struct ElfImage, str),
6731                 .sh_size = sizeof(img->str),
6732             }
6733         },
6734         .sym = {
6735             [1] = { /* code_gen_buffer */
6736                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6737                 .st_shndx = 1,
6738             }
6739         },
6740         .di = {
6741             .len = sizeof(struct DebugInfo) - 4,
6742             .version = 2,
6743             .ptr_size = sizeof(void *),
6744             .cu_die = 1,
6745             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6746             .fn_die = 2,
6747             .fn_name = "code_gen_buffer"
6748         },
6749         .da = {
6750             1,          /* abbrev number (the cu) */
6751             0x11, 1,    /* DW_TAG_compile_unit, has children */
6752             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6753             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6754             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6755             0, 0,       /* end of abbrev */
6756             2,          /* abbrev number (the fn) */
6757             0x2e, 0,    /* DW_TAG_subprogram, no children */
6758             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6759             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6760             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6761             0, 0,       /* end of abbrev */
6762             0           /* no more abbrev */
6763         },
6764         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6765                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6766     };
6767 
6768     /* We only need a single jit entry; statically allocate it.  */
6769     static struct jit_code_entry one_entry;
6770 
6771     uintptr_t buf = (uintptr_t)buf_ptr;
6772     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6773     DebugFrameHeader *dfh;
6774 
6775     img = g_malloc(img_size);
6776     *img = img_template;
6777 
6778     img->phdr.p_vaddr = buf;
6779     img->phdr.p_paddr = buf;
6780     img->phdr.p_memsz = buf_size;
6781 
6782     img->shdr[1].sh_name = find_string(img->str, ".text");
6783     img->shdr[1].sh_addr = buf;
6784     img->shdr[1].sh_size = buf_size;
6785 
6786     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6787     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6788 
6789     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6790     img->shdr[4].sh_size = debug_frame_size;
6791 
6792     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6793     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6794 
6795     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6796     img->sym[1].st_value = buf;
6797     img->sym[1].st_size = buf_size;
6798 
6799     img->di.cu_low_pc = buf;
6800     img->di.cu_high_pc = buf + buf_size;
6801     img->di.fn_low_pc = buf;
6802     img->di.fn_high_pc = buf + buf_size;
6803 
6804     dfh = (DebugFrameHeader *)(img + 1);
6805     memcpy(dfh, debug_frame, debug_frame_size);
6806     dfh->fde.func_start = buf;
6807     dfh->fde.func_len = buf_size;
6808 
6809 #ifdef DEBUG_JIT
6810     /* Enable this block to be able to debug the ELF image file creation.
6811        One can use readelf, objdump, or other inspection utilities.  */
6812     {
6813         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6814         FILE *f = fopen(jit, "w+b");
6815         if (f) {
6816             if (fwrite(img, img_size, 1, f) != img_size) {
6817                 /* Avoid stupid unused return value warning for fwrite.  */
6818             }
6819             fclose(f);
6820         }
6821     }
6822 #endif
6823 
6824     one_entry.symfile_addr = img;
6825     one_entry.symfile_size = img_size;
6826 
6827     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6828     __jit_debug_descriptor.relevant_entry = &one_entry;
6829     __jit_debug_descriptor.first_entry = &one_entry;
6830     __jit_debug_register_code();
6831 }
6832 #else
6833 /* No support for the feature.  Provide the entry point expected by exec.c,
6834    and implement the internal function we declared earlier.  */
6835 
6836 static void tcg_register_jit_int(const void *buf, size_t size,
6837                                  const void *debug_frame,
6838                                  size_t debug_frame_size)
6839 {
6840 }
6841 
6842 void tcg_register_jit(const void *buf, size_t buf_size)
6843 {
6844 }
6845 #endif /* ELF_HOST_MACHINE */
6846 
6847 #if !TCG_TARGET_MAYBE_vec
6848 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6849 {
6850     g_assert_not_reached();
6851 }
6852 #endif
6853