xref: /openbmc/qemu/tcg/tcg.c (revision fffd3dc9022efe89b9196d738127c294cf43a4d6)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 #include "tcg-target.c.inc"
990 
991 #ifndef CONFIG_TCG_INTERPRETER
992 /* Validate CPUTLBDescFast placement. */
993 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
994                         sizeof(CPUNegativeOffsetState))
995                   < MIN_TLB_MASK_TABLE_OFS);
996 #endif
997 
998 /*
999  * Register V as the TCGOutOp for O.
1000  * This verifies that V is of type T, otherwise give a nice compiler error.
1001  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1002  */
1003 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1004 
1005 /* Register allocation descriptions for every TCGOpcode. */
1006 static const TCGOutOp * const all_outop[NB_OPS] = {
1007     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1008     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1009     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1010     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1011     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1012     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1013 };
1014 
1015 #undef OUTOP
1016 
1017 /*
1018  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1019  * and registered the target's TCG globals) must register with this function
1020  * before initiating translation.
1021  *
1022  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1023  * of tcg_region_init() for the reasoning behind this.
1024  *
1025  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1026  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1027  * is not used anymore for translation once this function is called.
1028  *
1029  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1030  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1031  * modes.
1032  */
1033 #ifdef CONFIG_USER_ONLY
1034 void tcg_register_thread(void)
1035 {
1036     tcg_ctx = &tcg_init_ctx;
1037 }
1038 #else
1039 void tcg_register_thread(void)
1040 {
1041     TCGContext *s = g_malloc(sizeof(*s));
1042     unsigned int i, n;
1043 
1044     *s = tcg_init_ctx;
1045 
1046     /* Relink mem_base.  */
1047     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1048         if (tcg_init_ctx.temps[i].mem_base) {
1049             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1050             tcg_debug_assert(b >= 0 && b < n);
1051             s->temps[i].mem_base = &s->temps[b];
1052         }
1053     }
1054 
1055     /* Claim an entry in tcg_ctxs */
1056     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1057     g_assert(n < tcg_max_ctxs);
1058     qatomic_set(&tcg_ctxs[n], s);
1059 
1060     if (n > 0) {
1061         tcg_region_initial_alloc(s);
1062     }
1063 
1064     tcg_ctx = s;
1065 }
1066 #endif /* !CONFIG_USER_ONLY */
1067 
1068 /* pool based memory allocation */
1069 void *tcg_malloc_internal(TCGContext *s, int size)
1070 {
1071     TCGPool *p;
1072     int pool_size;
1073 
1074     if (size > TCG_POOL_CHUNK_SIZE) {
1075         /* big malloc: insert a new pool (XXX: could optimize) */
1076         p = g_malloc(sizeof(TCGPool) + size);
1077         p->size = size;
1078         p->next = s->pool_first_large;
1079         s->pool_first_large = p;
1080         return p->data;
1081     } else {
1082         p = s->pool_current;
1083         if (!p) {
1084             p = s->pool_first;
1085             if (!p)
1086                 goto new_pool;
1087         } else {
1088             if (!p->next) {
1089             new_pool:
1090                 pool_size = TCG_POOL_CHUNK_SIZE;
1091                 p = g_malloc(sizeof(TCGPool) + pool_size);
1092                 p->size = pool_size;
1093                 p->next = NULL;
1094                 if (s->pool_current) {
1095                     s->pool_current->next = p;
1096                 } else {
1097                     s->pool_first = p;
1098                 }
1099             } else {
1100                 p = p->next;
1101             }
1102         }
1103     }
1104     s->pool_current = p;
1105     s->pool_cur = p->data + size;
1106     s->pool_end = p->data + p->size;
1107     return p->data;
1108 }
1109 
1110 void tcg_pool_reset(TCGContext *s)
1111 {
1112     TCGPool *p, *t;
1113     for (p = s->pool_first_large; p; p = t) {
1114         t = p->next;
1115         g_free(p);
1116     }
1117     s->pool_first_large = NULL;
1118     s->pool_cur = s->pool_end = NULL;
1119     s->pool_current = NULL;
1120 }
1121 
1122 /*
1123  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1124  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1125  * We only use these for layout in tcg_out_ld_helper_ret and
1126  * tcg_out_st_helper_args, and share them between several of
1127  * the helpers, with the end result that it's easier to build manually.
1128  */
1129 
1130 #if TCG_TARGET_REG_BITS == 32
1131 # define dh_typecode_ttl  dh_typecode_i32
1132 #else
1133 # define dh_typecode_ttl  dh_typecode_i64
1134 #endif
1135 
1136 static TCGHelperInfo info_helper_ld32_mmu = {
1137     .flags = TCG_CALL_NO_WG,
1138     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1139               | dh_typemask(env, 1)
1140               | dh_typemask(i64, 2)  /* uint64_t addr */
1141               | dh_typemask(i32, 3)  /* unsigned oi */
1142               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1143 };
1144 
1145 static TCGHelperInfo info_helper_ld64_mmu = {
1146     .flags = TCG_CALL_NO_WG,
1147     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1148               | dh_typemask(env, 1)
1149               | dh_typemask(i64, 2)  /* uint64_t addr */
1150               | dh_typemask(i32, 3)  /* unsigned oi */
1151               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1152 };
1153 
1154 static TCGHelperInfo info_helper_ld128_mmu = {
1155     .flags = TCG_CALL_NO_WG,
1156     .typemask = dh_typemask(i128, 0) /* return Int128 */
1157               | dh_typemask(env, 1)
1158               | dh_typemask(i64, 2)  /* uint64_t addr */
1159               | dh_typemask(i32, 3)  /* unsigned oi */
1160               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1161 };
1162 
1163 static TCGHelperInfo info_helper_st32_mmu = {
1164     .flags = TCG_CALL_NO_WG,
1165     .typemask = dh_typemask(void, 0)
1166               | dh_typemask(env, 1)
1167               | dh_typemask(i64, 2)  /* uint64_t addr */
1168               | dh_typemask(i32, 3)  /* uint32_t data */
1169               | dh_typemask(i32, 4)  /* unsigned oi */
1170               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1171 };
1172 
1173 static TCGHelperInfo info_helper_st64_mmu = {
1174     .flags = TCG_CALL_NO_WG,
1175     .typemask = dh_typemask(void, 0)
1176               | dh_typemask(env, 1)
1177               | dh_typemask(i64, 2)  /* uint64_t addr */
1178               | dh_typemask(i64, 3)  /* uint64_t data */
1179               | dh_typemask(i32, 4)  /* unsigned oi */
1180               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1181 };
1182 
1183 static TCGHelperInfo info_helper_st128_mmu = {
1184     .flags = TCG_CALL_NO_WG,
1185     .typemask = dh_typemask(void, 0)
1186               | dh_typemask(env, 1)
1187               | dh_typemask(i64, 2)  /* uint64_t addr */
1188               | dh_typemask(i128, 3) /* Int128 data */
1189               | dh_typemask(i32, 4)  /* unsigned oi */
1190               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1191 };
1192 
1193 #ifdef CONFIG_TCG_INTERPRETER
1194 static ffi_type *typecode_to_ffi(int argmask)
1195 {
1196     /*
1197      * libffi does not support __int128_t, so we have forced Int128
1198      * to use the structure definition instead of the builtin type.
1199      */
1200     static ffi_type *ffi_type_i128_elements[3] = {
1201         &ffi_type_uint64,
1202         &ffi_type_uint64,
1203         NULL
1204     };
1205     static ffi_type ffi_type_i128 = {
1206         .size = 16,
1207         .alignment = __alignof__(Int128),
1208         .type = FFI_TYPE_STRUCT,
1209         .elements = ffi_type_i128_elements,
1210     };
1211 
1212     switch (argmask) {
1213     case dh_typecode_void:
1214         return &ffi_type_void;
1215     case dh_typecode_i32:
1216         return &ffi_type_uint32;
1217     case dh_typecode_s32:
1218         return &ffi_type_sint32;
1219     case dh_typecode_i64:
1220         return &ffi_type_uint64;
1221     case dh_typecode_s64:
1222         return &ffi_type_sint64;
1223     case dh_typecode_ptr:
1224         return &ffi_type_pointer;
1225     case dh_typecode_i128:
1226         return &ffi_type_i128;
1227     }
1228     g_assert_not_reached();
1229 }
1230 
1231 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1232 {
1233     unsigned typemask = info->typemask;
1234     struct {
1235         ffi_cif cif;
1236         ffi_type *args[];
1237     } *ca;
1238     ffi_status status;
1239     int nargs;
1240 
1241     /* Ignoring the return type, find the last non-zero field. */
1242     nargs = 32 - clz32(typemask >> 3);
1243     nargs = DIV_ROUND_UP(nargs, 3);
1244     assert(nargs <= MAX_CALL_IARGS);
1245 
1246     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1247     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1248     ca->cif.nargs = nargs;
1249 
1250     if (nargs != 0) {
1251         ca->cif.arg_types = ca->args;
1252         for (int j = 0; j < nargs; ++j) {
1253             int typecode = extract32(typemask, (j + 1) * 3, 3);
1254             ca->args[j] = typecode_to_ffi(typecode);
1255         }
1256     }
1257 
1258     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1259                           ca->cif.rtype, ca->cif.arg_types);
1260     assert(status == FFI_OK);
1261 
1262     return &ca->cif;
1263 }
1264 
1265 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1266 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1267 #else
1268 #define HELPER_INFO_INIT(I)      (&(I)->init)
1269 #define HELPER_INFO_INIT_VAL(I)  1
1270 #endif /* CONFIG_TCG_INTERPRETER */
1271 
1272 static inline bool arg_slot_reg_p(unsigned arg_slot)
1273 {
1274     /*
1275      * Split the sizeof away from the comparison to avoid Werror from
1276      * "unsigned < 0 is always false", when iarg_regs is empty.
1277      */
1278     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1279     return arg_slot < nreg;
1280 }
1281 
1282 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1283 {
1284     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1285     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1286 
1287     tcg_debug_assert(stk_slot < max);
1288     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1289 }
1290 
1291 typedef struct TCGCumulativeArgs {
1292     int arg_idx;                /* tcg_gen_callN args[] */
1293     int info_in_idx;            /* TCGHelperInfo in[] */
1294     int arg_slot;               /* regs+stack slot */
1295     int ref_slot;               /* stack slots for references */
1296 } TCGCumulativeArgs;
1297 
1298 static void layout_arg_even(TCGCumulativeArgs *cum)
1299 {
1300     cum->arg_slot += cum->arg_slot & 1;
1301 }
1302 
1303 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1304                          TCGCallArgumentKind kind)
1305 {
1306     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1307 
1308     *loc = (TCGCallArgumentLoc){
1309         .kind = kind,
1310         .arg_idx = cum->arg_idx,
1311         .arg_slot = cum->arg_slot,
1312     };
1313     cum->info_in_idx++;
1314     cum->arg_slot++;
1315 }
1316 
1317 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1318                                 TCGHelperInfo *info, int n)
1319 {
1320     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1321 
1322     for (int i = 0; i < n; ++i) {
1323         /* Layout all using the same arg_idx, adjusting the subindex. */
1324         loc[i] = (TCGCallArgumentLoc){
1325             .kind = TCG_CALL_ARG_NORMAL,
1326             .arg_idx = cum->arg_idx,
1327             .tmp_subindex = i,
1328             .arg_slot = cum->arg_slot + i,
1329         };
1330     }
1331     cum->info_in_idx += n;
1332     cum->arg_slot += n;
1333 }
1334 
1335 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1336 {
1337     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1338     int n = 128 / TCG_TARGET_REG_BITS;
1339 
1340     /* The first subindex carries the pointer. */
1341     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1342 
1343     /*
1344      * The callee is allowed to clobber memory associated with
1345      * structure pass by-reference.  Therefore we must make copies.
1346      * Allocate space from "ref_slot", which will be adjusted to
1347      * follow the parameters on the stack.
1348      */
1349     loc[0].ref_slot = cum->ref_slot;
1350 
1351     /*
1352      * Subsequent words also go into the reference slot, but
1353      * do not accumulate into the regular arguments.
1354      */
1355     for (int i = 1; i < n; ++i) {
1356         loc[i] = (TCGCallArgumentLoc){
1357             .kind = TCG_CALL_ARG_BY_REF_N,
1358             .arg_idx = cum->arg_idx,
1359             .tmp_subindex = i,
1360             .ref_slot = cum->ref_slot + i,
1361         };
1362     }
1363     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1364     cum->ref_slot += n;
1365 }
1366 
1367 static void init_call_layout(TCGHelperInfo *info)
1368 {
1369     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1370     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1371     unsigned typemask = info->typemask;
1372     unsigned typecode;
1373     TCGCumulativeArgs cum = { };
1374 
1375     /*
1376      * Parse and place any function return value.
1377      */
1378     typecode = typemask & 7;
1379     switch (typecode) {
1380     case dh_typecode_void:
1381         info->nr_out = 0;
1382         break;
1383     case dh_typecode_i32:
1384     case dh_typecode_s32:
1385     case dh_typecode_ptr:
1386         info->nr_out = 1;
1387         info->out_kind = TCG_CALL_RET_NORMAL;
1388         break;
1389     case dh_typecode_i64:
1390     case dh_typecode_s64:
1391         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1392         info->out_kind = TCG_CALL_RET_NORMAL;
1393         /* Query the last register now to trigger any assert early. */
1394         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1395         break;
1396     case dh_typecode_i128:
1397         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1398         info->out_kind = TCG_TARGET_CALL_RET_I128;
1399         switch (TCG_TARGET_CALL_RET_I128) {
1400         case TCG_CALL_RET_NORMAL:
1401             /* Query the last register now to trigger any assert early. */
1402             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1403             break;
1404         case TCG_CALL_RET_BY_VEC:
1405             /* Query the single register now to trigger any assert early. */
1406             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1407             break;
1408         case TCG_CALL_RET_BY_REF:
1409             /*
1410              * Allocate the first argument to the output.
1411              * We don't need to store this anywhere, just make it
1412              * unavailable for use in the input loop below.
1413              */
1414             cum.arg_slot = 1;
1415             break;
1416         default:
1417             qemu_build_not_reached();
1418         }
1419         break;
1420     default:
1421         g_assert_not_reached();
1422     }
1423 
1424     /*
1425      * Parse and place function arguments.
1426      */
1427     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1428         TCGCallArgumentKind kind;
1429         TCGType type;
1430 
1431         typecode = typemask & 7;
1432         switch (typecode) {
1433         case dh_typecode_i32:
1434         case dh_typecode_s32:
1435             type = TCG_TYPE_I32;
1436             break;
1437         case dh_typecode_i64:
1438         case dh_typecode_s64:
1439             type = TCG_TYPE_I64;
1440             break;
1441         case dh_typecode_ptr:
1442             type = TCG_TYPE_PTR;
1443             break;
1444         case dh_typecode_i128:
1445             type = TCG_TYPE_I128;
1446             break;
1447         default:
1448             g_assert_not_reached();
1449         }
1450 
1451         switch (type) {
1452         case TCG_TYPE_I32:
1453             switch (TCG_TARGET_CALL_ARG_I32) {
1454             case TCG_CALL_ARG_EVEN:
1455                 layout_arg_even(&cum);
1456                 /* fall through */
1457             case TCG_CALL_ARG_NORMAL:
1458                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1459                 break;
1460             case TCG_CALL_ARG_EXTEND:
1461                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1462                 layout_arg_1(&cum, info, kind);
1463                 break;
1464             default:
1465                 qemu_build_not_reached();
1466             }
1467             break;
1468 
1469         case TCG_TYPE_I64:
1470             switch (TCG_TARGET_CALL_ARG_I64) {
1471             case TCG_CALL_ARG_EVEN:
1472                 layout_arg_even(&cum);
1473                 /* fall through */
1474             case TCG_CALL_ARG_NORMAL:
1475                 if (TCG_TARGET_REG_BITS == 32) {
1476                     layout_arg_normal_n(&cum, info, 2);
1477                 } else {
1478                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1479                 }
1480                 break;
1481             default:
1482                 qemu_build_not_reached();
1483             }
1484             break;
1485 
1486         case TCG_TYPE_I128:
1487             switch (TCG_TARGET_CALL_ARG_I128) {
1488             case TCG_CALL_ARG_EVEN:
1489                 layout_arg_even(&cum);
1490                 /* fall through */
1491             case TCG_CALL_ARG_NORMAL:
1492                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1493                 break;
1494             case TCG_CALL_ARG_BY_REF:
1495                 layout_arg_by_ref(&cum, info);
1496                 break;
1497             default:
1498                 qemu_build_not_reached();
1499             }
1500             break;
1501 
1502         default:
1503             g_assert_not_reached();
1504         }
1505     }
1506     info->nr_in = cum.info_in_idx;
1507 
1508     /* Validate that we didn't overrun the input array. */
1509     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1510     /* Validate the backend has enough argument space. */
1511     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1512 
1513     /*
1514      * Relocate the "ref_slot" area to the end of the parameters.
1515      * Minimizing this stack offset helps code size for x86,
1516      * which has a signed 8-bit offset encoding.
1517      */
1518     if (cum.ref_slot != 0) {
1519         int ref_base = 0;
1520 
1521         if (cum.arg_slot > max_reg_slots) {
1522             int align = __alignof(Int128) / sizeof(tcg_target_long);
1523 
1524             ref_base = cum.arg_slot - max_reg_slots;
1525             if (align > 1) {
1526                 ref_base = ROUND_UP(ref_base, align);
1527             }
1528         }
1529         assert(ref_base + cum.ref_slot <= max_stk_slots);
1530         ref_base += max_reg_slots;
1531 
1532         if (ref_base != 0) {
1533             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1534                 TCGCallArgumentLoc *loc = &info->in[i];
1535                 switch (loc->kind) {
1536                 case TCG_CALL_ARG_BY_REF:
1537                 case TCG_CALL_ARG_BY_REF_N:
1538                     loc->ref_slot += ref_base;
1539                     break;
1540                 default:
1541                     break;
1542                 }
1543             }
1544         }
1545     }
1546 }
1547 
1548 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1549 static void process_constraint_sets(void);
1550 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1551                                             TCGReg reg, const char *name);
1552 
1553 static void tcg_context_init(unsigned max_threads)
1554 {
1555     TCGContext *s = &tcg_init_ctx;
1556     int n, i;
1557     TCGTemp *ts;
1558 
1559     memset(s, 0, sizeof(*s));
1560     s->nb_globals = 0;
1561 
1562     init_call_layout(&info_helper_ld32_mmu);
1563     init_call_layout(&info_helper_ld64_mmu);
1564     init_call_layout(&info_helper_ld128_mmu);
1565     init_call_layout(&info_helper_st32_mmu);
1566     init_call_layout(&info_helper_st64_mmu);
1567     init_call_layout(&info_helper_st128_mmu);
1568 
1569     tcg_target_init(s);
1570     process_constraint_sets();
1571 
1572     /* Reverse the order of the saved registers, assuming they're all at
1573        the start of tcg_target_reg_alloc_order.  */
1574     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1575         int r = tcg_target_reg_alloc_order[n];
1576         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1577             break;
1578         }
1579     }
1580     for (i = 0; i < n; ++i) {
1581         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1582     }
1583     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1584         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1585     }
1586 
1587     tcg_ctx = s;
1588     /*
1589      * In user-mode we simply share the init context among threads, since we
1590      * use a single region. See the documentation tcg_region_init() for the
1591      * reasoning behind this.
1592      * In system-mode we will have at most max_threads TCG threads.
1593      */
1594 #ifdef CONFIG_USER_ONLY
1595     tcg_ctxs = &tcg_ctx;
1596     tcg_cur_ctxs = 1;
1597     tcg_max_ctxs = 1;
1598 #else
1599     tcg_max_ctxs = max_threads;
1600     tcg_ctxs = g_new0(TCGContext *, max_threads);
1601 #endif
1602 
1603     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1604     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1605     tcg_env = temp_tcgv_ptr(ts);
1606 }
1607 
1608 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1609 {
1610     tcg_context_init(max_threads);
1611     tcg_region_init(tb_size, splitwx, max_threads);
1612 }
1613 
1614 /*
1615  * Allocate TBs right before their corresponding translated code, making
1616  * sure that TBs and code are on different cache lines.
1617  */
1618 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1619 {
1620     uintptr_t align = qemu_icache_linesize;
1621     TranslationBlock *tb;
1622     void *next;
1623 
1624  retry:
1625     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1626     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1627 
1628     if (unlikely(next > s->code_gen_highwater)) {
1629         if (tcg_region_alloc(s)) {
1630             return NULL;
1631         }
1632         goto retry;
1633     }
1634     qatomic_set(&s->code_gen_ptr, next);
1635     return tb;
1636 }
1637 
1638 void tcg_prologue_init(void)
1639 {
1640     TCGContext *s = tcg_ctx;
1641     size_t prologue_size;
1642 
1643     s->code_ptr = s->code_gen_ptr;
1644     s->code_buf = s->code_gen_ptr;
1645     s->data_gen_ptr = NULL;
1646 
1647 #ifndef CONFIG_TCG_INTERPRETER
1648     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1649 #endif
1650 
1651     s->pool_labels = NULL;
1652 
1653     qemu_thread_jit_write();
1654     /* Generate the prologue.  */
1655     tcg_target_qemu_prologue(s);
1656 
1657     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1658     {
1659         int result = tcg_out_pool_finalize(s);
1660         tcg_debug_assert(result == 0);
1661     }
1662 
1663     prologue_size = tcg_current_code_size(s);
1664     perf_report_prologue(s->code_gen_ptr, prologue_size);
1665 
1666 #ifndef CONFIG_TCG_INTERPRETER
1667     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1668                         (uintptr_t)s->code_buf, prologue_size);
1669 #endif
1670 
1671     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1672         FILE *logfile = qemu_log_trylock();
1673         if (logfile) {
1674             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1675             if (s->data_gen_ptr) {
1676                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1677                 size_t data_size = prologue_size - code_size;
1678                 size_t i;
1679 
1680                 disas(logfile, s->code_gen_ptr, code_size);
1681 
1682                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1683                     if (sizeof(tcg_target_ulong) == 8) {
1684                         fprintf(logfile,
1685                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1686                                 (uintptr_t)s->data_gen_ptr + i,
1687                                 *(uint64_t *)(s->data_gen_ptr + i));
1688                     } else {
1689                         fprintf(logfile,
1690                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1691                                 (uintptr_t)s->data_gen_ptr + i,
1692                                 *(uint32_t *)(s->data_gen_ptr + i));
1693                     }
1694                 }
1695             } else {
1696                 disas(logfile, s->code_gen_ptr, prologue_size);
1697             }
1698             fprintf(logfile, "\n");
1699             qemu_log_unlock(logfile);
1700         }
1701     }
1702 
1703 #ifndef CONFIG_TCG_INTERPRETER
1704     /*
1705      * Assert that goto_ptr is implemented completely, setting an epilogue.
1706      * For tci, we use NULL as the signal to return from the interpreter,
1707      * so skip this check.
1708      */
1709     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1710 #endif
1711 
1712     tcg_region_prologue_set(s);
1713 }
1714 
1715 void tcg_func_start(TCGContext *s)
1716 {
1717     tcg_pool_reset(s);
1718     s->nb_temps = s->nb_globals;
1719 
1720     /* No temps have been previously allocated for size or locality.  */
1721     tcg_temp_ebb_reset_freed(s);
1722 
1723     /* No constant temps have been previously allocated. */
1724     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1725         if (s->const_table[i]) {
1726             g_hash_table_remove_all(s->const_table[i]);
1727         }
1728     }
1729 
1730     s->nb_ops = 0;
1731     s->nb_labels = 0;
1732     s->current_frame_offset = s->frame_start;
1733 
1734 #ifdef CONFIG_DEBUG_TCG
1735     s->goto_tb_issue_mask = 0;
1736 #endif
1737 
1738     QTAILQ_INIT(&s->ops);
1739     QTAILQ_INIT(&s->free_ops);
1740     s->emit_before_op = NULL;
1741     QSIMPLEQ_INIT(&s->labels);
1742 
1743     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1744     tcg_debug_assert(s->insn_start_words > 0);
1745 }
1746 
1747 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1748 {
1749     int n = s->nb_temps++;
1750 
1751     if (n >= TCG_MAX_TEMPS) {
1752         tcg_raise_tb_overflow(s);
1753     }
1754     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1755 }
1756 
1757 static TCGTemp *tcg_global_alloc(TCGContext *s)
1758 {
1759     TCGTemp *ts;
1760 
1761     tcg_debug_assert(s->nb_globals == s->nb_temps);
1762     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1763     s->nb_globals++;
1764     ts = tcg_temp_alloc(s);
1765     ts->kind = TEMP_GLOBAL;
1766 
1767     return ts;
1768 }
1769 
1770 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1771                                             TCGReg reg, const char *name)
1772 {
1773     TCGTemp *ts;
1774 
1775     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1776 
1777     ts = tcg_global_alloc(s);
1778     ts->base_type = type;
1779     ts->type = type;
1780     ts->kind = TEMP_FIXED;
1781     ts->reg = reg;
1782     ts->name = name;
1783     tcg_regset_set_reg(s->reserved_regs, reg);
1784 
1785     return ts;
1786 }
1787 
1788 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1789 {
1790     s->frame_start = start;
1791     s->frame_end = start + size;
1792     s->frame_temp
1793         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1794 }
1795 
1796 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1797                                             const char *name, TCGType type)
1798 {
1799     TCGContext *s = tcg_ctx;
1800     TCGTemp *base_ts = tcgv_ptr_temp(base);
1801     TCGTemp *ts = tcg_global_alloc(s);
1802     int indirect_reg = 0;
1803 
1804     switch (base_ts->kind) {
1805     case TEMP_FIXED:
1806         break;
1807     case TEMP_GLOBAL:
1808         /* We do not support double-indirect registers.  */
1809         tcg_debug_assert(!base_ts->indirect_reg);
1810         base_ts->indirect_base = 1;
1811         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1812                             ? 2 : 1);
1813         indirect_reg = 1;
1814         break;
1815     default:
1816         g_assert_not_reached();
1817     }
1818 
1819     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1820         TCGTemp *ts2 = tcg_global_alloc(s);
1821         char buf[64];
1822 
1823         ts->base_type = TCG_TYPE_I64;
1824         ts->type = TCG_TYPE_I32;
1825         ts->indirect_reg = indirect_reg;
1826         ts->mem_allocated = 1;
1827         ts->mem_base = base_ts;
1828         ts->mem_offset = offset;
1829         pstrcpy(buf, sizeof(buf), name);
1830         pstrcat(buf, sizeof(buf), "_0");
1831         ts->name = strdup(buf);
1832 
1833         tcg_debug_assert(ts2 == ts + 1);
1834         ts2->base_type = TCG_TYPE_I64;
1835         ts2->type = TCG_TYPE_I32;
1836         ts2->indirect_reg = indirect_reg;
1837         ts2->mem_allocated = 1;
1838         ts2->mem_base = base_ts;
1839         ts2->mem_offset = offset + 4;
1840         ts2->temp_subindex = 1;
1841         pstrcpy(buf, sizeof(buf), name);
1842         pstrcat(buf, sizeof(buf), "_1");
1843         ts2->name = strdup(buf);
1844     } else {
1845         ts->base_type = type;
1846         ts->type = type;
1847         ts->indirect_reg = indirect_reg;
1848         ts->mem_allocated = 1;
1849         ts->mem_base = base_ts;
1850         ts->mem_offset = offset;
1851         ts->name = name;
1852     }
1853     return ts;
1854 }
1855 
1856 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1857 {
1858     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1859     return temp_tcgv_i32(ts);
1860 }
1861 
1862 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1863 {
1864     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1865     return temp_tcgv_i64(ts);
1866 }
1867 
1868 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1869 {
1870     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1871     return temp_tcgv_ptr(ts);
1872 }
1873 
1874 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1875 {
1876     TCGContext *s = tcg_ctx;
1877     TCGTemp *ts;
1878     int n;
1879 
1880     if (kind == TEMP_EBB) {
1881         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1882 
1883         if (idx < TCG_MAX_TEMPS) {
1884             /* There is already an available temp with the right type.  */
1885             clear_bit(idx, s->free_temps[type].l);
1886 
1887             ts = &s->temps[idx];
1888             ts->temp_allocated = 1;
1889             tcg_debug_assert(ts->base_type == type);
1890             tcg_debug_assert(ts->kind == kind);
1891             return ts;
1892         }
1893     } else {
1894         tcg_debug_assert(kind == TEMP_TB);
1895     }
1896 
1897     switch (type) {
1898     case TCG_TYPE_I32:
1899     case TCG_TYPE_V64:
1900     case TCG_TYPE_V128:
1901     case TCG_TYPE_V256:
1902         n = 1;
1903         break;
1904     case TCG_TYPE_I64:
1905         n = 64 / TCG_TARGET_REG_BITS;
1906         break;
1907     case TCG_TYPE_I128:
1908         n = 128 / TCG_TARGET_REG_BITS;
1909         break;
1910     default:
1911         g_assert_not_reached();
1912     }
1913 
1914     ts = tcg_temp_alloc(s);
1915     ts->base_type = type;
1916     ts->temp_allocated = 1;
1917     ts->kind = kind;
1918 
1919     if (n == 1) {
1920         ts->type = type;
1921     } else {
1922         ts->type = TCG_TYPE_REG;
1923 
1924         for (int i = 1; i < n; ++i) {
1925             TCGTemp *ts2 = tcg_temp_alloc(s);
1926 
1927             tcg_debug_assert(ts2 == ts + i);
1928             ts2->base_type = type;
1929             ts2->type = TCG_TYPE_REG;
1930             ts2->temp_allocated = 1;
1931             ts2->temp_subindex = i;
1932             ts2->kind = kind;
1933         }
1934     }
1935     return ts;
1936 }
1937 
1938 TCGv_i32 tcg_temp_new_i32(void)
1939 {
1940     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1941 }
1942 
1943 TCGv_i32 tcg_temp_ebb_new_i32(void)
1944 {
1945     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1946 }
1947 
1948 TCGv_i64 tcg_temp_new_i64(void)
1949 {
1950     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1951 }
1952 
1953 TCGv_i64 tcg_temp_ebb_new_i64(void)
1954 {
1955     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1956 }
1957 
1958 TCGv_ptr tcg_temp_new_ptr(void)
1959 {
1960     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1961 }
1962 
1963 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1964 {
1965     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1966 }
1967 
1968 TCGv_i128 tcg_temp_new_i128(void)
1969 {
1970     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1971 }
1972 
1973 TCGv_i128 tcg_temp_ebb_new_i128(void)
1974 {
1975     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1976 }
1977 
1978 TCGv_vec tcg_temp_new_vec(TCGType type)
1979 {
1980     TCGTemp *t;
1981 
1982 #ifdef CONFIG_DEBUG_TCG
1983     switch (type) {
1984     case TCG_TYPE_V64:
1985         assert(TCG_TARGET_HAS_v64);
1986         break;
1987     case TCG_TYPE_V128:
1988         assert(TCG_TARGET_HAS_v128);
1989         break;
1990     case TCG_TYPE_V256:
1991         assert(TCG_TARGET_HAS_v256);
1992         break;
1993     default:
1994         g_assert_not_reached();
1995     }
1996 #endif
1997 
1998     t = tcg_temp_new_internal(type, TEMP_EBB);
1999     return temp_tcgv_vec(t);
2000 }
2001 
2002 /* Create a new temp of the same type as an existing temp.  */
2003 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2004 {
2005     TCGTemp *t = tcgv_vec_temp(match);
2006 
2007     tcg_debug_assert(t->temp_allocated != 0);
2008 
2009     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2010     return temp_tcgv_vec(t);
2011 }
2012 
2013 void tcg_temp_free_internal(TCGTemp *ts)
2014 {
2015     TCGContext *s = tcg_ctx;
2016 
2017     switch (ts->kind) {
2018     case TEMP_CONST:
2019     case TEMP_TB:
2020         /* Silently ignore free. */
2021         break;
2022     case TEMP_EBB:
2023         tcg_debug_assert(ts->temp_allocated != 0);
2024         ts->temp_allocated = 0;
2025         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2026         break;
2027     default:
2028         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2029         g_assert_not_reached();
2030     }
2031 }
2032 
2033 void tcg_temp_free_i32(TCGv_i32 arg)
2034 {
2035     tcg_temp_free_internal(tcgv_i32_temp(arg));
2036 }
2037 
2038 void tcg_temp_free_i64(TCGv_i64 arg)
2039 {
2040     tcg_temp_free_internal(tcgv_i64_temp(arg));
2041 }
2042 
2043 void tcg_temp_free_i128(TCGv_i128 arg)
2044 {
2045     tcg_temp_free_internal(tcgv_i128_temp(arg));
2046 }
2047 
2048 void tcg_temp_free_ptr(TCGv_ptr arg)
2049 {
2050     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2051 }
2052 
2053 void tcg_temp_free_vec(TCGv_vec arg)
2054 {
2055     tcg_temp_free_internal(tcgv_vec_temp(arg));
2056 }
2057 
2058 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2059 {
2060     TCGContext *s = tcg_ctx;
2061     GHashTable *h = s->const_table[type];
2062     TCGTemp *ts;
2063 
2064     if (h == NULL) {
2065         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2066         s->const_table[type] = h;
2067     }
2068 
2069     ts = g_hash_table_lookup(h, &val);
2070     if (ts == NULL) {
2071         int64_t *val_ptr;
2072 
2073         ts = tcg_temp_alloc(s);
2074 
2075         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2076             TCGTemp *ts2 = tcg_temp_alloc(s);
2077 
2078             tcg_debug_assert(ts2 == ts + 1);
2079 
2080             ts->base_type = TCG_TYPE_I64;
2081             ts->type = TCG_TYPE_I32;
2082             ts->kind = TEMP_CONST;
2083             ts->temp_allocated = 1;
2084 
2085             ts2->base_type = TCG_TYPE_I64;
2086             ts2->type = TCG_TYPE_I32;
2087             ts2->kind = TEMP_CONST;
2088             ts2->temp_allocated = 1;
2089             ts2->temp_subindex = 1;
2090 
2091             /*
2092              * Retain the full value of the 64-bit constant in the low
2093              * part, so that the hash table works.  Actual uses will
2094              * truncate the value to the low part.
2095              */
2096             ts[HOST_BIG_ENDIAN].val = val;
2097             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2098             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2099         } else {
2100             ts->base_type = type;
2101             ts->type = type;
2102             ts->kind = TEMP_CONST;
2103             ts->temp_allocated = 1;
2104             ts->val = val;
2105             val_ptr = &ts->val;
2106         }
2107         g_hash_table_insert(h, val_ptr, ts);
2108     }
2109 
2110     return ts;
2111 }
2112 
2113 TCGv_i32 tcg_constant_i32(int32_t val)
2114 {
2115     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2116 }
2117 
2118 TCGv_i64 tcg_constant_i64(int64_t val)
2119 {
2120     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2121 }
2122 
2123 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2124 {
2125     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2126 }
2127 
2128 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2129 {
2130     val = dup_const(vece, val);
2131     return temp_tcgv_vec(tcg_constant_internal(type, val));
2132 }
2133 
2134 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2135 {
2136     TCGTemp *t = tcgv_vec_temp(match);
2137 
2138     tcg_debug_assert(t->temp_allocated != 0);
2139     return tcg_constant_vec(t->base_type, vece, val);
2140 }
2141 
2142 #ifdef CONFIG_DEBUG_TCG
2143 size_t temp_idx(TCGTemp *ts)
2144 {
2145     ptrdiff_t n = ts - tcg_ctx->temps;
2146     assert(n >= 0 && n < tcg_ctx->nb_temps);
2147     return n;
2148 }
2149 
2150 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2151 {
2152     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2153 
2154     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2155     assert(o % sizeof(TCGTemp) == 0);
2156 
2157     return (void *)tcg_ctx + (uintptr_t)v;
2158 }
2159 #endif /* CONFIG_DEBUG_TCG */
2160 
2161 /*
2162  * Return true if OP may appear in the opcode stream with TYPE.
2163  * Test the runtime variable that controls each opcode.
2164  */
2165 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2166 {
2167     bool has_type;
2168 
2169     switch (type) {
2170     case TCG_TYPE_I32:
2171         has_type = true;
2172         break;
2173     case TCG_TYPE_I64:
2174         has_type = TCG_TARGET_REG_BITS == 64;
2175         break;
2176     case TCG_TYPE_V64:
2177         has_type = TCG_TARGET_HAS_v64;
2178         break;
2179     case TCG_TYPE_V128:
2180         has_type = TCG_TARGET_HAS_v128;
2181         break;
2182     case TCG_TYPE_V256:
2183         has_type = TCG_TARGET_HAS_v256;
2184         break;
2185     default:
2186         has_type = false;
2187         break;
2188     }
2189 
2190     switch (op) {
2191     case INDEX_op_discard:
2192     case INDEX_op_set_label:
2193     case INDEX_op_call:
2194     case INDEX_op_br:
2195     case INDEX_op_mb:
2196     case INDEX_op_insn_start:
2197     case INDEX_op_exit_tb:
2198     case INDEX_op_goto_tb:
2199     case INDEX_op_goto_ptr:
2200     case INDEX_op_qemu_ld_i32:
2201     case INDEX_op_qemu_st_i32:
2202     case INDEX_op_qemu_ld_i64:
2203     case INDEX_op_qemu_st_i64:
2204         return true;
2205 
2206     case INDEX_op_qemu_st8_i32:
2207         return TCG_TARGET_HAS_qemu_st8_i32;
2208 
2209     case INDEX_op_qemu_ld_i128:
2210     case INDEX_op_qemu_st_i128:
2211         return TCG_TARGET_HAS_qemu_ldst_i128;
2212 
2213     case INDEX_op_add:
2214     case INDEX_op_and:
2215     case INDEX_op_mov:
2216     case INDEX_op_or:
2217     case INDEX_op_xor:
2218         return has_type;
2219 
2220     case INDEX_op_setcond_i32:
2221     case INDEX_op_brcond_i32:
2222     case INDEX_op_movcond_i32:
2223     case INDEX_op_ld8u_i32:
2224     case INDEX_op_ld8s_i32:
2225     case INDEX_op_ld16u_i32:
2226     case INDEX_op_ld16s_i32:
2227     case INDEX_op_ld_i32:
2228     case INDEX_op_st8_i32:
2229     case INDEX_op_st16_i32:
2230     case INDEX_op_st_i32:
2231     case INDEX_op_sub_i32:
2232     case INDEX_op_neg_i32:
2233     case INDEX_op_mul_i32:
2234     case INDEX_op_shl_i32:
2235     case INDEX_op_shr_i32:
2236     case INDEX_op_sar_i32:
2237     case INDEX_op_extract_i32:
2238     case INDEX_op_sextract_i32:
2239     case INDEX_op_deposit_i32:
2240         return true;
2241 
2242     case INDEX_op_negsetcond_i32:
2243         return TCG_TARGET_HAS_negsetcond_i32;
2244     case INDEX_op_div_i32:
2245     case INDEX_op_divu_i32:
2246         return TCG_TARGET_HAS_div_i32;
2247     case INDEX_op_rem_i32:
2248     case INDEX_op_remu_i32:
2249         return TCG_TARGET_HAS_rem_i32;
2250     case INDEX_op_div2_i32:
2251     case INDEX_op_divu2_i32:
2252         return TCG_TARGET_HAS_div2_i32;
2253     case INDEX_op_rotl_i32:
2254     case INDEX_op_rotr_i32:
2255         return TCG_TARGET_HAS_rot_i32;
2256     case INDEX_op_extract2_i32:
2257         return TCG_TARGET_HAS_extract2_i32;
2258     case INDEX_op_add2_i32:
2259         return TCG_TARGET_HAS_add2_i32;
2260     case INDEX_op_sub2_i32:
2261         return TCG_TARGET_HAS_sub2_i32;
2262     case INDEX_op_mulu2_i32:
2263         return TCG_TARGET_HAS_mulu2_i32;
2264     case INDEX_op_muls2_i32:
2265         return TCG_TARGET_HAS_muls2_i32;
2266     case INDEX_op_muluh_i32:
2267         return TCG_TARGET_HAS_muluh_i32;
2268     case INDEX_op_mulsh_i32:
2269         return TCG_TARGET_HAS_mulsh_i32;
2270     case INDEX_op_bswap16_i32:
2271         return TCG_TARGET_HAS_bswap16_i32;
2272     case INDEX_op_bswap32_i32:
2273         return TCG_TARGET_HAS_bswap32_i32;
2274     case INDEX_op_not_i32:
2275         return TCG_TARGET_HAS_not_i32;
2276     case INDEX_op_eqv_i32:
2277         return TCG_TARGET_HAS_eqv_i32;
2278     case INDEX_op_nand_i32:
2279         return TCG_TARGET_HAS_nand_i32;
2280     case INDEX_op_nor_i32:
2281         return TCG_TARGET_HAS_nor_i32;
2282     case INDEX_op_clz_i32:
2283         return TCG_TARGET_HAS_clz_i32;
2284     case INDEX_op_ctz_i32:
2285         return TCG_TARGET_HAS_ctz_i32;
2286     case INDEX_op_ctpop_i32:
2287         return TCG_TARGET_HAS_ctpop_i32;
2288 
2289     case INDEX_op_brcond2_i32:
2290     case INDEX_op_setcond2_i32:
2291         return TCG_TARGET_REG_BITS == 32;
2292 
2293     case INDEX_op_setcond_i64:
2294     case INDEX_op_brcond_i64:
2295     case INDEX_op_movcond_i64:
2296     case INDEX_op_ld8u_i64:
2297     case INDEX_op_ld8s_i64:
2298     case INDEX_op_ld16u_i64:
2299     case INDEX_op_ld16s_i64:
2300     case INDEX_op_ld32u_i64:
2301     case INDEX_op_ld32s_i64:
2302     case INDEX_op_ld_i64:
2303     case INDEX_op_st8_i64:
2304     case INDEX_op_st16_i64:
2305     case INDEX_op_st32_i64:
2306     case INDEX_op_st_i64:
2307     case INDEX_op_sub_i64:
2308     case INDEX_op_neg_i64:
2309     case INDEX_op_mul_i64:
2310     case INDEX_op_shl_i64:
2311     case INDEX_op_shr_i64:
2312     case INDEX_op_sar_i64:
2313     case INDEX_op_ext_i32_i64:
2314     case INDEX_op_extu_i32_i64:
2315     case INDEX_op_extract_i64:
2316     case INDEX_op_sextract_i64:
2317     case INDEX_op_deposit_i64:
2318         return TCG_TARGET_REG_BITS == 64;
2319 
2320     case INDEX_op_negsetcond_i64:
2321         return TCG_TARGET_HAS_negsetcond_i64;
2322     case INDEX_op_div_i64:
2323     case INDEX_op_divu_i64:
2324         return TCG_TARGET_HAS_div_i64;
2325     case INDEX_op_rem_i64:
2326     case INDEX_op_remu_i64:
2327         return TCG_TARGET_HAS_rem_i64;
2328     case INDEX_op_div2_i64:
2329     case INDEX_op_divu2_i64:
2330         return TCG_TARGET_HAS_div2_i64;
2331     case INDEX_op_rotl_i64:
2332     case INDEX_op_rotr_i64:
2333         return TCG_TARGET_HAS_rot_i64;
2334     case INDEX_op_extract2_i64:
2335         return TCG_TARGET_HAS_extract2_i64;
2336     case INDEX_op_extrl_i64_i32:
2337     case INDEX_op_extrh_i64_i32:
2338         return TCG_TARGET_HAS_extr_i64_i32;
2339     case INDEX_op_bswap16_i64:
2340         return TCG_TARGET_HAS_bswap16_i64;
2341     case INDEX_op_bswap32_i64:
2342         return TCG_TARGET_HAS_bswap32_i64;
2343     case INDEX_op_bswap64_i64:
2344         return TCG_TARGET_HAS_bswap64_i64;
2345     case INDEX_op_not_i64:
2346         return TCG_TARGET_HAS_not_i64;
2347     case INDEX_op_eqv_i64:
2348         return TCG_TARGET_HAS_eqv_i64;
2349     case INDEX_op_nand_i64:
2350         return TCG_TARGET_HAS_nand_i64;
2351     case INDEX_op_nor_i64:
2352         return TCG_TARGET_HAS_nor_i64;
2353     case INDEX_op_clz_i64:
2354         return TCG_TARGET_HAS_clz_i64;
2355     case INDEX_op_ctz_i64:
2356         return TCG_TARGET_HAS_ctz_i64;
2357     case INDEX_op_ctpop_i64:
2358         return TCG_TARGET_HAS_ctpop_i64;
2359     case INDEX_op_add2_i64:
2360         return TCG_TARGET_HAS_add2_i64;
2361     case INDEX_op_sub2_i64:
2362         return TCG_TARGET_HAS_sub2_i64;
2363     case INDEX_op_mulu2_i64:
2364         return TCG_TARGET_HAS_mulu2_i64;
2365     case INDEX_op_muls2_i64:
2366         return TCG_TARGET_HAS_muls2_i64;
2367     case INDEX_op_muluh_i64:
2368         return TCG_TARGET_HAS_muluh_i64;
2369     case INDEX_op_mulsh_i64:
2370         return TCG_TARGET_HAS_mulsh_i64;
2371 
2372     case INDEX_op_mov_vec:
2373     case INDEX_op_dup_vec:
2374     case INDEX_op_dupm_vec:
2375     case INDEX_op_ld_vec:
2376     case INDEX_op_st_vec:
2377     case INDEX_op_add_vec:
2378     case INDEX_op_sub_vec:
2379     case INDEX_op_and_vec:
2380     case INDEX_op_or_vec:
2381     case INDEX_op_xor_vec:
2382     case INDEX_op_cmp_vec:
2383         return has_type;
2384     case INDEX_op_dup2_vec:
2385         return has_type && TCG_TARGET_REG_BITS == 32;
2386     case INDEX_op_not_vec:
2387         return has_type && TCG_TARGET_HAS_not_vec;
2388     case INDEX_op_neg_vec:
2389         return has_type && TCG_TARGET_HAS_neg_vec;
2390     case INDEX_op_abs_vec:
2391         return has_type && TCG_TARGET_HAS_abs_vec;
2392     case INDEX_op_andc_vec:
2393         return has_type && TCG_TARGET_HAS_andc_vec;
2394     case INDEX_op_orc_vec:
2395         return has_type && TCG_TARGET_HAS_orc_vec;
2396     case INDEX_op_nand_vec:
2397         return has_type && TCG_TARGET_HAS_nand_vec;
2398     case INDEX_op_nor_vec:
2399         return has_type && TCG_TARGET_HAS_nor_vec;
2400     case INDEX_op_eqv_vec:
2401         return has_type && TCG_TARGET_HAS_eqv_vec;
2402     case INDEX_op_mul_vec:
2403         return has_type && TCG_TARGET_HAS_mul_vec;
2404     case INDEX_op_shli_vec:
2405     case INDEX_op_shri_vec:
2406     case INDEX_op_sari_vec:
2407         return has_type && TCG_TARGET_HAS_shi_vec;
2408     case INDEX_op_shls_vec:
2409     case INDEX_op_shrs_vec:
2410     case INDEX_op_sars_vec:
2411         return has_type && TCG_TARGET_HAS_shs_vec;
2412     case INDEX_op_shlv_vec:
2413     case INDEX_op_shrv_vec:
2414     case INDEX_op_sarv_vec:
2415         return has_type && TCG_TARGET_HAS_shv_vec;
2416     case INDEX_op_rotli_vec:
2417         return has_type && TCG_TARGET_HAS_roti_vec;
2418     case INDEX_op_rotls_vec:
2419         return has_type && TCG_TARGET_HAS_rots_vec;
2420     case INDEX_op_rotlv_vec:
2421     case INDEX_op_rotrv_vec:
2422         return has_type && TCG_TARGET_HAS_rotv_vec;
2423     case INDEX_op_ssadd_vec:
2424     case INDEX_op_usadd_vec:
2425     case INDEX_op_sssub_vec:
2426     case INDEX_op_ussub_vec:
2427         return has_type && TCG_TARGET_HAS_sat_vec;
2428     case INDEX_op_smin_vec:
2429     case INDEX_op_umin_vec:
2430     case INDEX_op_smax_vec:
2431     case INDEX_op_umax_vec:
2432         return has_type && TCG_TARGET_HAS_minmax_vec;
2433     case INDEX_op_bitsel_vec:
2434         return has_type && TCG_TARGET_HAS_bitsel_vec;
2435     case INDEX_op_cmpsel_vec:
2436         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2437 
2438     default:
2439         if (op < INDEX_op_last_generic) {
2440             const TCGOutOp *outop;
2441             TCGConstraintSetIndex con_set;
2442 
2443             if (!has_type) {
2444                 return false;
2445             }
2446 
2447             outop = all_outop[op];
2448             tcg_debug_assert(outop != NULL);
2449 
2450             con_set = outop->static_constraint;
2451             if (con_set == C_Dynamic) {
2452                 con_set = outop->dynamic_constraint(type, flags);
2453             }
2454             if (con_set >= 0) {
2455                 return true;
2456             }
2457             tcg_debug_assert(con_set == C_NotImplemented);
2458             return false;
2459         }
2460         tcg_debug_assert(op < NB_OPS);
2461         return true;
2462 
2463     case INDEX_op_last_generic:
2464         g_assert_not_reached();
2465     }
2466 }
2467 
2468 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2469 {
2470     unsigned width;
2471 
2472     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2473     width = (type == TCG_TYPE_I32 ? 32 : 64);
2474 
2475     tcg_debug_assert(ofs < width);
2476     tcg_debug_assert(len > 0);
2477     tcg_debug_assert(len <= width - ofs);
2478 
2479     return TCG_TARGET_deposit_valid(type, ofs, len);
2480 }
2481 
2482 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2483 
2484 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2485                           TCGTemp *ret, TCGTemp **args)
2486 {
2487     TCGv_i64 extend_free[MAX_CALL_IARGS];
2488     int n_extend = 0;
2489     TCGOp *op;
2490     int i, n, pi = 0, total_args;
2491 
2492     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2493         init_call_layout(info);
2494         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2495     }
2496 
2497     total_args = info->nr_out + info->nr_in + 2;
2498     op = tcg_op_alloc(INDEX_op_call, total_args);
2499 
2500 #ifdef CONFIG_PLUGIN
2501     /* Flag helpers that may affect guest state */
2502     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2503         tcg_ctx->plugin_insn->calls_helpers = true;
2504     }
2505 #endif
2506 
2507     TCGOP_CALLO(op) = n = info->nr_out;
2508     switch (n) {
2509     case 0:
2510         tcg_debug_assert(ret == NULL);
2511         break;
2512     case 1:
2513         tcg_debug_assert(ret != NULL);
2514         op->args[pi++] = temp_arg(ret);
2515         break;
2516     case 2:
2517     case 4:
2518         tcg_debug_assert(ret != NULL);
2519         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2520         tcg_debug_assert(ret->temp_subindex == 0);
2521         for (i = 0; i < n; ++i) {
2522             op->args[pi++] = temp_arg(ret + i);
2523         }
2524         break;
2525     default:
2526         g_assert_not_reached();
2527     }
2528 
2529     TCGOP_CALLI(op) = n = info->nr_in;
2530     for (i = 0; i < n; i++) {
2531         const TCGCallArgumentLoc *loc = &info->in[i];
2532         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2533 
2534         switch (loc->kind) {
2535         case TCG_CALL_ARG_NORMAL:
2536         case TCG_CALL_ARG_BY_REF:
2537         case TCG_CALL_ARG_BY_REF_N:
2538             op->args[pi++] = temp_arg(ts);
2539             break;
2540 
2541         case TCG_CALL_ARG_EXTEND_U:
2542         case TCG_CALL_ARG_EXTEND_S:
2543             {
2544                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2545                 TCGv_i32 orig = temp_tcgv_i32(ts);
2546 
2547                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2548                     tcg_gen_ext_i32_i64(temp, orig);
2549                 } else {
2550                     tcg_gen_extu_i32_i64(temp, orig);
2551                 }
2552                 op->args[pi++] = tcgv_i64_arg(temp);
2553                 extend_free[n_extend++] = temp;
2554             }
2555             break;
2556 
2557         default:
2558             g_assert_not_reached();
2559         }
2560     }
2561     op->args[pi++] = (uintptr_t)func;
2562     op->args[pi++] = (uintptr_t)info;
2563     tcg_debug_assert(pi == total_args);
2564 
2565     if (tcg_ctx->emit_before_op) {
2566         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2567     } else {
2568         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2569     }
2570 
2571     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2572     for (i = 0; i < n_extend; ++i) {
2573         tcg_temp_free_i64(extend_free[i]);
2574     }
2575 }
2576 
2577 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2578 {
2579     tcg_gen_callN(func, info, ret, NULL);
2580 }
2581 
2582 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2583 {
2584     tcg_gen_callN(func, info, ret, &t1);
2585 }
2586 
2587 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2588                    TCGTemp *t1, TCGTemp *t2)
2589 {
2590     TCGTemp *args[2] = { t1, t2 };
2591     tcg_gen_callN(func, info, ret, args);
2592 }
2593 
2594 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2595                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2596 {
2597     TCGTemp *args[3] = { t1, t2, t3 };
2598     tcg_gen_callN(func, info, ret, args);
2599 }
2600 
2601 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2602                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2603 {
2604     TCGTemp *args[4] = { t1, t2, t3, t4 };
2605     tcg_gen_callN(func, info, ret, args);
2606 }
2607 
2608 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2609                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2610 {
2611     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2612     tcg_gen_callN(func, info, ret, args);
2613 }
2614 
2615 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2616                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2617                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2618 {
2619     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2620     tcg_gen_callN(func, info, ret, args);
2621 }
2622 
2623 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2624                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2625                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2626 {
2627     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2628     tcg_gen_callN(func, info, ret, args);
2629 }
2630 
2631 static void tcg_reg_alloc_start(TCGContext *s)
2632 {
2633     int i, n;
2634 
2635     for (i = 0, n = s->nb_temps; i < n; i++) {
2636         TCGTemp *ts = &s->temps[i];
2637         TCGTempVal val = TEMP_VAL_MEM;
2638 
2639         switch (ts->kind) {
2640         case TEMP_CONST:
2641             val = TEMP_VAL_CONST;
2642             break;
2643         case TEMP_FIXED:
2644             val = TEMP_VAL_REG;
2645             break;
2646         case TEMP_GLOBAL:
2647             break;
2648         case TEMP_EBB:
2649             val = TEMP_VAL_DEAD;
2650             /* fall through */
2651         case TEMP_TB:
2652             ts->mem_allocated = 0;
2653             break;
2654         default:
2655             g_assert_not_reached();
2656         }
2657         ts->val_type = val;
2658     }
2659 
2660     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2661 }
2662 
2663 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2664                                  TCGTemp *ts)
2665 {
2666     int idx = temp_idx(ts);
2667 
2668     switch (ts->kind) {
2669     case TEMP_FIXED:
2670     case TEMP_GLOBAL:
2671         pstrcpy(buf, buf_size, ts->name);
2672         break;
2673     case TEMP_TB:
2674         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2675         break;
2676     case TEMP_EBB:
2677         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2678         break;
2679     case TEMP_CONST:
2680         switch (ts->type) {
2681         case TCG_TYPE_I32:
2682             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2683             break;
2684 #if TCG_TARGET_REG_BITS > 32
2685         case TCG_TYPE_I64:
2686             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2687             break;
2688 #endif
2689         case TCG_TYPE_V64:
2690         case TCG_TYPE_V128:
2691         case TCG_TYPE_V256:
2692             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2693                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2694             break;
2695         default:
2696             g_assert_not_reached();
2697         }
2698         break;
2699     }
2700     return buf;
2701 }
2702 
2703 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2704                              int buf_size, TCGArg arg)
2705 {
2706     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2707 }
2708 
2709 static const char * const cond_name[] =
2710 {
2711     [TCG_COND_NEVER] = "never",
2712     [TCG_COND_ALWAYS] = "always",
2713     [TCG_COND_EQ] = "eq",
2714     [TCG_COND_NE] = "ne",
2715     [TCG_COND_LT] = "lt",
2716     [TCG_COND_GE] = "ge",
2717     [TCG_COND_LE] = "le",
2718     [TCG_COND_GT] = "gt",
2719     [TCG_COND_LTU] = "ltu",
2720     [TCG_COND_GEU] = "geu",
2721     [TCG_COND_LEU] = "leu",
2722     [TCG_COND_GTU] = "gtu",
2723     [TCG_COND_TSTEQ] = "tsteq",
2724     [TCG_COND_TSTNE] = "tstne",
2725 };
2726 
2727 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2728 {
2729     [MO_UB]   = "ub",
2730     [MO_SB]   = "sb",
2731     [MO_LEUW] = "leuw",
2732     [MO_LESW] = "lesw",
2733     [MO_LEUL] = "leul",
2734     [MO_LESL] = "lesl",
2735     [MO_LEUQ] = "leq",
2736     [MO_BEUW] = "beuw",
2737     [MO_BESW] = "besw",
2738     [MO_BEUL] = "beul",
2739     [MO_BESL] = "besl",
2740     [MO_BEUQ] = "beq",
2741     [MO_128 + MO_BE] = "beo",
2742     [MO_128 + MO_LE] = "leo",
2743 };
2744 
2745 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2746     [MO_UNALN >> MO_ASHIFT]    = "un+",
2747     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2748     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2749     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2750     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2751     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2752     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2753     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2754 };
2755 
2756 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2757     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2758     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2759     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2760     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2761     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2762     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2763 };
2764 
2765 static const char bswap_flag_name[][6] = {
2766     [TCG_BSWAP_IZ] = "iz",
2767     [TCG_BSWAP_OZ] = "oz",
2768     [TCG_BSWAP_OS] = "os",
2769     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2770     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2771 };
2772 
2773 #ifdef CONFIG_PLUGIN
2774 static const char * const plugin_from_name[] = {
2775     "from-tb",
2776     "from-insn",
2777     "after-insn",
2778     "after-tb",
2779 };
2780 #endif
2781 
2782 static inline bool tcg_regset_single(TCGRegSet d)
2783 {
2784     return (d & (d - 1)) == 0;
2785 }
2786 
2787 static inline TCGReg tcg_regset_first(TCGRegSet d)
2788 {
2789     if (TCG_TARGET_NB_REGS <= 32) {
2790         return ctz32(d);
2791     } else {
2792         return ctz64(d);
2793     }
2794 }
2795 
2796 /* Return only the number of characters output -- no error return. */
2797 #define ne_fprintf(...) \
2798     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2799 
2800 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2801 {
2802     char buf[128];
2803     TCGOp *op;
2804 
2805     QTAILQ_FOREACH(op, &s->ops, link) {
2806         int i, k, nb_oargs, nb_iargs, nb_cargs;
2807         const TCGOpDef *def;
2808         TCGOpcode c;
2809         int col = 0;
2810 
2811         c = op->opc;
2812         def = &tcg_op_defs[c];
2813 
2814         if (c == INDEX_op_insn_start) {
2815             nb_oargs = 0;
2816             col += ne_fprintf(f, "\n ----");
2817 
2818             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2819                 col += ne_fprintf(f, " %016" PRIx64,
2820                                   tcg_get_insn_start_param(op, i));
2821             }
2822         } else if (c == INDEX_op_call) {
2823             const TCGHelperInfo *info = tcg_call_info(op);
2824             void *func = tcg_call_func(op);
2825 
2826             /* variable number of arguments */
2827             nb_oargs = TCGOP_CALLO(op);
2828             nb_iargs = TCGOP_CALLI(op);
2829             nb_cargs = def->nb_cargs;
2830 
2831             col += ne_fprintf(f, " %s ", def->name);
2832 
2833             /*
2834              * Print the function name from TCGHelperInfo, if available.
2835              * Note that plugins have a template function for the info,
2836              * but the actual function pointer comes from the plugin.
2837              */
2838             if (func == info->func) {
2839                 col += ne_fprintf(f, "%s", info->name);
2840             } else {
2841                 col += ne_fprintf(f, "plugin(%p)", func);
2842             }
2843 
2844             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2845             for (i = 0; i < nb_oargs; i++) {
2846                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2847                                                             op->args[i]));
2848             }
2849             for (i = 0; i < nb_iargs; i++) {
2850                 TCGArg arg = op->args[nb_oargs + i];
2851                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2852                 col += ne_fprintf(f, ",%s", t);
2853             }
2854         } else {
2855             if (def->flags & TCG_OPF_INT) {
2856                 col += ne_fprintf(f, " %s_i%d ",
2857                                   def->name,
2858                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2859             } else if (def->flags & TCG_OPF_VECTOR) {
2860                 col += ne_fprintf(f, "%s v%d,e%d,",
2861                                   def->name,
2862                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2863                                   8 << TCGOP_VECE(op));
2864             } else {
2865                 col += ne_fprintf(f, " %s ", def->name);
2866             }
2867 
2868             nb_oargs = def->nb_oargs;
2869             nb_iargs = def->nb_iargs;
2870             nb_cargs = def->nb_cargs;
2871 
2872             k = 0;
2873             for (i = 0; i < nb_oargs; i++) {
2874                 const char *sep =  k ? "," : "";
2875                 col += ne_fprintf(f, "%s%s", sep,
2876                                   tcg_get_arg_str(s, buf, sizeof(buf),
2877                                                   op->args[k++]));
2878             }
2879             for (i = 0; i < nb_iargs; i++) {
2880                 const char *sep =  k ? "," : "";
2881                 col += ne_fprintf(f, "%s%s", sep,
2882                                   tcg_get_arg_str(s, buf, sizeof(buf),
2883                                                   op->args[k++]));
2884             }
2885             switch (c) {
2886             case INDEX_op_brcond_i32:
2887             case INDEX_op_setcond_i32:
2888             case INDEX_op_negsetcond_i32:
2889             case INDEX_op_movcond_i32:
2890             case INDEX_op_brcond2_i32:
2891             case INDEX_op_setcond2_i32:
2892             case INDEX_op_brcond_i64:
2893             case INDEX_op_setcond_i64:
2894             case INDEX_op_negsetcond_i64:
2895             case INDEX_op_movcond_i64:
2896             case INDEX_op_cmp_vec:
2897             case INDEX_op_cmpsel_vec:
2898                 if (op->args[k] < ARRAY_SIZE(cond_name)
2899                     && cond_name[op->args[k]]) {
2900                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2901                 } else {
2902                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2903                 }
2904                 i = 1;
2905                 break;
2906             case INDEX_op_qemu_ld_i32:
2907             case INDEX_op_qemu_st_i32:
2908             case INDEX_op_qemu_st8_i32:
2909             case INDEX_op_qemu_ld_i64:
2910             case INDEX_op_qemu_st_i64:
2911             case INDEX_op_qemu_ld_i128:
2912             case INDEX_op_qemu_st_i128:
2913                 {
2914                     const char *s_al, *s_op, *s_at;
2915                     MemOpIdx oi = op->args[k++];
2916                     MemOp mop = get_memop(oi);
2917                     unsigned ix = get_mmuidx(oi);
2918 
2919                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2920                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2921                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2922                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2923 
2924                     /* If all fields are accounted for, print symbolically. */
2925                     if (!mop && s_al && s_op && s_at) {
2926                         col += ne_fprintf(f, ",%s%s%s,%u",
2927                                           s_at, s_al, s_op, ix);
2928                     } else {
2929                         mop = get_memop(oi);
2930                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2931                     }
2932                     i = 1;
2933                 }
2934                 break;
2935             case INDEX_op_bswap16_i32:
2936             case INDEX_op_bswap16_i64:
2937             case INDEX_op_bswap32_i32:
2938             case INDEX_op_bswap32_i64:
2939             case INDEX_op_bswap64_i64:
2940                 {
2941                     TCGArg flags = op->args[k];
2942                     const char *name = NULL;
2943 
2944                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2945                         name = bswap_flag_name[flags];
2946                     }
2947                     if (name) {
2948                         col += ne_fprintf(f, ",%s", name);
2949                     } else {
2950                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2951                     }
2952                     i = k = 1;
2953                 }
2954                 break;
2955 #ifdef CONFIG_PLUGIN
2956             case INDEX_op_plugin_cb:
2957                 {
2958                     TCGArg from = op->args[k++];
2959                     const char *name = NULL;
2960 
2961                     if (from < ARRAY_SIZE(plugin_from_name)) {
2962                         name = plugin_from_name[from];
2963                     }
2964                     if (name) {
2965                         col += ne_fprintf(f, "%s", name);
2966                     } else {
2967                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2968                     }
2969                     i = 1;
2970                 }
2971                 break;
2972 #endif
2973             default:
2974                 i = 0;
2975                 break;
2976             }
2977             switch (c) {
2978             case INDEX_op_set_label:
2979             case INDEX_op_br:
2980             case INDEX_op_brcond_i32:
2981             case INDEX_op_brcond_i64:
2982             case INDEX_op_brcond2_i32:
2983                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2984                                   arg_label(op->args[k])->id);
2985                 i++, k++;
2986                 break;
2987             case INDEX_op_mb:
2988                 {
2989                     TCGBar membar = op->args[k];
2990                     const char *b_op, *m_op;
2991 
2992                     switch (membar & TCG_BAR_SC) {
2993                     case 0:
2994                         b_op = "none";
2995                         break;
2996                     case TCG_BAR_LDAQ:
2997                         b_op = "acq";
2998                         break;
2999                     case TCG_BAR_STRL:
3000                         b_op = "rel";
3001                         break;
3002                     case TCG_BAR_SC:
3003                         b_op = "seq";
3004                         break;
3005                     default:
3006                         g_assert_not_reached();
3007                     }
3008 
3009                     switch (membar & TCG_MO_ALL) {
3010                     case 0:
3011                         m_op = "none";
3012                         break;
3013                     case TCG_MO_LD_LD:
3014                         m_op = "rr";
3015                         break;
3016                     case TCG_MO_LD_ST:
3017                         m_op = "rw";
3018                         break;
3019                     case TCG_MO_ST_LD:
3020                         m_op = "wr";
3021                         break;
3022                     case TCG_MO_ST_ST:
3023                         m_op = "ww";
3024                         break;
3025                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3026                         m_op = "rr+rw";
3027                         break;
3028                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3029                         m_op = "rr+wr";
3030                         break;
3031                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3032                         m_op = "rr+ww";
3033                         break;
3034                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3035                         m_op = "rw+wr";
3036                         break;
3037                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3038                         m_op = "rw+ww";
3039                         break;
3040                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3041                         m_op = "wr+ww";
3042                         break;
3043                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3044                         m_op = "rr+rw+wr";
3045                         break;
3046                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3047                         m_op = "rr+rw+ww";
3048                         break;
3049                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3050                         m_op = "rr+wr+ww";
3051                         break;
3052                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3053                         m_op = "rw+wr+ww";
3054                         break;
3055                     case TCG_MO_ALL:
3056                         m_op = "all";
3057                         break;
3058                     default:
3059                         g_assert_not_reached();
3060                     }
3061 
3062                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3063                     i++, k++;
3064                 }
3065                 break;
3066             default:
3067                 break;
3068             }
3069             for (; i < nb_cargs; i++, k++) {
3070                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3071                                   op->args[k]);
3072             }
3073         }
3074 
3075         if (have_prefs || op->life) {
3076             for (; col < 40; ++col) {
3077                 putc(' ', f);
3078             }
3079         }
3080 
3081         if (op->life) {
3082             unsigned life = op->life;
3083 
3084             if (life & (SYNC_ARG * 3)) {
3085                 ne_fprintf(f, "  sync:");
3086                 for (i = 0; i < 2; ++i) {
3087                     if (life & (SYNC_ARG << i)) {
3088                         ne_fprintf(f, " %d", i);
3089                     }
3090                 }
3091             }
3092             life /= DEAD_ARG;
3093             if (life) {
3094                 ne_fprintf(f, "  dead:");
3095                 for (i = 0; life; ++i, life >>= 1) {
3096                     if (life & 1) {
3097                         ne_fprintf(f, " %d", i);
3098                     }
3099                 }
3100             }
3101         }
3102 
3103         if (have_prefs) {
3104             for (i = 0; i < nb_oargs; ++i) {
3105                 TCGRegSet set = output_pref(op, i);
3106 
3107                 if (i == 0) {
3108                     ne_fprintf(f, "  pref=");
3109                 } else {
3110                     ne_fprintf(f, ",");
3111                 }
3112                 if (set == 0) {
3113                     ne_fprintf(f, "none");
3114                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3115                     ne_fprintf(f, "all");
3116 #ifdef CONFIG_DEBUG_TCG
3117                 } else if (tcg_regset_single(set)) {
3118                     TCGReg reg = tcg_regset_first(set);
3119                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3120 #endif
3121                 } else if (TCG_TARGET_NB_REGS <= 32) {
3122                     ne_fprintf(f, "0x%x", (uint32_t)set);
3123                 } else {
3124                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3125                 }
3126             }
3127         }
3128 
3129         putc('\n', f);
3130     }
3131 }
3132 
3133 /* we give more priority to constraints with less registers */
3134 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3135 {
3136     int n;
3137 
3138     arg_ct += k;
3139     n = ctpop64(arg_ct->regs);
3140 
3141     /*
3142      * Sort constraints of a single register first, which includes output
3143      * aliases (which must exactly match the input already allocated).
3144      */
3145     if (n == 1 || arg_ct->oalias) {
3146         return INT_MAX;
3147     }
3148 
3149     /*
3150      * Sort register pairs next, first then second immediately after.
3151      * Arbitrarily sort multiple pairs by the index of the first reg;
3152      * there shouldn't be many pairs.
3153      */
3154     switch (arg_ct->pair) {
3155     case 1:
3156     case 3:
3157         return (k + 1) * 2;
3158     case 2:
3159         return (arg_ct->pair_index + 1) * 2 - 1;
3160     }
3161 
3162     /* Finally, sort by decreasing register count. */
3163     assert(n > 1);
3164     return -n;
3165 }
3166 
3167 /* sort from highest priority to lowest */
3168 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3169 {
3170     int i, j;
3171 
3172     for (i = 0; i < n; i++) {
3173         a[start + i].sort_index = start + i;
3174     }
3175     if (n <= 1) {
3176         return;
3177     }
3178     for (i = 0; i < n - 1; i++) {
3179         for (j = i + 1; j < n; j++) {
3180             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3181             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3182             if (p1 < p2) {
3183                 int tmp = a[start + i].sort_index;
3184                 a[start + i].sort_index = a[start + j].sort_index;
3185                 a[start + j].sort_index = tmp;
3186             }
3187         }
3188     }
3189 }
3190 
3191 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3192 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3193 
3194 static void process_constraint_sets(void)
3195 {
3196     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3197         const TCGConstraintSet *tdefs = &constraint_sets[c];
3198         TCGArgConstraint *args_ct = all_cts[c];
3199         int nb_oargs = tdefs->nb_oargs;
3200         int nb_iargs = tdefs->nb_iargs;
3201         int nb_args = nb_oargs + nb_iargs;
3202         bool saw_alias_pair = false;
3203 
3204         for (int i = 0; i < nb_args; i++) {
3205             const char *ct_str = tdefs->args_ct_str[i];
3206             bool input_p = i >= nb_oargs;
3207             int o;
3208 
3209             switch (*ct_str) {
3210             case '0' ... '9':
3211                 o = *ct_str - '0';
3212                 tcg_debug_assert(input_p);
3213                 tcg_debug_assert(o < nb_oargs);
3214                 tcg_debug_assert(args_ct[o].regs != 0);
3215                 tcg_debug_assert(!args_ct[o].oalias);
3216                 args_ct[i] = args_ct[o];
3217                 /* The output sets oalias.  */
3218                 args_ct[o].oalias = 1;
3219                 args_ct[o].alias_index = i;
3220                 /* The input sets ialias. */
3221                 args_ct[i].ialias = 1;
3222                 args_ct[i].alias_index = o;
3223                 if (args_ct[i].pair) {
3224                     saw_alias_pair = true;
3225                 }
3226                 tcg_debug_assert(ct_str[1] == '\0');
3227                 continue;
3228 
3229             case '&':
3230                 tcg_debug_assert(!input_p);
3231                 args_ct[i].newreg = true;
3232                 ct_str++;
3233                 break;
3234 
3235             case 'p': /* plus */
3236                 /* Allocate to the register after the previous. */
3237                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3238                 o = i - 1;
3239                 tcg_debug_assert(!args_ct[o].pair);
3240                 tcg_debug_assert(!args_ct[o].ct);
3241                 args_ct[i] = (TCGArgConstraint){
3242                     .pair = 2,
3243                     .pair_index = o,
3244                     .regs = args_ct[o].regs << 1,
3245                     .newreg = args_ct[o].newreg,
3246                 };
3247                 args_ct[o].pair = 1;
3248                 args_ct[o].pair_index = i;
3249                 tcg_debug_assert(ct_str[1] == '\0');
3250                 continue;
3251 
3252             case 'm': /* minus */
3253                 /* Allocate to the register before the previous. */
3254                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3255                 o = i - 1;
3256                 tcg_debug_assert(!args_ct[o].pair);
3257                 tcg_debug_assert(!args_ct[o].ct);
3258                 args_ct[i] = (TCGArgConstraint){
3259                     .pair = 1,
3260                     .pair_index = o,
3261                     .regs = args_ct[o].regs >> 1,
3262                     .newreg = args_ct[o].newreg,
3263                 };
3264                 args_ct[o].pair = 2;
3265                 args_ct[o].pair_index = i;
3266                 tcg_debug_assert(ct_str[1] == '\0');
3267                 continue;
3268             }
3269 
3270             do {
3271                 switch (*ct_str) {
3272                 case 'i':
3273                     args_ct[i].ct |= TCG_CT_CONST;
3274                     break;
3275 #ifdef TCG_REG_ZERO
3276                 case 'z':
3277                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3278                     break;
3279 #endif
3280 
3281                 /* Include all of the target-specific constraints. */
3282 
3283 #undef CONST
3284 #define CONST(CASE, MASK) \
3285     case CASE: args_ct[i].ct |= MASK; break;
3286 #define REGS(CASE, MASK) \
3287     case CASE: args_ct[i].regs |= MASK; break;
3288 
3289 #include "tcg-target-con-str.h"
3290 
3291 #undef REGS
3292 #undef CONST
3293                 default:
3294                 case '0' ... '9':
3295                 case '&':
3296                 case 'p':
3297                 case 'm':
3298                     /* Typo in TCGConstraintSet constraint. */
3299                     g_assert_not_reached();
3300                 }
3301             } while (*++ct_str != '\0');
3302         }
3303 
3304         /*
3305          * Fix up output pairs that are aliased with inputs.
3306          * When we created the alias, we copied pair from the output.
3307          * There are three cases:
3308          *    (1a) Pairs of inputs alias pairs of outputs.
3309          *    (1b) One input aliases the first of a pair of outputs.
3310          *    (2)  One input aliases the second of a pair of outputs.
3311          *
3312          * Case 1a is handled by making sure that the pair_index'es are
3313          * properly updated so that they appear the same as a pair of inputs.
3314          *
3315          * Case 1b is handled by setting the pair_index of the input to
3316          * itself, simply so it doesn't point to an unrelated argument.
3317          * Since we don't encounter the "second" during the input allocation
3318          * phase, nothing happens with the second half of the input pair.
3319          *
3320          * Case 2 is handled by setting the second input to pair=3, the
3321          * first output to pair=3, and the pair_index'es to match.
3322          */
3323         if (saw_alias_pair) {
3324             for (int i = nb_oargs; i < nb_args; i++) {
3325                 int o, o2, i2;
3326 
3327                 /*
3328                  * Since [0-9pm] must be alone in the constraint string,
3329                  * the only way they can both be set is if the pair comes
3330                  * from the output alias.
3331                  */
3332                 if (!args_ct[i].ialias) {
3333                     continue;
3334                 }
3335                 switch (args_ct[i].pair) {
3336                 case 0:
3337                     break;
3338                 case 1:
3339                     o = args_ct[i].alias_index;
3340                     o2 = args_ct[o].pair_index;
3341                     tcg_debug_assert(args_ct[o].pair == 1);
3342                     tcg_debug_assert(args_ct[o2].pair == 2);
3343                     if (args_ct[o2].oalias) {
3344                         /* Case 1a */
3345                         i2 = args_ct[o2].alias_index;
3346                         tcg_debug_assert(args_ct[i2].pair == 2);
3347                         args_ct[i2].pair_index = i;
3348                         args_ct[i].pair_index = i2;
3349                     } else {
3350                         /* Case 1b */
3351                         args_ct[i].pair_index = i;
3352                     }
3353                     break;
3354                 case 2:
3355                     o = args_ct[i].alias_index;
3356                     o2 = args_ct[o].pair_index;
3357                     tcg_debug_assert(args_ct[o].pair == 2);
3358                     tcg_debug_assert(args_ct[o2].pair == 1);
3359                     if (args_ct[o2].oalias) {
3360                         /* Case 1a */
3361                         i2 = args_ct[o2].alias_index;
3362                         tcg_debug_assert(args_ct[i2].pair == 1);
3363                         args_ct[i2].pair_index = i;
3364                         args_ct[i].pair_index = i2;
3365                     } else {
3366                         /* Case 2 */
3367                         args_ct[i].pair = 3;
3368                         args_ct[o2].pair = 3;
3369                         args_ct[i].pair_index = o2;
3370                         args_ct[o2].pair_index = i;
3371                     }
3372                     break;
3373                 default:
3374                     g_assert_not_reached();
3375                 }
3376             }
3377         }
3378 
3379         /* sort the constraints (XXX: this is just an heuristic) */
3380         sort_constraints(args_ct, 0, nb_oargs);
3381         sort_constraints(args_ct, nb_oargs, nb_iargs);
3382     }
3383 }
3384 
3385 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3386 {
3387     TCGOpcode opc = op->opc;
3388     TCGType type = TCGOP_TYPE(op);
3389     unsigned flags = TCGOP_FLAGS(op);
3390     const TCGOpDef *def = &tcg_op_defs[opc];
3391     const TCGOutOp *outop = all_outop[opc];
3392     TCGConstraintSetIndex con_set;
3393 
3394     if (def->flags & TCG_OPF_NOT_PRESENT) {
3395         return empty_cts;
3396     }
3397 
3398     if (outop) {
3399         con_set = outop->static_constraint;
3400         if (con_set == C_Dynamic) {
3401             con_set = outop->dynamic_constraint(type, flags);
3402         }
3403     } else {
3404         con_set = tcg_target_op_def(opc, type, flags);
3405     }
3406     tcg_debug_assert(con_set >= 0);
3407     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3408 
3409     /* The constraint arguments must match TCGOpcode arguments. */
3410     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3411     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3412 
3413     return all_cts[con_set];
3414 }
3415 
3416 static void remove_label_use(TCGOp *op, int idx)
3417 {
3418     TCGLabel *label = arg_label(op->args[idx]);
3419     TCGLabelUse *use;
3420 
3421     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3422         if (use->op == op) {
3423             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3424             return;
3425         }
3426     }
3427     g_assert_not_reached();
3428 }
3429 
3430 void tcg_op_remove(TCGContext *s, TCGOp *op)
3431 {
3432     switch (op->opc) {
3433     case INDEX_op_br:
3434         remove_label_use(op, 0);
3435         break;
3436     case INDEX_op_brcond_i32:
3437     case INDEX_op_brcond_i64:
3438         remove_label_use(op, 3);
3439         break;
3440     case INDEX_op_brcond2_i32:
3441         remove_label_use(op, 5);
3442         break;
3443     default:
3444         break;
3445     }
3446 
3447     QTAILQ_REMOVE(&s->ops, op, link);
3448     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3449     s->nb_ops--;
3450 }
3451 
3452 void tcg_remove_ops_after(TCGOp *op)
3453 {
3454     TCGContext *s = tcg_ctx;
3455 
3456     while (true) {
3457         TCGOp *last = tcg_last_op();
3458         if (last == op) {
3459             return;
3460         }
3461         tcg_op_remove(s, last);
3462     }
3463 }
3464 
3465 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3466 {
3467     TCGContext *s = tcg_ctx;
3468     TCGOp *op = NULL;
3469 
3470     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3471         QTAILQ_FOREACH(op, &s->free_ops, link) {
3472             if (nargs <= op->nargs) {
3473                 QTAILQ_REMOVE(&s->free_ops, op, link);
3474                 nargs = op->nargs;
3475                 goto found;
3476             }
3477         }
3478     }
3479 
3480     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3481     nargs = MAX(4, nargs);
3482     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3483 
3484  found:
3485     memset(op, 0, offsetof(TCGOp, link));
3486     op->opc = opc;
3487     op->nargs = nargs;
3488 
3489     /* Check for bitfield overflow. */
3490     tcg_debug_assert(op->nargs == nargs);
3491 
3492     s->nb_ops++;
3493     return op;
3494 }
3495 
3496 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3497 {
3498     TCGOp *op = tcg_op_alloc(opc, nargs);
3499 
3500     if (tcg_ctx->emit_before_op) {
3501         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3502     } else {
3503         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3504     }
3505     return op;
3506 }
3507 
3508 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3509                             TCGOpcode opc, TCGType type, unsigned nargs)
3510 {
3511     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3512 
3513     TCGOP_TYPE(new_op) = type;
3514     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3515     return new_op;
3516 }
3517 
3518 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3519                            TCGOpcode opc, TCGType type, unsigned nargs)
3520 {
3521     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3522 
3523     TCGOP_TYPE(new_op) = type;
3524     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3525     return new_op;
3526 }
3527 
3528 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3529 {
3530     TCGLabelUse *u;
3531 
3532     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3533         TCGOp *op = u->op;
3534         switch (op->opc) {
3535         case INDEX_op_br:
3536             op->args[0] = label_arg(to);
3537             break;
3538         case INDEX_op_brcond_i32:
3539         case INDEX_op_brcond_i64:
3540             op->args[3] = label_arg(to);
3541             break;
3542         case INDEX_op_brcond2_i32:
3543             op->args[5] = label_arg(to);
3544             break;
3545         default:
3546             g_assert_not_reached();
3547         }
3548     }
3549 
3550     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3551 }
3552 
3553 /* Reachable analysis : remove unreachable code.  */
3554 static void __attribute__((noinline))
3555 reachable_code_pass(TCGContext *s)
3556 {
3557     TCGOp *op, *op_next, *op_prev;
3558     bool dead = false;
3559 
3560     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3561         bool remove = dead;
3562         TCGLabel *label;
3563 
3564         switch (op->opc) {
3565         case INDEX_op_set_label:
3566             label = arg_label(op->args[0]);
3567 
3568             /*
3569              * Note that the first op in the TB is always a load,
3570              * so there is always something before a label.
3571              */
3572             op_prev = QTAILQ_PREV(op, link);
3573 
3574             /*
3575              * If we find two sequential labels, move all branches to
3576              * reference the second label and remove the first label.
3577              * Do this before branch to next optimization, so that the
3578              * middle label is out of the way.
3579              */
3580             if (op_prev->opc == INDEX_op_set_label) {
3581                 move_label_uses(label, arg_label(op_prev->args[0]));
3582                 tcg_op_remove(s, op_prev);
3583                 op_prev = QTAILQ_PREV(op, link);
3584             }
3585 
3586             /*
3587              * Optimization can fold conditional branches to unconditional.
3588              * If we find a label which is preceded by an unconditional
3589              * branch to next, remove the branch.  We couldn't do this when
3590              * processing the branch because any dead code between the branch
3591              * and label had not yet been removed.
3592              */
3593             if (op_prev->opc == INDEX_op_br &&
3594                 label == arg_label(op_prev->args[0])) {
3595                 tcg_op_remove(s, op_prev);
3596                 /* Fall through means insns become live again.  */
3597                 dead = false;
3598             }
3599 
3600             if (QSIMPLEQ_EMPTY(&label->branches)) {
3601                 /*
3602                  * While there is an occasional backward branch, virtually
3603                  * all branches generated by the translators are forward.
3604                  * Which means that generally we will have already removed
3605                  * all references to the label that will be, and there is
3606                  * little to be gained by iterating.
3607                  */
3608                 remove = true;
3609             } else {
3610                 /* Once we see a label, insns become live again.  */
3611                 dead = false;
3612                 remove = false;
3613             }
3614             break;
3615 
3616         case INDEX_op_br:
3617         case INDEX_op_exit_tb:
3618         case INDEX_op_goto_ptr:
3619             /* Unconditional branches; everything following is dead.  */
3620             dead = true;
3621             break;
3622 
3623         case INDEX_op_call:
3624             /* Notice noreturn helper calls, raising exceptions.  */
3625             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3626                 dead = true;
3627             }
3628             break;
3629 
3630         case INDEX_op_insn_start:
3631             /* Never remove -- we need to keep these for unwind.  */
3632             remove = false;
3633             break;
3634 
3635         default:
3636             break;
3637         }
3638 
3639         if (remove) {
3640             tcg_op_remove(s, op);
3641         }
3642     }
3643 }
3644 
3645 #define TS_DEAD  1
3646 #define TS_MEM   2
3647 
3648 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3649 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3650 
3651 /* For liveness_pass_1, the register preferences for a given temp.  */
3652 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3653 {
3654     return ts->state_ptr;
3655 }
3656 
3657 /* For liveness_pass_1, reset the preferences for a given temp to the
3658  * maximal regset for its type.
3659  */
3660 static inline void la_reset_pref(TCGTemp *ts)
3661 {
3662     *la_temp_pref(ts)
3663         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3664 }
3665 
3666 /* liveness analysis: end of function: all temps are dead, and globals
3667    should be in memory. */
3668 static void la_func_end(TCGContext *s, int ng, int nt)
3669 {
3670     int i;
3671 
3672     for (i = 0; i < ng; ++i) {
3673         s->temps[i].state = TS_DEAD | TS_MEM;
3674         la_reset_pref(&s->temps[i]);
3675     }
3676     for (i = ng; i < nt; ++i) {
3677         s->temps[i].state = TS_DEAD;
3678         la_reset_pref(&s->temps[i]);
3679     }
3680 }
3681 
3682 /* liveness analysis: end of basic block: all temps are dead, globals
3683    and local temps should be in memory. */
3684 static void la_bb_end(TCGContext *s, int ng, int nt)
3685 {
3686     int i;
3687 
3688     for (i = 0; i < nt; ++i) {
3689         TCGTemp *ts = &s->temps[i];
3690         int state;
3691 
3692         switch (ts->kind) {
3693         case TEMP_FIXED:
3694         case TEMP_GLOBAL:
3695         case TEMP_TB:
3696             state = TS_DEAD | TS_MEM;
3697             break;
3698         case TEMP_EBB:
3699         case TEMP_CONST:
3700             state = TS_DEAD;
3701             break;
3702         default:
3703             g_assert_not_reached();
3704         }
3705         ts->state = state;
3706         la_reset_pref(ts);
3707     }
3708 }
3709 
3710 /* liveness analysis: sync globals back to memory.  */
3711 static void la_global_sync(TCGContext *s, int ng)
3712 {
3713     int i;
3714 
3715     for (i = 0; i < ng; ++i) {
3716         int state = s->temps[i].state;
3717         s->temps[i].state = state | TS_MEM;
3718         if (state == TS_DEAD) {
3719             /* If the global was previously dead, reset prefs.  */
3720             la_reset_pref(&s->temps[i]);
3721         }
3722     }
3723 }
3724 
3725 /*
3726  * liveness analysis: conditional branch: all temps are dead unless
3727  * explicitly live-across-conditional-branch, globals and local temps
3728  * should be synced.
3729  */
3730 static void la_bb_sync(TCGContext *s, int ng, int nt)
3731 {
3732     la_global_sync(s, ng);
3733 
3734     for (int i = ng; i < nt; ++i) {
3735         TCGTemp *ts = &s->temps[i];
3736         int state;
3737 
3738         switch (ts->kind) {
3739         case TEMP_TB:
3740             state = ts->state;
3741             ts->state = state | TS_MEM;
3742             if (state != TS_DEAD) {
3743                 continue;
3744             }
3745             break;
3746         case TEMP_EBB:
3747         case TEMP_CONST:
3748             continue;
3749         default:
3750             g_assert_not_reached();
3751         }
3752         la_reset_pref(&s->temps[i]);
3753     }
3754 }
3755 
3756 /* liveness analysis: sync globals back to memory and kill.  */
3757 static void la_global_kill(TCGContext *s, int ng)
3758 {
3759     int i;
3760 
3761     for (i = 0; i < ng; i++) {
3762         s->temps[i].state = TS_DEAD | TS_MEM;
3763         la_reset_pref(&s->temps[i]);
3764     }
3765 }
3766 
3767 /* liveness analysis: note live globals crossing calls.  */
3768 static void la_cross_call(TCGContext *s, int nt)
3769 {
3770     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3771     int i;
3772 
3773     for (i = 0; i < nt; i++) {
3774         TCGTemp *ts = &s->temps[i];
3775         if (!(ts->state & TS_DEAD)) {
3776             TCGRegSet *pset = la_temp_pref(ts);
3777             TCGRegSet set = *pset;
3778 
3779             set &= mask;
3780             /* If the combination is not possible, restart.  */
3781             if (set == 0) {
3782                 set = tcg_target_available_regs[ts->type] & mask;
3783             }
3784             *pset = set;
3785         }
3786     }
3787 }
3788 
3789 /*
3790  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3791  * to TEMP_EBB, if possible.
3792  */
3793 static void __attribute__((noinline))
3794 liveness_pass_0(TCGContext *s)
3795 {
3796     void * const multiple_ebb = (void *)(uintptr_t)-1;
3797     int nb_temps = s->nb_temps;
3798     TCGOp *op, *ebb;
3799 
3800     for (int i = s->nb_globals; i < nb_temps; ++i) {
3801         s->temps[i].state_ptr = NULL;
3802     }
3803 
3804     /*
3805      * Represent each EBB by the op at which it begins.  In the case of
3806      * the first EBB, this is the first op, otherwise it is a label.
3807      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3808      * within a single EBB, else MULTIPLE_EBB.
3809      */
3810     ebb = QTAILQ_FIRST(&s->ops);
3811     QTAILQ_FOREACH(op, &s->ops, link) {
3812         const TCGOpDef *def;
3813         int nb_oargs, nb_iargs;
3814 
3815         switch (op->opc) {
3816         case INDEX_op_set_label:
3817             ebb = op;
3818             continue;
3819         case INDEX_op_discard:
3820             continue;
3821         case INDEX_op_call:
3822             nb_oargs = TCGOP_CALLO(op);
3823             nb_iargs = TCGOP_CALLI(op);
3824             break;
3825         default:
3826             def = &tcg_op_defs[op->opc];
3827             nb_oargs = def->nb_oargs;
3828             nb_iargs = def->nb_iargs;
3829             break;
3830         }
3831 
3832         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3833             TCGTemp *ts = arg_temp(op->args[i]);
3834 
3835             if (ts->kind != TEMP_TB) {
3836                 continue;
3837             }
3838             if (ts->state_ptr == NULL) {
3839                 ts->state_ptr = ebb;
3840             } else if (ts->state_ptr != ebb) {
3841                 ts->state_ptr = multiple_ebb;
3842             }
3843         }
3844     }
3845 
3846     /*
3847      * For TEMP_TB that turned out not to be used beyond one EBB,
3848      * reduce the liveness to TEMP_EBB.
3849      */
3850     for (int i = s->nb_globals; i < nb_temps; ++i) {
3851         TCGTemp *ts = &s->temps[i];
3852         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3853             ts->kind = TEMP_EBB;
3854         }
3855     }
3856 }
3857 
3858 /* Liveness analysis : update the opc_arg_life array to tell if a
3859    given input arguments is dead. Instructions updating dead
3860    temporaries are removed. */
3861 static void __attribute__((noinline))
3862 liveness_pass_1(TCGContext *s)
3863 {
3864     int nb_globals = s->nb_globals;
3865     int nb_temps = s->nb_temps;
3866     TCGOp *op, *op_prev;
3867     TCGRegSet *prefs;
3868     int i;
3869 
3870     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3871     for (i = 0; i < nb_temps; ++i) {
3872         s->temps[i].state_ptr = prefs + i;
3873     }
3874 
3875     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3876     la_func_end(s, nb_globals, nb_temps);
3877 
3878     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3879         int nb_iargs, nb_oargs;
3880         TCGOpcode opc_new, opc_new2;
3881         bool have_opc_new2;
3882         TCGLifeData arg_life = 0;
3883         TCGTemp *ts;
3884         TCGOpcode opc = op->opc;
3885         const TCGOpDef *def = &tcg_op_defs[opc];
3886         const TCGArgConstraint *args_ct;
3887 
3888         switch (opc) {
3889         case INDEX_op_call:
3890             {
3891                 const TCGHelperInfo *info = tcg_call_info(op);
3892                 int call_flags = tcg_call_flags(op);
3893 
3894                 nb_oargs = TCGOP_CALLO(op);
3895                 nb_iargs = TCGOP_CALLI(op);
3896 
3897                 /* pure functions can be removed if their result is unused */
3898                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3899                     for (i = 0; i < nb_oargs; i++) {
3900                         ts = arg_temp(op->args[i]);
3901                         if (ts->state != TS_DEAD) {
3902                             goto do_not_remove_call;
3903                         }
3904                     }
3905                     goto do_remove;
3906                 }
3907             do_not_remove_call:
3908 
3909                 /* Output args are dead.  */
3910                 for (i = 0; i < nb_oargs; i++) {
3911                     ts = arg_temp(op->args[i]);
3912                     if (ts->state & TS_DEAD) {
3913                         arg_life |= DEAD_ARG << i;
3914                     }
3915                     if (ts->state & TS_MEM) {
3916                         arg_life |= SYNC_ARG << i;
3917                     }
3918                     ts->state = TS_DEAD;
3919                     la_reset_pref(ts);
3920                 }
3921 
3922                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3923                 memset(op->output_pref, 0, sizeof(op->output_pref));
3924 
3925                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3926                                     TCG_CALL_NO_READ_GLOBALS))) {
3927                     la_global_kill(s, nb_globals);
3928                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3929                     la_global_sync(s, nb_globals);
3930                 }
3931 
3932                 /* Record arguments that die in this helper.  */
3933                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3934                     ts = arg_temp(op->args[i]);
3935                     if (ts->state & TS_DEAD) {
3936                         arg_life |= DEAD_ARG << i;
3937                     }
3938                 }
3939 
3940                 /* For all live registers, remove call-clobbered prefs.  */
3941                 la_cross_call(s, nb_temps);
3942 
3943                 /*
3944                  * Input arguments are live for preceding opcodes.
3945                  *
3946                  * For those arguments that die, and will be allocated in
3947                  * registers, clear the register set for that arg, to be
3948                  * filled in below.  For args that will be on the stack,
3949                  * reset to any available reg.  Process arguments in reverse
3950                  * order so that if a temp is used more than once, the stack
3951                  * reset to max happens before the register reset to 0.
3952                  */
3953                 for (i = nb_iargs - 1; i >= 0; i--) {
3954                     const TCGCallArgumentLoc *loc = &info->in[i];
3955                     ts = arg_temp(op->args[nb_oargs + i]);
3956 
3957                     if (ts->state & TS_DEAD) {
3958                         switch (loc->kind) {
3959                         case TCG_CALL_ARG_NORMAL:
3960                         case TCG_CALL_ARG_EXTEND_U:
3961                         case TCG_CALL_ARG_EXTEND_S:
3962                             if (arg_slot_reg_p(loc->arg_slot)) {
3963                                 *la_temp_pref(ts) = 0;
3964                                 break;
3965                             }
3966                             /* fall through */
3967                         default:
3968                             *la_temp_pref(ts) =
3969                                 tcg_target_available_regs[ts->type];
3970                             break;
3971                         }
3972                         ts->state &= ~TS_DEAD;
3973                     }
3974                 }
3975 
3976                 /*
3977                  * For each input argument, add its input register to prefs.
3978                  * If a temp is used once, this produces a single set bit;
3979                  * if a temp is used multiple times, this produces a set.
3980                  */
3981                 for (i = 0; i < nb_iargs; i++) {
3982                     const TCGCallArgumentLoc *loc = &info->in[i];
3983                     ts = arg_temp(op->args[nb_oargs + i]);
3984 
3985                     switch (loc->kind) {
3986                     case TCG_CALL_ARG_NORMAL:
3987                     case TCG_CALL_ARG_EXTEND_U:
3988                     case TCG_CALL_ARG_EXTEND_S:
3989                         if (arg_slot_reg_p(loc->arg_slot)) {
3990                             tcg_regset_set_reg(*la_temp_pref(ts),
3991                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3992                         }
3993                         break;
3994                     default:
3995                         break;
3996                     }
3997                 }
3998             }
3999             break;
4000         case INDEX_op_insn_start:
4001             break;
4002         case INDEX_op_discard:
4003             /* mark the temporary as dead */
4004             ts = arg_temp(op->args[0]);
4005             ts->state = TS_DEAD;
4006             la_reset_pref(ts);
4007             break;
4008 
4009         case INDEX_op_add2_i32:
4010         case INDEX_op_add2_i64:
4011             opc_new = INDEX_op_add;
4012             goto do_addsub2;
4013         case INDEX_op_sub2_i32:
4014             opc_new = INDEX_op_sub_i32;
4015             goto do_addsub2;
4016         case INDEX_op_sub2_i64:
4017             opc_new = INDEX_op_sub_i64;
4018         do_addsub2:
4019             nb_iargs = 4;
4020             nb_oargs = 2;
4021             /* Test if the high part of the operation is dead, but not
4022                the low part.  The result can be optimized to a simple
4023                add or sub.  This happens often for x86_64 guest when the
4024                cpu mode is set to 32 bit.  */
4025             if (arg_temp(op->args[1])->state == TS_DEAD) {
4026                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4027                     goto do_remove;
4028                 }
4029                 /* Replace the opcode and adjust the args in place,
4030                    leaving 3 unused args at the end.  */
4031                 op->opc = opc = opc_new;
4032                 op->args[1] = op->args[2];
4033                 op->args[2] = op->args[4];
4034                 /* Fall through and mark the single-word operation live.  */
4035                 nb_iargs = 2;
4036                 nb_oargs = 1;
4037             }
4038             goto do_not_remove;
4039 
4040         case INDEX_op_mulu2_i32:
4041             opc_new = INDEX_op_mul_i32;
4042             opc_new2 = INDEX_op_muluh_i32;
4043             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
4044             goto do_mul2;
4045         case INDEX_op_muls2_i32:
4046             opc_new = INDEX_op_mul_i32;
4047             opc_new2 = INDEX_op_mulsh_i32;
4048             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
4049             goto do_mul2;
4050         case INDEX_op_mulu2_i64:
4051             opc_new = INDEX_op_mul_i64;
4052             opc_new2 = INDEX_op_muluh_i64;
4053             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
4054             goto do_mul2;
4055         case INDEX_op_muls2_i64:
4056             opc_new = INDEX_op_mul_i64;
4057             opc_new2 = INDEX_op_mulsh_i64;
4058             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
4059             goto do_mul2;
4060         do_mul2:
4061             nb_iargs = 2;
4062             nb_oargs = 2;
4063             if (arg_temp(op->args[1])->state == TS_DEAD) {
4064                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4065                     /* Both parts of the operation are dead.  */
4066                     goto do_remove;
4067                 }
4068                 /* The high part of the operation is dead; generate the low. */
4069                 op->opc = opc = opc_new;
4070                 op->args[1] = op->args[2];
4071                 op->args[2] = op->args[3];
4072             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
4073                 /* The low part of the operation is dead; generate the high. */
4074                 op->opc = opc = opc_new2;
4075                 op->args[0] = op->args[1];
4076                 op->args[1] = op->args[2];
4077                 op->args[2] = op->args[3];
4078             } else {
4079                 goto do_not_remove;
4080             }
4081             /* Mark the single-word operation live.  */
4082             nb_oargs = 1;
4083             goto do_not_remove;
4084 
4085         default:
4086             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4087             nb_iargs = def->nb_iargs;
4088             nb_oargs = def->nb_oargs;
4089 
4090             /* Test if the operation can be removed because all
4091                its outputs are dead. We assume that nb_oargs == 0
4092                implies side effects */
4093             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4094                 for (i = 0; i < nb_oargs; i++) {
4095                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4096                         goto do_not_remove;
4097                     }
4098                 }
4099                 goto do_remove;
4100             }
4101             goto do_not_remove;
4102 
4103         do_remove:
4104             tcg_op_remove(s, op);
4105             break;
4106 
4107         do_not_remove:
4108             for (i = 0; i < nb_oargs; i++) {
4109                 ts = arg_temp(op->args[i]);
4110 
4111                 /* Remember the preference of the uses that followed.  */
4112                 if (i < ARRAY_SIZE(op->output_pref)) {
4113                     op->output_pref[i] = *la_temp_pref(ts);
4114                 }
4115 
4116                 /* Output args are dead.  */
4117                 if (ts->state & TS_DEAD) {
4118                     arg_life |= DEAD_ARG << i;
4119                 }
4120                 if (ts->state & TS_MEM) {
4121                     arg_life |= SYNC_ARG << i;
4122                 }
4123                 ts->state = TS_DEAD;
4124                 la_reset_pref(ts);
4125             }
4126 
4127             /* If end of basic block, update.  */
4128             if (def->flags & TCG_OPF_BB_EXIT) {
4129                 la_func_end(s, nb_globals, nb_temps);
4130             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4131                 la_bb_sync(s, nb_globals, nb_temps);
4132             } else if (def->flags & TCG_OPF_BB_END) {
4133                 la_bb_end(s, nb_globals, nb_temps);
4134             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4135                 la_global_sync(s, nb_globals);
4136                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4137                     la_cross_call(s, nb_temps);
4138                 }
4139             }
4140 
4141             /* Record arguments that die in this opcode.  */
4142             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4143                 ts = arg_temp(op->args[i]);
4144                 if (ts->state & TS_DEAD) {
4145                     arg_life |= DEAD_ARG << i;
4146                 }
4147             }
4148 
4149             /* Input arguments are live for preceding opcodes.  */
4150             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4151                 ts = arg_temp(op->args[i]);
4152                 if (ts->state & TS_DEAD) {
4153                     /* For operands that were dead, initially allow
4154                        all regs for the type.  */
4155                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4156                     ts->state &= ~TS_DEAD;
4157                 }
4158             }
4159 
4160             /* Incorporate constraints for this operand.  */
4161             switch (opc) {
4162             case INDEX_op_mov:
4163                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4164                    have proper constraints.  That said, special case
4165                    moves to propagate preferences backward.  */
4166                 if (IS_DEAD_ARG(1)) {
4167                     *la_temp_pref(arg_temp(op->args[0]))
4168                         = *la_temp_pref(arg_temp(op->args[1]));
4169                 }
4170                 break;
4171 
4172             default:
4173                 args_ct = opcode_args_ct(op);
4174                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4175                     const TCGArgConstraint *ct = &args_ct[i];
4176                     TCGRegSet set, *pset;
4177 
4178                     ts = arg_temp(op->args[i]);
4179                     pset = la_temp_pref(ts);
4180                     set = *pset;
4181 
4182                     set &= ct->regs;
4183                     if (ct->ialias) {
4184                         set &= output_pref(op, ct->alias_index);
4185                     }
4186                     /* If the combination is not possible, restart.  */
4187                     if (set == 0) {
4188                         set = ct->regs;
4189                     }
4190                     *pset = set;
4191                 }
4192                 break;
4193             }
4194             break;
4195         }
4196         op->life = arg_life;
4197     }
4198 }
4199 
4200 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4201 static bool __attribute__((noinline))
4202 liveness_pass_2(TCGContext *s)
4203 {
4204     int nb_globals = s->nb_globals;
4205     int nb_temps, i;
4206     bool changes = false;
4207     TCGOp *op, *op_next;
4208 
4209     /* Create a temporary for each indirect global.  */
4210     for (i = 0; i < nb_globals; ++i) {
4211         TCGTemp *its = &s->temps[i];
4212         if (its->indirect_reg) {
4213             TCGTemp *dts = tcg_temp_alloc(s);
4214             dts->type = its->type;
4215             dts->base_type = its->base_type;
4216             dts->temp_subindex = its->temp_subindex;
4217             dts->kind = TEMP_EBB;
4218             its->state_ptr = dts;
4219         } else {
4220             its->state_ptr = NULL;
4221         }
4222         /* All globals begin dead.  */
4223         its->state = TS_DEAD;
4224     }
4225     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4226         TCGTemp *its = &s->temps[i];
4227         its->state_ptr = NULL;
4228         its->state = TS_DEAD;
4229     }
4230 
4231     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4232         TCGOpcode opc = op->opc;
4233         const TCGOpDef *def = &tcg_op_defs[opc];
4234         TCGLifeData arg_life = op->life;
4235         int nb_iargs, nb_oargs, call_flags;
4236         TCGTemp *arg_ts, *dir_ts;
4237 
4238         if (opc == INDEX_op_call) {
4239             nb_oargs = TCGOP_CALLO(op);
4240             nb_iargs = TCGOP_CALLI(op);
4241             call_flags = tcg_call_flags(op);
4242         } else {
4243             nb_iargs = def->nb_iargs;
4244             nb_oargs = def->nb_oargs;
4245 
4246             /* Set flags similar to how calls require.  */
4247             if (def->flags & TCG_OPF_COND_BRANCH) {
4248                 /* Like reading globals: sync_globals */
4249                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4250             } else if (def->flags & TCG_OPF_BB_END) {
4251                 /* Like writing globals: save_globals */
4252                 call_flags = 0;
4253             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4254                 /* Like reading globals: sync_globals */
4255                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4256             } else {
4257                 /* No effect on globals.  */
4258                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4259                               TCG_CALL_NO_WRITE_GLOBALS);
4260             }
4261         }
4262 
4263         /* Make sure that input arguments are available.  */
4264         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4265             arg_ts = arg_temp(op->args[i]);
4266             dir_ts = arg_ts->state_ptr;
4267             if (dir_ts && arg_ts->state == TS_DEAD) {
4268                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4269                                   ? INDEX_op_ld_i32
4270                                   : INDEX_op_ld_i64);
4271                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4272                                                   arg_ts->type, 3);
4273 
4274                 lop->args[0] = temp_arg(dir_ts);
4275                 lop->args[1] = temp_arg(arg_ts->mem_base);
4276                 lop->args[2] = arg_ts->mem_offset;
4277 
4278                 /* Loaded, but synced with memory.  */
4279                 arg_ts->state = TS_MEM;
4280             }
4281         }
4282 
4283         /* Perform input replacement, and mark inputs that became dead.
4284            No action is required except keeping temp_state up to date
4285            so that we reload when needed.  */
4286         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4287             arg_ts = arg_temp(op->args[i]);
4288             dir_ts = arg_ts->state_ptr;
4289             if (dir_ts) {
4290                 op->args[i] = temp_arg(dir_ts);
4291                 changes = true;
4292                 if (IS_DEAD_ARG(i)) {
4293                     arg_ts->state = TS_DEAD;
4294                 }
4295             }
4296         }
4297 
4298         /* Liveness analysis should ensure that the following are
4299            all correct, for call sites and basic block end points.  */
4300         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4301             /* Nothing to do */
4302         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4303             for (i = 0; i < nb_globals; ++i) {
4304                 /* Liveness should see that globals are synced back,
4305                    that is, either TS_DEAD or TS_MEM.  */
4306                 arg_ts = &s->temps[i];
4307                 tcg_debug_assert(arg_ts->state_ptr == 0
4308                                  || arg_ts->state != 0);
4309             }
4310         } else {
4311             for (i = 0; i < nb_globals; ++i) {
4312                 /* Liveness should see that globals are saved back,
4313                    that is, TS_DEAD, waiting to be reloaded.  */
4314                 arg_ts = &s->temps[i];
4315                 tcg_debug_assert(arg_ts->state_ptr == 0
4316                                  || arg_ts->state == TS_DEAD);
4317             }
4318         }
4319 
4320         /* Outputs become available.  */
4321         if (opc == INDEX_op_mov) {
4322             arg_ts = arg_temp(op->args[0]);
4323             dir_ts = arg_ts->state_ptr;
4324             if (dir_ts) {
4325                 op->args[0] = temp_arg(dir_ts);
4326                 changes = true;
4327 
4328                 /* The output is now live and modified.  */
4329                 arg_ts->state = 0;
4330 
4331                 if (NEED_SYNC_ARG(0)) {
4332                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4333                                       ? INDEX_op_st_i32
4334                                       : INDEX_op_st_i64);
4335                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4336                                                      arg_ts->type, 3);
4337                     TCGTemp *out_ts = dir_ts;
4338 
4339                     if (IS_DEAD_ARG(0)) {
4340                         out_ts = arg_temp(op->args[1]);
4341                         arg_ts->state = TS_DEAD;
4342                         tcg_op_remove(s, op);
4343                     } else {
4344                         arg_ts->state = TS_MEM;
4345                     }
4346 
4347                     sop->args[0] = temp_arg(out_ts);
4348                     sop->args[1] = temp_arg(arg_ts->mem_base);
4349                     sop->args[2] = arg_ts->mem_offset;
4350                 } else {
4351                     tcg_debug_assert(!IS_DEAD_ARG(0));
4352                 }
4353             }
4354         } else {
4355             for (i = 0; i < nb_oargs; i++) {
4356                 arg_ts = arg_temp(op->args[i]);
4357                 dir_ts = arg_ts->state_ptr;
4358                 if (!dir_ts) {
4359                     continue;
4360                 }
4361                 op->args[i] = temp_arg(dir_ts);
4362                 changes = true;
4363 
4364                 /* The output is now live and modified.  */
4365                 arg_ts->state = 0;
4366 
4367                 /* Sync outputs upon their last write.  */
4368                 if (NEED_SYNC_ARG(i)) {
4369                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4370                                       ? INDEX_op_st_i32
4371                                       : INDEX_op_st_i64);
4372                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4373                                                      arg_ts->type, 3);
4374 
4375                     sop->args[0] = temp_arg(dir_ts);
4376                     sop->args[1] = temp_arg(arg_ts->mem_base);
4377                     sop->args[2] = arg_ts->mem_offset;
4378 
4379                     arg_ts->state = TS_MEM;
4380                 }
4381                 /* Drop outputs that are dead.  */
4382                 if (IS_DEAD_ARG(i)) {
4383                     arg_ts->state = TS_DEAD;
4384                 }
4385             }
4386         }
4387     }
4388 
4389     return changes;
4390 }
4391 
4392 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4393 {
4394     intptr_t off;
4395     int size, align;
4396 
4397     /* When allocating an object, look at the full type. */
4398     size = tcg_type_size(ts->base_type);
4399     switch (ts->base_type) {
4400     case TCG_TYPE_I32:
4401         align = 4;
4402         break;
4403     case TCG_TYPE_I64:
4404     case TCG_TYPE_V64:
4405         align = 8;
4406         break;
4407     case TCG_TYPE_I128:
4408     case TCG_TYPE_V128:
4409     case TCG_TYPE_V256:
4410         /*
4411          * Note that we do not require aligned storage for V256,
4412          * and that we provide alignment for I128 to match V128,
4413          * even if that's above what the host ABI requires.
4414          */
4415         align = 16;
4416         break;
4417     default:
4418         g_assert_not_reached();
4419     }
4420 
4421     /*
4422      * Assume the stack is sufficiently aligned.
4423      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4424      * and do not require 16 byte vector alignment.  This seems slightly
4425      * easier than fully parameterizing the above switch statement.
4426      */
4427     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4428     off = ROUND_UP(s->current_frame_offset, align);
4429 
4430     /* If we've exhausted the stack frame, restart with a smaller TB. */
4431     if (off + size > s->frame_end) {
4432         tcg_raise_tb_overflow(s);
4433     }
4434     s->current_frame_offset = off + size;
4435 #if defined(__sparc__)
4436     off += TCG_TARGET_STACK_BIAS;
4437 #endif
4438 
4439     /* If the object was subdivided, assign memory to all the parts. */
4440     if (ts->base_type != ts->type) {
4441         int part_size = tcg_type_size(ts->type);
4442         int part_count = size / part_size;
4443 
4444         /*
4445          * Each part is allocated sequentially in tcg_temp_new_internal.
4446          * Jump back to the first part by subtracting the current index.
4447          */
4448         ts -= ts->temp_subindex;
4449         for (int i = 0; i < part_count; ++i) {
4450             ts[i].mem_offset = off + i * part_size;
4451             ts[i].mem_base = s->frame_temp;
4452             ts[i].mem_allocated = 1;
4453         }
4454     } else {
4455         ts->mem_offset = off;
4456         ts->mem_base = s->frame_temp;
4457         ts->mem_allocated = 1;
4458     }
4459 }
4460 
4461 /* Assign @reg to @ts, and update reg_to_temp[]. */
4462 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4463 {
4464     if (ts->val_type == TEMP_VAL_REG) {
4465         TCGReg old = ts->reg;
4466         tcg_debug_assert(s->reg_to_temp[old] == ts);
4467         if (old == reg) {
4468             return;
4469         }
4470         s->reg_to_temp[old] = NULL;
4471     }
4472     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4473     s->reg_to_temp[reg] = ts;
4474     ts->val_type = TEMP_VAL_REG;
4475     ts->reg = reg;
4476 }
4477 
4478 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4479 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4480 {
4481     tcg_debug_assert(type != TEMP_VAL_REG);
4482     if (ts->val_type == TEMP_VAL_REG) {
4483         TCGReg reg = ts->reg;
4484         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4485         s->reg_to_temp[reg] = NULL;
4486     }
4487     ts->val_type = type;
4488 }
4489 
4490 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4491 
4492 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4493    mark it free; otherwise mark it dead.  */
4494 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4495 {
4496     TCGTempVal new_type;
4497 
4498     switch (ts->kind) {
4499     case TEMP_FIXED:
4500         return;
4501     case TEMP_GLOBAL:
4502     case TEMP_TB:
4503         new_type = TEMP_VAL_MEM;
4504         break;
4505     case TEMP_EBB:
4506         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4507         break;
4508     case TEMP_CONST:
4509         new_type = TEMP_VAL_CONST;
4510         break;
4511     default:
4512         g_assert_not_reached();
4513     }
4514     set_temp_val_nonreg(s, ts, new_type);
4515 }
4516 
4517 /* Mark a temporary as dead.  */
4518 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4519 {
4520     temp_free_or_dead(s, ts, 1);
4521 }
4522 
4523 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4524    registers needs to be allocated to store a constant.  If 'free_or_dead'
4525    is non-zero, subsequently release the temporary; if it is positive, the
4526    temp is dead; if it is negative, the temp is free.  */
4527 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4528                       TCGRegSet preferred_regs, int free_or_dead)
4529 {
4530     if (!temp_readonly(ts) && !ts->mem_coherent) {
4531         if (!ts->mem_allocated) {
4532             temp_allocate_frame(s, ts);
4533         }
4534         switch (ts->val_type) {
4535         case TEMP_VAL_CONST:
4536             /* If we're going to free the temp immediately, then we won't
4537                require it later in a register, so attempt to store the
4538                constant to memory directly.  */
4539             if (free_or_dead
4540                 && tcg_out_sti(s, ts->type, ts->val,
4541                                ts->mem_base->reg, ts->mem_offset)) {
4542                 break;
4543             }
4544             temp_load(s, ts, tcg_target_available_regs[ts->type],
4545                       allocated_regs, preferred_regs);
4546             /* fallthrough */
4547 
4548         case TEMP_VAL_REG:
4549             tcg_out_st(s, ts->type, ts->reg,
4550                        ts->mem_base->reg, ts->mem_offset);
4551             break;
4552 
4553         case TEMP_VAL_MEM:
4554             break;
4555 
4556         case TEMP_VAL_DEAD:
4557         default:
4558             g_assert_not_reached();
4559         }
4560         ts->mem_coherent = 1;
4561     }
4562     if (free_or_dead) {
4563         temp_free_or_dead(s, ts, free_or_dead);
4564     }
4565 }
4566 
4567 /* free register 'reg' by spilling the corresponding temporary if necessary */
4568 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4569 {
4570     TCGTemp *ts = s->reg_to_temp[reg];
4571     if (ts != NULL) {
4572         temp_sync(s, ts, allocated_regs, 0, -1);
4573     }
4574 }
4575 
4576 /**
4577  * tcg_reg_alloc:
4578  * @required_regs: Set of registers in which we must allocate.
4579  * @allocated_regs: Set of registers which must be avoided.
4580  * @preferred_regs: Set of registers we should prefer.
4581  * @rev: True if we search the registers in "indirect" order.
4582  *
4583  * The allocated register must be in @required_regs & ~@allocated_regs,
4584  * but if we can put it in @preferred_regs we may save a move later.
4585  */
4586 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4587                             TCGRegSet allocated_regs,
4588                             TCGRegSet preferred_regs, bool rev)
4589 {
4590     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4591     TCGRegSet reg_ct[2];
4592     const int *order;
4593 
4594     reg_ct[1] = required_regs & ~allocated_regs;
4595     tcg_debug_assert(reg_ct[1] != 0);
4596     reg_ct[0] = reg_ct[1] & preferred_regs;
4597 
4598     /* Skip the preferred_regs option if it cannot be satisfied,
4599        or if the preference made no difference.  */
4600     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4601 
4602     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4603 
4604     /* Try free registers, preferences first.  */
4605     for (j = f; j < 2; j++) {
4606         TCGRegSet set = reg_ct[j];
4607 
4608         if (tcg_regset_single(set)) {
4609             /* One register in the set.  */
4610             TCGReg reg = tcg_regset_first(set);
4611             if (s->reg_to_temp[reg] == NULL) {
4612                 return reg;
4613             }
4614         } else {
4615             for (i = 0; i < n; i++) {
4616                 TCGReg reg = order[i];
4617                 if (s->reg_to_temp[reg] == NULL &&
4618                     tcg_regset_test_reg(set, reg)) {
4619                     return reg;
4620                 }
4621             }
4622         }
4623     }
4624 
4625     /* We must spill something.  */
4626     for (j = f; j < 2; j++) {
4627         TCGRegSet set = reg_ct[j];
4628 
4629         if (tcg_regset_single(set)) {
4630             /* One register in the set.  */
4631             TCGReg reg = tcg_regset_first(set);
4632             tcg_reg_free(s, reg, allocated_regs);
4633             return reg;
4634         } else {
4635             for (i = 0; i < n; i++) {
4636                 TCGReg reg = order[i];
4637                 if (tcg_regset_test_reg(set, reg)) {
4638                     tcg_reg_free(s, reg, allocated_regs);
4639                     return reg;
4640                 }
4641             }
4642         }
4643     }
4644 
4645     g_assert_not_reached();
4646 }
4647 
4648 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4649                                  TCGRegSet allocated_regs,
4650                                  TCGRegSet preferred_regs, bool rev)
4651 {
4652     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4653     TCGRegSet reg_ct[2];
4654     const int *order;
4655 
4656     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4657     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4658     tcg_debug_assert(reg_ct[1] != 0);
4659     reg_ct[0] = reg_ct[1] & preferred_regs;
4660 
4661     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4662 
4663     /*
4664      * Skip the preferred_regs option if it cannot be satisfied,
4665      * or if the preference made no difference.
4666      */
4667     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4668 
4669     /*
4670      * Minimize the number of flushes by looking for 2 free registers first,
4671      * then a single flush, then two flushes.
4672      */
4673     for (fmin = 2; fmin >= 0; fmin--) {
4674         for (j = k; j < 2; j++) {
4675             TCGRegSet set = reg_ct[j];
4676 
4677             for (i = 0; i < n; i++) {
4678                 TCGReg reg = order[i];
4679 
4680                 if (tcg_regset_test_reg(set, reg)) {
4681                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4682                     if (f >= fmin) {
4683                         tcg_reg_free(s, reg, allocated_regs);
4684                         tcg_reg_free(s, reg + 1, allocated_regs);
4685                         return reg;
4686                     }
4687                 }
4688             }
4689         }
4690     }
4691     g_assert_not_reached();
4692 }
4693 
4694 /* Make sure the temporary is in a register.  If needed, allocate the register
4695    from DESIRED while avoiding ALLOCATED.  */
4696 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4697                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4698 {
4699     TCGReg reg;
4700 
4701     switch (ts->val_type) {
4702     case TEMP_VAL_REG:
4703         return;
4704     case TEMP_VAL_CONST:
4705         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4706                             preferred_regs, ts->indirect_base);
4707         if (ts->type <= TCG_TYPE_I64) {
4708             tcg_out_movi(s, ts->type, reg, ts->val);
4709         } else {
4710             uint64_t val = ts->val;
4711             MemOp vece = MO_64;
4712 
4713             /*
4714              * Find the minimal vector element that matches the constant.
4715              * The targets will, in general, have to do this search anyway,
4716              * do this generically.
4717              */
4718             if (val == dup_const(MO_8, val)) {
4719                 vece = MO_8;
4720             } else if (val == dup_const(MO_16, val)) {
4721                 vece = MO_16;
4722             } else if (val == dup_const(MO_32, val)) {
4723                 vece = MO_32;
4724             }
4725 
4726             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4727         }
4728         ts->mem_coherent = 0;
4729         break;
4730     case TEMP_VAL_MEM:
4731         if (!ts->mem_allocated) {
4732             temp_allocate_frame(s, ts);
4733         }
4734         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4735                             preferred_regs, ts->indirect_base);
4736         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4737         ts->mem_coherent = 1;
4738         break;
4739     case TEMP_VAL_DEAD:
4740     default:
4741         g_assert_not_reached();
4742     }
4743     set_temp_val_reg(s, ts, reg);
4744 }
4745 
4746 /* Save a temporary to memory. 'allocated_regs' is used in case a
4747    temporary registers needs to be allocated to store a constant.  */
4748 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4749 {
4750     /* The liveness analysis already ensures that globals are back
4751        in memory. Keep an tcg_debug_assert for safety. */
4752     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4753 }
4754 
4755 /* save globals to their canonical location and assume they can be
4756    modified be the following code. 'allocated_regs' is used in case a
4757    temporary registers needs to be allocated to store a constant. */
4758 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4759 {
4760     int i, n;
4761 
4762     for (i = 0, n = s->nb_globals; i < n; i++) {
4763         temp_save(s, &s->temps[i], allocated_regs);
4764     }
4765 }
4766 
4767 /* sync globals to their canonical location and assume they can be
4768    read by the following code. 'allocated_regs' is used in case a
4769    temporary registers needs to be allocated to store a constant. */
4770 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4771 {
4772     int i, n;
4773 
4774     for (i = 0, n = s->nb_globals; i < n; i++) {
4775         TCGTemp *ts = &s->temps[i];
4776         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4777                          || ts->kind == TEMP_FIXED
4778                          || ts->mem_coherent);
4779     }
4780 }
4781 
4782 /* at the end of a basic block, we assume all temporaries are dead and
4783    all globals are stored at their canonical location. */
4784 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4785 {
4786     int i;
4787 
4788     for (i = s->nb_globals; i < s->nb_temps; i++) {
4789         TCGTemp *ts = &s->temps[i];
4790 
4791         switch (ts->kind) {
4792         case TEMP_TB:
4793             temp_save(s, ts, allocated_regs);
4794             break;
4795         case TEMP_EBB:
4796             /* The liveness analysis already ensures that temps are dead.
4797                Keep an tcg_debug_assert for safety. */
4798             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4799             break;
4800         case TEMP_CONST:
4801             /* Similarly, we should have freed any allocated register. */
4802             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4803             break;
4804         default:
4805             g_assert_not_reached();
4806         }
4807     }
4808 
4809     save_globals(s, allocated_regs);
4810 }
4811 
4812 /*
4813  * At a conditional branch, we assume all temporaries are dead unless
4814  * explicitly live-across-conditional-branch; all globals and local
4815  * temps are synced to their location.
4816  */
4817 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4818 {
4819     sync_globals(s, allocated_regs);
4820 
4821     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4822         TCGTemp *ts = &s->temps[i];
4823         /*
4824          * The liveness analysis already ensures that temps are dead.
4825          * Keep tcg_debug_asserts for safety.
4826          */
4827         switch (ts->kind) {
4828         case TEMP_TB:
4829             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4830             break;
4831         case TEMP_EBB:
4832         case TEMP_CONST:
4833             break;
4834         default:
4835             g_assert_not_reached();
4836         }
4837     }
4838 }
4839 
4840 /*
4841  * Specialized code generation for INDEX_op_mov_* with a constant.
4842  */
4843 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4844                                   tcg_target_ulong val, TCGLifeData arg_life,
4845                                   TCGRegSet preferred_regs)
4846 {
4847     /* ENV should not be modified.  */
4848     tcg_debug_assert(!temp_readonly(ots));
4849 
4850     /* The movi is not explicitly generated here.  */
4851     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4852     ots->val = val;
4853     ots->mem_coherent = 0;
4854     if (NEED_SYNC_ARG(0)) {
4855         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4856     } else if (IS_DEAD_ARG(0)) {
4857         temp_dead(s, ots);
4858     }
4859 }
4860 
4861 /*
4862  * Specialized code generation for INDEX_op_mov_*.
4863  */
4864 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4865 {
4866     const TCGLifeData arg_life = op->life;
4867     TCGRegSet allocated_regs, preferred_regs;
4868     TCGTemp *ts, *ots;
4869     TCGType otype, itype;
4870     TCGReg oreg, ireg;
4871 
4872     allocated_regs = s->reserved_regs;
4873     preferred_regs = output_pref(op, 0);
4874     ots = arg_temp(op->args[0]);
4875     ts = arg_temp(op->args[1]);
4876 
4877     /* ENV should not be modified.  */
4878     tcg_debug_assert(!temp_readonly(ots));
4879 
4880     /* Note that otype != itype for no-op truncation.  */
4881     otype = ots->type;
4882     itype = ts->type;
4883 
4884     if (ts->val_type == TEMP_VAL_CONST) {
4885         /* propagate constant or generate sti */
4886         tcg_target_ulong val = ts->val;
4887         if (IS_DEAD_ARG(1)) {
4888             temp_dead(s, ts);
4889         }
4890         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4891         return;
4892     }
4893 
4894     /* If the source value is in memory we're going to be forced
4895        to have it in a register in order to perform the copy.  Copy
4896        the SOURCE value into its own register first, that way we
4897        don't have to reload SOURCE the next time it is used. */
4898     if (ts->val_type == TEMP_VAL_MEM) {
4899         temp_load(s, ts, tcg_target_available_regs[itype],
4900                   allocated_regs, preferred_regs);
4901     }
4902     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4903     ireg = ts->reg;
4904 
4905     if (IS_DEAD_ARG(0)) {
4906         /* mov to a non-saved dead register makes no sense (even with
4907            liveness analysis disabled). */
4908         tcg_debug_assert(NEED_SYNC_ARG(0));
4909         if (!ots->mem_allocated) {
4910             temp_allocate_frame(s, ots);
4911         }
4912         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4913         if (IS_DEAD_ARG(1)) {
4914             temp_dead(s, ts);
4915         }
4916         temp_dead(s, ots);
4917         return;
4918     }
4919 
4920     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4921         /*
4922          * The mov can be suppressed.  Kill input first, so that it
4923          * is unlinked from reg_to_temp, then set the output to the
4924          * reg that we saved from the input.
4925          */
4926         temp_dead(s, ts);
4927         oreg = ireg;
4928     } else {
4929         if (ots->val_type == TEMP_VAL_REG) {
4930             oreg = ots->reg;
4931         } else {
4932             /* Make sure to not spill the input register during allocation. */
4933             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4934                                  allocated_regs | ((TCGRegSet)1 << ireg),
4935                                  preferred_regs, ots->indirect_base);
4936         }
4937         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4938             /*
4939              * Cross register class move not supported.
4940              * Store the source register into the destination slot
4941              * and leave the destination temp as TEMP_VAL_MEM.
4942              */
4943             assert(!temp_readonly(ots));
4944             if (!ts->mem_allocated) {
4945                 temp_allocate_frame(s, ots);
4946             }
4947             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4948             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4949             ots->mem_coherent = 1;
4950             return;
4951         }
4952     }
4953     set_temp_val_reg(s, ots, oreg);
4954     ots->mem_coherent = 0;
4955 
4956     if (NEED_SYNC_ARG(0)) {
4957         temp_sync(s, ots, allocated_regs, 0, 0);
4958     }
4959 }
4960 
4961 /*
4962  * Specialized code generation for INDEX_op_dup_vec.
4963  */
4964 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4965 {
4966     const TCGLifeData arg_life = op->life;
4967     TCGRegSet dup_out_regs, dup_in_regs;
4968     const TCGArgConstraint *dup_args_ct;
4969     TCGTemp *its, *ots;
4970     TCGType itype, vtype;
4971     unsigned vece;
4972     int lowpart_ofs;
4973     bool ok;
4974 
4975     ots = arg_temp(op->args[0]);
4976     its = arg_temp(op->args[1]);
4977 
4978     /* ENV should not be modified.  */
4979     tcg_debug_assert(!temp_readonly(ots));
4980 
4981     itype = its->type;
4982     vece = TCGOP_VECE(op);
4983     vtype = TCGOP_TYPE(op);
4984 
4985     if (its->val_type == TEMP_VAL_CONST) {
4986         /* Propagate constant via movi -> dupi.  */
4987         tcg_target_ulong val = its->val;
4988         if (IS_DEAD_ARG(1)) {
4989             temp_dead(s, its);
4990         }
4991         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4992         return;
4993     }
4994 
4995     dup_args_ct = opcode_args_ct(op);
4996     dup_out_regs = dup_args_ct[0].regs;
4997     dup_in_regs = dup_args_ct[1].regs;
4998 
4999     /* Allocate the output register now.  */
5000     if (ots->val_type != TEMP_VAL_REG) {
5001         TCGRegSet allocated_regs = s->reserved_regs;
5002         TCGReg oreg;
5003 
5004         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
5005             /* Make sure to not spill the input register. */
5006             tcg_regset_set_reg(allocated_regs, its->reg);
5007         }
5008         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5009                              output_pref(op, 0), ots->indirect_base);
5010         set_temp_val_reg(s, ots, oreg);
5011     }
5012 
5013     switch (its->val_type) {
5014     case TEMP_VAL_REG:
5015         /*
5016          * The dup constriaints must be broad, covering all possible VECE.
5017          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
5018          * to fail, indicating that extra moves are required for that case.
5019          */
5020         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
5021             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
5022                 goto done;
5023             }
5024             /* Try again from memory or a vector input register.  */
5025         }
5026         if (!its->mem_coherent) {
5027             /*
5028              * The input register is not synced, and so an extra store
5029              * would be required to use memory.  Attempt an integer-vector
5030              * register move first.  We do not have a TCGRegSet for this.
5031              */
5032             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5033                 break;
5034             }
5035             /* Sync the temp back to its slot and load from there.  */
5036             temp_sync(s, its, s->reserved_regs, 0, 0);
5037         }
5038         /* fall through */
5039 
5040     case TEMP_VAL_MEM:
5041         lowpart_ofs = 0;
5042         if (HOST_BIG_ENDIAN) {
5043             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5044         }
5045         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5046                              its->mem_offset + lowpart_ofs)) {
5047             goto done;
5048         }
5049         /* Load the input into the destination vector register. */
5050         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5051         break;
5052 
5053     default:
5054         g_assert_not_reached();
5055     }
5056 
5057     /* We now have a vector input register, so dup must succeed. */
5058     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5059     tcg_debug_assert(ok);
5060 
5061  done:
5062     ots->mem_coherent = 0;
5063     if (IS_DEAD_ARG(1)) {
5064         temp_dead(s, its);
5065     }
5066     if (NEED_SYNC_ARG(0)) {
5067         temp_sync(s, ots, s->reserved_regs, 0, 0);
5068     }
5069     if (IS_DEAD_ARG(0)) {
5070         temp_dead(s, ots);
5071     }
5072 }
5073 
5074 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5075 {
5076     const TCGLifeData arg_life = op->life;
5077     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5078     TCGRegSet i_allocated_regs;
5079     TCGRegSet o_allocated_regs;
5080     int i, k, nb_iargs, nb_oargs;
5081     TCGReg reg;
5082     TCGArg arg;
5083     const TCGArgConstraint *args_ct;
5084     const TCGArgConstraint *arg_ct;
5085     TCGTemp *ts;
5086     TCGArg new_args[TCG_MAX_OP_ARGS];
5087     int const_args[TCG_MAX_OP_ARGS];
5088     TCGCond op_cond;
5089 
5090     nb_oargs = def->nb_oargs;
5091     nb_iargs = def->nb_iargs;
5092 
5093     /* copy constants */
5094     memcpy(new_args + nb_oargs + nb_iargs,
5095            op->args + nb_oargs + nb_iargs,
5096            sizeof(TCGArg) * def->nb_cargs);
5097 
5098     i_allocated_regs = s->reserved_regs;
5099     o_allocated_regs = s->reserved_regs;
5100 
5101     switch (op->opc) {
5102     case INDEX_op_brcond_i32:
5103     case INDEX_op_brcond_i64:
5104         op_cond = op->args[2];
5105         break;
5106     case INDEX_op_setcond_i32:
5107     case INDEX_op_setcond_i64:
5108     case INDEX_op_negsetcond_i32:
5109     case INDEX_op_negsetcond_i64:
5110     case INDEX_op_cmp_vec:
5111         op_cond = op->args[3];
5112         break;
5113     case INDEX_op_brcond2_i32:
5114         op_cond = op->args[4];
5115         break;
5116     case INDEX_op_movcond_i32:
5117     case INDEX_op_movcond_i64:
5118     case INDEX_op_setcond2_i32:
5119     case INDEX_op_cmpsel_vec:
5120         op_cond = op->args[5];
5121         break;
5122     default:
5123         /* No condition within opcode. */
5124         op_cond = TCG_COND_ALWAYS;
5125         break;
5126     }
5127 
5128     args_ct = opcode_args_ct(op);
5129 
5130     /* satisfy input constraints */
5131     for (k = 0; k < nb_iargs; k++) {
5132         TCGRegSet i_preferred_regs, i_required_regs;
5133         bool allocate_new_reg, copyto_new_reg;
5134         TCGTemp *ts2;
5135         int i1, i2;
5136 
5137         i = args_ct[nb_oargs + k].sort_index;
5138         arg = op->args[i];
5139         arg_ct = &args_ct[i];
5140         ts = arg_temp(arg);
5141 
5142         if (ts->val_type == TEMP_VAL_CONST) {
5143 #ifdef TCG_REG_ZERO
5144             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5145                 /* Hardware zero register: indicate register via non-const. */
5146                 const_args[i] = 0;
5147                 new_args[i] = TCG_REG_ZERO;
5148                 continue;
5149             }
5150 #endif
5151 
5152             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5153                                        op_cond, TCGOP_VECE(op))) {
5154                 /* constant is OK for instruction */
5155                 const_args[i] = 1;
5156                 new_args[i] = ts->val;
5157                 continue;
5158             }
5159         }
5160 
5161         reg = ts->reg;
5162         i_preferred_regs = 0;
5163         i_required_regs = arg_ct->regs;
5164         allocate_new_reg = false;
5165         copyto_new_reg = false;
5166 
5167         switch (arg_ct->pair) {
5168         case 0: /* not paired */
5169             if (arg_ct->ialias) {
5170                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5171 
5172                 /*
5173                  * If the input is readonly, then it cannot also be an
5174                  * output and aliased to itself.  If the input is not
5175                  * dead after the instruction, we must allocate a new
5176                  * register and move it.
5177                  */
5178                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5179                     || args_ct[arg_ct->alias_index].newreg) {
5180                     allocate_new_reg = true;
5181                 } else if (ts->val_type == TEMP_VAL_REG) {
5182                     /*
5183                      * Check if the current register has already been
5184                      * allocated for another input.
5185                      */
5186                     allocate_new_reg =
5187                         tcg_regset_test_reg(i_allocated_regs, reg);
5188                 }
5189             }
5190             if (!allocate_new_reg) {
5191                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5192                           i_preferred_regs);
5193                 reg = ts->reg;
5194                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5195             }
5196             if (allocate_new_reg) {
5197                 /*
5198                  * Allocate a new register matching the constraint
5199                  * and move the temporary register into it.
5200                  */
5201                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5202                           i_allocated_regs, 0);
5203                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5204                                     i_preferred_regs, ts->indirect_base);
5205                 copyto_new_reg = true;
5206             }
5207             break;
5208 
5209         case 1:
5210             /* First of an input pair; if i1 == i2, the second is an output. */
5211             i1 = i;
5212             i2 = arg_ct->pair_index;
5213             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5214 
5215             /*
5216              * It is easier to default to allocating a new pair
5217              * and to identify a few cases where it's not required.
5218              */
5219             if (arg_ct->ialias) {
5220                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5221                 if (IS_DEAD_ARG(i1) &&
5222                     IS_DEAD_ARG(i2) &&
5223                     !temp_readonly(ts) &&
5224                     ts->val_type == TEMP_VAL_REG &&
5225                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5226                     tcg_regset_test_reg(i_required_regs, reg) &&
5227                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5228                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5229                     (ts2
5230                      ? ts2->val_type == TEMP_VAL_REG &&
5231                        ts2->reg == reg + 1 &&
5232                        !temp_readonly(ts2)
5233                      : s->reg_to_temp[reg + 1] == NULL)) {
5234                     break;
5235                 }
5236             } else {
5237                 /* Without aliasing, the pair must also be an input. */
5238                 tcg_debug_assert(ts2);
5239                 if (ts->val_type == TEMP_VAL_REG &&
5240                     ts2->val_type == TEMP_VAL_REG &&
5241                     ts2->reg == reg + 1 &&
5242                     tcg_regset_test_reg(i_required_regs, reg)) {
5243                     break;
5244                 }
5245             }
5246             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5247                                      0, ts->indirect_base);
5248             goto do_pair;
5249 
5250         case 2: /* pair second */
5251             reg = new_args[arg_ct->pair_index] + 1;
5252             goto do_pair;
5253 
5254         case 3: /* ialias with second output, no first input */
5255             tcg_debug_assert(arg_ct->ialias);
5256             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5257 
5258             if (IS_DEAD_ARG(i) &&
5259                 !temp_readonly(ts) &&
5260                 ts->val_type == TEMP_VAL_REG &&
5261                 reg > 0 &&
5262                 s->reg_to_temp[reg - 1] == NULL &&
5263                 tcg_regset_test_reg(i_required_regs, reg) &&
5264                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5265                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5266                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5267                 break;
5268             }
5269             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5270                                      i_allocated_regs, 0,
5271                                      ts->indirect_base);
5272             tcg_regset_set_reg(i_allocated_regs, reg);
5273             reg += 1;
5274             goto do_pair;
5275 
5276         do_pair:
5277             /*
5278              * If an aliased input is not dead after the instruction,
5279              * we must allocate a new register and move it.
5280              */
5281             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5282                 TCGRegSet t_allocated_regs = i_allocated_regs;
5283 
5284                 /*
5285                  * Because of the alias, and the continued life, make sure
5286                  * that the temp is somewhere *other* than the reg pair,
5287                  * and we get a copy in reg.
5288                  */
5289                 tcg_regset_set_reg(t_allocated_regs, reg);
5290                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5291                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5292                     /* If ts was already in reg, copy it somewhere else. */
5293                     TCGReg nr;
5294                     bool ok;
5295 
5296                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5297                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5298                                        t_allocated_regs, 0, ts->indirect_base);
5299                     ok = tcg_out_mov(s, ts->type, nr, reg);
5300                     tcg_debug_assert(ok);
5301 
5302                     set_temp_val_reg(s, ts, nr);
5303                 } else {
5304                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5305                               t_allocated_regs, 0);
5306                     copyto_new_reg = true;
5307                 }
5308             } else {
5309                 /* Preferably allocate to reg, otherwise copy. */
5310                 i_required_regs = (TCGRegSet)1 << reg;
5311                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5312                           i_preferred_regs);
5313                 copyto_new_reg = ts->reg != reg;
5314             }
5315             break;
5316 
5317         default:
5318             g_assert_not_reached();
5319         }
5320 
5321         if (copyto_new_reg) {
5322             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5323                 /*
5324                  * Cross register class move not supported.  Sync the
5325                  * temp back to its slot and load from there.
5326                  */
5327                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5328                 tcg_out_ld(s, ts->type, reg,
5329                            ts->mem_base->reg, ts->mem_offset);
5330             }
5331         }
5332         new_args[i] = reg;
5333         const_args[i] = 0;
5334         tcg_regset_set_reg(i_allocated_regs, reg);
5335     }
5336 
5337     /* mark dead temporaries and free the associated registers */
5338     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5339         if (IS_DEAD_ARG(i)) {
5340             temp_dead(s, arg_temp(op->args[i]));
5341         }
5342     }
5343 
5344     if (def->flags & TCG_OPF_COND_BRANCH) {
5345         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5346     } else if (def->flags & TCG_OPF_BB_END) {
5347         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5348     } else {
5349         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5350             /* XXX: permit generic clobber register list ? */
5351             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5352                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5353                     tcg_reg_free(s, i, i_allocated_regs);
5354                 }
5355             }
5356         }
5357         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5358             /* sync globals if the op has side effects and might trigger
5359                an exception. */
5360             sync_globals(s, i_allocated_regs);
5361         }
5362 
5363         /* satisfy the output constraints */
5364         for (k = 0; k < nb_oargs; k++) {
5365             i = args_ct[k].sort_index;
5366             arg = op->args[i];
5367             arg_ct = &args_ct[i];
5368             ts = arg_temp(arg);
5369 
5370             /* ENV should not be modified.  */
5371             tcg_debug_assert(!temp_readonly(ts));
5372 
5373             switch (arg_ct->pair) {
5374             case 0: /* not paired */
5375                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5376                     reg = new_args[arg_ct->alias_index];
5377                 } else if (arg_ct->newreg) {
5378                     reg = tcg_reg_alloc(s, arg_ct->regs,
5379                                         i_allocated_regs | o_allocated_regs,
5380                                         output_pref(op, k), ts->indirect_base);
5381                 } else {
5382                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5383                                         output_pref(op, k), ts->indirect_base);
5384                 }
5385                 break;
5386 
5387             case 1: /* first of pair */
5388                 if (arg_ct->oalias) {
5389                     reg = new_args[arg_ct->alias_index];
5390                 } else if (arg_ct->newreg) {
5391                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5392                                              i_allocated_regs | o_allocated_regs,
5393                                              output_pref(op, k),
5394                                              ts->indirect_base);
5395                 } else {
5396                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5397                                              output_pref(op, k),
5398                                              ts->indirect_base);
5399                 }
5400                 break;
5401 
5402             case 2: /* second of pair */
5403                 if (arg_ct->oalias) {
5404                     reg = new_args[arg_ct->alias_index];
5405                 } else {
5406                     reg = new_args[arg_ct->pair_index] + 1;
5407                 }
5408                 break;
5409 
5410             case 3: /* first of pair, aliasing with a second input */
5411                 tcg_debug_assert(!arg_ct->newreg);
5412                 reg = new_args[arg_ct->pair_index] - 1;
5413                 break;
5414 
5415             default:
5416                 g_assert_not_reached();
5417             }
5418             tcg_regset_set_reg(o_allocated_regs, reg);
5419             set_temp_val_reg(s, ts, reg);
5420             ts->mem_coherent = 0;
5421             new_args[i] = reg;
5422         }
5423     }
5424 
5425     /* emit instruction */
5426     TCGType type = TCGOP_TYPE(op);
5427     switch (op->opc) {
5428     case INDEX_op_ext_i32_i64:
5429         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5430         break;
5431     case INDEX_op_extu_i32_i64:
5432         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5433         break;
5434     case INDEX_op_extrl_i64_i32:
5435         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5436         break;
5437 
5438     case INDEX_op_add:
5439     case INDEX_op_and:
5440     case INDEX_op_andc:
5441     case INDEX_op_or:
5442     case INDEX_op_orc:
5443     case INDEX_op_xor:
5444         {
5445             const TCGOutOpBinary *out =
5446                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5447 
5448             /* Constants should never appear in the first source operand. */
5449             tcg_debug_assert(!const_args[1]);
5450             if (const_args[2]) {
5451                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5452             } else {
5453                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5454             }
5455         }
5456         break;
5457 
5458     default:
5459         if (def->flags & TCG_OPF_VECTOR) {
5460             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5461                            TCGOP_VECE(op), new_args, const_args);
5462         } else {
5463             tcg_out_op(s, op->opc, type, new_args, const_args);
5464         }
5465         break;
5466     }
5467 
5468     /* move the outputs in the correct register if needed */
5469     for(i = 0; i < nb_oargs; i++) {
5470         ts = arg_temp(op->args[i]);
5471 
5472         /* ENV should not be modified.  */
5473         tcg_debug_assert(!temp_readonly(ts));
5474 
5475         if (NEED_SYNC_ARG(i)) {
5476             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5477         } else if (IS_DEAD_ARG(i)) {
5478             temp_dead(s, ts);
5479         }
5480     }
5481 }
5482 
5483 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5484 {
5485     const TCGLifeData arg_life = op->life;
5486     TCGTemp *ots, *itsl, *itsh;
5487     TCGType vtype = TCGOP_TYPE(op);
5488 
5489     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5490     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5491     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5492 
5493     ots = arg_temp(op->args[0]);
5494     itsl = arg_temp(op->args[1]);
5495     itsh = arg_temp(op->args[2]);
5496 
5497     /* ENV should not be modified.  */
5498     tcg_debug_assert(!temp_readonly(ots));
5499 
5500     /* Allocate the output register now.  */
5501     if (ots->val_type != TEMP_VAL_REG) {
5502         TCGRegSet allocated_regs = s->reserved_regs;
5503         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5504         TCGReg oreg;
5505 
5506         /* Make sure to not spill the input registers. */
5507         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5508             tcg_regset_set_reg(allocated_regs, itsl->reg);
5509         }
5510         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5511             tcg_regset_set_reg(allocated_regs, itsh->reg);
5512         }
5513 
5514         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5515                              output_pref(op, 0), ots->indirect_base);
5516         set_temp_val_reg(s, ots, oreg);
5517     }
5518 
5519     /* Promote dup2 of immediates to dupi_vec. */
5520     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5521         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5522         MemOp vece = MO_64;
5523 
5524         if (val == dup_const(MO_8, val)) {
5525             vece = MO_8;
5526         } else if (val == dup_const(MO_16, val)) {
5527             vece = MO_16;
5528         } else if (val == dup_const(MO_32, val)) {
5529             vece = MO_32;
5530         }
5531 
5532         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5533         goto done;
5534     }
5535 
5536     /* If the two inputs form one 64-bit value, try dupm_vec. */
5537     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5538         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5539         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5540         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5541 
5542         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5543         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5544 
5545         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5546                              its->mem_base->reg, its->mem_offset)) {
5547             goto done;
5548         }
5549     }
5550 
5551     /* Fall back to generic expansion. */
5552     return false;
5553 
5554  done:
5555     ots->mem_coherent = 0;
5556     if (IS_DEAD_ARG(1)) {
5557         temp_dead(s, itsl);
5558     }
5559     if (IS_DEAD_ARG(2)) {
5560         temp_dead(s, itsh);
5561     }
5562     if (NEED_SYNC_ARG(0)) {
5563         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5564     } else if (IS_DEAD_ARG(0)) {
5565         temp_dead(s, ots);
5566     }
5567     return true;
5568 }
5569 
5570 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5571                          TCGRegSet allocated_regs)
5572 {
5573     if (ts->val_type == TEMP_VAL_REG) {
5574         if (ts->reg != reg) {
5575             tcg_reg_free(s, reg, allocated_regs);
5576             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5577                 /*
5578                  * Cross register class move not supported.  Sync the
5579                  * temp back to its slot and load from there.
5580                  */
5581                 temp_sync(s, ts, allocated_regs, 0, 0);
5582                 tcg_out_ld(s, ts->type, reg,
5583                            ts->mem_base->reg, ts->mem_offset);
5584             }
5585         }
5586     } else {
5587         TCGRegSet arg_set = 0;
5588 
5589         tcg_reg_free(s, reg, allocated_regs);
5590         tcg_regset_set_reg(arg_set, reg);
5591         temp_load(s, ts, arg_set, allocated_regs, 0);
5592     }
5593 }
5594 
5595 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5596                          TCGRegSet allocated_regs)
5597 {
5598     /*
5599      * When the destination is on the stack, load up the temp and store.
5600      * If there are many call-saved registers, the temp might live to
5601      * see another use; otherwise it'll be discarded.
5602      */
5603     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5604     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5605                arg_slot_stk_ofs(arg_slot));
5606 }
5607 
5608 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5609                             TCGTemp *ts, TCGRegSet *allocated_regs)
5610 {
5611     if (arg_slot_reg_p(l->arg_slot)) {
5612         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5613         load_arg_reg(s, reg, ts, *allocated_regs);
5614         tcg_regset_set_reg(*allocated_regs, reg);
5615     } else {
5616         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5617     }
5618 }
5619 
5620 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5621                          intptr_t ref_off, TCGRegSet *allocated_regs)
5622 {
5623     TCGReg reg;
5624 
5625     if (arg_slot_reg_p(arg_slot)) {
5626         reg = tcg_target_call_iarg_regs[arg_slot];
5627         tcg_reg_free(s, reg, *allocated_regs);
5628         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5629         tcg_regset_set_reg(*allocated_regs, reg);
5630     } else {
5631         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5632                             *allocated_regs, 0, false);
5633         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5634         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5635                    arg_slot_stk_ofs(arg_slot));
5636     }
5637 }
5638 
5639 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5640 {
5641     const int nb_oargs = TCGOP_CALLO(op);
5642     const int nb_iargs = TCGOP_CALLI(op);
5643     const TCGLifeData arg_life = op->life;
5644     const TCGHelperInfo *info = tcg_call_info(op);
5645     TCGRegSet allocated_regs = s->reserved_regs;
5646     int i;
5647 
5648     /*
5649      * Move inputs into place in reverse order,
5650      * so that we place stacked arguments first.
5651      */
5652     for (i = nb_iargs - 1; i >= 0; --i) {
5653         const TCGCallArgumentLoc *loc = &info->in[i];
5654         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5655 
5656         switch (loc->kind) {
5657         case TCG_CALL_ARG_NORMAL:
5658         case TCG_CALL_ARG_EXTEND_U:
5659         case TCG_CALL_ARG_EXTEND_S:
5660             load_arg_normal(s, loc, ts, &allocated_regs);
5661             break;
5662         case TCG_CALL_ARG_BY_REF:
5663             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5664             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5665                          arg_slot_stk_ofs(loc->ref_slot),
5666                          &allocated_regs);
5667             break;
5668         case TCG_CALL_ARG_BY_REF_N:
5669             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5670             break;
5671         default:
5672             g_assert_not_reached();
5673         }
5674     }
5675 
5676     /* Mark dead temporaries and free the associated registers.  */
5677     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5678         if (IS_DEAD_ARG(i)) {
5679             temp_dead(s, arg_temp(op->args[i]));
5680         }
5681     }
5682 
5683     /* Clobber call registers.  */
5684     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5685         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5686             tcg_reg_free(s, i, allocated_regs);
5687         }
5688     }
5689 
5690     /*
5691      * Save globals if they might be written by the helper,
5692      * sync them if they might be read.
5693      */
5694     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5695         /* Nothing to do */
5696     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5697         sync_globals(s, allocated_regs);
5698     } else {
5699         save_globals(s, allocated_regs);
5700     }
5701 
5702     /*
5703      * If the ABI passes a pointer to the returned struct as the first
5704      * argument, load that now.  Pass a pointer to the output home slot.
5705      */
5706     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5707         TCGTemp *ts = arg_temp(op->args[0]);
5708 
5709         if (!ts->mem_allocated) {
5710             temp_allocate_frame(s, ts);
5711         }
5712         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5713     }
5714 
5715     tcg_out_call(s, tcg_call_func(op), info);
5716 
5717     /* Assign output registers and emit moves if needed.  */
5718     switch (info->out_kind) {
5719     case TCG_CALL_RET_NORMAL:
5720         for (i = 0; i < nb_oargs; i++) {
5721             TCGTemp *ts = arg_temp(op->args[i]);
5722             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5723 
5724             /* ENV should not be modified.  */
5725             tcg_debug_assert(!temp_readonly(ts));
5726 
5727             set_temp_val_reg(s, ts, reg);
5728             ts->mem_coherent = 0;
5729         }
5730         break;
5731 
5732     case TCG_CALL_RET_BY_VEC:
5733         {
5734             TCGTemp *ts = arg_temp(op->args[0]);
5735 
5736             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5737             tcg_debug_assert(ts->temp_subindex == 0);
5738             if (!ts->mem_allocated) {
5739                 temp_allocate_frame(s, ts);
5740             }
5741             tcg_out_st(s, TCG_TYPE_V128,
5742                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5743                        ts->mem_base->reg, ts->mem_offset);
5744         }
5745         /* fall through to mark all parts in memory */
5746 
5747     case TCG_CALL_RET_BY_REF:
5748         /* The callee has performed a write through the reference. */
5749         for (i = 0; i < nb_oargs; i++) {
5750             TCGTemp *ts = arg_temp(op->args[i]);
5751             ts->val_type = TEMP_VAL_MEM;
5752         }
5753         break;
5754 
5755     default:
5756         g_assert_not_reached();
5757     }
5758 
5759     /* Flush or discard output registers as needed. */
5760     for (i = 0; i < nb_oargs; i++) {
5761         TCGTemp *ts = arg_temp(op->args[i]);
5762         if (NEED_SYNC_ARG(i)) {
5763             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5764         } else if (IS_DEAD_ARG(i)) {
5765             temp_dead(s, ts);
5766         }
5767     }
5768 }
5769 
5770 /**
5771  * atom_and_align_for_opc:
5772  * @s: tcg context
5773  * @opc: memory operation code
5774  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5775  * @allow_two_ops: true if we are prepared to issue two operations
5776  *
5777  * Return the alignment and atomicity to use for the inline fast path
5778  * for the given memory operation.  The alignment may be larger than
5779  * that specified in @opc, and the correct alignment will be diagnosed
5780  * by the slow path helper.
5781  *
5782  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5783  * and issue two loads or stores for subalignment.
5784  */
5785 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5786                                            MemOp host_atom, bool allow_two_ops)
5787 {
5788     MemOp align = memop_alignment_bits(opc);
5789     MemOp size = opc & MO_SIZE;
5790     MemOp half = size ? size - 1 : 0;
5791     MemOp atom = opc & MO_ATOM_MASK;
5792     MemOp atmax;
5793 
5794     switch (atom) {
5795     case MO_ATOM_NONE:
5796         /* The operation requires no specific atomicity. */
5797         atmax = MO_8;
5798         break;
5799 
5800     case MO_ATOM_IFALIGN:
5801         atmax = size;
5802         break;
5803 
5804     case MO_ATOM_IFALIGN_PAIR:
5805         atmax = half;
5806         break;
5807 
5808     case MO_ATOM_WITHIN16:
5809         atmax = size;
5810         if (size == MO_128) {
5811             /* Misalignment implies !within16, and therefore no atomicity. */
5812         } else if (host_atom != MO_ATOM_WITHIN16) {
5813             /* The host does not implement within16, so require alignment. */
5814             align = MAX(align, size);
5815         }
5816         break;
5817 
5818     case MO_ATOM_WITHIN16_PAIR:
5819         atmax = size;
5820         /*
5821          * Misalignment implies !within16, and therefore half atomicity.
5822          * Any host prepared for two operations can implement this with
5823          * half alignment.
5824          */
5825         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5826             align = MAX(align, half);
5827         }
5828         break;
5829 
5830     case MO_ATOM_SUBALIGN:
5831         atmax = size;
5832         if (host_atom != MO_ATOM_SUBALIGN) {
5833             /* If unaligned but not odd, there are subobjects up to half. */
5834             if (allow_two_ops) {
5835                 align = MAX(align, half);
5836             } else {
5837                 align = MAX(align, size);
5838             }
5839         }
5840         break;
5841 
5842     default:
5843         g_assert_not_reached();
5844     }
5845 
5846     return (TCGAtomAlign){ .atom = atmax, .align = align };
5847 }
5848 
5849 /*
5850  * Similarly for qemu_ld/st slow path helpers.
5851  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5852  * using only the provided backend tcg_out_* functions.
5853  */
5854 
5855 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5856 {
5857     int ofs = arg_slot_stk_ofs(slot);
5858 
5859     /*
5860      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5861      * require extension to uint64_t, adjust the address for uint32_t.
5862      */
5863     if (HOST_BIG_ENDIAN &&
5864         TCG_TARGET_REG_BITS == 64 &&
5865         type == TCG_TYPE_I32) {
5866         ofs += 4;
5867     }
5868     return ofs;
5869 }
5870 
5871 static void tcg_out_helper_load_slots(TCGContext *s,
5872                                       unsigned nmov, TCGMovExtend *mov,
5873                                       const TCGLdstHelperParam *parm)
5874 {
5875     unsigned i;
5876     TCGReg dst3;
5877 
5878     /*
5879      * Start from the end, storing to the stack first.
5880      * This frees those registers, so we need not consider overlap.
5881      */
5882     for (i = nmov; i-- > 0; ) {
5883         unsigned slot = mov[i].dst;
5884 
5885         if (arg_slot_reg_p(slot)) {
5886             goto found_reg;
5887         }
5888 
5889         TCGReg src = mov[i].src;
5890         TCGType dst_type = mov[i].dst_type;
5891         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5892 
5893         /* The argument is going onto the stack; extend into scratch. */
5894         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5895             tcg_debug_assert(parm->ntmp != 0);
5896             mov[i].dst = src = parm->tmp[0];
5897             tcg_out_movext1(s, &mov[i]);
5898         }
5899 
5900         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5901                    tcg_out_helper_stk_ofs(dst_type, slot));
5902     }
5903     return;
5904 
5905  found_reg:
5906     /*
5907      * The remaining arguments are in registers.
5908      * Convert slot numbers to argument registers.
5909      */
5910     nmov = i + 1;
5911     for (i = 0; i < nmov; ++i) {
5912         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5913     }
5914 
5915     switch (nmov) {
5916     case 4:
5917         /* The backend must have provided enough temps for the worst case. */
5918         tcg_debug_assert(parm->ntmp >= 2);
5919 
5920         dst3 = mov[3].dst;
5921         for (unsigned j = 0; j < 3; ++j) {
5922             if (dst3 == mov[j].src) {
5923                 /*
5924                  * Conflict. Copy the source to a temporary, perform the
5925                  * remaining moves, then the extension from our scratch
5926                  * on the way out.
5927                  */
5928                 TCGReg scratch = parm->tmp[1];
5929 
5930                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5931                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5932                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5933                 break;
5934             }
5935         }
5936 
5937         /* No conflicts: perform this move and continue. */
5938         tcg_out_movext1(s, &mov[3]);
5939         /* fall through */
5940 
5941     case 3:
5942         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5943                         parm->ntmp ? parm->tmp[0] : -1);
5944         break;
5945     case 2:
5946         tcg_out_movext2(s, mov, mov + 1,
5947                         parm->ntmp ? parm->tmp[0] : -1);
5948         break;
5949     case 1:
5950         tcg_out_movext1(s, mov);
5951         break;
5952     default:
5953         g_assert_not_reached();
5954     }
5955 }
5956 
5957 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5958                                     TCGType type, tcg_target_long imm,
5959                                     const TCGLdstHelperParam *parm)
5960 {
5961     if (arg_slot_reg_p(slot)) {
5962         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5963     } else {
5964         int ofs = tcg_out_helper_stk_ofs(type, slot);
5965         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5966             tcg_debug_assert(parm->ntmp != 0);
5967             tcg_out_movi(s, type, parm->tmp[0], imm);
5968             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5969         }
5970     }
5971 }
5972 
5973 static void tcg_out_helper_load_common_args(TCGContext *s,
5974                                             const TCGLabelQemuLdst *ldst,
5975                                             const TCGLdstHelperParam *parm,
5976                                             const TCGHelperInfo *info,
5977                                             unsigned next_arg)
5978 {
5979     TCGMovExtend ptr_mov = {
5980         .dst_type = TCG_TYPE_PTR,
5981         .src_type = TCG_TYPE_PTR,
5982         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5983     };
5984     const TCGCallArgumentLoc *loc = &info->in[0];
5985     TCGType type;
5986     unsigned slot;
5987     tcg_target_ulong imm;
5988 
5989     /*
5990      * Handle env, which is always first.
5991      */
5992     ptr_mov.dst = loc->arg_slot;
5993     ptr_mov.src = TCG_AREG0;
5994     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5995 
5996     /*
5997      * Handle oi.
5998      */
5999     imm = ldst->oi;
6000     loc = &info->in[next_arg];
6001     type = TCG_TYPE_I32;
6002     switch (loc->kind) {
6003     case TCG_CALL_ARG_NORMAL:
6004         break;
6005     case TCG_CALL_ARG_EXTEND_U:
6006     case TCG_CALL_ARG_EXTEND_S:
6007         /* No extension required for MemOpIdx. */
6008         tcg_debug_assert(imm <= INT32_MAX);
6009         type = TCG_TYPE_REG;
6010         break;
6011     default:
6012         g_assert_not_reached();
6013     }
6014     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6015     next_arg++;
6016 
6017     /*
6018      * Handle ra.
6019      */
6020     loc = &info->in[next_arg];
6021     slot = loc->arg_slot;
6022     if (parm->ra_gen) {
6023         int arg_reg = -1;
6024         TCGReg ra_reg;
6025 
6026         if (arg_slot_reg_p(slot)) {
6027             arg_reg = tcg_target_call_iarg_regs[slot];
6028         }
6029         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6030 
6031         ptr_mov.dst = slot;
6032         ptr_mov.src = ra_reg;
6033         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6034     } else {
6035         imm = (uintptr_t)ldst->raddr;
6036         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6037     }
6038 }
6039 
6040 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6041                                        const TCGCallArgumentLoc *loc,
6042                                        TCGType dst_type, TCGType src_type,
6043                                        TCGReg lo, TCGReg hi)
6044 {
6045     MemOp reg_mo;
6046 
6047     if (dst_type <= TCG_TYPE_REG) {
6048         MemOp src_ext;
6049 
6050         switch (loc->kind) {
6051         case TCG_CALL_ARG_NORMAL:
6052             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6053             break;
6054         case TCG_CALL_ARG_EXTEND_U:
6055             dst_type = TCG_TYPE_REG;
6056             src_ext = MO_UL;
6057             break;
6058         case TCG_CALL_ARG_EXTEND_S:
6059             dst_type = TCG_TYPE_REG;
6060             src_ext = MO_SL;
6061             break;
6062         default:
6063             g_assert_not_reached();
6064         }
6065 
6066         mov[0].dst = loc->arg_slot;
6067         mov[0].dst_type = dst_type;
6068         mov[0].src = lo;
6069         mov[0].src_type = src_type;
6070         mov[0].src_ext = src_ext;
6071         return 1;
6072     }
6073 
6074     if (TCG_TARGET_REG_BITS == 32) {
6075         assert(dst_type == TCG_TYPE_I64);
6076         reg_mo = MO_32;
6077     } else {
6078         assert(dst_type == TCG_TYPE_I128);
6079         reg_mo = MO_64;
6080     }
6081 
6082     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6083     mov[0].src = lo;
6084     mov[0].dst_type = TCG_TYPE_REG;
6085     mov[0].src_type = TCG_TYPE_REG;
6086     mov[0].src_ext = reg_mo;
6087 
6088     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6089     mov[1].src = hi;
6090     mov[1].dst_type = TCG_TYPE_REG;
6091     mov[1].src_type = TCG_TYPE_REG;
6092     mov[1].src_ext = reg_mo;
6093 
6094     return 2;
6095 }
6096 
6097 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6098                                    const TCGLdstHelperParam *parm)
6099 {
6100     const TCGHelperInfo *info;
6101     const TCGCallArgumentLoc *loc;
6102     TCGMovExtend mov[2];
6103     unsigned next_arg, nmov;
6104     MemOp mop = get_memop(ldst->oi);
6105 
6106     switch (mop & MO_SIZE) {
6107     case MO_8:
6108     case MO_16:
6109     case MO_32:
6110         info = &info_helper_ld32_mmu;
6111         break;
6112     case MO_64:
6113         info = &info_helper_ld64_mmu;
6114         break;
6115     case MO_128:
6116         info = &info_helper_ld128_mmu;
6117         break;
6118     default:
6119         g_assert_not_reached();
6120     }
6121 
6122     /* Defer env argument. */
6123     next_arg = 1;
6124 
6125     loc = &info->in[next_arg];
6126     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6127         /*
6128          * 32-bit host with 32-bit guest: zero-extend the guest address
6129          * to 64-bits for the helper by storing the low part, then
6130          * load a zero for the high part.
6131          */
6132         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6133                                TCG_TYPE_I32, TCG_TYPE_I32,
6134                                ldst->addr_reg, -1);
6135         tcg_out_helper_load_slots(s, 1, mov, parm);
6136 
6137         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6138                                 TCG_TYPE_I32, 0, parm);
6139         next_arg += 2;
6140     } else {
6141         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6142                                       ldst->addr_reg, -1);
6143         tcg_out_helper_load_slots(s, nmov, mov, parm);
6144         next_arg += nmov;
6145     }
6146 
6147     switch (info->out_kind) {
6148     case TCG_CALL_RET_NORMAL:
6149     case TCG_CALL_RET_BY_VEC:
6150         break;
6151     case TCG_CALL_RET_BY_REF:
6152         /*
6153          * The return reference is in the first argument slot.
6154          * We need memory in which to return: re-use the top of stack.
6155          */
6156         {
6157             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6158 
6159             if (arg_slot_reg_p(0)) {
6160                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6161                                  TCG_REG_CALL_STACK, ofs_slot0);
6162             } else {
6163                 tcg_debug_assert(parm->ntmp != 0);
6164                 tcg_out_addi_ptr(s, parm->tmp[0],
6165                                  TCG_REG_CALL_STACK, ofs_slot0);
6166                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6167                            TCG_REG_CALL_STACK, ofs_slot0);
6168             }
6169         }
6170         break;
6171     default:
6172         g_assert_not_reached();
6173     }
6174 
6175     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6176 }
6177 
6178 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6179                                   bool load_sign,
6180                                   const TCGLdstHelperParam *parm)
6181 {
6182     MemOp mop = get_memop(ldst->oi);
6183     TCGMovExtend mov[2];
6184     int ofs_slot0;
6185 
6186     switch (ldst->type) {
6187     case TCG_TYPE_I64:
6188         if (TCG_TARGET_REG_BITS == 32) {
6189             break;
6190         }
6191         /* fall through */
6192 
6193     case TCG_TYPE_I32:
6194         mov[0].dst = ldst->datalo_reg;
6195         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6196         mov[0].dst_type = ldst->type;
6197         mov[0].src_type = TCG_TYPE_REG;
6198 
6199         /*
6200          * If load_sign, then we allowed the helper to perform the
6201          * appropriate sign extension to tcg_target_ulong, and all
6202          * we need now is a plain move.
6203          *
6204          * If they do not, then we expect the relevant extension
6205          * instruction to be no more expensive than a move, and
6206          * we thus save the icache etc by only using one of two
6207          * helper functions.
6208          */
6209         if (load_sign || !(mop & MO_SIGN)) {
6210             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6211                 mov[0].src_ext = MO_32;
6212             } else {
6213                 mov[0].src_ext = MO_64;
6214             }
6215         } else {
6216             mov[0].src_ext = mop & MO_SSIZE;
6217         }
6218         tcg_out_movext1(s, mov);
6219         return;
6220 
6221     case TCG_TYPE_I128:
6222         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6223         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6224         switch (TCG_TARGET_CALL_RET_I128) {
6225         case TCG_CALL_RET_NORMAL:
6226             break;
6227         case TCG_CALL_RET_BY_VEC:
6228             tcg_out_st(s, TCG_TYPE_V128,
6229                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6230                        TCG_REG_CALL_STACK, ofs_slot0);
6231             /* fall through */
6232         case TCG_CALL_RET_BY_REF:
6233             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6234                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6235             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6236                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6237             return;
6238         default:
6239             g_assert_not_reached();
6240         }
6241         break;
6242 
6243     default:
6244         g_assert_not_reached();
6245     }
6246 
6247     mov[0].dst = ldst->datalo_reg;
6248     mov[0].src =
6249         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6250     mov[0].dst_type = TCG_TYPE_REG;
6251     mov[0].src_type = TCG_TYPE_REG;
6252     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6253 
6254     mov[1].dst = ldst->datahi_reg;
6255     mov[1].src =
6256         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6257     mov[1].dst_type = TCG_TYPE_REG;
6258     mov[1].src_type = TCG_TYPE_REG;
6259     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6260 
6261     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6262 }
6263 
6264 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6265                                    const TCGLdstHelperParam *parm)
6266 {
6267     const TCGHelperInfo *info;
6268     const TCGCallArgumentLoc *loc;
6269     TCGMovExtend mov[4];
6270     TCGType data_type;
6271     unsigned next_arg, nmov, n;
6272     MemOp mop = get_memop(ldst->oi);
6273 
6274     switch (mop & MO_SIZE) {
6275     case MO_8:
6276     case MO_16:
6277     case MO_32:
6278         info = &info_helper_st32_mmu;
6279         data_type = TCG_TYPE_I32;
6280         break;
6281     case MO_64:
6282         info = &info_helper_st64_mmu;
6283         data_type = TCG_TYPE_I64;
6284         break;
6285     case MO_128:
6286         info = &info_helper_st128_mmu;
6287         data_type = TCG_TYPE_I128;
6288         break;
6289     default:
6290         g_assert_not_reached();
6291     }
6292 
6293     /* Defer env argument. */
6294     next_arg = 1;
6295     nmov = 0;
6296 
6297     /* Handle addr argument. */
6298     loc = &info->in[next_arg];
6299     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6300     if (TCG_TARGET_REG_BITS == 32) {
6301         /*
6302          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6303          * to 64-bits for the helper by storing the low part.  Later,
6304          * after we have processed the register inputs, we will load a
6305          * zero for the high part.
6306          */
6307         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6308                                TCG_TYPE_I32, TCG_TYPE_I32,
6309                                ldst->addr_reg, -1);
6310         next_arg += 2;
6311         nmov += 1;
6312     } else {
6313         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6314                                    ldst->addr_reg, -1);
6315         next_arg += n;
6316         nmov += n;
6317     }
6318 
6319     /* Handle data argument. */
6320     loc = &info->in[next_arg];
6321     switch (loc->kind) {
6322     case TCG_CALL_ARG_NORMAL:
6323     case TCG_CALL_ARG_EXTEND_U:
6324     case TCG_CALL_ARG_EXTEND_S:
6325         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6326                                    ldst->datalo_reg, ldst->datahi_reg);
6327         next_arg += n;
6328         nmov += n;
6329         tcg_out_helper_load_slots(s, nmov, mov, parm);
6330         break;
6331 
6332     case TCG_CALL_ARG_BY_REF:
6333         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6334         tcg_debug_assert(data_type == TCG_TYPE_I128);
6335         tcg_out_st(s, TCG_TYPE_I64,
6336                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6337                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6338         tcg_out_st(s, TCG_TYPE_I64,
6339                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6340                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6341 
6342         tcg_out_helper_load_slots(s, nmov, mov, parm);
6343 
6344         if (arg_slot_reg_p(loc->arg_slot)) {
6345             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6346                              TCG_REG_CALL_STACK,
6347                              arg_slot_stk_ofs(loc->ref_slot));
6348         } else {
6349             tcg_debug_assert(parm->ntmp != 0);
6350             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6351                              arg_slot_stk_ofs(loc->ref_slot));
6352             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6353                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6354         }
6355         next_arg += 2;
6356         break;
6357 
6358     default:
6359         g_assert_not_reached();
6360     }
6361 
6362     if (TCG_TARGET_REG_BITS == 32) {
6363         /* Zero extend the address by loading a zero for the high part. */
6364         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6365         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6366     }
6367 
6368     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6369 }
6370 
6371 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6372 {
6373     int i, start_words, num_insns;
6374     TCGOp *op;
6375 
6376     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6377                  && qemu_log_in_addr_range(pc_start))) {
6378         FILE *logfile = qemu_log_trylock();
6379         if (logfile) {
6380             fprintf(logfile, "OP:\n");
6381             tcg_dump_ops(s, logfile, false);
6382             fprintf(logfile, "\n");
6383             qemu_log_unlock(logfile);
6384         }
6385     }
6386 
6387 #ifdef CONFIG_DEBUG_TCG
6388     /* Ensure all labels referenced have been emitted.  */
6389     {
6390         TCGLabel *l;
6391         bool error = false;
6392 
6393         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6394             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6395                 qemu_log_mask(CPU_LOG_TB_OP,
6396                               "$L%d referenced but not present.\n", l->id);
6397                 error = true;
6398             }
6399         }
6400         assert(!error);
6401     }
6402 #endif
6403 
6404     /* Do not reuse any EBB that may be allocated within the TB. */
6405     tcg_temp_ebb_reset_freed(s);
6406 
6407     tcg_optimize(s);
6408 
6409     reachable_code_pass(s);
6410     liveness_pass_0(s);
6411     liveness_pass_1(s);
6412 
6413     if (s->nb_indirects > 0) {
6414         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6415                      && qemu_log_in_addr_range(pc_start))) {
6416             FILE *logfile = qemu_log_trylock();
6417             if (logfile) {
6418                 fprintf(logfile, "OP before indirect lowering:\n");
6419                 tcg_dump_ops(s, logfile, false);
6420                 fprintf(logfile, "\n");
6421                 qemu_log_unlock(logfile);
6422             }
6423         }
6424 
6425         /* Replace indirect temps with direct temps.  */
6426         if (liveness_pass_2(s)) {
6427             /* If changes were made, re-run liveness.  */
6428             liveness_pass_1(s);
6429         }
6430     }
6431 
6432     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6433                  && qemu_log_in_addr_range(pc_start))) {
6434         FILE *logfile = qemu_log_trylock();
6435         if (logfile) {
6436             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6437             tcg_dump_ops(s, logfile, true);
6438             fprintf(logfile, "\n");
6439             qemu_log_unlock(logfile);
6440         }
6441     }
6442 
6443     /* Initialize goto_tb jump offsets. */
6444     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6445     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6446     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6447     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6448 
6449     tcg_reg_alloc_start(s);
6450 
6451     /*
6452      * Reset the buffer pointers when restarting after overflow.
6453      * TODO: Move this into translate-all.c with the rest of the
6454      * buffer management.  Having only this done here is confusing.
6455      */
6456     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6457     s->code_ptr = s->code_buf;
6458     s->data_gen_ptr = NULL;
6459 
6460     QSIMPLEQ_INIT(&s->ldst_labels);
6461     s->pool_labels = NULL;
6462 
6463     start_words = s->insn_start_words;
6464     s->gen_insn_data =
6465         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6466 
6467     tcg_out_tb_start(s);
6468 
6469     num_insns = -1;
6470     QTAILQ_FOREACH(op, &s->ops, link) {
6471         TCGOpcode opc = op->opc;
6472 
6473         switch (opc) {
6474         case INDEX_op_mov:
6475         case INDEX_op_mov_vec:
6476             tcg_reg_alloc_mov(s, op);
6477             break;
6478         case INDEX_op_dup_vec:
6479             tcg_reg_alloc_dup(s, op);
6480             break;
6481         case INDEX_op_insn_start:
6482             if (num_insns >= 0) {
6483                 size_t off = tcg_current_code_size(s);
6484                 s->gen_insn_end_off[num_insns] = off;
6485                 /* Assert that we do not overflow our stored offset.  */
6486                 assert(s->gen_insn_end_off[num_insns] == off);
6487             }
6488             num_insns++;
6489             for (i = 0; i < start_words; ++i) {
6490                 s->gen_insn_data[num_insns * start_words + i] =
6491                     tcg_get_insn_start_param(op, i);
6492             }
6493             break;
6494         case INDEX_op_discard:
6495             temp_dead(s, arg_temp(op->args[0]));
6496             break;
6497         case INDEX_op_set_label:
6498             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6499             tcg_out_label(s, arg_label(op->args[0]));
6500             break;
6501         case INDEX_op_call:
6502             tcg_reg_alloc_call(s, op);
6503             break;
6504         case INDEX_op_exit_tb:
6505             tcg_out_exit_tb(s, op->args[0]);
6506             break;
6507         case INDEX_op_goto_tb:
6508             tcg_out_goto_tb(s, op->args[0]);
6509             break;
6510         case INDEX_op_dup2_vec:
6511             if (tcg_reg_alloc_dup2(s, op)) {
6512                 break;
6513             }
6514             /* fall through */
6515         default:
6516             /* Sanity check that we've not introduced any unhandled opcodes. */
6517             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6518                                               TCGOP_FLAGS(op)));
6519             /* Note: in order to speed up the code, it would be much
6520                faster to have specialized register allocator functions for
6521                some common argument patterns */
6522             tcg_reg_alloc_op(s, op);
6523             break;
6524         }
6525         /* Test for (pending) buffer overflow.  The assumption is that any
6526            one operation beginning below the high water mark cannot overrun
6527            the buffer completely.  Thus we can test for overflow after
6528            generating code without having to check during generation.  */
6529         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6530             return -1;
6531         }
6532         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6533         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6534             return -2;
6535         }
6536     }
6537     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6538     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6539 
6540     /* Generate TB finalization at the end of block */
6541     i = tcg_out_ldst_finalize(s);
6542     if (i < 0) {
6543         return i;
6544     }
6545     i = tcg_out_pool_finalize(s);
6546     if (i < 0) {
6547         return i;
6548     }
6549     if (!tcg_resolve_relocs(s)) {
6550         return -2;
6551     }
6552 
6553 #ifndef CONFIG_TCG_INTERPRETER
6554     /* flush instruction cache */
6555     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6556                         (uintptr_t)s->code_buf,
6557                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6558 #endif
6559 
6560     return tcg_current_code_size(s);
6561 }
6562 
6563 #ifdef ELF_HOST_MACHINE
6564 /* In order to use this feature, the backend needs to do three things:
6565 
6566    (1) Define ELF_HOST_MACHINE to indicate both what value to
6567        put into the ELF image and to indicate support for the feature.
6568 
6569    (2) Define tcg_register_jit.  This should create a buffer containing
6570        the contents of a .debug_frame section that describes the post-
6571        prologue unwind info for the tcg machine.
6572 
6573    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6574 */
6575 
6576 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6577 typedef enum {
6578     JIT_NOACTION = 0,
6579     JIT_REGISTER_FN,
6580     JIT_UNREGISTER_FN
6581 } jit_actions_t;
6582 
6583 struct jit_code_entry {
6584     struct jit_code_entry *next_entry;
6585     struct jit_code_entry *prev_entry;
6586     const void *symfile_addr;
6587     uint64_t symfile_size;
6588 };
6589 
6590 struct jit_descriptor {
6591     uint32_t version;
6592     uint32_t action_flag;
6593     struct jit_code_entry *relevant_entry;
6594     struct jit_code_entry *first_entry;
6595 };
6596 
6597 void __jit_debug_register_code(void) __attribute__((noinline));
6598 void __jit_debug_register_code(void)
6599 {
6600     asm("");
6601 }
6602 
6603 /* Must statically initialize the version, because GDB may check
6604    the version before we can set it.  */
6605 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6606 
6607 /* End GDB interface.  */
6608 
6609 static int find_string(const char *strtab, const char *str)
6610 {
6611     const char *p = strtab + 1;
6612 
6613     while (1) {
6614         if (strcmp(p, str) == 0) {
6615             return p - strtab;
6616         }
6617         p += strlen(p) + 1;
6618     }
6619 }
6620 
6621 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6622                                  const void *debug_frame,
6623                                  size_t debug_frame_size)
6624 {
6625     struct __attribute__((packed)) DebugInfo {
6626         uint32_t  len;
6627         uint16_t  version;
6628         uint32_t  abbrev;
6629         uint8_t   ptr_size;
6630         uint8_t   cu_die;
6631         uint16_t  cu_lang;
6632         uintptr_t cu_low_pc;
6633         uintptr_t cu_high_pc;
6634         uint8_t   fn_die;
6635         char      fn_name[16];
6636         uintptr_t fn_low_pc;
6637         uintptr_t fn_high_pc;
6638         uint8_t   cu_eoc;
6639     };
6640 
6641     struct ElfImage {
6642         ElfW(Ehdr) ehdr;
6643         ElfW(Phdr) phdr;
6644         ElfW(Shdr) shdr[7];
6645         ElfW(Sym)  sym[2];
6646         struct DebugInfo di;
6647         uint8_t    da[24];
6648         char       str[80];
6649     };
6650 
6651     struct ElfImage *img;
6652 
6653     static const struct ElfImage img_template = {
6654         .ehdr = {
6655             .e_ident[EI_MAG0] = ELFMAG0,
6656             .e_ident[EI_MAG1] = ELFMAG1,
6657             .e_ident[EI_MAG2] = ELFMAG2,
6658             .e_ident[EI_MAG3] = ELFMAG3,
6659             .e_ident[EI_CLASS] = ELF_CLASS,
6660             .e_ident[EI_DATA] = ELF_DATA,
6661             .e_ident[EI_VERSION] = EV_CURRENT,
6662             .e_type = ET_EXEC,
6663             .e_machine = ELF_HOST_MACHINE,
6664             .e_version = EV_CURRENT,
6665             .e_phoff = offsetof(struct ElfImage, phdr),
6666             .e_shoff = offsetof(struct ElfImage, shdr),
6667             .e_ehsize = sizeof(ElfW(Shdr)),
6668             .e_phentsize = sizeof(ElfW(Phdr)),
6669             .e_phnum = 1,
6670             .e_shentsize = sizeof(ElfW(Shdr)),
6671             .e_shnum = ARRAY_SIZE(img->shdr),
6672             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6673 #ifdef ELF_HOST_FLAGS
6674             .e_flags = ELF_HOST_FLAGS,
6675 #endif
6676 #ifdef ELF_OSABI
6677             .e_ident[EI_OSABI] = ELF_OSABI,
6678 #endif
6679         },
6680         .phdr = {
6681             .p_type = PT_LOAD,
6682             .p_flags = PF_X,
6683         },
6684         .shdr = {
6685             [0] = { .sh_type = SHT_NULL },
6686             /* Trick: The contents of code_gen_buffer are not present in
6687                this fake ELF file; that got allocated elsewhere.  Therefore
6688                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6689                will not look for contents.  We can record any address.  */
6690             [1] = { /* .text */
6691                 .sh_type = SHT_NOBITS,
6692                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6693             },
6694             [2] = { /* .debug_info */
6695                 .sh_type = SHT_PROGBITS,
6696                 .sh_offset = offsetof(struct ElfImage, di),
6697                 .sh_size = sizeof(struct DebugInfo),
6698             },
6699             [3] = { /* .debug_abbrev */
6700                 .sh_type = SHT_PROGBITS,
6701                 .sh_offset = offsetof(struct ElfImage, da),
6702                 .sh_size = sizeof(img->da),
6703             },
6704             [4] = { /* .debug_frame */
6705                 .sh_type = SHT_PROGBITS,
6706                 .sh_offset = sizeof(struct ElfImage),
6707             },
6708             [5] = { /* .symtab */
6709                 .sh_type = SHT_SYMTAB,
6710                 .sh_offset = offsetof(struct ElfImage, sym),
6711                 .sh_size = sizeof(img->sym),
6712                 .sh_info = 1,
6713                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6714                 .sh_entsize = sizeof(ElfW(Sym)),
6715             },
6716             [6] = { /* .strtab */
6717                 .sh_type = SHT_STRTAB,
6718                 .sh_offset = offsetof(struct ElfImage, str),
6719                 .sh_size = sizeof(img->str),
6720             }
6721         },
6722         .sym = {
6723             [1] = { /* code_gen_buffer */
6724                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6725                 .st_shndx = 1,
6726             }
6727         },
6728         .di = {
6729             .len = sizeof(struct DebugInfo) - 4,
6730             .version = 2,
6731             .ptr_size = sizeof(void *),
6732             .cu_die = 1,
6733             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6734             .fn_die = 2,
6735             .fn_name = "code_gen_buffer"
6736         },
6737         .da = {
6738             1,          /* abbrev number (the cu) */
6739             0x11, 1,    /* DW_TAG_compile_unit, has children */
6740             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6741             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6742             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6743             0, 0,       /* end of abbrev */
6744             2,          /* abbrev number (the fn) */
6745             0x2e, 0,    /* DW_TAG_subprogram, no children */
6746             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6747             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6748             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6749             0, 0,       /* end of abbrev */
6750             0           /* no more abbrev */
6751         },
6752         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6753                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6754     };
6755 
6756     /* We only need a single jit entry; statically allocate it.  */
6757     static struct jit_code_entry one_entry;
6758 
6759     uintptr_t buf = (uintptr_t)buf_ptr;
6760     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6761     DebugFrameHeader *dfh;
6762 
6763     img = g_malloc(img_size);
6764     *img = img_template;
6765 
6766     img->phdr.p_vaddr = buf;
6767     img->phdr.p_paddr = buf;
6768     img->phdr.p_memsz = buf_size;
6769 
6770     img->shdr[1].sh_name = find_string(img->str, ".text");
6771     img->shdr[1].sh_addr = buf;
6772     img->shdr[1].sh_size = buf_size;
6773 
6774     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6775     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6776 
6777     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6778     img->shdr[4].sh_size = debug_frame_size;
6779 
6780     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6781     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6782 
6783     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6784     img->sym[1].st_value = buf;
6785     img->sym[1].st_size = buf_size;
6786 
6787     img->di.cu_low_pc = buf;
6788     img->di.cu_high_pc = buf + buf_size;
6789     img->di.fn_low_pc = buf;
6790     img->di.fn_high_pc = buf + buf_size;
6791 
6792     dfh = (DebugFrameHeader *)(img + 1);
6793     memcpy(dfh, debug_frame, debug_frame_size);
6794     dfh->fde.func_start = buf;
6795     dfh->fde.func_len = buf_size;
6796 
6797 #ifdef DEBUG_JIT
6798     /* Enable this block to be able to debug the ELF image file creation.
6799        One can use readelf, objdump, or other inspection utilities.  */
6800     {
6801         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6802         FILE *f = fopen(jit, "w+b");
6803         if (f) {
6804             if (fwrite(img, img_size, 1, f) != img_size) {
6805                 /* Avoid stupid unused return value warning for fwrite.  */
6806             }
6807             fclose(f);
6808         }
6809     }
6810 #endif
6811 
6812     one_entry.symfile_addr = img;
6813     one_entry.symfile_size = img_size;
6814 
6815     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6816     __jit_debug_descriptor.relevant_entry = &one_entry;
6817     __jit_debug_descriptor.first_entry = &one_entry;
6818     __jit_debug_register_code();
6819 }
6820 #else
6821 /* No support for the feature.  Provide the entry point expected by exec.c,
6822    and implement the internal function we declared earlier.  */
6823 
6824 static void tcg_register_jit_int(const void *buf, size_t size,
6825                                  const void *debug_frame,
6826                                  size_t debug_frame_size)
6827 {
6828 }
6829 
6830 void tcg_register_jit(const void *buf, size_t buf_size)
6831 {
6832 }
6833 #endif /* ELF_HOST_MACHINE */
6834 
6835 #if !TCG_TARGET_MAYBE_vec
6836 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6837 {
6838     g_assert_not_reached();
6839 }
6840 #endif
6841