xref: /openbmc/qemu/tcg/tcg.c (revision b2c514f9d5cab89814dc8a6b7c98c653ca8523d3)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpUnary {
990     TCGOutOp base;
991     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
992 } TCGOutOpUnary;
993 
994 typedef struct TCGOutOpSubtract {
995     TCGOutOp base;
996     void (*out_rrr)(TCGContext *s, TCGType type,
997                     TCGReg a0, TCGReg a1, TCGReg a2);
998     void (*out_rir)(TCGContext *s, TCGType type,
999                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1000 } TCGOutOpSubtract;
1001 
1002 #include "tcg-target.c.inc"
1003 
1004 #ifndef CONFIG_TCG_INTERPRETER
1005 /* Validate CPUTLBDescFast placement. */
1006 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1007                         sizeof(CPUNegativeOffsetState))
1008                   < MIN_TLB_MASK_TABLE_OFS);
1009 #endif
1010 
1011 /*
1012  * Register V as the TCGOutOp for O.
1013  * This verifies that V is of type T, otherwise give a nice compiler error.
1014  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1015  */
1016 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1017 
1018 /* Register allocation descriptions for every TCGOpcode. */
1019 static const TCGOutOp * const all_outop[NB_OPS] = {
1020     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1021     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1022     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1023     OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1024     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1025     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1026     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1027     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1028     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1029     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1030     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1031     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1032     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1033     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1034     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1035     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1036 };
1037 
1038 #undef OUTOP
1039 
1040 /*
1041  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1042  * and registered the target's TCG globals) must register with this function
1043  * before initiating translation.
1044  *
1045  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1046  * of tcg_region_init() for the reasoning behind this.
1047  *
1048  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1049  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1050  * is not used anymore for translation once this function is called.
1051  *
1052  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1053  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1054  * modes.
1055  */
1056 #ifdef CONFIG_USER_ONLY
1057 void tcg_register_thread(void)
1058 {
1059     tcg_ctx = &tcg_init_ctx;
1060 }
1061 #else
1062 void tcg_register_thread(void)
1063 {
1064     TCGContext *s = g_malloc(sizeof(*s));
1065     unsigned int i, n;
1066 
1067     *s = tcg_init_ctx;
1068 
1069     /* Relink mem_base.  */
1070     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1071         if (tcg_init_ctx.temps[i].mem_base) {
1072             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1073             tcg_debug_assert(b >= 0 && b < n);
1074             s->temps[i].mem_base = &s->temps[b];
1075         }
1076     }
1077 
1078     /* Claim an entry in tcg_ctxs */
1079     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1080     g_assert(n < tcg_max_ctxs);
1081     qatomic_set(&tcg_ctxs[n], s);
1082 
1083     if (n > 0) {
1084         tcg_region_initial_alloc(s);
1085     }
1086 
1087     tcg_ctx = s;
1088 }
1089 #endif /* !CONFIG_USER_ONLY */
1090 
1091 /* pool based memory allocation */
1092 void *tcg_malloc_internal(TCGContext *s, int size)
1093 {
1094     TCGPool *p;
1095     int pool_size;
1096 
1097     if (size > TCG_POOL_CHUNK_SIZE) {
1098         /* big malloc: insert a new pool (XXX: could optimize) */
1099         p = g_malloc(sizeof(TCGPool) + size);
1100         p->size = size;
1101         p->next = s->pool_first_large;
1102         s->pool_first_large = p;
1103         return p->data;
1104     } else {
1105         p = s->pool_current;
1106         if (!p) {
1107             p = s->pool_first;
1108             if (!p)
1109                 goto new_pool;
1110         } else {
1111             if (!p->next) {
1112             new_pool:
1113                 pool_size = TCG_POOL_CHUNK_SIZE;
1114                 p = g_malloc(sizeof(TCGPool) + pool_size);
1115                 p->size = pool_size;
1116                 p->next = NULL;
1117                 if (s->pool_current) {
1118                     s->pool_current->next = p;
1119                 } else {
1120                     s->pool_first = p;
1121                 }
1122             } else {
1123                 p = p->next;
1124             }
1125         }
1126     }
1127     s->pool_current = p;
1128     s->pool_cur = p->data + size;
1129     s->pool_end = p->data + p->size;
1130     return p->data;
1131 }
1132 
1133 void tcg_pool_reset(TCGContext *s)
1134 {
1135     TCGPool *p, *t;
1136     for (p = s->pool_first_large; p; p = t) {
1137         t = p->next;
1138         g_free(p);
1139     }
1140     s->pool_first_large = NULL;
1141     s->pool_cur = s->pool_end = NULL;
1142     s->pool_current = NULL;
1143 }
1144 
1145 /*
1146  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1147  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1148  * We only use these for layout in tcg_out_ld_helper_ret and
1149  * tcg_out_st_helper_args, and share them between several of
1150  * the helpers, with the end result that it's easier to build manually.
1151  */
1152 
1153 #if TCG_TARGET_REG_BITS == 32
1154 # define dh_typecode_ttl  dh_typecode_i32
1155 #else
1156 # define dh_typecode_ttl  dh_typecode_i64
1157 #endif
1158 
1159 static TCGHelperInfo info_helper_ld32_mmu = {
1160     .flags = TCG_CALL_NO_WG,
1161     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1162               | dh_typemask(env, 1)
1163               | dh_typemask(i64, 2)  /* uint64_t addr */
1164               | dh_typemask(i32, 3)  /* unsigned oi */
1165               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1166 };
1167 
1168 static TCGHelperInfo info_helper_ld64_mmu = {
1169     .flags = TCG_CALL_NO_WG,
1170     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1171               | dh_typemask(env, 1)
1172               | dh_typemask(i64, 2)  /* uint64_t addr */
1173               | dh_typemask(i32, 3)  /* unsigned oi */
1174               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1175 };
1176 
1177 static TCGHelperInfo info_helper_ld128_mmu = {
1178     .flags = TCG_CALL_NO_WG,
1179     .typemask = dh_typemask(i128, 0) /* return Int128 */
1180               | dh_typemask(env, 1)
1181               | dh_typemask(i64, 2)  /* uint64_t addr */
1182               | dh_typemask(i32, 3)  /* unsigned oi */
1183               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1184 };
1185 
1186 static TCGHelperInfo info_helper_st32_mmu = {
1187     .flags = TCG_CALL_NO_WG,
1188     .typemask = dh_typemask(void, 0)
1189               | dh_typemask(env, 1)
1190               | dh_typemask(i64, 2)  /* uint64_t addr */
1191               | dh_typemask(i32, 3)  /* uint32_t data */
1192               | dh_typemask(i32, 4)  /* unsigned oi */
1193               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1194 };
1195 
1196 static TCGHelperInfo info_helper_st64_mmu = {
1197     .flags = TCG_CALL_NO_WG,
1198     .typemask = dh_typemask(void, 0)
1199               | dh_typemask(env, 1)
1200               | dh_typemask(i64, 2)  /* uint64_t addr */
1201               | dh_typemask(i64, 3)  /* uint64_t data */
1202               | dh_typemask(i32, 4)  /* unsigned oi */
1203               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1204 };
1205 
1206 static TCGHelperInfo info_helper_st128_mmu = {
1207     .flags = TCG_CALL_NO_WG,
1208     .typemask = dh_typemask(void, 0)
1209               | dh_typemask(env, 1)
1210               | dh_typemask(i64, 2)  /* uint64_t addr */
1211               | dh_typemask(i128, 3) /* Int128 data */
1212               | dh_typemask(i32, 4)  /* unsigned oi */
1213               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1214 };
1215 
1216 #ifdef CONFIG_TCG_INTERPRETER
1217 static ffi_type *typecode_to_ffi(int argmask)
1218 {
1219     /*
1220      * libffi does not support __int128_t, so we have forced Int128
1221      * to use the structure definition instead of the builtin type.
1222      */
1223     static ffi_type *ffi_type_i128_elements[3] = {
1224         &ffi_type_uint64,
1225         &ffi_type_uint64,
1226         NULL
1227     };
1228     static ffi_type ffi_type_i128 = {
1229         .size = 16,
1230         .alignment = __alignof__(Int128),
1231         .type = FFI_TYPE_STRUCT,
1232         .elements = ffi_type_i128_elements,
1233     };
1234 
1235     switch (argmask) {
1236     case dh_typecode_void:
1237         return &ffi_type_void;
1238     case dh_typecode_i32:
1239         return &ffi_type_uint32;
1240     case dh_typecode_s32:
1241         return &ffi_type_sint32;
1242     case dh_typecode_i64:
1243         return &ffi_type_uint64;
1244     case dh_typecode_s64:
1245         return &ffi_type_sint64;
1246     case dh_typecode_ptr:
1247         return &ffi_type_pointer;
1248     case dh_typecode_i128:
1249         return &ffi_type_i128;
1250     }
1251     g_assert_not_reached();
1252 }
1253 
1254 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1255 {
1256     unsigned typemask = info->typemask;
1257     struct {
1258         ffi_cif cif;
1259         ffi_type *args[];
1260     } *ca;
1261     ffi_status status;
1262     int nargs;
1263 
1264     /* Ignoring the return type, find the last non-zero field. */
1265     nargs = 32 - clz32(typemask >> 3);
1266     nargs = DIV_ROUND_UP(nargs, 3);
1267     assert(nargs <= MAX_CALL_IARGS);
1268 
1269     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1270     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1271     ca->cif.nargs = nargs;
1272 
1273     if (nargs != 0) {
1274         ca->cif.arg_types = ca->args;
1275         for (int j = 0; j < nargs; ++j) {
1276             int typecode = extract32(typemask, (j + 1) * 3, 3);
1277             ca->args[j] = typecode_to_ffi(typecode);
1278         }
1279     }
1280 
1281     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1282                           ca->cif.rtype, ca->cif.arg_types);
1283     assert(status == FFI_OK);
1284 
1285     return &ca->cif;
1286 }
1287 
1288 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1289 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1290 #else
1291 #define HELPER_INFO_INIT(I)      (&(I)->init)
1292 #define HELPER_INFO_INIT_VAL(I)  1
1293 #endif /* CONFIG_TCG_INTERPRETER */
1294 
1295 static inline bool arg_slot_reg_p(unsigned arg_slot)
1296 {
1297     /*
1298      * Split the sizeof away from the comparison to avoid Werror from
1299      * "unsigned < 0 is always false", when iarg_regs is empty.
1300      */
1301     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1302     return arg_slot < nreg;
1303 }
1304 
1305 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1306 {
1307     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1308     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1309 
1310     tcg_debug_assert(stk_slot < max);
1311     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1312 }
1313 
1314 typedef struct TCGCumulativeArgs {
1315     int arg_idx;                /* tcg_gen_callN args[] */
1316     int info_in_idx;            /* TCGHelperInfo in[] */
1317     int arg_slot;               /* regs+stack slot */
1318     int ref_slot;               /* stack slots for references */
1319 } TCGCumulativeArgs;
1320 
1321 static void layout_arg_even(TCGCumulativeArgs *cum)
1322 {
1323     cum->arg_slot += cum->arg_slot & 1;
1324 }
1325 
1326 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1327                          TCGCallArgumentKind kind)
1328 {
1329     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1330 
1331     *loc = (TCGCallArgumentLoc){
1332         .kind = kind,
1333         .arg_idx = cum->arg_idx,
1334         .arg_slot = cum->arg_slot,
1335     };
1336     cum->info_in_idx++;
1337     cum->arg_slot++;
1338 }
1339 
1340 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1341                                 TCGHelperInfo *info, int n)
1342 {
1343     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1344 
1345     for (int i = 0; i < n; ++i) {
1346         /* Layout all using the same arg_idx, adjusting the subindex. */
1347         loc[i] = (TCGCallArgumentLoc){
1348             .kind = TCG_CALL_ARG_NORMAL,
1349             .arg_idx = cum->arg_idx,
1350             .tmp_subindex = i,
1351             .arg_slot = cum->arg_slot + i,
1352         };
1353     }
1354     cum->info_in_idx += n;
1355     cum->arg_slot += n;
1356 }
1357 
1358 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1359 {
1360     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1361     int n = 128 / TCG_TARGET_REG_BITS;
1362 
1363     /* The first subindex carries the pointer. */
1364     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1365 
1366     /*
1367      * The callee is allowed to clobber memory associated with
1368      * structure pass by-reference.  Therefore we must make copies.
1369      * Allocate space from "ref_slot", which will be adjusted to
1370      * follow the parameters on the stack.
1371      */
1372     loc[0].ref_slot = cum->ref_slot;
1373 
1374     /*
1375      * Subsequent words also go into the reference slot, but
1376      * do not accumulate into the regular arguments.
1377      */
1378     for (int i = 1; i < n; ++i) {
1379         loc[i] = (TCGCallArgumentLoc){
1380             .kind = TCG_CALL_ARG_BY_REF_N,
1381             .arg_idx = cum->arg_idx,
1382             .tmp_subindex = i,
1383             .ref_slot = cum->ref_slot + i,
1384         };
1385     }
1386     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1387     cum->ref_slot += n;
1388 }
1389 
1390 static void init_call_layout(TCGHelperInfo *info)
1391 {
1392     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1393     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1394     unsigned typemask = info->typemask;
1395     unsigned typecode;
1396     TCGCumulativeArgs cum = { };
1397 
1398     /*
1399      * Parse and place any function return value.
1400      */
1401     typecode = typemask & 7;
1402     switch (typecode) {
1403     case dh_typecode_void:
1404         info->nr_out = 0;
1405         break;
1406     case dh_typecode_i32:
1407     case dh_typecode_s32:
1408     case dh_typecode_ptr:
1409         info->nr_out = 1;
1410         info->out_kind = TCG_CALL_RET_NORMAL;
1411         break;
1412     case dh_typecode_i64:
1413     case dh_typecode_s64:
1414         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1415         info->out_kind = TCG_CALL_RET_NORMAL;
1416         /* Query the last register now to trigger any assert early. */
1417         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1418         break;
1419     case dh_typecode_i128:
1420         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1421         info->out_kind = TCG_TARGET_CALL_RET_I128;
1422         switch (TCG_TARGET_CALL_RET_I128) {
1423         case TCG_CALL_RET_NORMAL:
1424             /* Query the last register now to trigger any assert early. */
1425             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1426             break;
1427         case TCG_CALL_RET_BY_VEC:
1428             /* Query the single register now to trigger any assert early. */
1429             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1430             break;
1431         case TCG_CALL_RET_BY_REF:
1432             /*
1433              * Allocate the first argument to the output.
1434              * We don't need to store this anywhere, just make it
1435              * unavailable for use in the input loop below.
1436              */
1437             cum.arg_slot = 1;
1438             break;
1439         default:
1440             qemu_build_not_reached();
1441         }
1442         break;
1443     default:
1444         g_assert_not_reached();
1445     }
1446 
1447     /*
1448      * Parse and place function arguments.
1449      */
1450     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1451         TCGCallArgumentKind kind;
1452         TCGType type;
1453 
1454         typecode = typemask & 7;
1455         switch (typecode) {
1456         case dh_typecode_i32:
1457         case dh_typecode_s32:
1458             type = TCG_TYPE_I32;
1459             break;
1460         case dh_typecode_i64:
1461         case dh_typecode_s64:
1462             type = TCG_TYPE_I64;
1463             break;
1464         case dh_typecode_ptr:
1465             type = TCG_TYPE_PTR;
1466             break;
1467         case dh_typecode_i128:
1468             type = TCG_TYPE_I128;
1469             break;
1470         default:
1471             g_assert_not_reached();
1472         }
1473 
1474         switch (type) {
1475         case TCG_TYPE_I32:
1476             switch (TCG_TARGET_CALL_ARG_I32) {
1477             case TCG_CALL_ARG_EVEN:
1478                 layout_arg_even(&cum);
1479                 /* fall through */
1480             case TCG_CALL_ARG_NORMAL:
1481                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1482                 break;
1483             case TCG_CALL_ARG_EXTEND:
1484                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1485                 layout_arg_1(&cum, info, kind);
1486                 break;
1487             default:
1488                 qemu_build_not_reached();
1489             }
1490             break;
1491 
1492         case TCG_TYPE_I64:
1493             switch (TCG_TARGET_CALL_ARG_I64) {
1494             case TCG_CALL_ARG_EVEN:
1495                 layout_arg_even(&cum);
1496                 /* fall through */
1497             case TCG_CALL_ARG_NORMAL:
1498                 if (TCG_TARGET_REG_BITS == 32) {
1499                     layout_arg_normal_n(&cum, info, 2);
1500                 } else {
1501                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1502                 }
1503                 break;
1504             default:
1505                 qemu_build_not_reached();
1506             }
1507             break;
1508 
1509         case TCG_TYPE_I128:
1510             switch (TCG_TARGET_CALL_ARG_I128) {
1511             case TCG_CALL_ARG_EVEN:
1512                 layout_arg_even(&cum);
1513                 /* fall through */
1514             case TCG_CALL_ARG_NORMAL:
1515                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1516                 break;
1517             case TCG_CALL_ARG_BY_REF:
1518                 layout_arg_by_ref(&cum, info);
1519                 break;
1520             default:
1521                 qemu_build_not_reached();
1522             }
1523             break;
1524 
1525         default:
1526             g_assert_not_reached();
1527         }
1528     }
1529     info->nr_in = cum.info_in_idx;
1530 
1531     /* Validate that we didn't overrun the input array. */
1532     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1533     /* Validate the backend has enough argument space. */
1534     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1535 
1536     /*
1537      * Relocate the "ref_slot" area to the end of the parameters.
1538      * Minimizing this stack offset helps code size for x86,
1539      * which has a signed 8-bit offset encoding.
1540      */
1541     if (cum.ref_slot != 0) {
1542         int ref_base = 0;
1543 
1544         if (cum.arg_slot > max_reg_slots) {
1545             int align = __alignof(Int128) / sizeof(tcg_target_long);
1546 
1547             ref_base = cum.arg_slot - max_reg_slots;
1548             if (align > 1) {
1549                 ref_base = ROUND_UP(ref_base, align);
1550             }
1551         }
1552         assert(ref_base + cum.ref_slot <= max_stk_slots);
1553         ref_base += max_reg_slots;
1554 
1555         if (ref_base != 0) {
1556             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1557                 TCGCallArgumentLoc *loc = &info->in[i];
1558                 switch (loc->kind) {
1559                 case TCG_CALL_ARG_BY_REF:
1560                 case TCG_CALL_ARG_BY_REF_N:
1561                     loc->ref_slot += ref_base;
1562                     break;
1563                 default:
1564                     break;
1565                 }
1566             }
1567         }
1568     }
1569 }
1570 
1571 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1572 static void process_constraint_sets(void);
1573 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1574                                             TCGReg reg, const char *name);
1575 
1576 static void tcg_context_init(unsigned max_threads)
1577 {
1578     TCGContext *s = &tcg_init_ctx;
1579     int n, i;
1580     TCGTemp *ts;
1581 
1582     memset(s, 0, sizeof(*s));
1583     s->nb_globals = 0;
1584 
1585     init_call_layout(&info_helper_ld32_mmu);
1586     init_call_layout(&info_helper_ld64_mmu);
1587     init_call_layout(&info_helper_ld128_mmu);
1588     init_call_layout(&info_helper_st32_mmu);
1589     init_call_layout(&info_helper_st64_mmu);
1590     init_call_layout(&info_helper_st128_mmu);
1591 
1592     tcg_target_init(s);
1593     process_constraint_sets();
1594 
1595     /* Reverse the order of the saved registers, assuming they're all at
1596        the start of tcg_target_reg_alloc_order.  */
1597     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1598         int r = tcg_target_reg_alloc_order[n];
1599         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1600             break;
1601         }
1602     }
1603     for (i = 0; i < n; ++i) {
1604         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1605     }
1606     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1607         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1608     }
1609 
1610     tcg_ctx = s;
1611     /*
1612      * In user-mode we simply share the init context among threads, since we
1613      * use a single region. See the documentation tcg_region_init() for the
1614      * reasoning behind this.
1615      * In system-mode we will have at most max_threads TCG threads.
1616      */
1617 #ifdef CONFIG_USER_ONLY
1618     tcg_ctxs = &tcg_ctx;
1619     tcg_cur_ctxs = 1;
1620     tcg_max_ctxs = 1;
1621 #else
1622     tcg_max_ctxs = max_threads;
1623     tcg_ctxs = g_new0(TCGContext *, max_threads);
1624 #endif
1625 
1626     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1627     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1628     tcg_env = temp_tcgv_ptr(ts);
1629 }
1630 
1631 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1632 {
1633     tcg_context_init(max_threads);
1634     tcg_region_init(tb_size, splitwx, max_threads);
1635 }
1636 
1637 /*
1638  * Allocate TBs right before their corresponding translated code, making
1639  * sure that TBs and code are on different cache lines.
1640  */
1641 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1642 {
1643     uintptr_t align = qemu_icache_linesize;
1644     TranslationBlock *tb;
1645     void *next;
1646 
1647  retry:
1648     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1649     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1650 
1651     if (unlikely(next > s->code_gen_highwater)) {
1652         if (tcg_region_alloc(s)) {
1653             return NULL;
1654         }
1655         goto retry;
1656     }
1657     qatomic_set(&s->code_gen_ptr, next);
1658     return tb;
1659 }
1660 
1661 void tcg_prologue_init(void)
1662 {
1663     TCGContext *s = tcg_ctx;
1664     size_t prologue_size;
1665 
1666     s->code_ptr = s->code_gen_ptr;
1667     s->code_buf = s->code_gen_ptr;
1668     s->data_gen_ptr = NULL;
1669 
1670 #ifndef CONFIG_TCG_INTERPRETER
1671     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1672 #endif
1673 
1674     s->pool_labels = NULL;
1675 
1676     qemu_thread_jit_write();
1677     /* Generate the prologue.  */
1678     tcg_target_qemu_prologue(s);
1679 
1680     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1681     {
1682         int result = tcg_out_pool_finalize(s);
1683         tcg_debug_assert(result == 0);
1684     }
1685 
1686     prologue_size = tcg_current_code_size(s);
1687     perf_report_prologue(s->code_gen_ptr, prologue_size);
1688 
1689 #ifndef CONFIG_TCG_INTERPRETER
1690     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1691                         (uintptr_t)s->code_buf, prologue_size);
1692 #endif
1693 
1694     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1695         FILE *logfile = qemu_log_trylock();
1696         if (logfile) {
1697             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1698             if (s->data_gen_ptr) {
1699                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1700                 size_t data_size = prologue_size - code_size;
1701                 size_t i;
1702 
1703                 disas(logfile, s->code_gen_ptr, code_size);
1704 
1705                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1706                     if (sizeof(tcg_target_ulong) == 8) {
1707                         fprintf(logfile,
1708                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1709                                 (uintptr_t)s->data_gen_ptr + i,
1710                                 *(uint64_t *)(s->data_gen_ptr + i));
1711                     } else {
1712                         fprintf(logfile,
1713                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1714                                 (uintptr_t)s->data_gen_ptr + i,
1715                                 *(uint32_t *)(s->data_gen_ptr + i));
1716                     }
1717                 }
1718             } else {
1719                 disas(logfile, s->code_gen_ptr, prologue_size);
1720             }
1721             fprintf(logfile, "\n");
1722             qemu_log_unlock(logfile);
1723         }
1724     }
1725 
1726 #ifndef CONFIG_TCG_INTERPRETER
1727     /*
1728      * Assert that goto_ptr is implemented completely, setting an epilogue.
1729      * For tci, we use NULL as the signal to return from the interpreter,
1730      * so skip this check.
1731      */
1732     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1733 #endif
1734 
1735     tcg_region_prologue_set(s);
1736 }
1737 
1738 void tcg_func_start(TCGContext *s)
1739 {
1740     tcg_pool_reset(s);
1741     s->nb_temps = s->nb_globals;
1742 
1743     /* No temps have been previously allocated for size or locality.  */
1744     tcg_temp_ebb_reset_freed(s);
1745 
1746     /* No constant temps have been previously allocated. */
1747     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1748         if (s->const_table[i]) {
1749             g_hash_table_remove_all(s->const_table[i]);
1750         }
1751     }
1752 
1753     s->nb_ops = 0;
1754     s->nb_labels = 0;
1755     s->current_frame_offset = s->frame_start;
1756 
1757 #ifdef CONFIG_DEBUG_TCG
1758     s->goto_tb_issue_mask = 0;
1759 #endif
1760 
1761     QTAILQ_INIT(&s->ops);
1762     QTAILQ_INIT(&s->free_ops);
1763     s->emit_before_op = NULL;
1764     QSIMPLEQ_INIT(&s->labels);
1765 
1766     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1767     tcg_debug_assert(s->insn_start_words > 0);
1768 }
1769 
1770 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1771 {
1772     int n = s->nb_temps++;
1773 
1774     if (n >= TCG_MAX_TEMPS) {
1775         tcg_raise_tb_overflow(s);
1776     }
1777     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1778 }
1779 
1780 static TCGTemp *tcg_global_alloc(TCGContext *s)
1781 {
1782     TCGTemp *ts;
1783 
1784     tcg_debug_assert(s->nb_globals == s->nb_temps);
1785     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1786     s->nb_globals++;
1787     ts = tcg_temp_alloc(s);
1788     ts->kind = TEMP_GLOBAL;
1789 
1790     return ts;
1791 }
1792 
1793 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1794                                             TCGReg reg, const char *name)
1795 {
1796     TCGTemp *ts;
1797 
1798     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1799 
1800     ts = tcg_global_alloc(s);
1801     ts->base_type = type;
1802     ts->type = type;
1803     ts->kind = TEMP_FIXED;
1804     ts->reg = reg;
1805     ts->name = name;
1806     tcg_regset_set_reg(s->reserved_regs, reg);
1807 
1808     return ts;
1809 }
1810 
1811 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1812 {
1813     s->frame_start = start;
1814     s->frame_end = start + size;
1815     s->frame_temp
1816         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1817 }
1818 
1819 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1820                                             const char *name, TCGType type)
1821 {
1822     TCGContext *s = tcg_ctx;
1823     TCGTemp *base_ts = tcgv_ptr_temp(base);
1824     TCGTemp *ts = tcg_global_alloc(s);
1825     int indirect_reg = 0;
1826 
1827     switch (base_ts->kind) {
1828     case TEMP_FIXED:
1829         break;
1830     case TEMP_GLOBAL:
1831         /* We do not support double-indirect registers.  */
1832         tcg_debug_assert(!base_ts->indirect_reg);
1833         base_ts->indirect_base = 1;
1834         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1835                             ? 2 : 1);
1836         indirect_reg = 1;
1837         break;
1838     default:
1839         g_assert_not_reached();
1840     }
1841 
1842     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1843         TCGTemp *ts2 = tcg_global_alloc(s);
1844         char buf[64];
1845 
1846         ts->base_type = TCG_TYPE_I64;
1847         ts->type = TCG_TYPE_I32;
1848         ts->indirect_reg = indirect_reg;
1849         ts->mem_allocated = 1;
1850         ts->mem_base = base_ts;
1851         ts->mem_offset = offset;
1852         pstrcpy(buf, sizeof(buf), name);
1853         pstrcat(buf, sizeof(buf), "_0");
1854         ts->name = strdup(buf);
1855 
1856         tcg_debug_assert(ts2 == ts + 1);
1857         ts2->base_type = TCG_TYPE_I64;
1858         ts2->type = TCG_TYPE_I32;
1859         ts2->indirect_reg = indirect_reg;
1860         ts2->mem_allocated = 1;
1861         ts2->mem_base = base_ts;
1862         ts2->mem_offset = offset + 4;
1863         ts2->temp_subindex = 1;
1864         pstrcpy(buf, sizeof(buf), name);
1865         pstrcat(buf, sizeof(buf), "_1");
1866         ts2->name = strdup(buf);
1867     } else {
1868         ts->base_type = type;
1869         ts->type = type;
1870         ts->indirect_reg = indirect_reg;
1871         ts->mem_allocated = 1;
1872         ts->mem_base = base_ts;
1873         ts->mem_offset = offset;
1874         ts->name = name;
1875     }
1876     return ts;
1877 }
1878 
1879 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1880 {
1881     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1882     return temp_tcgv_i32(ts);
1883 }
1884 
1885 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1886 {
1887     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1888     return temp_tcgv_i64(ts);
1889 }
1890 
1891 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1892 {
1893     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1894     return temp_tcgv_ptr(ts);
1895 }
1896 
1897 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1898 {
1899     TCGContext *s = tcg_ctx;
1900     TCGTemp *ts;
1901     int n;
1902 
1903     if (kind == TEMP_EBB) {
1904         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1905 
1906         if (idx < TCG_MAX_TEMPS) {
1907             /* There is already an available temp with the right type.  */
1908             clear_bit(idx, s->free_temps[type].l);
1909 
1910             ts = &s->temps[idx];
1911             ts->temp_allocated = 1;
1912             tcg_debug_assert(ts->base_type == type);
1913             tcg_debug_assert(ts->kind == kind);
1914             return ts;
1915         }
1916     } else {
1917         tcg_debug_assert(kind == TEMP_TB);
1918     }
1919 
1920     switch (type) {
1921     case TCG_TYPE_I32:
1922     case TCG_TYPE_V64:
1923     case TCG_TYPE_V128:
1924     case TCG_TYPE_V256:
1925         n = 1;
1926         break;
1927     case TCG_TYPE_I64:
1928         n = 64 / TCG_TARGET_REG_BITS;
1929         break;
1930     case TCG_TYPE_I128:
1931         n = 128 / TCG_TARGET_REG_BITS;
1932         break;
1933     default:
1934         g_assert_not_reached();
1935     }
1936 
1937     ts = tcg_temp_alloc(s);
1938     ts->base_type = type;
1939     ts->temp_allocated = 1;
1940     ts->kind = kind;
1941 
1942     if (n == 1) {
1943         ts->type = type;
1944     } else {
1945         ts->type = TCG_TYPE_REG;
1946 
1947         for (int i = 1; i < n; ++i) {
1948             TCGTemp *ts2 = tcg_temp_alloc(s);
1949 
1950             tcg_debug_assert(ts2 == ts + i);
1951             ts2->base_type = type;
1952             ts2->type = TCG_TYPE_REG;
1953             ts2->temp_allocated = 1;
1954             ts2->temp_subindex = i;
1955             ts2->kind = kind;
1956         }
1957     }
1958     return ts;
1959 }
1960 
1961 TCGv_i32 tcg_temp_new_i32(void)
1962 {
1963     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1964 }
1965 
1966 TCGv_i32 tcg_temp_ebb_new_i32(void)
1967 {
1968     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1969 }
1970 
1971 TCGv_i64 tcg_temp_new_i64(void)
1972 {
1973     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1974 }
1975 
1976 TCGv_i64 tcg_temp_ebb_new_i64(void)
1977 {
1978     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1979 }
1980 
1981 TCGv_ptr tcg_temp_new_ptr(void)
1982 {
1983     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1984 }
1985 
1986 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1987 {
1988     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1989 }
1990 
1991 TCGv_i128 tcg_temp_new_i128(void)
1992 {
1993     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1994 }
1995 
1996 TCGv_i128 tcg_temp_ebb_new_i128(void)
1997 {
1998     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1999 }
2000 
2001 TCGv_vec tcg_temp_new_vec(TCGType type)
2002 {
2003     TCGTemp *t;
2004 
2005 #ifdef CONFIG_DEBUG_TCG
2006     switch (type) {
2007     case TCG_TYPE_V64:
2008         assert(TCG_TARGET_HAS_v64);
2009         break;
2010     case TCG_TYPE_V128:
2011         assert(TCG_TARGET_HAS_v128);
2012         break;
2013     case TCG_TYPE_V256:
2014         assert(TCG_TARGET_HAS_v256);
2015         break;
2016     default:
2017         g_assert_not_reached();
2018     }
2019 #endif
2020 
2021     t = tcg_temp_new_internal(type, TEMP_EBB);
2022     return temp_tcgv_vec(t);
2023 }
2024 
2025 /* Create a new temp of the same type as an existing temp.  */
2026 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2027 {
2028     TCGTemp *t = tcgv_vec_temp(match);
2029 
2030     tcg_debug_assert(t->temp_allocated != 0);
2031 
2032     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2033     return temp_tcgv_vec(t);
2034 }
2035 
2036 void tcg_temp_free_internal(TCGTemp *ts)
2037 {
2038     TCGContext *s = tcg_ctx;
2039 
2040     switch (ts->kind) {
2041     case TEMP_CONST:
2042     case TEMP_TB:
2043         /* Silently ignore free. */
2044         break;
2045     case TEMP_EBB:
2046         tcg_debug_assert(ts->temp_allocated != 0);
2047         ts->temp_allocated = 0;
2048         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2049         break;
2050     default:
2051         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2052         g_assert_not_reached();
2053     }
2054 }
2055 
2056 void tcg_temp_free_i32(TCGv_i32 arg)
2057 {
2058     tcg_temp_free_internal(tcgv_i32_temp(arg));
2059 }
2060 
2061 void tcg_temp_free_i64(TCGv_i64 arg)
2062 {
2063     tcg_temp_free_internal(tcgv_i64_temp(arg));
2064 }
2065 
2066 void tcg_temp_free_i128(TCGv_i128 arg)
2067 {
2068     tcg_temp_free_internal(tcgv_i128_temp(arg));
2069 }
2070 
2071 void tcg_temp_free_ptr(TCGv_ptr arg)
2072 {
2073     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2074 }
2075 
2076 void tcg_temp_free_vec(TCGv_vec arg)
2077 {
2078     tcg_temp_free_internal(tcgv_vec_temp(arg));
2079 }
2080 
2081 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2082 {
2083     TCGContext *s = tcg_ctx;
2084     GHashTable *h = s->const_table[type];
2085     TCGTemp *ts;
2086 
2087     if (h == NULL) {
2088         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2089         s->const_table[type] = h;
2090     }
2091 
2092     ts = g_hash_table_lookup(h, &val);
2093     if (ts == NULL) {
2094         int64_t *val_ptr;
2095 
2096         ts = tcg_temp_alloc(s);
2097 
2098         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2099             TCGTemp *ts2 = tcg_temp_alloc(s);
2100 
2101             tcg_debug_assert(ts2 == ts + 1);
2102 
2103             ts->base_type = TCG_TYPE_I64;
2104             ts->type = TCG_TYPE_I32;
2105             ts->kind = TEMP_CONST;
2106             ts->temp_allocated = 1;
2107 
2108             ts2->base_type = TCG_TYPE_I64;
2109             ts2->type = TCG_TYPE_I32;
2110             ts2->kind = TEMP_CONST;
2111             ts2->temp_allocated = 1;
2112             ts2->temp_subindex = 1;
2113 
2114             /*
2115              * Retain the full value of the 64-bit constant in the low
2116              * part, so that the hash table works.  Actual uses will
2117              * truncate the value to the low part.
2118              */
2119             ts[HOST_BIG_ENDIAN].val = val;
2120             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2121             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2122         } else {
2123             ts->base_type = type;
2124             ts->type = type;
2125             ts->kind = TEMP_CONST;
2126             ts->temp_allocated = 1;
2127             ts->val = val;
2128             val_ptr = &ts->val;
2129         }
2130         g_hash_table_insert(h, val_ptr, ts);
2131     }
2132 
2133     return ts;
2134 }
2135 
2136 TCGv_i32 tcg_constant_i32(int32_t val)
2137 {
2138     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2139 }
2140 
2141 TCGv_i64 tcg_constant_i64(int64_t val)
2142 {
2143     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2144 }
2145 
2146 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2147 {
2148     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2149 }
2150 
2151 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2152 {
2153     val = dup_const(vece, val);
2154     return temp_tcgv_vec(tcg_constant_internal(type, val));
2155 }
2156 
2157 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2158 {
2159     TCGTemp *t = tcgv_vec_temp(match);
2160 
2161     tcg_debug_assert(t->temp_allocated != 0);
2162     return tcg_constant_vec(t->base_type, vece, val);
2163 }
2164 
2165 #ifdef CONFIG_DEBUG_TCG
2166 size_t temp_idx(TCGTemp *ts)
2167 {
2168     ptrdiff_t n = ts - tcg_ctx->temps;
2169     assert(n >= 0 && n < tcg_ctx->nb_temps);
2170     return n;
2171 }
2172 
2173 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2174 {
2175     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2176 
2177     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2178     assert(o % sizeof(TCGTemp) == 0);
2179 
2180     return (void *)tcg_ctx + (uintptr_t)v;
2181 }
2182 #endif /* CONFIG_DEBUG_TCG */
2183 
2184 /*
2185  * Return true if OP may appear in the opcode stream with TYPE.
2186  * Test the runtime variable that controls each opcode.
2187  */
2188 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2189 {
2190     bool has_type;
2191 
2192     switch (type) {
2193     case TCG_TYPE_I32:
2194         has_type = true;
2195         break;
2196     case TCG_TYPE_I64:
2197         has_type = TCG_TARGET_REG_BITS == 64;
2198         break;
2199     case TCG_TYPE_V64:
2200         has_type = TCG_TARGET_HAS_v64;
2201         break;
2202     case TCG_TYPE_V128:
2203         has_type = TCG_TARGET_HAS_v128;
2204         break;
2205     case TCG_TYPE_V256:
2206         has_type = TCG_TARGET_HAS_v256;
2207         break;
2208     default:
2209         has_type = false;
2210         break;
2211     }
2212 
2213     switch (op) {
2214     case INDEX_op_discard:
2215     case INDEX_op_set_label:
2216     case INDEX_op_call:
2217     case INDEX_op_br:
2218     case INDEX_op_mb:
2219     case INDEX_op_insn_start:
2220     case INDEX_op_exit_tb:
2221     case INDEX_op_goto_tb:
2222     case INDEX_op_goto_ptr:
2223     case INDEX_op_qemu_ld_i32:
2224     case INDEX_op_qemu_st_i32:
2225     case INDEX_op_qemu_ld_i64:
2226     case INDEX_op_qemu_st_i64:
2227         return true;
2228 
2229     case INDEX_op_qemu_st8_i32:
2230         return TCG_TARGET_HAS_qemu_st8_i32;
2231 
2232     case INDEX_op_qemu_ld_i128:
2233     case INDEX_op_qemu_st_i128:
2234         return TCG_TARGET_HAS_qemu_ldst_i128;
2235 
2236     case INDEX_op_add:
2237     case INDEX_op_and:
2238     case INDEX_op_mov:
2239     case INDEX_op_or:
2240     case INDEX_op_xor:
2241         return has_type;
2242 
2243     case INDEX_op_setcond_i32:
2244     case INDEX_op_brcond_i32:
2245     case INDEX_op_movcond_i32:
2246     case INDEX_op_ld8u_i32:
2247     case INDEX_op_ld8s_i32:
2248     case INDEX_op_ld16u_i32:
2249     case INDEX_op_ld16s_i32:
2250     case INDEX_op_ld_i32:
2251     case INDEX_op_st8_i32:
2252     case INDEX_op_st16_i32:
2253     case INDEX_op_st_i32:
2254     case INDEX_op_shl_i32:
2255     case INDEX_op_shr_i32:
2256     case INDEX_op_sar_i32:
2257     case INDEX_op_extract_i32:
2258     case INDEX_op_sextract_i32:
2259     case INDEX_op_deposit_i32:
2260         return true;
2261 
2262     case INDEX_op_negsetcond_i32:
2263         return TCG_TARGET_HAS_negsetcond_i32;
2264     case INDEX_op_divu_i32:
2265         return TCG_TARGET_HAS_div_i32;
2266     case INDEX_op_rem_i32:
2267     case INDEX_op_remu_i32:
2268         return TCG_TARGET_HAS_rem_i32;
2269     case INDEX_op_div2_i32:
2270     case INDEX_op_divu2_i32:
2271         return TCG_TARGET_HAS_div2_i32;
2272     case INDEX_op_rotl_i32:
2273     case INDEX_op_rotr_i32:
2274         return TCG_TARGET_HAS_rot_i32;
2275     case INDEX_op_extract2_i32:
2276         return TCG_TARGET_HAS_extract2_i32;
2277     case INDEX_op_add2_i32:
2278         return TCG_TARGET_HAS_add2_i32;
2279     case INDEX_op_sub2_i32:
2280         return TCG_TARGET_HAS_sub2_i32;
2281     case INDEX_op_mulu2_i32:
2282         return TCG_TARGET_HAS_mulu2_i32;
2283     case INDEX_op_muls2_i32:
2284         return TCG_TARGET_HAS_muls2_i32;
2285     case INDEX_op_bswap16_i32:
2286         return TCG_TARGET_HAS_bswap16_i32;
2287     case INDEX_op_bswap32_i32:
2288         return TCG_TARGET_HAS_bswap32_i32;
2289     case INDEX_op_clz_i32:
2290         return TCG_TARGET_HAS_clz_i32;
2291     case INDEX_op_ctz_i32:
2292         return TCG_TARGET_HAS_ctz_i32;
2293     case INDEX_op_ctpop_i32:
2294         return TCG_TARGET_HAS_ctpop_i32;
2295 
2296     case INDEX_op_brcond2_i32:
2297     case INDEX_op_setcond2_i32:
2298         return TCG_TARGET_REG_BITS == 32;
2299 
2300     case INDEX_op_setcond_i64:
2301     case INDEX_op_brcond_i64:
2302     case INDEX_op_movcond_i64:
2303     case INDEX_op_ld8u_i64:
2304     case INDEX_op_ld8s_i64:
2305     case INDEX_op_ld16u_i64:
2306     case INDEX_op_ld16s_i64:
2307     case INDEX_op_ld32u_i64:
2308     case INDEX_op_ld32s_i64:
2309     case INDEX_op_ld_i64:
2310     case INDEX_op_st8_i64:
2311     case INDEX_op_st16_i64:
2312     case INDEX_op_st32_i64:
2313     case INDEX_op_st_i64:
2314     case INDEX_op_shl_i64:
2315     case INDEX_op_shr_i64:
2316     case INDEX_op_sar_i64:
2317     case INDEX_op_ext_i32_i64:
2318     case INDEX_op_extu_i32_i64:
2319     case INDEX_op_extract_i64:
2320     case INDEX_op_sextract_i64:
2321     case INDEX_op_deposit_i64:
2322         return TCG_TARGET_REG_BITS == 64;
2323 
2324     case INDEX_op_negsetcond_i64:
2325         return TCG_TARGET_HAS_negsetcond_i64;
2326     case INDEX_op_divu_i64:
2327         return TCG_TARGET_HAS_div_i64;
2328     case INDEX_op_rem_i64:
2329     case INDEX_op_remu_i64:
2330         return TCG_TARGET_HAS_rem_i64;
2331     case INDEX_op_div2_i64:
2332     case INDEX_op_divu2_i64:
2333         return TCG_TARGET_HAS_div2_i64;
2334     case INDEX_op_rotl_i64:
2335     case INDEX_op_rotr_i64:
2336         return TCG_TARGET_HAS_rot_i64;
2337     case INDEX_op_extract2_i64:
2338         return TCG_TARGET_HAS_extract2_i64;
2339     case INDEX_op_extrl_i64_i32:
2340     case INDEX_op_extrh_i64_i32:
2341         return TCG_TARGET_HAS_extr_i64_i32;
2342     case INDEX_op_bswap16_i64:
2343         return TCG_TARGET_HAS_bswap16_i64;
2344     case INDEX_op_bswap32_i64:
2345         return TCG_TARGET_HAS_bswap32_i64;
2346     case INDEX_op_bswap64_i64:
2347         return TCG_TARGET_HAS_bswap64_i64;
2348     case INDEX_op_clz_i64:
2349         return TCG_TARGET_HAS_clz_i64;
2350     case INDEX_op_ctz_i64:
2351         return TCG_TARGET_HAS_ctz_i64;
2352     case INDEX_op_ctpop_i64:
2353         return TCG_TARGET_HAS_ctpop_i64;
2354     case INDEX_op_add2_i64:
2355         return TCG_TARGET_HAS_add2_i64;
2356     case INDEX_op_sub2_i64:
2357         return TCG_TARGET_HAS_sub2_i64;
2358     case INDEX_op_mulu2_i64:
2359         return TCG_TARGET_HAS_mulu2_i64;
2360     case INDEX_op_muls2_i64:
2361         return TCG_TARGET_HAS_muls2_i64;
2362 
2363     case INDEX_op_mov_vec:
2364     case INDEX_op_dup_vec:
2365     case INDEX_op_dupm_vec:
2366     case INDEX_op_ld_vec:
2367     case INDEX_op_st_vec:
2368     case INDEX_op_add_vec:
2369     case INDEX_op_sub_vec:
2370     case INDEX_op_and_vec:
2371     case INDEX_op_or_vec:
2372     case INDEX_op_xor_vec:
2373     case INDEX_op_cmp_vec:
2374         return has_type;
2375     case INDEX_op_dup2_vec:
2376         return has_type && TCG_TARGET_REG_BITS == 32;
2377     case INDEX_op_not_vec:
2378         return has_type && TCG_TARGET_HAS_not_vec;
2379     case INDEX_op_neg_vec:
2380         return has_type && TCG_TARGET_HAS_neg_vec;
2381     case INDEX_op_abs_vec:
2382         return has_type && TCG_TARGET_HAS_abs_vec;
2383     case INDEX_op_andc_vec:
2384         return has_type && TCG_TARGET_HAS_andc_vec;
2385     case INDEX_op_orc_vec:
2386         return has_type && TCG_TARGET_HAS_orc_vec;
2387     case INDEX_op_nand_vec:
2388         return has_type && TCG_TARGET_HAS_nand_vec;
2389     case INDEX_op_nor_vec:
2390         return has_type && TCG_TARGET_HAS_nor_vec;
2391     case INDEX_op_eqv_vec:
2392         return has_type && TCG_TARGET_HAS_eqv_vec;
2393     case INDEX_op_mul_vec:
2394         return has_type && TCG_TARGET_HAS_mul_vec;
2395     case INDEX_op_shli_vec:
2396     case INDEX_op_shri_vec:
2397     case INDEX_op_sari_vec:
2398         return has_type && TCG_TARGET_HAS_shi_vec;
2399     case INDEX_op_shls_vec:
2400     case INDEX_op_shrs_vec:
2401     case INDEX_op_sars_vec:
2402         return has_type && TCG_TARGET_HAS_shs_vec;
2403     case INDEX_op_shlv_vec:
2404     case INDEX_op_shrv_vec:
2405     case INDEX_op_sarv_vec:
2406         return has_type && TCG_TARGET_HAS_shv_vec;
2407     case INDEX_op_rotli_vec:
2408         return has_type && TCG_TARGET_HAS_roti_vec;
2409     case INDEX_op_rotls_vec:
2410         return has_type && TCG_TARGET_HAS_rots_vec;
2411     case INDEX_op_rotlv_vec:
2412     case INDEX_op_rotrv_vec:
2413         return has_type && TCG_TARGET_HAS_rotv_vec;
2414     case INDEX_op_ssadd_vec:
2415     case INDEX_op_usadd_vec:
2416     case INDEX_op_sssub_vec:
2417     case INDEX_op_ussub_vec:
2418         return has_type && TCG_TARGET_HAS_sat_vec;
2419     case INDEX_op_smin_vec:
2420     case INDEX_op_umin_vec:
2421     case INDEX_op_smax_vec:
2422     case INDEX_op_umax_vec:
2423         return has_type && TCG_TARGET_HAS_minmax_vec;
2424     case INDEX_op_bitsel_vec:
2425         return has_type && TCG_TARGET_HAS_bitsel_vec;
2426     case INDEX_op_cmpsel_vec:
2427         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2428 
2429     default:
2430         if (op < INDEX_op_last_generic) {
2431             const TCGOutOp *outop;
2432             TCGConstraintSetIndex con_set;
2433 
2434             if (!has_type) {
2435                 return false;
2436             }
2437 
2438             outop = all_outop[op];
2439             tcg_debug_assert(outop != NULL);
2440 
2441             con_set = outop->static_constraint;
2442             if (con_set == C_Dynamic) {
2443                 con_set = outop->dynamic_constraint(type, flags);
2444             }
2445             if (con_set >= 0) {
2446                 return true;
2447             }
2448             tcg_debug_assert(con_set == C_NotImplemented);
2449             return false;
2450         }
2451         tcg_debug_assert(op < NB_OPS);
2452         return true;
2453 
2454     case INDEX_op_last_generic:
2455         g_assert_not_reached();
2456     }
2457 }
2458 
2459 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2460 {
2461     unsigned width;
2462 
2463     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2464     width = (type == TCG_TYPE_I32 ? 32 : 64);
2465 
2466     tcg_debug_assert(ofs < width);
2467     tcg_debug_assert(len > 0);
2468     tcg_debug_assert(len <= width - ofs);
2469 
2470     return TCG_TARGET_deposit_valid(type, ofs, len);
2471 }
2472 
2473 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2474 
2475 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2476                           TCGTemp *ret, TCGTemp **args)
2477 {
2478     TCGv_i64 extend_free[MAX_CALL_IARGS];
2479     int n_extend = 0;
2480     TCGOp *op;
2481     int i, n, pi = 0, total_args;
2482 
2483     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2484         init_call_layout(info);
2485         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2486     }
2487 
2488     total_args = info->nr_out + info->nr_in + 2;
2489     op = tcg_op_alloc(INDEX_op_call, total_args);
2490 
2491 #ifdef CONFIG_PLUGIN
2492     /* Flag helpers that may affect guest state */
2493     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2494         tcg_ctx->plugin_insn->calls_helpers = true;
2495     }
2496 #endif
2497 
2498     TCGOP_CALLO(op) = n = info->nr_out;
2499     switch (n) {
2500     case 0:
2501         tcg_debug_assert(ret == NULL);
2502         break;
2503     case 1:
2504         tcg_debug_assert(ret != NULL);
2505         op->args[pi++] = temp_arg(ret);
2506         break;
2507     case 2:
2508     case 4:
2509         tcg_debug_assert(ret != NULL);
2510         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2511         tcg_debug_assert(ret->temp_subindex == 0);
2512         for (i = 0; i < n; ++i) {
2513             op->args[pi++] = temp_arg(ret + i);
2514         }
2515         break;
2516     default:
2517         g_assert_not_reached();
2518     }
2519 
2520     TCGOP_CALLI(op) = n = info->nr_in;
2521     for (i = 0; i < n; i++) {
2522         const TCGCallArgumentLoc *loc = &info->in[i];
2523         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2524 
2525         switch (loc->kind) {
2526         case TCG_CALL_ARG_NORMAL:
2527         case TCG_CALL_ARG_BY_REF:
2528         case TCG_CALL_ARG_BY_REF_N:
2529             op->args[pi++] = temp_arg(ts);
2530             break;
2531 
2532         case TCG_CALL_ARG_EXTEND_U:
2533         case TCG_CALL_ARG_EXTEND_S:
2534             {
2535                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2536                 TCGv_i32 orig = temp_tcgv_i32(ts);
2537 
2538                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2539                     tcg_gen_ext_i32_i64(temp, orig);
2540                 } else {
2541                     tcg_gen_extu_i32_i64(temp, orig);
2542                 }
2543                 op->args[pi++] = tcgv_i64_arg(temp);
2544                 extend_free[n_extend++] = temp;
2545             }
2546             break;
2547 
2548         default:
2549             g_assert_not_reached();
2550         }
2551     }
2552     op->args[pi++] = (uintptr_t)func;
2553     op->args[pi++] = (uintptr_t)info;
2554     tcg_debug_assert(pi == total_args);
2555 
2556     if (tcg_ctx->emit_before_op) {
2557         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2558     } else {
2559         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2560     }
2561 
2562     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2563     for (i = 0; i < n_extend; ++i) {
2564         tcg_temp_free_i64(extend_free[i]);
2565     }
2566 }
2567 
2568 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2569 {
2570     tcg_gen_callN(func, info, ret, NULL);
2571 }
2572 
2573 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2574 {
2575     tcg_gen_callN(func, info, ret, &t1);
2576 }
2577 
2578 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2579                    TCGTemp *t1, TCGTemp *t2)
2580 {
2581     TCGTemp *args[2] = { t1, t2 };
2582     tcg_gen_callN(func, info, ret, args);
2583 }
2584 
2585 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2586                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2587 {
2588     TCGTemp *args[3] = { t1, t2, t3 };
2589     tcg_gen_callN(func, info, ret, args);
2590 }
2591 
2592 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2593                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2594 {
2595     TCGTemp *args[4] = { t1, t2, t3, t4 };
2596     tcg_gen_callN(func, info, ret, args);
2597 }
2598 
2599 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2600                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2601 {
2602     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2603     tcg_gen_callN(func, info, ret, args);
2604 }
2605 
2606 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2607                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2608                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2609 {
2610     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2611     tcg_gen_callN(func, info, ret, args);
2612 }
2613 
2614 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2615                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2616                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2617 {
2618     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2619     tcg_gen_callN(func, info, ret, args);
2620 }
2621 
2622 static void tcg_reg_alloc_start(TCGContext *s)
2623 {
2624     int i, n;
2625 
2626     for (i = 0, n = s->nb_temps; i < n; i++) {
2627         TCGTemp *ts = &s->temps[i];
2628         TCGTempVal val = TEMP_VAL_MEM;
2629 
2630         switch (ts->kind) {
2631         case TEMP_CONST:
2632             val = TEMP_VAL_CONST;
2633             break;
2634         case TEMP_FIXED:
2635             val = TEMP_VAL_REG;
2636             break;
2637         case TEMP_GLOBAL:
2638             break;
2639         case TEMP_EBB:
2640             val = TEMP_VAL_DEAD;
2641             /* fall through */
2642         case TEMP_TB:
2643             ts->mem_allocated = 0;
2644             break;
2645         default:
2646             g_assert_not_reached();
2647         }
2648         ts->val_type = val;
2649     }
2650 
2651     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2652 }
2653 
2654 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2655                                  TCGTemp *ts)
2656 {
2657     int idx = temp_idx(ts);
2658 
2659     switch (ts->kind) {
2660     case TEMP_FIXED:
2661     case TEMP_GLOBAL:
2662         pstrcpy(buf, buf_size, ts->name);
2663         break;
2664     case TEMP_TB:
2665         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2666         break;
2667     case TEMP_EBB:
2668         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2669         break;
2670     case TEMP_CONST:
2671         switch (ts->type) {
2672         case TCG_TYPE_I32:
2673             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2674             break;
2675 #if TCG_TARGET_REG_BITS > 32
2676         case TCG_TYPE_I64:
2677             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2678             break;
2679 #endif
2680         case TCG_TYPE_V64:
2681         case TCG_TYPE_V128:
2682         case TCG_TYPE_V256:
2683             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2684                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2685             break;
2686         default:
2687             g_assert_not_reached();
2688         }
2689         break;
2690     }
2691     return buf;
2692 }
2693 
2694 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2695                              int buf_size, TCGArg arg)
2696 {
2697     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2698 }
2699 
2700 static const char * const cond_name[] =
2701 {
2702     [TCG_COND_NEVER] = "never",
2703     [TCG_COND_ALWAYS] = "always",
2704     [TCG_COND_EQ] = "eq",
2705     [TCG_COND_NE] = "ne",
2706     [TCG_COND_LT] = "lt",
2707     [TCG_COND_GE] = "ge",
2708     [TCG_COND_LE] = "le",
2709     [TCG_COND_GT] = "gt",
2710     [TCG_COND_LTU] = "ltu",
2711     [TCG_COND_GEU] = "geu",
2712     [TCG_COND_LEU] = "leu",
2713     [TCG_COND_GTU] = "gtu",
2714     [TCG_COND_TSTEQ] = "tsteq",
2715     [TCG_COND_TSTNE] = "tstne",
2716 };
2717 
2718 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2719 {
2720     [MO_UB]   = "ub",
2721     [MO_SB]   = "sb",
2722     [MO_LEUW] = "leuw",
2723     [MO_LESW] = "lesw",
2724     [MO_LEUL] = "leul",
2725     [MO_LESL] = "lesl",
2726     [MO_LEUQ] = "leq",
2727     [MO_BEUW] = "beuw",
2728     [MO_BESW] = "besw",
2729     [MO_BEUL] = "beul",
2730     [MO_BESL] = "besl",
2731     [MO_BEUQ] = "beq",
2732     [MO_128 + MO_BE] = "beo",
2733     [MO_128 + MO_LE] = "leo",
2734 };
2735 
2736 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2737     [MO_UNALN >> MO_ASHIFT]    = "un+",
2738     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2739     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2740     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2741     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2742     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2743     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2744     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2745 };
2746 
2747 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2748     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2749     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2750     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2751     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2752     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2753     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2754 };
2755 
2756 static const char bswap_flag_name[][6] = {
2757     [TCG_BSWAP_IZ] = "iz",
2758     [TCG_BSWAP_OZ] = "oz",
2759     [TCG_BSWAP_OS] = "os",
2760     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2761     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2762 };
2763 
2764 #ifdef CONFIG_PLUGIN
2765 static const char * const plugin_from_name[] = {
2766     "from-tb",
2767     "from-insn",
2768     "after-insn",
2769     "after-tb",
2770 };
2771 #endif
2772 
2773 static inline bool tcg_regset_single(TCGRegSet d)
2774 {
2775     return (d & (d - 1)) == 0;
2776 }
2777 
2778 static inline TCGReg tcg_regset_first(TCGRegSet d)
2779 {
2780     if (TCG_TARGET_NB_REGS <= 32) {
2781         return ctz32(d);
2782     } else {
2783         return ctz64(d);
2784     }
2785 }
2786 
2787 /* Return only the number of characters output -- no error return. */
2788 #define ne_fprintf(...) \
2789     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2790 
2791 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2792 {
2793     char buf[128];
2794     TCGOp *op;
2795 
2796     QTAILQ_FOREACH(op, &s->ops, link) {
2797         int i, k, nb_oargs, nb_iargs, nb_cargs;
2798         const TCGOpDef *def;
2799         TCGOpcode c;
2800         int col = 0;
2801 
2802         c = op->opc;
2803         def = &tcg_op_defs[c];
2804 
2805         if (c == INDEX_op_insn_start) {
2806             nb_oargs = 0;
2807             col += ne_fprintf(f, "\n ----");
2808 
2809             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2810                 col += ne_fprintf(f, " %016" PRIx64,
2811                                   tcg_get_insn_start_param(op, i));
2812             }
2813         } else if (c == INDEX_op_call) {
2814             const TCGHelperInfo *info = tcg_call_info(op);
2815             void *func = tcg_call_func(op);
2816 
2817             /* variable number of arguments */
2818             nb_oargs = TCGOP_CALLO(op);
2819             nb_iargs = TCGOP_CALLI(op);
2820             nb_cargs = def->nb_cargs;
2821 
2822             col += ne_fprintf(f, " %s ", def->name);
2823 
2824             /*
2825              * Print the function name from TCGHelperInfo, if available.
2826              * Note that plugins have a template function for the info,
2827              * but the actual function pointer comes from the plugin.
2828              */
2829             if (func == info->func) {
2830                 col += ne_fprintf(f, "%s", info->name);
2831             } else {
2832                 col += ne_fprintf(f, "plugin(%p)", func);
2833             }
2834 
2835             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2836             for (i = 0; i < nb_oargs; i++) {
2837                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2838                                                             op->args[i]));
2839             }
2840             for (i = 0; i < nb_iargs; i++) {
2841                 TCGArg arg = op->args[nb_oargs + i];
2842                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2843                 col += ne_fprintf(f, ",%s", t);
2844             }
2845         } else {
2846             if (def->flags & TCG_OPF_INT) {
2847                 col += ne_fprintf(f, " %s_i%d ",
2848                                   def->name,
2849                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2850             } else if (def->flags & TCG_OPF_VECTOR) {
2851                 col += ne_fprintf(f, "%s v%d,e%d,",
2852                                   def->name,
2853                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2854                                   8 << TCGOP_VECE(op));
2855             } else {
2856                 col += ne_fprintf(f, " %s ", def->name);
2857             }
2858 
2859             nb_oargs = def->nb_oargs;
2860             nb_iargs = def->nb_iargs;
2861             nb_cargs = def->nb_cargs;
2862 
2863             k = 0;
2864             for (i = 0; i < nb_oargs; i++) {
2865                 const char *sep =  k ? "," : "";
2866                 col += ne_fprintf(f, "%s%s", sep,
2867                                   tcg_get_arg_str(s, buf, sizeof(buf),
2868                                                   op->args[k++]));
2869             }
2870             for (i = 0; i < nb_iargs; i++) {
2871                 const char *sep =  k ? "," : "";
2872                 col += ne_fprintf(f, "%s%s", sep,
2873                                   tcg_get_arg_str(s, buf, sizeof(buf),
2874                                                   op->args[k++]));
2875             }
2876             switch (c) {
2877             case INDEX_op_brcond_i32:
2878             case INDEX_op_setcond_i32:
2879             case INDEX_op_negsetcond_i32:
2880             case INDEX_op_movcond_i32:
2881             case INDEX_op_brcond2_i32:
2882             case INDEX_op_setcond2_i32:
2883             case INDEX_op_brcond_i64:
2884             case INDEX_op_setcond_i64:
2885             case INDEX_op_negsetcond_i64:
2886             case INDEX_op_movcond_i64:
2887             case INDEX_op_cmp_vec:
2888             case INDEX_op_cmpsel_vec:
2889                 if (op->args[k] < ARRAY_SIZE(cond_name)
2890                     && cond_name[op->args[k]]) {
2891                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2892                 } else {
2893                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2894                 }
2895                 i = 1;
2896                 break;
2897             case INDEX_op_qemu_ld_i32:
2898             case INDEX_op_qemu_st_i32:
2899             case INDEX_op_qemu_st8_i32:
2900             case INDEX_op_qemu_ld_i64:
2901             case INDEX_op_qemu_st_i64:
2902             case INDEX_op_qemu_ld_i128:
2903             case INDEX_op_qemu_st_i128:
2904                 {
2905                     const char *s_al, *s_op, *s_at;
2906                     MemOpIdx oi = op->args[k++];
2907                     MemOp mop = get_memop(oi);
2908                     unsigned ix = get_mmuidx(oi);
2909 
2910                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2911                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2912                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2913                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2914 
2915                     /* If all fields are accounted for, print symbolically. */
2916                     if (!mop && s_al && s_op && s_at) {
2917                         col += ne_fprintf(f, ",%s%s%s,%u",
2918                                           s_at, s_al, s_op, ix);
2919                     } else {
2920                         mop = get_memop(oi);
2921                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2922                     }
2923                     i = 1;
2924                 }
2925                 break;
2926             case INDEX_op_bswap16_i32:
2927             case INDEX_op_bswap16_i64:
2928             case INDEX_op_bswap32_i32:
2929             case INDEX_op_bswap32_i64:
2930             case INDEX_op_bswap64_i64:
2931                 {
2932                     TCGArg flags = op->args[k];
2933                     const char *name = NULL;
2934 
2935                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2936                         name = bswap_flag_name[flags];
2937                     }
2938                     if (name) {
2939                         col += ne_fprintf(f, ",%s", name);
2940                     } else {
2941                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2942                     }
2943                     i = k = 1;
2944                 }
2945                 break;
2946 #ifdef CONFIG_PLUGIN
2947             case INDEX_op_plugin_cb:
2948                 {
2949                     TCGArg from = op->args[k++];
2950                     const char *name = NULL;
2951 
2952                     if (from < ARRAY_SIZE(plugin_from_name)) {
2953                         name = plugin_from_name[from];
2954                     }
2955                     if (name) {
2956                         col += ne_fprintf(f, "%s", name);
2957                     } else {
2958                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2959                     }
2960                     i = 1;
2961                 }
2962                 break;
2963 #endif
2964             default:
2965                 i = 0;
2966                 break;
2967             }
2968             switch (c) {
2969             case INDEX_op_set_label:
2970             case INDEX_op_br:
2971             case INDEX_op_brcond_i32:
2972             case INDEX_op_brcond_i64:
2973             case INDEX_op_brcond2_i32:
2974                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2975                                   arg_label(op->args[k])->id);
2976                 i++, k++;
2977                 break;
2978             case INDEX_op_mb:
2979                 {
2980                     TCGBar membar = op->args[k];
2981                     const char *b_op, *m_op;
2982 
2983                     switch (membar & TCG_BAR_SC) {
2984                     case 0:
2985                         b_op = "none";
2986                         break;
2987                     case TCG_BAR_LDAQ:
2988                         b_op = "acq";
2989                         break;
2990                     case TCG_BAR_STRL:
2991                         b_op = "rel";
2992                         break;
2993                     case TCG_BAR_SC:
2994                         b_op = "seq";
2995                         break;
2996                     default:
2997                         g_assert_not_reached();
2998                     }
2999 
3000                     switch (membar & TCG_MO_ALL) {
3001                     case 0:
3002                         m_op = "none";
3003                         break;
3004                     case TCG_MO_LD_LD:
3005                         m_op = "rr";
3006                         break;
3007                     case TCG_MO_LD_ST:
3008                         m_op = "rw";
3009                         break;
3010                     case TCG_MO_ST_LD:
3011                         m_op = "wr";
3012                         break;
3013                     case TCG_MO_ST_ST:
3014                         m_op = "ww";
3015                         break;
3016                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3017                         m_op = "rr+rw";
3018                         break;
3019                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3020                         m_op = "rr+wr";
3021                         break;
3022                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3023                         m_op = "rr+ww";
3024                         break;
3025                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3026                         m_op = "rw+wr";
3027                         break;
3028                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3029                         m_op = "rw+ww";
3030                         break;
3031                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3032                         m_op = "wr+ww";
3033                         break;
3034                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3035                         m_op = "rr+rw+wr";
3036                         break;
3037                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3038                         m_op = "rr+rw+ww";
3039                         break;
3040                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3041                         m_op = "rr+wr+ww";
3042                         break;
3043                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3044                         m_op = "rw+wr+ww";
3045                         break;
3046                     case TCG_MO_ALL:
3047                         m_op = "all";
3048                         break;
3049                     default:
3050                         g_assert_not_reached();
3051                     }
3052 
3053                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3054                     i++, k++;
3055                 }
3056                 break;
3057             default:
3058                 break;
3059             }
3060             for (; i < nb_cargs; i++, k++) {
3061                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3062                                   op->args[k]);
3063             }
3064         }
3065 
3066         if (have_prefs || op->life) {
3067             for (; col < 40; ++col) {
3068                 putc(' ', f);
3069             }
3070         }
3071 
3072         if (op->life) {
3073             unsigned life = op->life;
3074 
3075             if (life & (SYNC_ARG * 3)) {
3076                 ne_fprintf(f, "  sync:");
3077                 for (i = 0; i < 2; ++i) {
3078                     if (life & (SYNC_ARG << i)) {
3079                         ne_fprintf(f, " %d", i);
3080                     }
3081                 }
3082             }
3083             life /= DEAD_ARG;
3084             if (life) {
3085                 ne_fprintf(f, "  dead:");
3086                 for (i = 0; life; ++i, life >>= 1) {
3087                     if (life & 1) {
3088                         ne_fprintf(f, " %d", i);
3089                     }
3090                 }
3091             }
3092         }
3093 
3094         if (have_prefs) {
3095             for (i = 0; i < nb_oargs; ++i) {
3096                 TCGRegSet set = output_pref(op, i);
3097 
3098                 if (i == 0) {
3099                     ne_fprintf(f, "  pref=");
3100                 } else {
3101                     ne_fprintf(f, ",");
3102                 }
3103                 if (set == 0) {
3104                     ne_fprintf(f, "none");
3105                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3106                     ne_fprintf(f, "all");
3107 #ifdef CONFIG_DEBUG_TCG
3108                 } else if (tcg_regset_single(set)) {
3109                     TCGReg reg = tcg_regset_first(set);
3110                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3111 #endif
3112                 } else if (TCG_TARGET_NB_REGS <= 32) {
3113                     ne_fprintf(f, "0x%x", (uint32_t)set);
3114                 } else {
3115                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3116                 }
3117             }
3118         }
3119 
3120         putc('\n', f);
3121     }
3122 }
3123 
3124 /* we give more priority to constraints with less registers */
3125 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3126 {
3127     int n;
3128 
3129     arg_ct += k;
3130     n = ctpop64(arg_ct->regs);
3131 
3132     /*
3133      * Sort constraints of a single register first, which includes output
3134      * aliases (which must exactly match the input already allocated).
3135      */
3136     if (n == 1 || arg_ct->oalias) {
3137         return INT_MAX;
3138     }
3139 
3140     /*
3141      * Sort register pairs next, first then second immediately after.
3142      * Arbitrarily sort multiple pairs by the index of the first reg;
3143      * there shouldn't be many pairs.
3144      */
3145     switch (arg_ct->pair) {
3146     case 1:
3147     case 3:
3148         return (k + 1) * 2;
3149     case 2:
3150         return (arg_ct->pair_index + 1) * 2 - 1;
3151     }
3152 
3153     /* Finally, sort by decreasing register count. */
3154     assert(n > 1);
3155     return -n;
3156 }
3157 
3158 /* sort from highest priority to lowest */
3159 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3160 {
3161     int i, j;
3162 
3163     for (i = 0; i < n; i++) {
3164         a[start + i].sort_index = start + i;
3165     }
3166     if (n <= 1) {
3167         return;
3168     }
3169     for (i = 0; i < n - 1; i++) {
3170         for (j = i + 1; j < n; j++) {
3171             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3172             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3173             if (p1 < p2) {
3174                 int tmp = a[start + i].sort_index;
3175                 a[start + i].sort_index = a[start + j].sort_index;
3176                 a[start + j].sort_index = tmp;
3177             }
3178         }
3179     }
3180 }
3181 
3182 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3183 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3184 
3185 static void process_constraint_sets(void)
3186 {
3187     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3188         const TCGConstraintSet *tdefs = &constraint_sets[c];
3189         TCGArgConstraint *args_ct = all_cts[c];
3190         int nb_oargs = tdefs->nb_oargs;
3191         int nb_iargs = tdefs->nb_iargs;
3192         int nb_args = nb_oargs + nb_iargs;
3193         bool saw_alias_pair = false;
3194 
3195         for (int i = 0; i < nb_args; i++) {
3196             const char *ct_str = tdefs->args_ct_str[i];
3197             bool input_p = i >= nb_oargs;
3198             int o;
3199 
3200             switch (*ct_str) {
3201             case '0' ... '9':
3202                 o = *ct_str - '0';
3203                 tcg_debug_assert(input_p);
3204                 tcg_debug_assert(o < nb_oargs);
3205                 tcg_debug_assert(args_ct[o].regs != 0);
3206                 tcg_debug_assert(!args_ct[o].oalias);
3207                 args_ct[i] = args_ct[o];
3208                 /* The output sets oalias.  */
3209                 args_ct[o].oalias = 1;
3210                 args_ct[o].alias_index = i;
3211                 /* The input sets ialias. */
3212                 args_ct[i].ialias = 1;
3213                 args_ct[i].alias_index = o;
3214                 if (args_ct[i].pair) {
3215                     saw_alias_pair = true;
3216                 }
3217                 tcg_debug_assert(ct_str[1] == '\0');
3218                 continue;
3219 
3220             case '&':
3221                 tcg_debug_assert(!input_p);
3222                 args_ct[i].newreg = true;
3223                 ct_str++;
3224                 break;
3225 
3226             case 'p': /* plus */
3227                 /* Allocate to the register after the previous. */
3228                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3229                 o = i - 1;
3230                 tcg_debug_assert(!args_ct[o].pair);
3231                 tcg_debug_assert(!args_ct[o].ct);
3232                 args_ct[i] = (TCGArgConstraint){
3233                     .pair = 2,
3234                     .pair_index = o,
3235                     .regs = args_ct[o].regs << 1,
3236                     .newreg = args_ct[o].newreg,
3237                 };
3238                 args_ct[o].pair = 1;
3239                 args_ct[o].pair_index = i;
3240                 tcg_debug_assert(ct_str[1] == '\0');
3241                 continue;
3242 
3243             case 'm': /* minus */
3244                 /* Allocate to the register before the previous. */
3245                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3246                 o = i - 1;
3247                 tcg_debug_assert(!args_ct[o].pair);
3248                 tcg_debug_assert(!args_ct[o].ct);
3249                 args_ct[i] = (TCGArgConstraint){
3250                     .pair = 1,
3251                     .pair_index = o,
3252                     .regs = args_ct[o].regs >> 1,
3253                     .newreg = args_ct[o].newreg,
3254                 };
3255                 args_ct[o].pair = 2;
3256                 args_ct[o].pair_index = i;
3257                 tcg_debug_assert(ct_str[1] == '\0');
3258                 continue;
3259             }
3260 
3261             do {
3262                 switch (*ct_str) {
3263                 case 'i':
3264                     args_ct[i].ct |= TCG_CT_CONST;
3265                     break;
3266 #ifdef TCG_REG_ZERO
3267                 case 'z':
3268                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3269                     break;
3270 #endif
3271 
3272                 /* Include all of the target-specific constraints. */
3273 
3274 #undef CONST
3275 #define CONST(CASE, MASK) \
3276     case CASE: args_ct[i].ct |= MASK; break;
3277 #define REGS(CASE, MASK) \
3278     case CASE: args_ct[i].regs |= MASK; break;
3279 
3280 #include "tcg-target-con-str.h"
3281 
3282 #undef REGS
3283 #undef CONST
3284                 default:
3285                 case '0' ... '9':
3286                 case '&':
3287                 case 'p':
3288                 case 'm':
3289                     /* Typo in TCGConstraintSet constraint. */
3290                     g_assert_not_reached();
3291                 }
3292             } while (*++ct_str != '\0');
3293         }
3294 
3295         /*
3296          * Fix up output pairs that are aliased with inputs.
3297          * When we created the alias, we copied pair from the output.
3298          * There are three cases:
3299          *    (1a) Pairs of inputs alias pairs of outputs.
3300          *    (1b) One input aliases the first of a pair of outputs.
3301          *    (2)  One input aliases the second of a pair of outputs.
3302          *
3303          * Case 1a is handled by making sure that the pair_index'es are
3304          * properly updated so that they appear the same as a pair of inputs.
3305          *
3306          * Case 1b is handled by setting the pair_index of the input to
3307          * itself, simply so it doesn't point to an unrelated argument.
3308          * Since we don't encounter the "second" during the input allocation
3309          * phase, nothing happens with the second half of the input pair.
3310          *
3311          * Case 2 is handled by setting the second input to pair=3, the
3312          * first output to pair=3, and the pair_index'es to match.
3313          */
3314         if (saw_alias_pair) {
3315             for (int i = nb_oargs; i < nb_args; i++) {
3316                 int o, o2, i2;
3317 
3318                 /*
3319                  * Since [0-9pm] must be alone in the constraint string,
3320                  * the only way they can both be set is if the pair comes
3321                  * from the output alias.
3322                  */
3323                 if (!args_ct[i].ialias) {
3324                     continue;
3325                 }
3326                 switch (args_ct[i].pair) {
3327                 case 0:
3328                     break;
3329                 case 1:
3330                     o = args_ct[i].alias_index;
3331                     o2 = args_ct[o].pair_index;
3332                     tcg_debug_assert(args_ct[o].pair == 1);
3333                     tcg_debug_assert(args_ct[o2].pair == 2);
3334                     if (args_ct[o2].oalias) {
3335                         /* Case 1a */
3336                         i2 = args_ct[o2].alias_index;
3337                         tcg_debug_assert(args_ct[i2].pair == 2);
3338                         args_ct[i2].pair_index = i;
3339                         args_ct[i].pair_index = i2;
3340                     } else {
3341                         /* Case 1b */
3342                         args_ct[i].pair_index = i;
3343                     }
3344                     break;
3345                 case 2:
3346                     o = args_ct[i].alias_index;
3347                     o2 = args_ct[o].pair_index;
3348                     tcg_debug_assert(args_ct[o].pair == 2);
3349                     tcg_debug_assert(args_ct[o2].pair == 1);
3350                     if (args_ct[o2].oalias) {
3351                         /* Case 1a */
3352                         i2 = args_ct[o2].alias_index;
3353                         tcg_debug_assert(args_ct[i2].pair == 1);
3354                         args_ct[i2].pair_index = i;
3355                         args_ct[i].pair_index = i2;
3356                     } else {
3357                         /* Case 2 */
3358                         args_ct[i].pair = 3;
3359                         args_ct[o2].pair = 3;
3360                         args_ct[i].pair_index = o2;
3361                         args_ct[o2].pair_index = i;
3362                     }
3363                     break;
3364                 default:
3365                     g_assert_not_reached();
3366                 }
3367             }
3368         }
3369 
3370         /* sort the constraints (XXX: this is just an heuristic) */
3371         sort_constraints(args_ct, 0, nb_oargs);
3372         sort_constraints(args_ct, nb_oargs, nb_iargs);
3373     }
3374 }
3375 
3376 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3377 {
3378     TCGOpcode opc = op->opc;
3379     TCGType type = TCGOP_TYPE(op);
3380     unsigned flags = TCGOP_FLAGS(op);
3381     const TCGOpDef *def = &tcg_op_defs[opc];
3382     const TCGOutOp *outop = all_outop[opc];
3383     TCGConstraintSetIndex con_set;
3384 
3385     if (def->flags & TCG_OPF_NOT_PRESENT) {
3386         return empty_cts;
3387     }
3388 
3389     if (outop) {
3390         con_set = outop->static_constraint;
3391         if (con_set == C_Dynamic) {
3392             con_set = outop->dynamic_constraint(type, flags);
3393         }
3394     } else {
3395         con_set = tcg_target_op_def(opc, type, flags);
3396     }
3397     tcg_debug_assert(con_set >= 0);
3398     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3399 
3400     /* The constraint arguments must match TCGOpcode arguments. */
3401     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3402     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3403 
3404     return all_cts[con_set];
3405 }
3406 
3407 static void remove_label_use(TCGOp *op, int idx)
3408 {
3409     TCGLabel *label = arg_label(op->args[idx]);
3410     TCGLabelUse *use;
3411 
3412     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3413         if (use->op == op) {
3414             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3415             return;
3416         }
3417     }
3418     g_assert_not_reached();
3419 }
3420 
3421 void tcg_op_remove(TCGContext *s, TCGOp *op)
3422 {
3423     switch (op->opc) {
3424     case INDEX_op_br:
3425         remove_label_use(op, 0);
3426         break;
3427     case INDEX_op_brcond_i32:
3428     case INDEX_op_brcond_i64:
3429         remove_label_use(op, 3);
3430         break;
3431     case INDEX_op_brcond2_i32:
3432         remove_label_use(op, 5);
3433         break;
3434     default:
3435         break;
3436     }
3437 
3438     QTAILQ_REMOVE(&s->ops, op, link);
3439     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3440     s->nb_ops--;
3441 }
3442 
3443 void tcg_remove_ops_after(TCGOp *op)
3444 {
3445     TCGContext *s = tcg_ctx;
3446 
3447     while (true) {
3448         TCGOp *last = tcg_last_op();
3449         if (last == op) {
3450             return;
3451         }
3452         tcg_op_remove(s, last);
3453     }
3454 }
3455 
3456 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3457 {
3458     TCGContext *s = tcg_ctx;
3459     TCGOp *op = NULL;
3460 
3461     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3462         QTAILQ_FOREACH(op, &s->free_ops, link) {
3463             if (nargs <= op->nargs) {
3464                 QTAILQ_REMOVE(&s->free_ops, op, link);
3465                 nargs = op->nargs;
3466                 goto found;
3467             }
3468         }
3469     }
3470 
3471     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3472     nargs = MAX(4, nargs);
3473     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3474 
3475  found:
3476     memset(op, 0, offsetof(TCGOp, link));
3477     op->opc = opc;
3478     op->nargs = nargs;
3479 
3480     /* Check for bitfield overflow. */
3481     tcg_debug_assert(op->nargs == nargs);
3482 
3483     s->nb_ops++;
3484     return op;
3485 }
3486 
3487 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3488 {
3489     TCGOp *op = tcg_op_alloc(opc, nargs);
3490 
3491     if (tcg_ctx->emit_before_op) {
3492         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3493     } else {
3494         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3495     }
3496     return op;
3497 }
3498 
3499 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3500                             TCGOpcode opc, TCGType type, unsigned nargs)
3501 {
3502     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3503 
3504     TCGOP_TYPE(new_op) = type;
3505     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3506     return new_op;
3507 }
3508 
3509 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3510                            TCGOpcode opc, TCGType type, unsigned nargs)
3511 {
3512     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3513 
3514     TCGOP_TYPE(new_op) = type;
3515     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3516     return new_op;
3517 }
3518 
3519 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3520 {
3521     TCGLabelUse *u;
3522 
3523     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3524         TCGOp *op = u->op;
3525         switch (op->opc) {
3526         case INDEX_op_br:
3527             op->args[0] = label_arg(to);
3528             break;
3529         case INDEX_op_brcond_i32:
3530         case INDEX_op_brcond_i64:
3531             op->args[3] = label_arg(to);
3532             break;
3533         case INDEX_op_brcond2_i32:
3534             op->args[5] = label_arg(to);
3535             break;
3536         default:
3537             g_assert_not_reached();
3538         }
3539     }
3540 
3541     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3542 }
3543 
3544 /* Reachable analysis : remove unreachable code.  */
3545 static void __attribute__((noinline))
3546 reachable_code_pass(TCGContext *s)
3547 {
3548     TCGOp *op, *op_next, *op_prev;
3549     bool dead = false;
3550 
3551     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3552         bool remove = dead;
3553         TCGLabel *label;
3554 
3555         switch (op->opc) {
3556         case INDEX_op_set_label:
3557             label = arg_label(op->args[0]);
3558 
3559             /*
3560              * Note that the first op in the TB is always a load,
3561              * so there is always something before a label.
3562              */
3563             op_prev = QTAILQ_PREV(op, link);
3564 
3565             /*
3566              * If we find two sequential labels, move all branches to
3567              * reference the second label and remove the first label.
3568              * Do this before branch to next optimization, so that the
3569              * middle label is out of the way.
3570              */
3571             if (op_prev->opc == INDEX_op_set_label) {
3572                 move_label_uses(label, arg_label(op_prev->args[0]));
3573                 tcg_op_remove(s, op_prev);
3574                 op_prev = QTAILQ_PREV(op, link);
3575             }
3576 
3577             /*
3578              * Optimization can fold conditional branches to unconditional.
3579              * If we find a label which is preceded by an unconditional
3580              * branch to next, remove the branch.  We couldn't do this when
3581              * processing the branch because any dead code between the branch
3582              * and label had not yet been removed.
3583              */
3584             if (op_prev->opc == INDEX_op_br &&
3585                 label == arg_label(op_prev->args[0])) {
3586                 tcg_op_remove(s, op_prev);
3587                 /* Fall through means insns become live again.  */
3588                 dead = false;
3589             }
3590 
3591             if (QSIMPLEQ_EMPTY(&label->branches)) {
3592                 /*
3593                  * While there is an occasional backward branch, virtually
3594                  * all branches generated by the translators are forward.
3595                  * Which means that generally we will have already removed
3596                  * all references to the label that will be, and there is
3597                  * little to be gained by iterating.
3598                  */
3599                 remove = true;
3600             } else {
3601                 /* Once we see a label, insns become live again.  */
3602                 dead = false;
3603                 remove = false;
3604             }
3605             break;
3606 
3607         case INDEX_op_br:
3608         case INDEX_op_exit_tb:
3609         case INDEX_op_goto_ptr:
3610             /* Unconditional branches; everything following is dead.  */
3611             dead = true;
3612             break;
3613 
3614         case INDEX_op_call:
3615             /* Notice noreturn helper calls, raising exceptions.  */
3616             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3617                 dead = true;
3618             }
3619             break;
3620 
3621         case INDEX_op_insn_start:
3622             /* Never remove -- we need to keep these for unwind.  */
3623             remove = false;
3624             break;
3625 
3626         default:
3627             break;
3628         }
3629 
3630         if (remove) {
3631             tcg_op_remove(s, op);
3632         }
3633     }
3634 }
3635 
3636 #define TS_DEAD  1
3637 #define TS_MEM   2
3638 
3639 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3640 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3641 
3642 /* For liveness_pass_1, the register preferences for a given temp.  */
3643 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3644 {
3645     return ts->state_ptr;
3646 }
3647 
3648 /* For liveness_pass_1, reset the preferences for a given temp to the
3649  * maximal regset for its type.
3650  */
3651 static inline void la_reset_pref(TCGTemp *ts)
3652 {
3653     *la_temp_pref(ts)
3654         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3655 }
3656 
3657 /* liveness analysis: end of function: all temps are dead, and globals
3658    should be in memory. */
3659 static void la_func_end(TCGContext *s, int ng, int nt)
3660 {
3661     int i;
3662 
3663     for (i = 0; i < ng; ++i) {
3664         s->temps[i].state = TS_DEAD | TS_MEM;
3665         la_reset_pref(&s->temps[i]);
3666     }
3667     for (i = ng; i < nt; ++i) {
3668         s->temps[i].state = TS_DEAD;
3669         la_reset_pref(&s->temps[i]);
3670     }
3671 }
3672 
3673 /* liveness analysis: end of basic block: all temps are dead, globals
3674    and local temps should be in memory. */
3675 static void la_bb_end(TCGContext *s, int ng, int nt)
3676 {
3677     int i;
3678 
3679     for (i = 0; i < nt; ++i) {
3680         TCGTemp *ts = &s->temps[i];
3681         int state;
3682 
3683         switch (ts->kind) {
3684         case TEMP_FIXED:
3685         case TEMP_GLOBAL:
3686         case TEMP_TB:
3687             state = TS_DEAD | TS_MEM;
3688             break;
3689         case TEMP_EBB:
3690         case TEMP_CONST:
3691             state = TS_DEAD;
3692             break;
3693         default:
3694             g_assert_not_reached();
3695         }
3696         ts->state = state;
3697         la_reset_pref(ts);
3698     }
3699 }
3700 
3701 /* liveness analysis: sync globals back to memory.  */
3702 static void la_global_sync(TCGContext *s, int ng)
3703 {
3704     int i;
3705 
3706     for (i = 0; i < ng; ++i) {
3707         int state = s->temps[i].state;
3708         s->temps[i].state = state | TS_MEM;
3709         if (state == TS_DEAD) {
3710             /* If the global was previously dead, reset prefs.  */
3711             la_reset_pref(&s->temps[i]);
3712         }
3713     }
3714 }
3715 
3716 /*
3717  * liveness analysis: conditional branch: all temps are dead unless
3718  * explicitly live-across-conditional-branch, globals and local temps
3719  * should be synced.
3720  */
3721 static void la_bb_sync(TCGContext *s, int ng, int nt)
3722 {
3723     la_global_sync(s, ng);
3724 
3725     for (int i = ng; i < nt; ++i) {
3726         TCGTemp *ts = &s->temps[i];
3727         int state;
3728 
3729         switch (ts->kind) {
3730         case TEMP_TB:
3731             state = ts->state;
3732             ts->state = state | TS_MEM;
3733             if (state != TS_DEAD) {
3734                 continue;
3735             }
3736             break;
3737         case TEMP_EBB:
3738         case TEMP_CONST:
3739             continue;
3740         default:
3741             g_assert_not_reached();
3742         }
3743         la_reset_pref(&s->temps[i]);
3744     }
3745 }
3746 
3747 /* liveness analysis: sync globals back to memory and kill.  */
3748 static void la_global_kill(TCGContext *s, int ng)
3749 {
3750     int i;
3751 
3752     for (i = 0; i < ng; i++) {
3753         s->temps[i].state = TS_DEAD | TS_MEM;
3754         la_reset_pref(&s->temps[i]);
3755     }
3756 }
3757 
3758 /* liveness analysis: note live globals crossing calls.  */
3759 static void la_cross_call(TCGContext *s, int nt)
3760 {
3761     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3762     int i;
3763 
3764     for (i = 0; i < nt; i++) {
3765         TCGTemp *ts = &s->temps[i];
3766         if (!(ts->state & TS_DEAD)) {
3767             TCGRegSet *pset = la_temp_pref(ts);
3768             TCGRegSet set = *pset;
3769 
3770             set &= mask;
3771             /* If the combination is not possible, restart.  */
3772             if (set == 0) {
3773                 set = tcg_target_available_regs[ts->type] & mask;
3774             }
3775             *pset = set;
3776         }
3777     }
3778 }
3779 
3780 /*
3781  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3782  * to TEMP_EBB, if possible.
3783  */
3784 static void __attribute__((noinline))
3785 liveness_pass_0(TCGContext *s)
3786 {
3787     void * const multiple_ebb = (void *)(uintptr_t)-1;
3788     int nb_temps = s->nb_temps;
3789     TCGOp *op, *ebb;
3790 
3791     for (int i = s->nb_globals; i < nb_temps; ++i) {
3792         s->temps[i].state_ptr = NULL;
3793     }
3794 
3795     /*
3796      * Represent each EBB by the op at which it begins.  In the case of
3797      * the first EBB, this is the first op, otherwise it is a label.
3798      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3799      * within a single EBB, else MULTIPLE_EBB.
3800      */
3801     ebb = QTAILQ_FIRST(&s->ops);
3802     QTAILQ_FOREACH(op, &s->ops, link) {
3803         const TCGOpDef *def;
3804         int nb_oargs, nb_iargs;
3805 
3806         switch (op->opc) {
3807         case INDEX_op_set_label:
3808             ebb = op;
3809             continue;
3810         case INDEX_op_discard:
3811             continue;
3812         case INDEX_op_call:
3813             nb_oargs = TCGOP_CALLO(op);
3814             nb_iargs = TCGOP_CALLI(op);
3815             break;
3816         default:
3817             def = &tcg_op_defs[op->opc];
3818             nb_oargs = def->nb_oargs;
3819             nb_iargs = def->nb_iargs;
3820             break;
3821         }
3822 
3823         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3824             TCGTemp *ts = arg_temp(op->args[i]);
3825 
3826             if (ts->kind != TEMP_TB) {
3827                 continue;
3828             }
3829             if (ts->state_ptr == NULL) {
3830                 ts->state_ptr = ebb;
3831             } else if (ts->state_ptr != ebb) {
3832                 ts->state_ptr = multiple_ebb;
3833             }
3834         }
3835     }
3836 
3837     /*
3838      * For TEMP_TB that turned out not to be used beyond one EBB,
3839      * reduce the liveness to TEMP_EBB.
3840      */
3841     for (int i = s->nb_globals; i < nb_temps; ++i) {
3842         TCGTemp *ts = &s->temps[i];
3843         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3844             ts->kind = TEMP_EBB;
3845         }
3846     }
3847 }
3848 
3849 /* Liveness analysis : update the opc_arg_life array to tell if a
3850    given input arguments is dead. Instructions updating dead
3851    temporaries are removed. */
3852 static void __attribute__((noinline))
3853 liveness_pass_1(TCGContext *s)
3854 {
3855     int nb_globals = s->nb_globals;
3856     int nb_temps = s->nb_temps;
3857     TCGOp *op, *op_prev;
3858     TCGRegSet *prefs;
3859     int i;
3860 
3861     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3862     for (i = 0; i < nb_temps; ++i) {
3863         s->temps[i].state_ptr = prefs + i;
3864     }
3865 
3866     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3867     la_func_end(s, nb_globals, nb_temps);
3868 
3869     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3870         int nb_iargs, nb_oargs;
3871         TCGOpcode opc_new, opc_new2;
3872         TCGLifeData arg_life = 0;
3873         TCGTemp *ts;
3874         TCGOpcode opc = op->opc;
3875         const TCGOpDef *def = &tcg_op_defs[opc];
3876         const TCGArgConstraint *args_ct;
3877 
3878         switch (opc) {
3879         case INDEX_op_call:
3880             {
3881                 const TCGHelperInfo *info = tcg_call_info(op);
3882                 int call_flags = tcg_call_flags(op);
3883 
3884                 nb_oargs = TCGOP_CALLO(op);
3885                 nb_iargs = TCGOP_CALLI(op);
3886 
3887                 /* pure functions can be removed if their result is unused */
3888                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3889                     for (i = 0; i < nb_oargs; i++) {
3890                         ts = arg_temp(op->args[i]);
3891                         if (ts->state != TS_DEAD) {
3892                             goto do_not_remove_call;
3893                         }
3894                     }
3895                     goto do_remove;
3896                 }
3897             do_not_remove_call:
3898 
3899                 /* Output args are dead.  */
3900                 for (i = 0; i < nb_oargs; i++) {
3901                     ts = arg_temp(op->args[i]);
3902                     if (ts->state & TS_DEAD) {
3903                         arg_life |= DEAD_ARG << i;
3904                     }
3905                     if (ts->state & TS_MEM) {
3906                         arg_life |= SYNC_ARG << i;
3907                     }
3908                     ts->state = TS_DEAD;
3909                     la_reset_pref(ts);
3910                 }
3911 
3912                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3913                 memset(op->output_pref, 0, sizeof(op->output_pref));
3914 
3915                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3916                                     TCG_CALL_NO_READ_GLOBALS))) {
3917                     la_global_kill(s, nb_globals);
3918                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3919                     la_global_sync(s, nb_globals);
3920                 }
3921 
3922                 /* Record arguments that die in this helper.  */
3923                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3924                     ts = arg_temp(op->args[i]);
3925                     if (ts->state & TS_DEAD) {
3926                         arg_life |= DEAD_ARG << i;
3927                     }
3928                 }
3929 
3930                 /* For all live registers, remove call-clobbered prefs.  */
3931                 la_cross_call(s, nb_temps);
3932 
3933                 /*
3934                  * Input arguments are live for preceding opcodes.
3935                  *
3936                  * For those arguments that die, and will be allocated in
3937                  * registers, clear the register set for that arg, to be
3938                  * filled in below.  For args that will be on the stack,
3939                  * reset to any available reg.  Process arguments in reverse
3940                  * order so that if a temp is used more than once, the stack
3941                  * reset to max happens before the register reset to 0.
3942                  */
3943                 for (i = nb_iargs - 1; i >= 0; i--) {
3944                     const TCGCallArgumentLoc *loc = &info->in[i];
3945                     ts = arg_temp(op->args[nb_oargs + i]);
3946 
3947                     if (ts->state & TS_DEAD) {
3948                         switch (loc->kind) {
3949                         case TCG_CALL_ARG_NORMAL:
3950                         case TCG_CALL_ARG_EXTEND_U:
3951                         case TCG_CALL_ARG_EXTEND_S:
3952                             if (arg_slot_reg_p(loc->arg_slot)) {
3953                                 *la_temp_pref(ts) = 0;
3954                                 break;
3955                             }
3956                             /* fall through */
3957                         default:
3958                             *la_temp_pref(ts) =
3959                                 tcg_target_available_regs[ts->type];
3960                             break;
3961                         }
3962                         ts->state &= ~TS_DEAD;
3963                     }
3964                 }
3965 
3966                 /*
3967                  * For each input argument, add its input register to prefs.
3968                  * If a temp is used once, this produces a single set bit;
3969                  * if a temp is used multiple times, this produces a set.
3970                  */
3971                 for (i = 0; i < nb_iargs; i++) {
3972                     const TCGCallArgumentLoc *loc = &info->in[i];
3973                     ts = arg_temp(op->args[nb_oargs + i]);
3974 
3975                     switch (loc->kind) {
3976                     case TCG_CALL_ARG_NORMAL:
3977                     case TCG_CALL_ARG_EXTEND_U:
3978                     case TCG_CALL_ARG_EXTEND_S:
3979                         if (arg_slot_reg_p(loc->arg_slot)) {
3980                             tcg_regset_set_reg(*la_temp_pref(ts),
3981                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3982                         }
3983                         break;
3984                     default:
3985                         break;
3986                     }
3987                 }
3988             }
3989             break;
3990         case INDEX_op_insn_start:
3991             break;
3992         case INDEX_op_discard:
3993             /* mark the temporary as dead */
3994             ts = arg_temp(op->args[0]);
3995             ts->state = TS_DEAD;
3996             la_reset_pref(ts);
3997             break;
3998 
3999         case INDEX_op_add2_i32:
4000         case INDEX_op_add2_i64:
4001             opc_new = INDEX_op_add;
4002             goto do_addsub2;
4003         case INDEX_op_sub2_i32:
4004         case INDEX_op_sub2_i64:
4005             opc_new = INDEX_op_sub;
4006         do_addsub2:
4007             nb_iargs = 4;
4008             nb_oargs = 2;
4009             /* Test if the high part of the operation is dead, but not
4010                the low part.  The result can be optimized to a simple
4011                add or sub.  This happens often for x86_64 guest when the
4012                cpu mode is set to 32 bit.  */
4013             if (arg_temp(op->args[1])->state == TS_DEAD) {
4014                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4015                     goto do_remove;
4016                 }
4017                 /* Replace the opcode and adjust the args in place,
4018                    leaving 3 unused args at the end.  */
4019                 op->opc = opc = opc_new;
4020                 op->args[1] = op->args[2];
4021                 op->args[2] = op->args[4];
4022                 /* Fall through and mark the single-word operation live.  */
4023                 nb_iargs = 2;
4024                 nb_oargs = 1;
4025             }
4026             goto do_not_remove;
4027 
4028         case INDEX_op_muls2_i32:
4029         case INDEX_op_muls2_i64:
4030             opc_new = INDEX_op_mul;
4031             opc_new2 = INDEX_op_mulsh;
4032             goto do_mul2;
4033         case INDEX_op_mulu2_i32:
4034         case INDEX_op_mulu2_i64:
4035             opc_new = INDEX_op_mul;
4036             opc_new2 = INDEX_op_muluh;
4037         do_mul2:
4038             nb_iargs = 2;
4039             nb_oargs = 2;
4040             if (arg_temp(op->args[1])->state == TS_DEAD) {
4041                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4042                     /* Both parts of the operation are dead.  */
4043                     goto do_remove;
4044                 }
4045                 /* The high part of the operation is dead; generate the low. */
4046                 op->opc = opc = opc_new;
4047                 op->args[1] = op->args[2];
4048                 op->args[2] = op->args[3];
4049             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4050                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4051                 /* The low part of the operation is dead; generate the high. */
4052                 op->opc = opc = opc_new2;
4053                 op->args[0] = op->args[1];
4054                 op->args[1] = op->args[2];
4055                 op->args[2] = op->args[3];
4056             } else {
4057                 goto do_not_remove;
4058             }
4059             /* Mark the single-word operation live.  */
4060             nb_oargs = 1;
4061             goto do_not_remove;
4062 
4063         default:
4064             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4065             nb_iargs = def->nb_iargs;
4066             nb_oargs = def->nb_oargs;
4067 
4068             /* Test if the operation can be removed because all
4069                its outputs are dead. We assume that nb_oargs == 0
4070                implies side effects */
4071             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4072                 for (i = 0; i < nb_oargs; i++) {
4073                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4074                         goto do_not_remove;
4075                     }
4076                 }
4077                 goto do_remove;
4078             }
4079             goto do_not_remove;
4080 
4081         do_remove:
4082             tcg_op_remove(s, op);
4083             break;
4084 
4085         do_not_remove:
4086             for (i = 0; i < nb_oargs; i++) {
4087                 ts = arg_temp(op->args[i]);
4088 
4089                 /* Remember the preference of the uses that followed.  */
4090                 if (i < ARRAY_SIZE(op->output_pref)) {
4091                     op->output_pref[i] = *la_temp_pref(ts);
4092                 }
4093 
4094                 /* Output args are dead.  */
4095                 if (ts->state & TS_DEAD) {
4096                     arg_life |= DEAD_ARG << i;
4097                 }
4098                 if (ts->state & TS_MEM) {
4099                     arg_life |= SYNC_ARG << i;
4100                 }
4101                 ts->state = TS_DEAD;
4102                 la_reset_pref(ts);
4103             }
4104 
4105             /* If end of basic block, update.  */
4106             if (def->flags & TCG_OPF_BB_EXIT) {
4107                 la_func_end(s, nb_globals, nb_temps);
4108             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4109                 la_bb_sync(s, nb_globals, nb_temps);
4110             } else if (def->flags & TCG_OPF_BB_END) {
4111                 la_bb_end(s, nb_globals, nb_temps);
4112             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4113                 la_global_sync(s, nb_globals);
4114                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4115                     la_cross_call(s, nb_temps);
4116                 }
4117             }
4118 
4119             /* Record arguments that die in this opcode.  */
4120             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4121                 ts = arg_temp(op->args[i]);
4122                 if (ts->state & TS_DEAD) {
4123                     arg_life |= DEAD_ARG << i;
4124                 }
4125             }
4126 
4127             /* Input arguments are live for preceding opcodes.  */
4128             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4129                 ts = arg_temp(op->args[i]);
4130                 if (ts->state & TS_DEAD) {
4131                     /* For operands that were dead, initially allow
4132                        all regs for the type.  */
4133                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4134                     ts->state &= ~TS_DEAD;
4135                 }
4136             }
4137 
4138             /* Incorporate constraints for this operand.  */
4139             switch (opc) {
4140             case INDEX_op_mov:
4141                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4142                    have proper constraints.  That said, special case
4143                    moves to propagate preferences backward.  */
4144                 if (IS_DEAD_ARG(1)) {
4145                     *la_temp_pref(arg_temp(op->args[0]))
4146                         = *la_temp_pref(arg_temp(op->args[1]));
4147                 }
4148                 break;
4149 
4150             default:
4151                 args_ct = opcode_args_ct(op);
4152                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4153                     const TCGArgConstraint *ct = &args_ct[i];
4154                     TCGRegSet set, *pset;
4155 
4156                     ts = arg_temp(op->args[i]);
4157                     pset = la_temp_pref(ts);
4158                     set = *pset;
4159 
4160                     set &= ct->regs;
4161                     if (ct->ialias) {
4162                         set &= output_pref(op, ct->alias_index);
4163                     }
4164                     /* If the combination is not possible, restart.  */
4165                     if (set == 0) {
4166                         set = ct->regs;
4167                     }
4168                     *pset = set;
4169                 }
4170                 break;
4171             }
4172             break;
4173         }
4174         op->life = arg_life;
4175     }
4176 }
4177 
4178 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4179 static bool __attribute__((noinline))
4180 liveness_pass_2(TCGContext *s)
4181 {
4182     int nb_globals = s->nb_globals;
4183     int nb_temps, i;
4184     bool changes = false;
4185     TCGOp *op, *op_next;
4186 
4187     /* Create a temporary for each indirect global.  */
4188     for (i = 0; i < nb_globals; ++i) {
4189         TCGTemp *its = &s->temps[i];
4190         if (its->indirect_reg) {
4191             TCGTemp *dts = tcg_temp_alloc(s);
4192             dts->type = its->type;
4193             dts->base_type = its->base_type;
4194             dts->temp_subindex = its->temp_subindex;
4195             dts->kind = TEMP_EBB;
4196             its->state_ptr = dts;
4197         } else {
4198             its->state_ptr = NULL;
4199         }
4200         /* All globals begin dead.  */
4201         its->state = TS_DEAD;
4202     }
4203     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4204         TCGTemp *its = &s->temps[i];
4205         its->state_ptr = NULL;
4206         its->state = TS_DEAD;
4207     }
4208 
4209     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4210         TCGOpcode opc = op->opc;
4211         const TCGOpDef *def = &tcg_op_defs[opc];
4212         TCGLifeData arg_life = op->life;
4213         int nb_iargs, nb_oargs, call_flags;
4214         TCGTemp *arg_ts, *dir_ts;
4215 
4216         if (opc == INDEX_op_call) {
4217             nb_oargs = TCGOP_CALLO(op);
4218             nb_iargs = TCGOP_CALLI(op);
4219             call_flags = tcg_call_flags(op);
4220         } else {
4221             nb_iargs = def->nb_iargs;
4222             nb_oargs = def->nb_oargs;
4223 
4224             /* Set flags similar to how calls require.  */
4225             if (def->flags & TCG_OPF_COND_BRANCH) {
4226                 /* Like reading globals: sync_globals */
4227                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4228             } else if (def->flags & TCG_OPF_BB_END) {
4229                 /* Like writing globals: save_globals */
4230                 call_flags = 0;
4231             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4232                 /* Like reading globals: sync_globals */
4233                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4234             } else {
4235                 /* No effect on globals.  */
4236                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4237                               TCG_CALL_NO_WRITE_GLOBALS);
4238             }
4239         }
4240 
4241         /* Make sure that input arguments are available.  */
4242         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4243             arg_ts = arg_temp(op->args[i]);
4244             dir_ts = arg_ts->state_ptr;
4245             if (dir_ts && arg_ts->state == TS_DEAD) {
4246                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4247                                   ? INDEX_op_ld_i32
4248                                   : INDEX_op_ld_i64);
4249                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4250                                                   arg_ts->type, 3);
4251 
4252                 lop->args[0] = temp_arg(dir_ts);
4253                 lop->args[1] = temp_arg(arg_ts->mem_base);
4254                 lop->args[2] = arg_ts->mem_offset;
4255 
4256                 /* Loaded, but synced with memory.  */
4257                 arg_ts->state = TS_MEM;
4258             }
4259         }
4260 
4261         /* Perform input replacement, and mark inputs that became dead.
4262            No action is required except keeping temp_state up to date
4263            so that we reload when needed.  */
4264         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4265             arg_ts = arg_temp(op->args[i]);
4266             dir_ts = arg_ts->state_ptr;
4267             if (dir_ts) {
4268                 op->args[i] = temp_arg(dir_ts);
4269                 changes = true;
4270                 if (IS_DEAD_ARG(i)) {
4271                     arg_ts->state = TS_DEAD;
4272                 }
4273             }
4274         }
4275 
4276         /* Liveness analysis should ensure that the following are
4277            all correct, for call sites and basic block end points.  */
4278         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4279             /* Nothing to do */
4280         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4281             for (i = 0; i < nb_globals; ++i) {
4282                 /* Liveness should see that globals are synced back,
4283                    that is, either TS_DEAD or TS_MEM.  */
4284                 arg_ts = &s->temps[i];
4285                 tcg_debug_assert(arg_ts->state_ptr == 0
4286                                  || arg_ts->state != 0);
4287             }
4288         } else {
4289             for (i = 0; i < nb_globals; ++i) {
4290                 /* Liveness should see that globals are saved back,
4291                    that is, TS_DEAD, waiting to be reloaded.  */
4292                 arg_ts = &s->temps[i];
4293                 tcg_debug_assert(arg_ts->state_ptr == 0
4294                                  || arg_ts->state == TS_DEAD);
4295             }
4296         }
4297 
4298         /* Outputs become available.  */
4299         if (opc == INDEX_op_mov) {
4300             arg_ts = arg_temp(op->args[0]);
4301             dir_ts = arg_ts->state_ptr;
4302             if (dir_ts) {
4303                 op->args[0] = temp_arg(dir_ts);
4304                 changes = true;
4305 
4306                 /* The output is now live and modified.  */
4307                 arg_ts->state = 0;
4308 
4309                 if (NEED_SYNC_ARG(0)) {
4310                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4311                                       ? INDEX_op_st_i32
4312                                       : INDEX_op_st_i64);
4313                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4314                                                      arg_ts->type, 3);
4315                     TCGTemp *out_ts = dir_ts;
4316 
4317                     if (IS_DEAD_ARG(0)) {
4318                         out_ts = arg_temp(op->args[1]);
4319                         arg_ts->state = TS_DEAD;
4320                         tcg_op_remove(s, op);
4321                     } else {
4322                         arg_ts->state = TS_MEM;
4323                     }
4324 
4325                     sop->args[0] = temp_arg(out_ts);
4326                     sop->args[1] = temp_arg(arg_ts->mem_base);
4327                     sop->args[2] = arg_ts->mem_offset;
4328                 } else {
4329                     tcg_debug_assert(!IS_DEAD_ARG(0));
4330                 }
4331             }
4332         } else {
4333             for (i = 0; i < nb_oargs; i++) {
4334                 arg_ts = arg_temp(op->args[i]);
4335                 dir_ts = arg_ts->state_ptr;
4336                 if (!dir_ts) {
4337                     continue;
4338                 }
4339                 op->args[i] = temp_arg(dir_ts);
4340                 changes = true;
4341 
4342                 /* The output is now live and modified.  */
4343                 arg_ts->state = 0;
4344 
4345                 /* Sync outputs upon their last write.  */
4346                 if (NEED_SYNC_ARG(i)) {
4347                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4348                                       ? INDEX_op_st_i32
4349                                       : INDEX_op_st_i64);
4350                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4351                                                      arg_ts->type, 3);
4352 
4353                     sop->args[0] = temp_arg(dir_ts);
4354                     sop->args[1] = temp_arg(arg_ts->mem_base);
4355                     sop->args[2] = arg_ts->mem_offset;
4356 
4357                     arg_ts->state = TS_MEM;
4358                 }
4359                 /* Drop outputs that are dead.  */
4360                 if (IS_DEAD_ARG(i)) {
4361                     arg_ts->state = TS_DEAD;
4362                 }
4363             }
4364         }
4365     }
4366 
4367     return changes;
4368 }
4369 
4370 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4371 {
4372     intptr_t off;
4373     int size, align;
4374 
4375     /* When allocating an object, look at the full type. */
4376     size = tcg_type_size(ts->base_type);
4377     switch (ts->base_type) {
4378     case TCG_TYPE_I32:
4379         align = 4;
4380         break;
4381     case TCG_TYPE_I64:
4382     case TCG_TYPE_V64:
4383         align = 8;
4384         break;
4385     case TCG_TYPE_I128:
4386     case TCG_TYPE_V128:
4387     case TCG_TYPE_V256:
4388         /*
4389          * Note that we do not require aligned storage for V256,
4390          * and that we provide alignment for I128 to match V128,
4391          * even if that's above what the host ABI requires.
4392          */
4393         align = 16;
4394         break;
4395     default:
4396         g_assert_not_reached();
4397     }
4398 
4399     /*
4400      * Assume the stack is sufficiently aligned.
4401      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4402      * and do not require 16 byte vector alignment.  This seems slightly
4403      * easier than fully parameterizing the above switch statement.
4404      */
4405     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4406     off = ROUND_UP(s->current_frame_offset, align);
4407 
4408     /* If we've exhausted the stack frame, restart with a smaller TB. */
4409     if (off + size > s->frame_end) {
4410         tcg_raise_tb_overflow(s);
4411     }
4412     s->current_frame_offset = off + size;
4413 #if defined(__sparc__)
4414     off += TCG_TARGET_STACK_BIAS;
4415 #endif
4416 
4417     /* If the object was subdivided, assign memory to all the parts. */
4418     if (ts->base_type != ts->type) {
4419         int part_size = tcg_type_size(ts->type);
4420         int part_count = size / part_size;
4421 
4422         /*
4423          * Each part is allocated sequentially in tcg_temp_new_internal.
4424          * Jump back to the first part by subtracting the current index.
4425          */
4426         ts -= ts->temp_subindex;
4427         for (int i = 0; i < part_count; ++i) {
4428             ts[i].mem_offset = off + i * part_size;
4429             ts[i].mem_base = s->frame_temp;
4430             ts[i].mem_allocated = 1;
4431         }
4432     } else {
4433         ts->mem_offset = off;
4434         ts->mem_base = s->frame_temp;
4435         ts->mem_allocated = 1;
4436     }
4437 }
4438 
4439 /* Assign @reg to @ts, and update reg_to_temp[]. */
4440 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4441 {
4442     if (ts->val_type == TEMP_VAL_REG) {
4443         TCGReg old = ts->reg;
4444         tcg_debug_assert(s->reg_to_temp[old] == ts);
4445         if (old == reg) {
4446             return;
4447         }
4448         s->reg_to_temp[old] = NULL;
4449     }
4450     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4451     s->reg_to_temp[reg] = ts;
4452     ts->val_type = TEMP_VAL_REG;
4453     ts->reg = reg;
4454 }
4455 
4456 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4457 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4458 {
4459     tcg_debug_assert(type != TEMP_VAL_REG);
4460     if (ts->val_type == TEMP_VAL_REG) {
4461         TCGReg reg = ts->reg;
4462         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4463         s->reg_to_temp[reg] = NULL;
4464     }
4465     ts->val_type = type;
4466 }
4467 
4468 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4469 
4470 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4471    mark it free; otherwise mark it dead.  */
4472 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4473 {
4474     TCGTempVal new_type;
4475 
4476     switch (ts->kind) {
4477     case TEMP_FIXED:
4478         return;
4479     case TEMP_GLOBAL:
4480     case TEMP_TB:
4481         new_type = TEMP_VAL_MEM;
4482         break;
4483     case TEMP_EBB:
4484         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4485         break;
4486     case TEMP_CONST:
4487         new_type = TEMP_VAL_CONST;
4488         break;
4489     default:
4490         g_assert_not_reached();
4491     }
4492     set_temp_val_nonreg(s, ts, new_type);
4493 }
4494 
4495 /* Mark a temporary as dead.  */
4496 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4497 {
4498     temp_free_or_dead(s, ts, 1);
4499 }
4500 
4501 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4502    registers needs to be allocated to store a constant.  If 'free_or_dead'
4503    is non-zero, subsequently release the temporary; if it is positive, the
4504    temp is dead; if it is negative, the temp is free.  */
4505 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4506                       TCGRegSet preferred_regs, int free_or_dead)
4507 {
4508     if (!temp_readonly(ts) && !ts->mem_coherent) {
4509         if (!ts->mem_allocated) {
4510             temp_allocate_frame(s, ts);
4511         }
4512         switch (ts->val_type) {
4513         case TEMP_VAL_CONST:
4514             /* If we're going to free the temp immediately, then we won't
4515                require it later in a register, so attempt to store the
4516                constant to memory directly.  */
4517             if (free_or_dead
4518                 && tcg_out_sti(s, ts->type, ts->val,
4519                                ts->mem_base->reg, ts->mem_offset)) {
4520                 break;
4521             }
4522             temp_load(s, ts, tcg_target_available_regs[ts->type],
4523                       allocated_regs, preferred_regs);
4524             /* fallthrough */
4525 
4526         case TEMP_VAL_REG:
4527             tcg_out_st(s, ts->type, ts->reg,
4528                        ts->mem_base->reg, ts->mem_offset);
4529             break;
4530 
4531         case TEMP_VAL_MEM:
4532             break;
4533 
4534         case TEMP_VAL_DEAD:
4535         default:
4536             g_assert_not_reached();
4537         }
4538         ts->mem_coherent = 1;
4539     }
4540     if (free_or_dead) {
4541         temp_free_or_dead(s, ts, free_or_dead);
4542     }
4543 }
4544 
4545 /* free register 'reg' by spilling the corresponding temporary if necessary */
4546 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4547 {
4548     TCGTemp *ts = s->reg_to_temp[reg];
4549     if (ts != NULL) {
4550         temp_sync(s, ts, allocated_regs, 0, -1);
4551     }
4552 }
4553 
4554 /**
4555  * tcg_reg_alloc:
4556  * @required_regs: Set of registers in which we must allocate.
4557  * @allocated_regs: Set of registers which must be avoided.
4558  * @preferred_regs: Set of registers we should prefer.
4559  * @rev: True if we search the registers in "indirect" order.
4560  *
4561  * The allocated register must be in @required_regs & ~@allocated_regs,
4562  * but if we can put it in @preferred_regs we may save a move later.
4563  */
4564 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4565                             TCGRegSet allocated_regs,
4566                             TCGRegSet preferred_regs, bool rev)
4567 {
4568     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4569     TCGRegSet reg_ct[2];
4570     const int *order;
4571 
4572     reg_ct[1] = required_regs & ~allocated_regs;
4573     tcg_debug_assert(reg_ct[1] != 0);
4574     reg_ct[0] = reg_ct[1] & preferred_regs;
4575 
4576     /* Skip the preferred_regs option if it cannot be satisfied,
4577        or if the preference made no difference.  */
4578     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4579 
4580     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4581 
4582     /* Try free registers, preferences first.  */
4583     for (j = f; j < 2; j++) {
4584         TCGRegSet set = reg_ct[j];
4585 
4586         if (tcg_regset_single(set)) {
4587             /* One register in the set.  */
4588             TCGReg reg = tcg_regset_first(set);
4589             if (s->reg_to_temp[reg] == NULL) {
4590                 return reg;
4591             }
4592         } else {
4593             for (i = 0; i < n; i++) {
4594                 TCGReg reg = order[i];
4595                 if (s->reg_to_temp[reg] == NULL &&
4596                     tcg_regset_test_reg(set, reg)) {
4597                     return reg;
4598                 }
4599             }
4600         }
4601     }
4602 
4603     /* We must spill something.  */
4604     for (j = f; j < 2; j++) {
4605         TCGRegSet set = reg_ct[j];
4606 
4607         if (tcg_regset_single(set)) {
4608             /* One register in the set.  */
4609             TCGReg reg = tcg_regset_first(set);
4610             tcg_reg_free(s, reg, allocated_regs);
4611             return reg;
4612         } else {
4613             for (i = 0; i < n; i++) {
4614                 TCGReg reg = order[i];
4615                 if (tcg_regset_test_reg(set, reg)) {
4616                     tcg_reg_free(s, reg, allocated_regs);
4617                     return reg;
4618                 }
4619             }
4620         }
4621     }
4622 
4623     g_assert_not_reached();
4624 }
4625 
4626 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4627                                  TCGRegSet allocated_regs,
4628                                  TCGRegSet preferred_regs, bool rev)
4629 {
4630     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4631     TCGRegSet reg_ct[2];
4632     const int *order;
4633 
4634     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4635     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4636     tcg_debug_assert(reg_ct[1] != 0);
4637     reg_ct[0] = reg_ct[1] & preferred_regs;
4638 
4639     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4640 
4641     /*
4642      * Skip the preferred_regs option if it cannot be satisfied,
4643      * or if the preference made no difference.
4644      */
4645     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4646 
4647     /*
4648      * Minimize the number of flushes by looking for 2 free registers first,
4649      * then a single flush, then two flushes.
4650      */
4651     for (fmin = 2; fmin >= 0; fmin--) {
4652         for (j = k; j < 2; j++) {
4653             TCGRegSet set = reg_ct[j];
4654 
4655             for (i = 0; i < n; i++) {
4656                 TCGReg reg = order[i];
4657 
4658                 if (tcg_regset_test_reg(set, reg)) {
4659                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4660                     if (f >= fmin) {
4661                         tcg_reg_free(s, reg, allocated_regs);
4662                         tcg_reg_free(s, reg + 1, allocated_regs);
4663                         return reg;
4664                     }
4665                 }
4666             }
4667         }
4668     }
4669     g_assert_not_reached();
4670 }
4671 
4672 /* Make sure the temporary is in a register.  If needed, allocate the register
4673    from DESIRED while avoiding ALLOCATED.  */
4674 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4675                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4676 {
4677     TCGReg reg;
4678 
4679     switch (ts->val_type) {
4680     case TEMP_VAL_REG:
4681         return;
4682     case TEMP_VAL_CONST:
4683         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4684                             preferred_regs, ts->indirect_base);
4685         if (ts->type <= TCG_TYPE_I64) {
4686             tcg_out_movi(s, ts->type, reg, ts->val);
4687         } else {
4688             uint64_t val = ts->val;
4689             MemOp vece = MO_64;
4690 
4691             /*
4692              * Find the minimal vector element that matches the constant.
4693              * The targets will, in general, have to do this search anyway,
4694              * do this generically.
4695              */
4696             if (val == dup_const(MO_8, val)) {
4697                 vece = MO_8;
4698             } else if (val == dup_const(MO_16, val)) {
4699                 vece = MO_16;
4700             } else if (val == dup_const(MO_32, val)) {
4701                 vece = MO_32;
4702             }
4703 
4704             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4705         }
4706         ts->mem_coherent = 0;
4707         break;
4708     case TEMP_VAL_MEM:
4709         if (!ts->mem_allocated) {
4710             temp_allocate_frame(s, ts);
4711         }
4712         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4713                             preferred_regs, ts->indirect_base);
4714         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4715         ts->mem_coherent = 1;
4716         break;
4717     case TEMP_VAL_DEAD:
4718     default:
4719         g_assert_not_reached();
4720     }
4721     set_temp_val_reg(s, ts, reg);
4722 }
4723 
4724 /* Save a temporary to memory. 'allocated_regs' is used in case a
4725    temporary registers needs to be allocated to store a constant.  */
4726 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4727 {
4728     /* The liveness analysis already ensures that globals are back
4729        in memory. Keep an tcg_debug_assert for safety. */
4730     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4731 }
4732 
4733 /* save globals to their canonical location and assume they can be
4734    modified be the following code. 'allocated_regs' is used in case a
4735    temporary registers needs to be allocated to store a constant. */
4736 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4737 {
4738     int i, n;
4739 
4740     for (i = 0, n = s->nb_globals; i < n; i++) {
4741         temp_save(s, &s->temps[i], allocated_regs);
4742     }
4743 }
4744 
4745 /* sync globals to their canonical location and assume they can be
4746    read by the following code. 'allocated_regs' is used in case a
4747    temporary registers needs to be allocated to store a constant. */
4748 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4749 {
4750     int i, n;
4751 
4752     for (i = 0, n = s->nb_globals; i < n; i++) {
4753         TCGTemp *ts = &s->temps[i];
4754         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4755                          || ts->kind == TEMP_FIXED
4756                          || ts->mem_coherent);
4757     }
4758 }
4759 
4760 /* at the end of a basic block, we assume all temporaries are dead and
4761    all globals are stored at their canonical location. */
4762 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4763 {
4764     int i;
4765 
4766     for (i = s->nb_globals; i < s->nb_temps; i++) {
4767         TCGTemp *ts = &s->temps[i];
4768 
4769         switch (ts->kind) {
4770         case TEMP_TB:
4771             temp_save(s, ts, allocated_regs);
4772             break;
4773         case TEMP_EBB:
4774             /* The liveness analysis already ensures that temps are dead.
4775                Keep an tcg_debug_assert for safety. */
4776             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4777             break;
4778         case TEMP_CONST:
4779             /* Similarly, we should have freed any allocated register. */
4780             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4781             break;
4782         default:
4783             g_assert_not_reached();
4784         }
4785     }
4786 
4787     save_globals(s, allocated_regs);
4788 }
4789 
4790 /*
4791  * At a conditional branch, we assume all temporaries are dead unless
4792  * explicitly live-across-conditional-branch; all globals and local
4793  * temps are synced to their location.
4794  */
4795 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4796 {
4797     sync_globals(s, allocated_regs);
4798 
4799     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4800         TCGTemp *ts = &s->temps[i];
4801         /*
4802          * The liveness analysis already ensures that temps are dead.
4803          * Keep tcg_debug_asserts for safety.
4804          */
4805         switch (ts->kind) {
4806         case TEMP_TB:
4807             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4808             break;
4809         case TEMP_EBB:
4810         case TEMP_CONST:
4811             break;
4812         default:
4813             g_assert_not_reached();
4814         }
4815     }
4816 }
4817 
4818 /*
4819  * Specialized code generation for INDEX_op_mov_* with a constant.
4820  */
4821 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4822                                   tcg_target_ulong val, TCGLifeData arg_life,
4823                                   TCGRegSet preferred_regs)
4824 {
4825     /* ENV should not be modified.  */
4826     tcg_debug_assert(!temp_readonly(ots));
4827 
4828     /* The movi is not explicitly generated here.  */
4829     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4830     ots->val = val;
4831     ots->mem_coherent = 0;
4832     if (NEED_SYNC_ARG(0)) {
4833         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4834     } else if (IS_DEAD_ARG(0)) {
4835         temp_dead(s, ots);
4836     }
4837 }
4838 
4839 /*
4840  * Specialized code generation for INDEX_op_mov_*.
4841  */
4842 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4843 {
4844     const TCGLifeData arg_life = op->life;
4845     TCGRegSet allocated_regs, preferred_regs;
4846     TCGTemp *ts, *ots;
4847     TCGType otype, itype;
4848     TCGReg oreg, ireg;
4849 
4850     allocated_regs = s->reserved_regs;
4851     preferred_regs = output_pref(op, 0);
4852     ots = arg_temp(op->args[0]);
4853     ts = arg_temp(op->args[1]);
4854 
4855     /* ENV should not be modified.  */
4856     tcg_debug_assert(!temp_readonly(ots));
4857 
4858     /* Note that otype != itype for no-op truncation.  */
4859     otype = ots->type;
4860     itype = ts->type;
4861 
4862     if (ts->val_type == TEMP_VAL_CONST) {
4863         /* propagate constant or generate sti */
4864         tcg_target_ulong val = ts->val;
4865         if (IS_DEAD_ARG(1)) {
4866             temp_dead(s, ts);
4867         }
4868         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4869         return;
4870     }
4871 
4872     /* If the source value is in memory we're going to be forced
4873        to have it in a register in order to perform the copy.  Copy
4874        the SOURCE value into its own register first, that way we
4875        don't have to reload SOURCE the next time it is used. */
4876     if (ts->val_type == TEMP_VAL_MEM) {
4877         temp_load(s, ts, tcg_target_available_regs[itype],
4878                   allocated_regs, preferred_regs);
4879     }
4880     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4881     ireg = ts->reg;
4882 
4883     if (IS_DEAD_ARG(0)) {
4884         /* mov to a non-saved dead register makes no sense (even with
4885            liveness analysis disabled). */
4886         tcg_debug_assert(NEED_SYNC_ARG(0));
4887         if (!ots->mem_allocated) {
4888             temp_allocate_frame(s, ots);
4889         }
4890         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4891         if (IS_DEAD_ARG(1)) {
4892             temp_dead(s, ts);
4893         }
4894         temp_dead(s, ots);
4895         return;
4896     }
4897 
4898     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4899         /*
4900          * The mov can be suppressed.  Kill input first, so that it
4901          * is unlinked from reg_to_temp, then set the output to the
4902          * reg that we saved from the input.
4903          */
4904         temp_dead(s, ts);
4905         oreg = ireg;
4906     } else {
4907         if (ots->val_type == TEMP_VAL_REG) {
4908             oreg = ots->reg;
4909         } else {
4910             /* Make sure to not spill the input register during allocation. */
4911             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4912                                  allocated_regs | ((TCGRegSet)1 << ireg),
4913                                  preferred_regs, ots->indirect_base);
4914         }
4915         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4916             /*
4917              * Cross register class move not supported.
4918              * Store the source register into the destination slot
4919              * and leave the destination temp as TEMP_VAL_MEM.
4920              */
4921             assert(!temp_readonly(ots));
4922             if (!ts->mem_allocated) {
4923                 temp_allocate_frame(s, ots);
4924             }
4925             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4926             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4927             ots->mem_coherent = 1;
4928             return;
4929         }
4930     }
4931     set_temp_val_reg(s, ots, oreg);
4932     ots->mem_coherent = 0;
4933 
4934     if (NEED_SYNC_ARG(0)) {
4935         temp_sync(s, ots, allocated_regs, 0, 0);
4936     }
4937 }
4938 
4939 /*
4940  * Specialized code generation for INDEX_op_dup_vec.
4941  */
4942 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4943 {
4944     const TCGLifeData arg_life = op->life;
4945     TCGRegSet dup_out_regs, dup_in_regs;
4946     const TCGArgConstraint *dup_args_ct;
4947     TCGTemp *its, *ots;
4948     TCGType itype, vtype;
4949     unsigned vece;
4950     int lowpart_ofs;
4951     bool ok;
4952 
4953     ots = arg_temp(op->args[0]);
4954     its = arg_temp(op->args[1]);
4955 
4956     /* ENV should not be modified.  */
4957     tcg_debug_assert(!temp_readonly(ots));
4958 
4959     itype = its->type;
4960     vece = TCGOP_VECE(op);
4961     vtype = TCGOP_TYPE(op);
4962 
4963     if (its->val_type == TEMP_VAL_CONST) {
4964         /* Propagate constant via movi -> dupi.  */
4965         tcg_target_ulong val = its->val;
4966         if (IS_DEAD_ARG(1)) {
4967             temp_dead(s, its);
4968         }
4969         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4970         return;
4971     }
4972 
4973     dup_args_ct = opcode_args_ct(op);
4974     dup_out_regs = dup_args_ct[0].regs;
4975     dup_in_regs = dup_args_ct[1].regs;
4976 
4977     /* Allocate the output register now.  */
4978     if (ots->val_type != TEMP_VAL_REG) {
4979         TCGRegSet allocated_regs = s->reserved_regs;
4980         TCGReg oreg;
4981 
4982         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4983             /* Make sure to not spill the input register. */
4984             tcg_regset_set_reg(allocated_regs, its->reg);
4985         }
4986         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4987                              output_pref(op, 0), ots->indirect_base);
4988         set_temp_val_reg(s, ots, oreg);
4989     }
4990 
4991     switch (its->val_type) {
4992     case TEMP_VAL_REG:
4993         /*
4994          * The dup constriaints must be broad, covering all possible VECE.
4995          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4996          * to fail, indicating that extra moves are required for that case.
4997          */
4998         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4999             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
5000                 goto done;
5001             }
5002             /* Try again from memory or a vector input register.  */
5003         }
5004         if (!its->mem_coherent) {
5005             /*
5006              * The input register is not synced, and so an extra store
5007              * would be required to use memory.  Attempt an integer-vector
5008              * register move first.  We do not have a TCGRegSet for this.
5009              */
5010             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5011                 break;
5012             }
5013             /* Sync the temp back to its slot and load from there.  */
5014             temp_sync(s, its, s->reserved_regs, 0, 0);
5015         }
5016         /* fall through */
5017 
5018     case TEMP_VAL_MEM:
5019         lowpart_ofs = 0;
5020         if (HOST_BIG_ENDIAN) {
5021             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5022         }
5023         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5024                              its->mem_offset + lowpart_ofs)) {
5025             goto done;
5026         }
5027         /* Load the input into the destination vector register. */
5028         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5029         break;
5030 
5031     default:
5032         g_assert_not_reached();
5033     }
5034 
5035     /* We now have a vector input register, so dup must succeed. */
5036     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5037     tcg_debug_assert(ok);
5038 
5039  done:
5040     ots->mem_coherent = 0;
5041     if (IS_DEAD_ARG(1)) {
5042         temp_dead(s, its);
5043     }
5044     if (NEED_SYNC_ARG(0)) {
5045         temp_sync(s, ots, s->reserved_regs, 0, 0);
5046     }
5047     if (IS_DEAD_ARG(0)) {
5048         temp_dead(s, ots);
5049     }
5050 }
5051 
5052 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5053 {
5054     const TCGLifeData arg_life = op->life;
5055     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5056     TCGRegSet i_allocated_regs;
5057     TCGRegSet o_allocated_regs;
5058     int i, k, nb_iargs, nb_oargs;
5059     TCGReg reg;
5060     TCGArg arg;
5061     const TCGArgConstraint *args_ct;
5062     const TCGArgConstraint *arg_ct;
5063     TCGTemp *ts;
5064     TCGArg new_args[TCG_MAX_OP_ARGS];
5065     int const_args[TCG_MAX_OP_ARGS];
5066     TCGCond op_cond;
5067 
5068     nb_oargs = def->nb_oargs;
5069     nb_iargs = def->nb_iargs;
5070 
5071     /* copy constants */
5072     memcpy(new_args + nb_oargs + nb_iargs,
5073            op->args + nb_oargs + nb_iargs,
5074            sizeof(TCGArg) * def->nb_cargs);
5075 
5076     i_allocated_regs = s->reserved_regs;
5077     o_allocated_regs = s->reserved_regs;
5078 
5079     switch (op->opc) {
5080     case INDEX_op_brcond_i32:
5081     case INDEX_op_brcond_i64:
5082         op_cond = op->args[2];
5083         break;
5084     case INDEX_op_setcond_i32:
5085     case INDEX_op_setcond_i64:
5086     case INDEX_op_negsetcond_i32:
5087     case INDEX_op_negsetcond_i64:
5088     case INDEX_op_cmp_vec:
5089         op_cond = op->args[3];
5090         break;
5091     case INDEX_op_brcond2_i32:
5092         op_cond = op->args[4];
5093         break;
5094     case INDEX_op_movcond_i32:
5095     case INDEX_op_movcond_i64:
5096     case INDEX_op_setcond2_i32:
5097     case INDEX_op_cmpsel_vec:
5098         op_cond = op->args[5];
5099         break;
5100     default:
5101         /* No condition within opcode. */
5102         op_cond = TCG_COND_ALWAYS;
5103         break;
5104     }
5105 
5106     args_ct = opcode_args_ct(op);
5107 
5108     /* satisfy input constraints */
5109     for (k = 0; k < nb_iargs; k++) {
5110         TCGRegSet i_preferred_regs, i_required_regs;
5111         bool allocate_new_reg, copyto_new_reg;
5112         TCGTemp *ts2;
5113         int i1, i2;
5114 
5115         i = args_ct[nb_oargs + k].sort_index;
5116         arg = op->args[i];
5117         arg_ct = &args_ct[i];
5118         ts = arg_temp(arg);
5119 
5120         if (ts->val_type == TEMP_VAL_CONST) {
5121 #ifdef TCG_REG_ZERO
5122             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5123                 /* Hardware zero register: indicate register via non-const. */
5124                 const_args[i] = 0;
5125                 new_args[i] = TCG_REG_ZERO;
5126                 continue;
5127             }
5128 #endif
5129 
5130             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5131                                        op_cond, TCGOP_VECE(op))) {
5132                 /* constant is OK for instruction */
5133                 const_args[i] = 1;
5134                 new_args[i] = ts->val;
5135                 continue;
5136             }
5137         }
5138 
5139         reg = ts->reg;
5140         i_preferred_regs = 0;
5141         i_required_regs = arg_ct->regs;
5142         allocate_new_reg = false;
5143         copyto_new_reg = false;
5144 
5145         switch (arg_ct->pair) {
5146         case 0: /* not paired */
5147             if (arg_ct->ialias) {
5148                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5149 
5150                 /*
5151                  * If the input is readonly, then it cannot also be an
5152                  * output and aliased to itself.  If the input is not
5153                  * dead after the instruction, we must allocate a new
5154                  * register and move it.
5155                  */
5156                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5157                     || args_ct[arg_ct->alias_index].newreg) {
5158                     allocate_new_reg = true;
5159                 } else if (ts->val_type == TEMP_VAL_REG) {
5160                     /*
5161                      * Check if the current register has already been
5162                      * allocated for another input.
5163                      */
5164                     allocate_new_reg =
5165                         tcg_regset_test_reg(i_allocated_regs, reg);
5166                 }
5167             }
5168             if (!allocate_new_reg) {
5169                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5170                           i_preferred_regs);
5171                 reg = ts->reg;
5172                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5173             }
5174             if (allocate_new_reg) {
5175                 /*
5176                  * Allocate a new register matching the constraint
5177                  * and move the temporary register into it.
5178                  */
5179                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5180                           i_allocated_regs, 0);
5181                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5182                                     i_preferred_regs, ts->indirect_base);
5183                 copyto_new_reg = true;
5184             }
5185             break;
5186 
5187         case 1:
5188             /* First of an input pair; if i1 == i2, the second is an output. */
5189             i1 = i;
5190             i2 = arg_ct->pair_index;
5191             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5192 
5193             /*
5194              * It is easier to default to allocating a new pair
5195              * and to identify a few cases where it's not required.
5196              */
5197             if (arg_ct->ialias) {
5198                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5199                 if (IS_DEAD_ARG(i1) &&
5200                     IS_DEAD_ARG(i2) &&
5201                     !temp_readonly(ts) &&
5202                     ts->val_type == TEMP_VAL_REG &&
5203                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5204                     tcg_regset_test_reg(i_required_regs, reg) &&
5205                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5206                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5207                     (ts2
5208                      ? ts2->val_type == TEMP_VAL_REG &&
5209                        ts2->reg == reg + 1 &&
5210                        !temp_readonly(ts2)
5211                      : s->reg_to_temp[reg + 1] == NULL)) {
5212                     break;
5213                 }
5214             } else {
5215                 /* Without aliasing, the pair must also be an input. */
5216                 tcg_debug_assert(ts2);
5217                 if (ts->val_type == TEMP_VAL_REG &&
5218                     ts2->val_type == TEMP_VAL_REG &&
5219                     ts2->reg == reg + 1 &&
5220                     tcg_regset_test_reg(i_required_regs, reg)) {
5221                     break;
5222                 }
5223             }
5224             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5225                                      0, ts->indirect_base);
5226             goto do_pair;
5227 
5228         case 2: /* pair second */
5229             reg = new_args[arg_ct->pair_index] + 1;
5230             goto do_pair;
5231 
5232         case 3: /* ialias with second output, no first input */
5233             tcg_debug_assert(arg_ct->ialias);
5234             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5235 
5236             if (IS_DEAD_ARG(i) &&
5237                 !temp_readonly(ts) &&
5238                 ts->val_type == TEMP_VAL_REG &&
5239                 reg > 0 &&
5240                 s->reg_to_temp[reg - 1] == NULL &&
5241                 tcg_regset_test_reg(i_required_regs, reg) &&
5242                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5243                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5244                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5245                 break;
5246             }
5247             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5248                                      i_allocated_regs, 0,
5249                                      ts->indirect_base);
5250             tcg_regset_set_reg(i_allocated_regs, reg);
5251             reg += 1;
5252             goto do_pair;
5253 
5254         do_pair:
5255             /*
5256              * If an aliased input is not dead after the instruction,
5257              * we must allocate a new register and move it.
5258              */
5259             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5260                 TCGRegSet t_allocated_regs = i_allocated_regs;
5261 
5262                 /*
5263                  * Because of the alias, and the continued life, make sure
5264                  * that the temp is somewhere *other* than the reg pair,
5265                  * and we get a copy in reg.
5266                  */
5267                 tcg_regset_set_reg(t_allocated_regs, reg);
5268                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5269                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5270                     /* If ts was already in reg, copy it somewhere else. */
5271                     TCGReg nr;
5272                     bool ok;
5273 
5274                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5275                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5276                                        t_allocated_regs, 0, ts->indirect_base);
5277                     ok = tcg_out_mov(s, ts->type, nr, reg);
5278                     tcg_debug_assert(ok);
5279 
5280                     set_temp_val_reg(s, ts, nr);
5281                 } else {
5282                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5283                               t_allocated_regs, 0);
5284                     copyto_new_reg = true;
5285                 }
5286             } else {
5287                 /* Preferably allocate to reg, otherwise copy. */
5288                 i_required_regs = (TCGRegSet)1 << reg;
5289                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5290                           i_preferred_regs);
5291                 copyto_new_reg = ts->reg != reg;
5292             }
5293             break;
5294 
5295         default:
5296             g_assert_not_reached();
5297         }
5298 
5299         if (copyto_new_reg) {
5300             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5301                 /*
5302                  * Cross register class move not supported.  Sync the
5303                  * temp back to its slot and load from there.
5304                  */
5305                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5306                 tcg_out_ld(s, ts->type, reg,
5307                            ts->mem_base->reg, ts->mem_offset);
5308             }
5309         }
5310         new_args[i] = reg;
5311         const_args[i] = 0;
5312         tcg_regset_set_reg(i_allocated_regs, reg);
5313     }
5314 
5315     /* mark dead temporaries and free the associated registers */
5316     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5317         if (IS_DEAD_ARG(i)) {
5318             temp_dead(s, arg_temp(op->args[i]));
5319         }
5320     }
5321 
5322     if (def->flags & TCG_OPF_COND_BRANCH) {
5323         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5324     } else if (def->flags & TCG_OPF_BB_END) {
5325         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5326     } else {
5327         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5328             /* XXX: permit generic clobber register list ? */
5329             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5330                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5331                     tcg_reg_free(s, i, i_allocated_regs);
5332                 }
5333             }
5334         }
5335         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5336             /* sync globals if the op has side effects and might trigger
5337                an exception. */
5338             sync_globals(s, i_allocated_regs);
5339         }
5340 
5341         /* satisfy the output constraints */
5342         for (k = 0; k < nb_oargs; k++) {
5343             i = args_ct[k].sort_index;
5344             arg = op->args[i];
5345             arg_ct = &args_ct[i];
5346             ts = arg_temp(arg);
5347 
5348             /* ENV should not be modified.  */
5349             tcg_debug_assert(!temp_readonly(ts));
5350 
5351             switch (arg_ct->pair) {
5352             case 0: /* not paired */
5353                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5354                     reg = new_args[arg_ct->alias_index];
5355                 } else if (arg_ct->newreg) {
5356                     reg = tcg_reg_alloc(s, arg_ct->regs,
5357                                         i_allocated_regs | o_allocated_regs,
5358                                         output_pref(op, k), ts->indirect_base);
5359                 } else {
5360                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5361                                         output_pref(op, k), ts->indirect_base);
5362                 }
5363                 break;
5364 
5365             case 1: /* first of pair */
5366                 if (arg_ct->oalias) {
5367                     reg = new_args[arg_ct->alias_index];
5368                 } else if (arg_ct->newreg) {
5369                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5370                                              i_allocated_regs | o_allocated_regs,
5371                                              output_pref(op, k),
5372                                              ts->indirect_base);
5373                 } else {
5374                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5375                                              output_pref(op, k),
5376                                              ts->indirect_base);
5377                 }
5378                 break;
5379 
5380             case 2: /* second of pair */
5381                 if (arg_ct->oalias) {
5382                     reg = new_args[arg_ct->alias_index];
5383                 } else {
5384                     reg = new_args[arg_ct->pair_index] + 1;
5385                 }
5386                 break;
5387 
5388             case 3: /* first of pair, aliasing with a second input */
5389                 tcg_debug_assert(!arg_ct->newreg);
5390                 reg = new_args[arg_ct->pair_index] - 1;
5391                 break;
5392 
5393             default:
5394                 g_assert_not_reached();
5395             }
5396             tcg_regset_set_reg(o_allocated_regs, reg);
5397             set_temp_val_reg(s, ts, reg);
5398             ts->mem_coherent = 0;
5399             new_args[i] = reg;
5400         }
5401     }
5402 
5403     /* emit instruction */
5404     TCGType type = TCGOP_TYPE(op);
5405     switch (op->opc) {
5406     case INDEX_op_ext_i32_i64:
5407         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5408         break;
5409     case INDEX_op_extu_i32_i64:
5410         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5411         break;
5412     case INDEX_op_extrl_i64_i32:
5413         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5414         break;
5415 
5416     case INDEX_op_add:
5417     case INDEX_op_and:
5418     case INDEX_op_andc:
5419     case INDEX_op_divs:
5420     case INDEX_op_eqv:
5421     case INDEX_op_mul:
5422     case INDEX_op_mulsh:
5423     case INDEX_op_muluh:
5424     case INDEX_op_nand:
5425     case INDEX_op_nor:
5426     case INDEX_op_or:
5427     case INDEX_op_orc:
5428     case INDEX_op_xor:
5429         {
5430             const TCGOutOpBinary *out =
5431                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5432 
5433             /* Constants should never appear in the first source operand. */
5434             tcg_debug_assert(!const_args[1]);
5435             if (const_args[2]) {
5436                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5437             } else {
5438                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5439             }
5440         }
5441         break;
5442 
5443     case INDEX_op_sub:
5444         {
5445             const TCGOutOpSubtract *out = &outop_sub;
5446 
5447             /*
5448              * Constants should never appear in the second source operand.
5449              * These are folded to add with negative constant.
5450              */
5451             tcg_debug_assert(!const_args[2]);
5452             if (const_args[1]) {
5453                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5454             } else {
5455                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5456             }
5457         }
5458         break;
5459 
5460     case INDEX_op_neg:
5461     case INDEX_op_not:
5462         {
5463             const TCGOutOpUnary *out =
5464                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5465 
5466             /* Constants should have been folded. */
5467             tcg_debug_assert(!const_args[1]);
5468             out->out_rr(s, type, new_args[0], new_args[1]);
5469         }
5470         break;
5471 
5472     default:
5473         if (def->flags & TCG_OPF_VECTOR) {
5474             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5475                            TCGOP_VECE(op), new_args, const_args);
5476         } else {
5477             tcg_out_op(s, op->opc, type, new_args, const_args);
5478         }
5479         break;
5480     }
5481 
5482     /* move the outputs in the correct register if needed */
5483     for(i = 0; i < nb_oargs; i++) {
5484         ts = arg_temp(op->args[i]);
5485 
5486         /* ENV should not be modified.  */
5487         tcg_debug_assert(!temp_readonly(ts));
5488 
5489         if (NEED_SYNC_ARG(i)) {
5490             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5491         } else if (IS_DEAD_ARG(i)) {
5492             temp_dead(s, ts);
5493         }
5494     }
5495 }
5496 
5497 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5498 {
5499     const TCGLifeData arg_life = op->life;
5500     TCGTemp *ots, *itsl, *itsh;
5501     TCGType vtype = TCGOP_TYPE(op);
5502 
5503     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5504     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5505     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5506 
5507     ots = arg_temp(op->args[0]);
5508     itsl = arg_temp(op->args[1]);
5509     itsh = arg_temp(op->args[2]);
5510 
5511     /* ENV should not be modified.  */
5512     tcg_debug_assert(!temp_readonly(ots));
5513 
5514     /* Allocate the output register now.  */
5515     if (ots->val_type != TEMP_VAL_REG) {
5516         TCGRegSet allocated_regs = s->reserved_regs;
5517         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5518         TCGReg oreg;
5519 
5520         /* Make sure to not spill the input registers. */
5521         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5522             tcg_regset_set_reg(allocated_regs, itsl->reg);
5523         }
5524         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5525             tcg_regset_set_reg(allocated_regs, itsh->reg);
5526         }
5527 
5528         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5529                              output_pref(op, 0), ots->indirect_base);
5530         set_temp_val_reg(s, ots, oreg);
5531     }
5532 
5533     /* Promote dup2 of immediates to dupi_vec. */
5534     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5535         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5536         MemOp vece = MO_64;
5537 
5538         if (val == dup_const(MO_8, val)) {
5539             vece = MO_8;
5540         } else if (val == dup_const(MO_16, val)) {
5541             vece = MO_16;
5542         } else if (val == dup_const(MO_32, val)) {
5543             vece = MO_32;
5544         }
5545 
5546         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5547         goto done;
5548     }
5549 
5550     /* If the two inputs form one 64-bit value, try dupm_vec. */
5551     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5552         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5553         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5554         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5555 
5556         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5557         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5558 
5559         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5560                              its->mem_base->reg, its->mem_offset)) {
5561             goto done;
5562         }
5563     }
5564 
5565     /* Fall back to generic expansion. */
5566     return false;
5567 
5568  done:
5569     ots->mem_coherent = 0;
5570     if (IS_DEAD_ARG(1)) {
5571         temp_dead(s, itsl);
5572     }
5573     if (IS_DEAD_ARG(2)) {
5574         temp_dead(s, itsh);
5575     }
5576     if (NEED_SYNC_ARG(0)) {
5577         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5578     } else if (IS_DEAD_ARG(0)) {
5579         temp_dead(s, ots);
5580     }
5581     return true;
5582 }
5583 
5584 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5585                          TCGRegSet allocated_regs)
5586 {
5587     if (ts->val_type == TEMP_VAL_REG) {
5588         if (ts->reg != reg) {
5589             tcg_reg_free(s, reg, allocated_regs);
5590             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5591                 /*
5592                  * Cross register class move not supported.  Sync the
5593                  * temp back to its slot and load from there.
5594                  */
5595                 temp_sync(s, ts, allocated_regs, 0, 0);
5596                 tcg_out_ld(s, ts->type, reg,
5597                            ts->mem_base->reg, ts->mem_offset);
5598             }
5599         }
5600     } else {
5601         TCGRegSet arg_set = 0;
5602 
5603         tcg_reg_free(s, reg, allocated_regs);
5604         tcg_regset_set_reg(arg_set, reg);
5605         temp_load(s, ts, arg_set, allocated_regs, 0);
5606     }
5607 }
5608 
5609 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5610                          TCGRegSet allocated_regs)
5611 {
5612     /*
5613      * When the destination is on the stack, load up the temp and store.
5614      * If there are many call-saved registers, the temp might live to
5615      * see another use; otherwise it'll be discarded.
5616      */
5617     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5618     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5619                arg_slot_stk_ofs(arg_slot));
5620 }
5621 
5622 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5623                             TCGTemp *ts, TCGRegSet *allocated_regs)
5624 {
5625     if (arg_slot_reg_p(l->arg_slot)) {
5626         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5627         load_arg_reg(s, reg, ts, *allocated_regs);
5628         tcg_regset_set_reg(*allocated_regs, reg);
5629     } else {
5630         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5631     }
5632 }
5633 
5634 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5635                          intptr_t ref_off, TCGRegSet *allocated_regs)
5636 {
5637     TCGReg reg;
5638 
5639     if (arg_slot_reg_p(arg_slot)) {
5640         reg = tcg_target_call_iarg_regs[arg_slot];
5641         tcg_reg_free(s, reg, *allocated_regs);
5642         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5643         tcg_regset_set_reg(*allocated_regs, reg);
5644     } else {
5645         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5646                             *allocated_regs, 0, false);
5647         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5648         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5649                    arg_slot_stk_ofs(arg_slot));
5650     }
5651 }
5652 
5653 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5654 {
5655     const int nb_oargs = TCGOP_CALLO(op);
5656     const int nb_iargs = TCGOP_CALLI(op);
5657     const TCGLifeData arg_life = op->life;
5658     const TCGHelperInfo *info = tcg_call_info(op);
5659     TCGRegSet allocated_regs = s->reserved_regs;
5660     int i;
5661 
5662     /*
5663      * Move inputs into place in reverse order,
5664      * so that we place stacked arguments first.
5665      */
5666     for (i = nb_iargs - 1; i >= 0; --i) {
5667         const TCGCallArgumentLoc *loc = &info->in[i];
5668         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5669 
5670         switch (loc->kind) {
5671         case TCG_CALL_ARG_NORMAL:
5672         case TCG_CALL_ARG_EXTEND_U:
5673         case TCG_CALL_ARG_EXTEND_S:
5674             load_arg_normal(s, loc, ts, &allocated_regs);
5675             break;
5676         case TCG_CALL_ARG_BY_REF:
5677             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5678             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5679                          arg_slot_stk_ofs(loc->ref_slot),
5680                          &allocated_regs);
5681             break;
5682         case TCG_CALL_ARG_BY_REF_N:
5683             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5684             break;
5685         default:
5686             g_assert_not_reached();
5687         }
5688     }
5689 
5690     /* Mark dead temporaries and free the associated registers.  */
5691     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5692         if (IS_DEAD_ARG(i)) {
5693             temp_dead(s, arg_temp(op->args[i]));
5694         }
5695     }
5696 
5697     /* Clobber call registers.  */
5698     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5699         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5700             tcg_reg_free(s, i, allocated_regs);
5701         }
5702     }
5703 
5704     /*
5705      * Save globals if they might be written by the helper,
5706      * sync them if they might be read.
5707      */
5708     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5709         /* Nothing to do */
5710     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5711         sync_globals(s, allocated_regs);
5712     } else {
5713         save_globals(s, allocated_regs);
5714     }
5715 
5716     /*
5717      * If the ABI passes a pointer to the returned struct as the first
5718      * argument, load that now.  Pass a pointer to the output home slot.
5719      */
5720     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5721         TCGTemp *ts = arg_temp(op->args[0]);
5722 
5723         if (!ts->mem_allocated) {
5724             temp_allocate_frame(s, ts);
5725         }
5726         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5727     }
5728 
5729     tcg_out_call(s, tcg_call_func(op), info);
5730 
5731     /* Assign output registers and emit moves if needed.  */
5732     switch (info->out_kind) {
5733     case TCG_CALL_RET_NORMAL:
5734         for (i = 0; i < nb_oargs; i++) {
5735             TCGTemp *ts = arg_temp(op->args[i]);
5736             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5737 
5738             /* ENV should not be modified.  */
5739             tcg_debug_assert(!temp_readonly(ts));
5740 
5741             set_temp_val_reg(s, ts, reg);
5742             ts->mem_coherent = 0;
5743         }
5744         break;
5745 
5746     case TCG_CALL_RET_BY_VEC:
5747         {
5748             TCGTemp *ts = arg_temp(op->args[0]);
5749 
5750             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5751             tcg_debug_assert(ts->temp_subindex == 0);
5752             if (!ts->mem_allocated) {
5753                 temp_allocate_frame(s, ts);
5754             }
5755             tcg_out_st(s, TCG_TYPE_V128,
5756                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5757                        ts->mem_base->reg, ts->mem_offset);
5758         }
5759         /* fall through to mark all parts in memory */
5760 
5761     case TCG_CALL_RET_BY_REF:
5762         /* The callee has performed a write through the reference. */
5763         for (i = 0; i < nb_oargs; i++) {
5764             TCGTemp *ts = arg_temp(op->args[i]);
5765             ts->val_type = TEMP_VAL_MEM;
5766         }
5767         break;
5768 
5769     default:
5770         g_assert_not_reached();
5771     }
5772 
5773     /* Flush or discard output registers as needed. */
5774     for (i = 0; i < nb_oargs; i++) {
5775         TCGTemp *ts = arg_temp(op->args[i]);
5776         if (NEED_SYNC_ARG(i)) {
5777             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5778         } else if (IS_DEAD_ARG(i)) {
5779             temp_dead(s, ts);
5780         }
5781     }
5782 }
5783 
5784 /**
5785  * atom_and_align_for_opc:
5786  * @s: tcg context
5787  * @opc: memory operation code
5788  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5789  * @allow_two_ops: true if we are prepared to issue two operations
5790  *
5791  * Return the alignment and atomicity to use for the inline fast path
5792  * for the given memory operation.  The alignment may be larger than
5793  * that specified in @opc, and the correct alignment will be diagnosed
5794  * by the slow path helper.
5795  *
5796  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5797  * and issue two loads or stores for subalignment.
5798  */
5799 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5800                                            MemOp host_atom, bool allow_two_ops)
5801 {
5802     MemOp align = memop_alignment_bits(opc);
5803     MemOp size = opc & MO_SIZE;
5804     MemOp half = size ? size - 1 : 0;
5805     MemOp atom = opc & MO_ATOM_MASK;
5806     MemOp atmax;
5807 
5808     switch (atom) {
5809     case MO_ATOM_NONE:
5810         /* The operation requires no specific atomicity. */
5811         atmax = MO_8;
5812         break;
5813 
5814     case MO_ATOM_IFALIGN:
5815         atmax = size;
5816         break;
5817 
5818     case MO_ATOM_IFALIGN_PAIR:
5819         atmax = half;
5820         break;
5821 
5822     case MO_ATOM_WITHIN16:
5823         atmax = size;
5824         if (size == MO_128) {
5825             /* Misalignment implies !within16, and therefore no atomicity. */
5826         } else if (host_atom != MO_ATOM_WITHIN16) {
5827             /* The host does not implement within16, so require alignment. */
5828             align = MAX(align, size);
5829         }
5830         break;
5831 
5832     case MO_ATOM_WITHIN16_PAIR:
5833         atmax = size;
5834         /*
5835          * Misalignment implies !within16, and therefore half atomicity.
5836          * Any host prepared for two operations can implement this with
5837          * half alignment.
5838          */
5839         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5840             align = MAX(align, half);
5841         }
5842         break;
5843 
5844     case MO_ATOM_SUBALIGN:
5845         atmax = size;
5846         if (host_atom != MO_ATOM_SUBALIGN) {
5847             /* If unaligned but not odd, there are subobjects up to half. */
5848             if (allow_two_ops) {
5849                 align = MAX(align, half);
5850             } else {
5851                 align = MAX(align, size);
5852             }
5853         }
5854         break;
5855 
5856     default:
5857         g_assert_not_reached();
5858     }
5859 
5860     return (TCGAtomAlign){ .atom = atmax, .align = align };
5861 }
5862 
5863 /*
5864  * Similarly for qemu_ld/st slow path helpers.
5865  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5866  * using only the provided backend tcg_out_* functions.
5867  */
5868 
5869 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5870 {
5871     int ofs = arg_slot_stk_ofs(slot);
5872 
5873     /*
5874      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5875      * require extension to uint64_t, adjust the address for uint32_t.
5876      */
5877     if (HOST_BIG_ENDIAN &&
5878         TCG_TARGET_REG_BITS == 64 &&
5879         type == TCG_TYPE_I32) {
5880         ofs += 4;
5881     }
5882     return ofs;
5883 }
5884 
5885 static void tcg_out_helper_load_slots(TCGContext *s,
5886                                       unsigned nmov, TCGMovExtend *mov,
5887                                       const TCGLdstHelperParam *parm)
5888 {
5889     unsigned i;
5890     TCGReg dst3;
5891 
5892     /*
5893      * Start from the end, storing to the stack first.
5894      * This frees those registers, so we need not consider overlap.
5895      */
5896     for (i = nmov; i-- > 0; ) {
5897         unsigned slot = mov[i].dst;
5898 
5899         if (arg_slot_reg_p(slot)) {
5900             goto found_reg;
5901         }
5902 
5903         TCGReg src = mov[i].src;
5904         TCGType dst_type = mov[i].dst_type;
5905         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5906 
5907         /* The argument is going onto the stack; extend into scratch. */
5908         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5909             tcg_debug_assert(parm->ntmp != 0);
5910             mov[i].dst = src = parm->tmp[0];
5911             tcg_out_movext1(s, &mov[i]);
5912         }
5913 
5914         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5915                    tcg_out_helper_stk_ofs(dst_type, slot));
5916     }
5917     return;
5918 
5919  found_reg:
5920     /*
5921      * The remaining arguments are in registers.
5922      * Convert slot numbers to argument registers.
5923      */
5924     nmov = i + 1;
5925     for (i = 0; i < nmov; ++i) {
5926         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5927     }
5928 
5929     switch (nmov) {
5930     case 4:
5931         /* The backend must have provided enough temps for the worst case. */
5932         tcg_debug_assert(parm->ntmp >= 2);
5933 
5934         dst3 = mov[3].dst;
5935         for (unsigned j = 0; j < 3; ++j) {
5936             if (dst3 == mov[j].src) {
5937                 /*
5938                  * Conflict. Copy the source to a temporary, perform the
5939                  * remaining moves, then the extension from our scratch
5940                  * on the way out.
5941                  */
5942                 TCGReg scratch = parm->tmp[1];
5943 
5944                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5945                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5946                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5947                 break;
5948             }
5949         }
5950 
5951         /* No conflicts: perform this move and continue. */
5952         tcg_out_movext1(s, &mov[3]);
5953         /* fall through */
5954 
5955     case 3:
5956         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5957                         parm->ntmp ? parm->tmp[0] : -1);
5958         break;
5959     case 2:
5960         tcg_out_movext2(s, mov, mov + 1,
5961                         parm->ntmp ? parm->tmp[0] : -1);
5962         break;
5963     case 1:
5964         tcg_out_movext1(s, mov);
5965         break;
5966     default:
5967         g_assert_not_reached();
5968     }
5969 }
5970 
5971 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5972                                     TCGType type, tcg_target_long imm,
5973                                     const TCGLdstHelperParam *parm)
5974 {
5975     if (arg_slot_reg_p(slot)) {
5976         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5977     } else {
5978         int ofs = tcg_out_helper_stk_ofs(type, slot);
5979         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5980             tcg_debug_assert(parm->ntmp != 0);
5981             tcg_out_movi(s, type, parm->tmp[0], imm);
5982             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5983         }
5984     }
5985 }
5986 
5987 static void tcg_out_helper_load_common_args(TCGContext *s,
5988                                             const TCGLabelQemuLdst *ldst,
5989                                             const TCGLdstHelperParam *parm,
5990                                             const TCGHelperInfo *info,
5991                                             unsigned next_arg)
5992 {
5993     TCGMovExtend ptr_mov = {
5994         .dst_type = TCG_TYPE_PTR,
5995         .src_type = TCG_TYPE_PTR,
5996         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5997     };
5998     const TCGCallArgumentLoc *loc = &info->in[0];
5999     TCGType type;
6000     unsigned slot;
6001     tcg_target_ulong imm;
6002 
6003     /*
6004      * Handle env, which is always first.
6005      */
6006     ptr_mov.dst = loc->arg_slot;
6007     ptr_mov.src = TCG_AREG0;
6008     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6009 
6010     /*
6011      * Handle oi.
6012      */
6013     imm = ldst->oi;
6014     loc = &info->in[next_arg];
6015     type = TCG_TYPE_I32;
6016     switch (loc->kind) {
6017     case TCG_CALL_ARG_NORMAL:
6018         break;
6019     case TCG_CALL_ARG_EXTEND_U:
6020     case TCG_CALL_ARG_EXTEND_S:
6021         /* No extension required for MemOpIdx. */
6022         tcg_debug_assert(imm <= INT32_MAX);
6023         type = TCG_TYPE_REG;
6024         break;
6025     default:
6026         g_assert_not_reached();
6027     }
6028     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6029     next_arg++;
6030 
6031     /*
6032      * Handle ra.
6033      */
6034     loc = &info->in[next_arg];
6035     slot = loc->arg_slot;
6036     if (parm->ra_gen) {
6037         int arg_reg = -1;
6038         TCGReg ra_reg;
6039 
6040         if (arg_slot_reg_p(slot)) {
6041             arg_reg = tcg_target_call_iarg_regs[slot];
6042         }
6043         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6044 
6045         ptr_mov.dst = slot;
6046         ptr_mov.src = ra_reg;
6047         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6048     } else {
6049         imm = (uintptr_t)ldst->raddr;
6050         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6051     }
6052 }
6053 
6054 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6055                                        const TCGCallArgumentLoc *loc,
6056                                        TCGType dst_type, TCGType src_type,
6057                                        TCGReg lo, TCGReg hi)
6058 {
6059     MemOp reg_mo;
6060 
6061     if (dst_type <= TCG_TYPE_REG) {
6062         MemOp src_ext;
6063 
6064         switch (loc->kind) {
6065         case TCG_CALL_ARG_NORMAL:
6066             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6067             break;
6068         case TCG_CALL_ARG_EXTEND_U:
6069             dst_type = TCG_TYPE_REG;
6070             src_ext = MO_UL;
6071             break;
6072         case TCG_CALL_ARG_EXTEND_S:
6073             dst_type = TCG_TYPE_REG;
6074             src_ext = MO_SL;
6075             break;
6076         default:
6077             g_assert_not_reached();
6078         }
6079 
6080         mov[0].dst = loc->arg_slot;
6081         mov[0].dst_type = dst_type;
6082         mov[0].src = lo;
6083         mov[0].src_type = src_type;
6084         mov[0].src_ext = src_ext;
6085         return 1;
6086     }
6087 
6088     if (TCG_TARGET_REG_BITS == 32) {
6089         assert(dst_type == TCG_TYPE_I64);
6090         reg_mo = MO_32;
6091     } else {
6092         assert(dst_type == TCG_TYPE_I128);
6093         reg_mo = MO_64;
6094     }
6095 
6096     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6097     mov[0].src = lo;
6098     mov[0].dst_type = TCG_TYPE_REG;
6099     mov[0].src_type = TCG_TYPE_REG;
6100     mov[0].src_ext = reg_mo;
6101 
6102     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6103     mov[1].src = hi;
6104     mov[1].dst_type = TCG_TYPE_REG;
6105     mov[1].src_type = TCG_TYPE_REG;
6106     mov[1].src_ext = reg_mo;
6107 
6108     return 2;
6109 }
6110 
6111 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6112                                    const TCGLdstHelperParam *parm)
6113 {
6114     const TCGHelperInfo *info;
6115     const TCGCallArgumentLoc *loc;
6116     TCGMovExtend mov[2];
6117     unsigned next_arg, nmov;
6118     MemOp mop = get_memop(ldst->oi);
6119 
6120     switch (mop & MO_SIZE) {
6121     case MO_8:
6122     case MO_16:
6123     case MO_32:
6124         info = &info_helper_ld32_mmu;
6125         break;
6126     case MO_64:
6127         info = &info_helper_ld64_mmu;
6128         break;
6129     case MO_128:
6130         info = &info_helper_ld128_mmu;
6131         break;
6132     default:
6133         g_assert_not_reached();
6134     }
6135 
6136     /* Defer env argument. */
6137     next_arg = 1;
6138 
6139     loc = &info->in[next_arg];
6140     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6141         /*
6142          * 32-bit host with 32-bit guest: zero-extend the guest address
6143          * to 64-bits for the helper by storing the low part, then
6144          * load a zero for the high part.
6145          */
6146         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6147                                TCG_TYPE_I32, TCG_TYPE_I32,
6148                                ldst->addr_reg, -1);
6149         tcg_out_helper_load_slots(s, 1, mov, parm);
6150 
6151         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6152                                 TCG_TYPE_I32, 0, parm);
6153         next_arg += 2;
6154     } else {
6155         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6156                                       ldst->addr_reg, -1);
6157         tcg_out_helper_load_slots(s, nmov, mov, parm);
6158         next_arg += nmov;
6159     }
6160 
6161     switch (info->out_kind) {
6162     case TCG_CALL_RET_NORMAL:
6163     case TCG_CALL_RET_BY_VEC:
6164         break;
6165     case TCG_CALL_RET_BY_REF:
6166         /*
6167          * The return reference is in the first argument slot.
6168          * We need memory in which to return: re-use the top of stack.
6169          */
6170         {
6171             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6172 
6173             if (arg_slot_reg_p(0)) {
6174                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6175                                  TCG_REG_CALL_STACK, ofs_slot0);
6176             } else {
6177                 tcg_debug_assert(parm->ntmp != 0);
6178                 tcg_out_addi_ptr(s, parm->tmp[0],
6179                                  TCG_REG_CALL_STACK, ofs_slot0);
6180                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6181                            TCG_REG_CALL_STACK, ofs_slot0);
6182             }
6183         }
6184         break;
6185     default:
6186         g_assert_not_reached();
6187     }
6188 
6189     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6190 }
6191 
6192 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6193                                   bool load_sign,
6194                                   const TCGLdstHelperParam *parm)
6195 {
6196     MemOp mop = get_memop(ldst->oi);
6197     TCGMovExtend mov[2];
6198     int ofs_slot0;
6199 
6200     switch (ldst->type) {
6201     case TCG_TYPE_I64:
6202         if (TCG_TARGET_REG_BITS == 32) {
6203             break;
6204         }
6205         /* fall through */
6206 
6207     case TCG_TYPE_I32:
6208         mov[0].dst = ldst->datalo_reg;
6209         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6210         mov[0].dst_type = ldst->type;
6211         mov[0].src_type = TCG_TYPE_REG;
6212 
6213         /*
6214          * If load_sign, then we allowed the helper to perform the
6215          * appropriate sign extension to tcg_target_ulong, and all
6216          * we need now is a plain move.
6217          *
6218          * If they do not, then we expect the relevant extension
6219          * instruction to be no more expensive than a move, and
6220          * we thus save the icache etc by only using one of two
6221          * helper functions.
6222          */
6223         if (load_sign || !(mop & MO_SIGN)) {
6224             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6225                 mov[0].src_ext = MO_32;
6226             } else {
6227                 mov[0].src_ext = MO_64;
6228             }
6229         } else {
6230             mov[0].src_ext = mop & MO_SSIZE;
6231         }
6232         tcg_out_movext1(s, mov);
6233         return;
6234 
6235     case TCG_TYPE_I128:
6236         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6237         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6238         switch (TCG_TARGET_CALL_RET_I128) {
6239         case TCG_CALL_RET_NORMAL:
6240             break;
6241         case TCG_CALL_RET_BY_VEC:
6242             tcg_out_st(s, TCG_TYPE_V128,
6243                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6244                        TCG_REG_CALL_STACK, ofs_slot0);
6245             /* fall through */
6246         case TCG_CALL_RET_BY_REF:
6247             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6248                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6249             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6250                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6251             return;
6252         default:
6253             g_assert_not_reached();
6254         }
6255         break;
6256 
6257     default:
6258         g_assert_not_reached();
6259     }
6260 
6261     mov[0].dst = ldst->datalo_reg;
6262     mov[0].src =
6263         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6264     mov[0].dst_type = TCG_TYPE_REG;
6265     mov[0].src_type = TCG_TYPE_REG;
6266     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6267 
6268     mov[1].dst = ldst->datahi_reg;
6269     mov[1].src =
6270         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6271     mov[1].dst_type = TCG_TYPE_REG;
6272     mov[1].src_type = TCG_TYPE_REG;
6273     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6274 
6275     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6276 }
6277 
6278 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6279                                    const TCGLdstHelperParam *parm)
6280 {
6281     const TCGHelperInfo *info;
6282     const TCGCallArgumentLoc *loc;
6283     TCGMovExtend mov[4];
6284     TCGType data_type;
6285     unsigned next_arg, nmov, n;
6286     MemOp mop = get_memop(ldst->oi);
6287 
6288     switch (mop & MO_SIZE) {
6289     case MO_8:
6290     case MO_16:
6291     case MO_32:
6292         info = &info_helper_st32_mmu;
6293         data_type = TCG_TYPE_I32;
6294         break;
6295     case MO_64:
6296         info = &info_helper_st64_mmu;
6297         data_type = TCG_TYPE_I64;
6298         break;
6299     case MO_128:
6300         info = &info_helper_st128_mmu;
6301         data_type = TCG_TYPE_I128;
6302         break;
6303     default:
6304         g_assert_not_reached();
6305     }
6306 
6307     /* Defer env argument. */
6308     next_arg = 1;
6309     nmov = 0;
6310 
6311     /* Handle addr argument. */
6312     loc = &info->in[next_arg];
6313     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6314     if (TCG_TARGET_REG_BITS == 32) {
6315         /*
6316          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6317          * to 64-bits for the helper by storing the low part.  Later,
6318          * after we have processed the register inputs, we will load a
6319          * zero for the high part.
6320          */
6321         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6322                                TCG_TYPE_I32, TCG_TYPE_I32,
6323                                ldst->addr_reg, -1);
6324         next_arg += 2;
6325         nmov += 1;
6326     } else {
6327         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6328                                    ldst->addr_reg, -1);
6329         next_arg += n;
6330         nmov += n;
6331     }
6332 
6333     /* Handle data argument. */
6334     loc = &info->in[next_arg];
6335     switch (loc->kind) {
6336     case TCG_CALL_ARG_NORMAL:
6337     case TCG_CALL_ARG_EXTEND_U:
6338     case TCG_CALL_ARG_EXTEND_S:
6339         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6340                                    ldst->datalo_reg, ldst->datahi_reg);
6341         next_arg += n;
6342         nmov += n;
6343         tcg_out_helper_load_slots(s, nmov, mov, parm);
6344         break;
6345 
6346     case TCG_CALL_ARG_BY_REF:
6347         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6348         tcg_debug_assert(data_type == TCG_TYPE_I128);
6349         tcg_out_st(s, TCG_TYPE_I64,
6350                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6351                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6352         tcg_out_st(s, TCG_TYPE_I64,
6353                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6354                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6355 
6356         tcg_out_helper_load_slots(s, nmov, mov, parm);
6357 
6358         if (arg_slot_reg_p(loc->arg_slot)) {
6359             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6360                              TCG_REG_CALL_STACK,
6361                              arg_slot_stk_ofs(loc->ref_slot));
6362         } else {
6363             tcg_debug_assert(parm->ntmp != 0);
6364             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6365                              arg_slot_stk_ofs(loc->ref_slot));
6366             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6367                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6368         }
6369         next_arg += 2;
6370         break;
6371 
6372     default:
6373         g_assert_not_reached();
6374     }
6375 
6376     if (TCG_TARGET_REG_BITS == 32) {
6377         /* Zero extend the address by loading a zero for the high part. */
6378         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6379         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6380     }
6381 
6382     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6383 }
6384 
6385 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6386 {
6387     int i, start_words, num_insns;
6388     TCGOp *op;
6389 
6390     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6391                  && qemu_log_in_addr_range(pc_start))) {
6392         FILE *logfile = qemu_log_trylock();
6393         if (logfile) {
6394             fprintf(logfile, "OP:\n");
6395             tcg_dump_ops(s, logfile, false);
6396             fprintf(logfile, "\n");
6397             qemu_log_unlock(logfile);
6398         }
6399     }
6400 
6401 #ifdef CONFIG_DEBUG_TCG
6402     /* Ensure all labels referenced have been emitted.  */
6403     {
6404         TCGLabel *l;
6405         bool error = false;
6406 
6407         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6408             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6409                 qemu_log_mask(CPU_LOG_TB_OP,
6410                               "$L%d referenced but not present.\n", l->id);
6411                 error = true;
6412             }
6413         }
6414         assert(!error);
6415     }
6416 #endif
6417 
6418     /* Do not reuse any EBB that may be allocated within the TB. */
6419     tcg_temp_ebb_reset_freed(s);
6420 
6421     tcg_optimize(s);
6422 
6423     reachable_code_pass(s);
6424     liveness_pass_0(s);
6425     liveness_pass_1(s);
6426 
6427     if (s->nb_indirects > 0) {
6428         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6429                      && qemu_log_in_addr_range(pc_start))) {
6430             FILE *logfile = qemu_log_trylock();
6431             if (logfile) {
6432                 fprintf(logfile, "OP before indirect lowering:\n");
6433                 tcg_dump_ops(s, logfile, false);
6434                 fprintf(logfile, "\n");
6435                 qemu_log_unlock(logfile);
6436             }
6437         }
6438 
6439         /* Replace indirect temps with direct temps.  */
6440         if (liveness_pass_2(s)) {
6441             /* If changes were made, re-run liveness.  */
6442             liveness_pass_1(s);
6443         }
6444     }
6445 
6446     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6447                  && qemu_log_in_addr_range(pc_start))) {
6448         FILE *logfile = qemu_log_trylock();
6449         if (logfile) {
6450             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6451             tcg_dump_ops(s, logfile, true);
6452             fprintf(logfile, "\n");
6453             qemu_log_unlock(logfile);
6454         }
6455     }
6456 
6457     /* Initialize goto_tb jump offsets. */
6458     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6459     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6460     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6461     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6462 
6463     tcg_reg_alloc_start(s);
6464 
6465     /*
6466      * Reset the buffer pointers when restarting after overflow.
6467      * TODO: Move this into translate-all.c with the rest of the
6468      * buffer management.  Having only this done here is confusing.
6469      */
6470     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6471     s->code_ptr = s->code_buf;
6472     s->data_gen_ptr = NULL;
6473 
6474     QSIMPLEQ_INIT(&s->ldst_labels);
6475     s->pool_labels = NULL;
6476 
6477     start_words = s->insn_start_words;
6478     s->gen_insn_data =
6479         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6480 
6481     tcg_out_tb_start(s);
6482 
6483     num_insns = -1;
6484     QTAILQ_FOREACH(op, &s->ops, link) {
6485         TCGOpcode opc = op->opc;
6486 
6487         switch (opc) {
6488         case INDEX_op_mov:
6489         case INDEX_op_mov_vec:
6490             tcg_reg_alloc_mov(s, op);
6491             break;
6492         case INDEX_op_dup_vec:
6493             tcg_reg_alloc_dup(s, op);
6494             break;
6495         case INDEX_op_insn_start:
6496             if (num_insns >= 0) {
6497                 size_t off = tcg_current_code_size(s);
6498                 s->gen_insn_end_off[num_insns] = off;
6499                 /* Assert that we do not overflow our stored offset.  */
6500                 assert(s->gen_insn_end_off[num_insns] == off);
6501             }
6502             num_insns++;
6503             for (i = 0; i < start_words; ++i) {
6504                 s->gen_insn_data[num_insns * start_words + i] =
6505                     tcg_get_insn_start_param(op, i);
6506             }
6507             break;
6508         case INDEX_op_discard:
6509             temp_dead(s, arg_temp(op->args[0]));
6510             break;
6511         case INDEX_op_set_label:
6512             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6513             tcg_out_label(s, arg_label(op->args[0]));
6514             break;
6515         case INDEX_op_call:
6516             tcg_reg_alloc_call(s, op);
6517             break;
6518         case INDEX_op_exit_tb:
6519             tcg_out_exit_tb(s, op->args[0]);
6520             break;
6521         case INDEX_op_goto_tb:
6522             tcg_out_goto_tb(s, op->args[0]);
6523             break;
6524         case INDEX_op_dup2_vec:
6525             if (tcg_reg_alloc_dup2(s, op)) {
6526                 break;
6527             }
6528             /* fall through */
6529         default:
6530             /* Sanity check that we've not introduced any unhandled opcodes. */
6531             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6532                                               TCGOP_FLAGS(op)));
6533             /* Note: in order to speed up the code, it would be much
6534                faster to have specialized register allocator functions for
6535                some common argument patterns */
6536             tcg_reg_alloc_op(s, op);
6537             break;
6538         }
6539         /* Test for (pending) buffer overflow.  The assumption is that any
6540            one operation beginning below the high water mark cannot overrun
6541            the buffer completely.  Thus we can test for overflow after
6542            generating code without having to check during generation.  */
6543         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6544             return -1;
6545         }
6546         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6547         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6548             return -2;
6549         }
6550     }
6551     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6552     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6553 
6554     /* Generate TB finalization at the end of block */
6555     i = tcg_out_ldst_finalize(s);
6556     if (i < 0) {
6557         return i;
6558     }
6559     i = tcg_out_pool_finalize(s);
6560     if (i < 0) {
6561         return i;
6562     }
6563     if (!tcg_resolve_relocs(s)) {
6564         return -2;
6565     }
6566 
6567 #ifndef CONFIG_TCG_INTERPRETER
6568     /* flush instruction cache */
6569     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6570                         (uintptr_t)s->code_buf,
6571                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6572 #endif
6573 
6574     return tcg_current_code_size(s);
6575 }
6576 
6577 #ifdef ELF_HOST_MACHINE
6578 /* In order to use this feature, the backend needs to do three things:
6579 
6580    (1) Define ELF_HOST_MACHINE to indicate both what value to
6581        put into the ELF image and to indicate support for the feature.
6582 
6583    (2) Define tcg_register_jit.  This should create a buffer containing
6584        the contents of a .debug_frame section that describes the post-
6585        prologue unwind info for the tcg machine.
6586 
6587    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6588 */
6589 
6590 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6591 typedef enum {
6592     JIT_NOACTION = 0,
6593     JIT_REGISTER_FN,
6594     JIT_UNREGISTER_FN
6595 } jit_actions_t;
6596 
6597 struct jit_code_entry {
6598     struct jit_code_entry *next_entry;
6599     struct jit_code_entry *prev_entry;
6600     const void *symfile_addr;
6601     uint64_t symfile_size;
6602 };
6603 
6604 struct jit_descriptor {
6605     uint32_t version;
6606     uint32_t action_flag;
6607     struct jit_code_entry *relevant_entry;
6608     struct jit_code_entry *first_entry;
6609 };
6610 
6611 void __jit_debug_register_code(void) __attribute__((noinline));
6612 void __jit_debug_register_code(void)
6613 {
6614     asm("");
6615 }
6616 
6617 /* Must statically initialize the version, because GDB may check
6618    the version before we can set it.  */
6619 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6620 
6621 /* End GDB interface.  */
6622 
6623 static int find_string(const char *strtab, const char *str)
6624 {
6625     const char *p = strtab + 1;
6626 
6627     while (1) {
6628         if (strcmp(p, str) == 0) {
6629             return p - strtab;
6630         }
6631         p += strlen(p) + 1;
6632     }
6633 }
6634 
6635 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6636                                  const void *debug_frame,
6637                                  size_t debug_frame_size)
6638 {
6639     struct __attribute__((packed)) DebugInfo {
6640         uint32_t  len;
6641         uint16_t  version;
6642         uint32_t  abbrev;
6643         uint8_t   ptr_size;
6644         uint8_t   cu_die;
6645         uint16_t  cu_lang;
6646         uintptr_t cu_low_pc;
6647         uintptr_t cu_high_pc;
6648         uint8_t   fn_die;
6649         char      fn_name[16];
6650         uintptr_t fn_low_pc;
6651         uintptr_t fn_high_pc;
6652         uint8_t   cu_eoc;
6653     };
6654 
6655     struct ElfImage {
6656         ElfW(Ehdr) ehdr;
6657         ElfW(Phdr) phdr;
6658         ElfW(Shdr) shdr[7];
6659         ElfW(Sym)  sym[2];
6660         struct DebugInfo di;
6661         uint8_t    da[24];
6662         char       str[80];
6663     };
6664 
6665     struct ElfImage *img;
6666 
6667     static const struct ElfImage img_template = {
6668         .ehdr = {
6669             .e_ident[EI_MAG0] = ELFMAG0,
6670             .e_ident[EI_MAG1] = ELFMAG1,
6671             .e_ident[EI_MAG2] = ELFMAG2,
6672             .e_ident[EI_MAG3] = ELFMAG3,
6673             .e_ident[EI_CLASS] = ELF_CLASS,
6674             .e_ident[EI_DATA] = ELF_DATA,
6675             .e_ident[EI_VERSION] = EV_CURRENT,
6676             .e_type = ET_EXEC,
6677             .e_machine = ELF_HOST_MACHINE,
6678             .e_version = EV_CURRENT,
6679             .e_phoff = offsetof(struct ElfImage, phdr),
6680             .e_shoff = offsetof(struct ElfImage, shdr),
6681             .e_ehsize = sizeof(ElfW(Shdr)),
6682             .e_phentsize = sizeof(ElfW(Phdr)),
6683             .e_phnum = 1,
6684             .e_shentsize = sizeof(ElfW(Shdr)),
6685             .e_shnum = ARRAY_SIZE(img->shdr),
6686             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6687 #ifdef ELF_HOST_FLAGS
6688             .e_flags = ELF_HOST_FLAGS,
6689 #endif
6690 #ifdef ELF_OSABI
6691             .e_ident[EI_OSABI] = ELF_OSABI,
6692 #endif
6693         },
6694         .phdr = {
6695             .p_type = PT_LOAD,
6696             .p_flags = PF_X,
6697         },
6698         .shdr = {
6699             [0] = { .sh_type = SHT_NULL },
6700             /* Trick: The contents of code_gen_buffer are not present in
6701                this fake ELF file; that got allocated elsewhere.  Therefore
6702                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6703                will not look for contents.  We can record any address.  */
6704             [1] = { /* .text */
6705                 .sh_type = SHT_NOBITS,
6706                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6707             },
6708             [2] = { /* .debug_info */
6709                 .sh_type = SHT_PROGBITS,
6710                 .sh_offset = offsetof(struct ElfImage, di),
6711                 .sh_size = sizeof(struct DebugInfo),
6712             },
6713             [3] = { /* .debug_abbrev */
6714                 .sh_type = SHT_PROGBITS,
6715                 .sh_offset = offsetof(struct ElfImage, da),
6716                 .sh_size = sizeof(img->da),
6717             },
6718             [4] = { /* .debug_frame */
6719                 .sh_type = SHT_PROGBITS,
6720                 .sh_offset = sizeof(struct ElfImage),
6721             },
6722             [5] = { /* .symtab */
6723                 .sh_type = SHT_SYMTAB,
6724                 .sh_offset = offsetof(struct ElfImage, sym),
6725                 .sh_size = sizeof(img->sym),
6726                 .sh_info = 1,
6727                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6728                 .sh_entsize = sizeof(ElfW(Sym)),
6729             },
6730             [6] = { /* .strtab */
6731                 .sh_type = SHT_STRTAB,
6732                 .sh_offset = offsetof(struct ElfImage, str),
6733                 .sh_size = sizeof(img->str),
6734             }
6735         },
6736         .sym = {
6737             [1] = { /* code_gen_buffer */
6738                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6739                 .st_shndx = 1,
6740             }
6741         },
6742         .di = {
6743             .len = sizeof(struct DebugInfo) - 4,
6744             .version = 2,
6745             .ptr_size = sizeof(void *),
6746             .cu_die = 1,
6747             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6748             .fn_die = 2,
6749             .fn_name = "code_gen_buffer"
6750         },
6751         .da = {
6752             1,          /* abbrev number (the cu) */
6753             0x11, 1,    /* DW_TAG_compile_unit, has children */
6754             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6755             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6756             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6757             0, 0,       /* end of abbrev */
6758             2,          /* abbrev number (the fn) */
6759             0x2e, 0,    /* DW_TAG_subprogram, no children */
6760             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6761             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6762             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6763             0, 0,       /* end of abbrev */
6764             0           /* no more abbrev */
6765         },
6766         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6767                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6768     };
6769 
6770     /* We only need a single jit entry; statically allocate it.  */
6771     static struct jit_code_entry one_entry;
6772 
6773     uintptr_t buf = (uintptr_t)buf_ptr;
6774     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6775     DebugFrameHeader *dfh;
6776 
6777     img = g_malloc(img_size);
6778     *img = img_template;
6779 
6780     img->phdr.p_vaddr = buf;
6781     img->phdr.p_paddr = buf;
6782     img->phdr.p_memsz = buf_size;
6783 
6784     img->shdr[1].sh_name = find_string(img->str, ".text");
6785     img->shdr[1].sh_addr = buf;
6786     img->shdr[1].sh_size = buf_size;
6787 
6788     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6789     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6790 
6791     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6792     img->shdr[4].sh_size = debug_frame_size;
6793 
6794     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6795     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6796 
6797     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6798     img->sym[1].st_value = buf;
6799     img->sym[1].st_size = buf_size;
6800 
6801     img->di.cu_low_pc = buf;
6802     img->di.cu_high_pc = buf + buf_size;
6803     img->di.fn_low_pc = buf;
6804     img->di.fn_high_pc = buf + buf_size;
6805 
6806     dfh = (DebugFrameHeader *)(img + 1);
6807     memcpy(dfh, debug_frame, debug_frame_size);
6808     dfh->fde.func_start = buf;
6809     dfh->fde.func_len = buf_size;
6810 
6811 #ifdef DEBUG_JIT
6812     /* Enable this block to be able to debug the ELF image file creation.
6813        One can use readelf, objdump, or other inspection utilities.  */
6814     {
6815         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6816         FILE *f = fopen(jit, "w+b");
6817         if (f) {
6818             if (fwrite(img, img_size, 1, f) != img_size) {
6819                 /* Avoid stupid unused return value warning for fwrite.  */
6820             }
6821             fclose(f);
6822         }
6823     }
6824 #endif
6825 
6826     one_entry.symfile_addr = img;
6827     one_entry.symfile_size = img_size;
6828 
6829     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6830     __jit_debug_descriptor.relevant_entry = &one_entry;
6831     __jit_debug_descriptor.first_entry = &one_entry;
6832     __jit_debug_register_code();
6833 }
6834 #else
6835 /* No support for the feature.  Provide the entry point expected by exec.c,
6836    and implement the internal function we declared earlier.  */
6837 
6838 static void tcg_register_jit_int(const void *buf, size_t size,
6839                                  const void *debug_frame,
6840                                  size_t debug_frame_size)
6841 {
6842 }
6843 
6844 void tcg_register_jit(const void *buf, size_t buf_size)
6845 {
6846 }
6847 #endif /* ELF_HOST_MACHINE */
6848 
6849 #if !TCG_TARGET_MAYBE_vec
6850 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6851 {
6852     g_assert_not_reached();
6853 }
6854 #endif
6855