xref: /openbmc/qemu/tcg/tcg.c (revision 3949f365eb6e7c934831c65c67b729562846ede9)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpDivRem {
990     TCGOutOp base;
991     void (*out_rr01r)(TCGContext *s, TCGType type,
992                       TCGReg a0, TCGReg a1, TCGReg a4);
993 } TCGOutOpDivRem;
994 
995 typedef struct TCGOutOpUnary {
996     TCGOutOp base;
997     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
998 } TCGOutOpUnary;
999 
1000 typedef struct TCGOutOpSubtract {
1001     TCGOutOp base;
1002     void (*out_rrr)(TCGContext *s, TCGType type,
1003                     TCGReg a0, TCGReg a1, TCGReg a2);
1004     void (*out_rir)(TCGContext *s, TCGType type,
1005                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1006 } TCGOutOpSubtract;
1007 
1008 #include "tcg-target.c.inc"
1009 
1010 #ifndef CONFIG_TCG_INTERPRETER
1011 /* Validate CPUTLBDescFast placement. */
1012 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1013                         sizeof(CPUNegativeOffsetState))
1014                   < MIN_TLB_MASK_TABLE_OFS);
1015 #endif
1016 
1017 /*
1018  * Register V as the TCGOutOp for O.
1019  * This verifies that V is of type T, otherwise give a nice compiler error.
1020  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1021  */
1022 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1023 
1024 /* Register allocation descriptions for every TCGOpcode. */
1025 static const TCGOutOp * const all_outop[NB_OPS] = {
1026     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1027     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1028     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1029     OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1030     OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
1031     OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2),
1032     OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2),
1033     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1034     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1035     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1036     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1037     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1038     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1039     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1040     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1041     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1042     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1043     OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems),
1044     OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu),
1045     OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar),
1046     OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl),
1047     OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr),
1048     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1049     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1050 };
1051 
1052 #undef OUTOP
1053 
1054 /*
1055  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1056  * and registered the target's TCG globals) must register with this function
1057  * before initiating translation.
1058  *
1059  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1060  * of tcg_region_init() for the reasoning behind this.
1061  *
1062  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1063  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1064  * is not used anymore for translation once this function is called.
1065  *
1066  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1067  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1068  * modes.
1069  */
1070 #ifdef CONFIG_USER_ONLY
1071 void tcg_register_thread(void)
1072 {
1073     tcg_ctx = &tcg_init_ctx;
1074 }
1075 #else
1076 void tcg_register_thread(void)
1077 {
1078     TCGContext *s = g_malloc(sizeof(*s));
1079     unsigned int i, n;
1080 
1081     *s = tcg_init_ctx;
1082 
1083     /* Relink mem_base.  */
1084     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1085         if (tcg_init_ctx.temps[i].mem_base) {
1086             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1087             tcg_debug_assert(b >= 0 && b < n);
1088             s->temps[i].mem_base = &s->temps[b];
1089         }
1090     }
1091 
1092     /* Claim an entry in tcg_ctxs */
1093     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1094     g_assert(n < tcg_max_ctxs);
1095     qatomic_set(&tcg_ctxs[n], s);
1096 
1097     if (n > 0) {
1098         tcg_region_initial_alloc(s);
1099     }
1100 
1101     tcg_ctx = s;
1102 }
1103 #endif /* !CONFIG_USER_ONLY */
1104 
1105 /* pool based memory allocation */
1106 void *tcg_malloc_internal(TCGContext *s, int size)
1107 {
1108     TCGPool *p;
1109     int pool_size;
1110 
1111     if (size > TCG_POOL_CHUNK_SIZE) {
1112         /* big malloc: insert a new pool (XXX: could optimize) */
1113         p = g_malloc(sizeof(TCGPool) + size);
1114         p->size = size;
1115         p->next = s->pool_first_large;
1116         s->pool_first_large = p;
1117         return p->data;
1118     } else {
1119         p = s->pool_current;
1120         if (!p) {
1121             p = s->pool_first;
1122             if (!p)
1123                 goto new_pool;
1124         } else {
1125             if (!p->next) {
1126             new_pool:
1127                 pool_size = TCG_POOL_CHUNK_SIZE;
1128                 p = g_malloc(sizeof(TCGPool) + pool_size);
1129                 p->size = pool_size;
1130                 p->next = NULL;
1131                 if (s->pool_current) {
1132                     s->pool_current->next = p;
1133                 } else {
1134                     s->pool_first = p;
1135                 }
1136             } else {
1137                 p = p->next;
1138             }
1139         }
1140     }
1141     s->pool_current = p;
1142     s->pool_cur = p->data + size;
1143     s->pool_end = p->data + p->size;
1144     return p->data;
1145 }
1146 
1147 void tcg_pool_reset(TCGContext *s)
1148 {
1149     TCGPool *p, *t;
1150     for (p = s->pool_first_large; p; p = t) {
1151         t = p->next;
1152         g_free(p);
1153     }
1154     s->pool_first_large = NULL;
1155     s->pool_cur = s->pool_end = NULL;
1156     s->pool_current = NULL;
1157 }
1158 
1159 /*
1160  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1161  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1162  * We only use these for layout in tcg_out_ld_helper_ret and
1163  * tcg_out_st_helper_args, and share them between several of
1164  * the helpers, with the end result that it's easier to build manually.
1165  */
1166 
1167 #if TCG_TARGET_REG_BITS == 32
1168 # define dh_typecode_ttl  dh_typecode_i32
1169 #else
1170 # define dh_typecode_ttl  dh_typecode_i64
1171 #endif
1172 
1173 static TCGHelperInfo info_helper_ld32_mmu = {
1174     .flags = TCG_CALL_NO_WG,
1175     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1176               | dh_typemask(env, 1)
1177               | dh_typemask(i64, 2)  /* uint64_t addr */
1178               | dh_typemask(i32, 3)  /* unsigned oi */
1179               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1180 };
1181 
1182 static TCGHelperInfo info_helper_ld64_mmu = {
1183     .flags = TCG_CALL_NO_WG,
1184     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1185               | dh_typemask(env, 1)
1186               | dh_typemask(i64, 2)  /* uint64_t addr */
1187               | dh_typemask(i32, 3)  /* unsigned oi */
1188               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1189 };
1190 
1191 static TCGHelperInfo info_helper_ld128_mmu = {
1192     .flags = TCG_CALL_NO_WG,
1193     .typemask = dh_typemask(i128, 0) /* return Int128 */
1194               | dh_typemask(env, 1)
1195               | dh_typemask(i64, 2)  /* uint64_t addr */
1196               | dh_typemask(i32, 3)  /* unsigned oi */
1197               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1198 };
1199 
1200 static TCGHelperInfo info_helper_st32_mmu = {
1201     .flags = TCG_CALL_NO_WG,
1202     .typemask = dh_typemask(void, 0)
1203               | dh_typemask(env, 1)
1204               | dh_typemask(i64, 2)  /* uint64_t addr */
1205               | dh_typemask(i32, 3)  /* uint32_t data */
1206               | dh_typemask(i32, 4)  /* unsigned oi */
1207               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1208 };
1209 
1210 static TCGHelperInfo info_helper_st64_mmu = {
1211     .flags = TCG_CALL_NO_WG,
1212     .typemask = dh_typemask(void, 0)
1213               | dh_typemask(env, 1)
1214               | dh_typemask(i64, 2)  /* uint64_t addr */
1215               | dh_typemask(i64, 3)  /* uint64_t data */
1216               | dh_typemask(i32, 4)  /* unsigned oi */
1217               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1218 };
1219 
1220 static TCGHelperInfo info_helper_st128_mmu = {
1221     .flags = TCG_CALL_NO_WG,
1222     .typemask = dh_typemask(void, 0)
1223               | dh_typemask(env, 1)
1224               | dh_typemask(i64, 2)  /* uint64_t addr */
1225               | dh_typemask(i128, 3) /* Int128 data */
1226               | dh_typemask(i32, 4)  /* unsigned oi */
1227               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1228 };
1229 
1230 #ifdef CONFIG_TCG_INTERPRETER
1231 static ffi_type *typecode_to_ffi(int argmask)
1232 {
1233     /*
1234      * libffi does not support __int128_t, so we have forced Int128
1235      * to use the structure definition instead of the builtin type.
1236      */
1237     static ffi_type *ffi_type_i128_elements[3] = {
1238         &ffi_type_uint64,
1239         &ffi_type_uint64,
1240         NULL
1241     };
1242     static ffi_type ffi_type_i128 = {
1243         .size = 16,
1244         .alignment = __alignof__(Int128),
1245         .type = FFI_TYPE_STRUCT,
1246         .elements = ffi_type_i128_elements,
1247     };
1248 
1249     switch (argmask) {
1250     case dh_typecode_void:
1251         return &ffi_type_void;
1252     case dh_typecode_i32:
1253         return &ffi_type_uint32;
1254     case dh_typecode_s32:
1255         return &ffi_type_sint32;
1256     case dh_typecode_i64:
1257         return &ffi_type_uint64;
1258     case dh_typecode_s64:
1259         return &ffi_type_sint64;
1260     case dh_typecode_ptr:
1261         return &ffi_type_pointer;
1262     case dh_typecode_i128:
1263         return &ffi_type_i128;
1264     }
1265     g_assert_not_reached();
1266 }
1267 
1268 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1269 {
1270     unsigned typemask = info->typemask;
1271     struct {
1272         ffi_cif cif;
1273         ffi_type *args[];
1274     } *ca;
1275     ffi_status status;
1276     int nargs;
1277 
1278     /* Ignoring the return type, find the last non-zero field. */
1279     nargs = 32 - clz32(typemask >> 3);
1280     nargs = DIV_ROUND_UP(nargs, 3);
1281     assert(nargs <= MAX_CALL_IARGS);
1282 
1283     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1284     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1285     ca->cif.nargs = nargs;
1286 
1287     if (nargs != 0) {
1288         ca->cif.arg_types = ca->args;
1289         for (int j = 0; j < nargs; ++j) {
1290             int typecode = extract32(typemask, (j + 1) * 3, 3);
1291             ca->args[j] = typecode_to_ffi(typecode);
1292         }
1293     }
1294 
1295     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1296                           ca->cif.rtype, ca->cif.arg_types);
1297     assert(status == FFI_OK);
1298 
1299     return &ca->cif;
1300 }
1301 
1302 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1303 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1304 #else
1305 #define HELPER_INFO_INIT(I)      (&(I)->init)
1306 #define HELPER_INFO_INIT_VAL(I)  1
1307 #endif /* CONFIG_TCG_INTERPRETER */
1308 
1309 static inline bool arg_slot_reg_p(unsigned arg_slot)
1310 {
1311     /*
1312      * Split the sizeof away from the comparison to avoid Werror from
1313      * "unsigned < 0 is always false", when iarg_regs is empty.
1314      */
1315     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1316     return arg_slot < nreg;
1317 }
1318 
1319 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1320 {
1321     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1322     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1323 
1324     tcg_debug_assert(stk_slot < max);
1325     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1326 }
1327 
1328 typedef struct TCGCumulativeArgs {
1329     int arg_idx;                /* tcg_gen_callN args[] */
1330     int info_in_idx;            /* TCGHelperInfo in[] */
1331     int arg_slot;               /* regs+stack slot */
1332     int ref_slot;               /* stack slots for references */
1333 } TCGCumulativeArgs;
1334 
1335 static void layout_arg_even(TCGCumulativeArgs *cum)
1336 {
1337     cum->arg_slot += cum->arg_slot & 1;
1338 }
1339 
1340 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1341                          TCGCallArgumentKind kind)
1342 {
1343     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1344 
1345     *loc = (TCGCallArgumentLoc){
1346         .kind = kind,
1347         .arg_idx = cum->arg_idx,
1348         .arg_slot = cum->arg_slot,
1349     };
1350     cum->info_in_idx++;
1351     cum->arg_slot++;
1352 }
1353 
1354 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1355                                 TCGHelperInfo *info, int n)
1356 {
1357     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1358 
1359     for (int i = 0; i < n; ++i) {
1360         /* Layout all using the same arg_idx, adjusting the subindex. */
1361         loc[i] = (TCGCallArgumentLoc){
1362             .kind = TCG_CALL_ARG_NORMAL,
1363             .arg_idx = cum->arg_idx,
1364             .tmp_subindex = i,
1365             .arg_slot = cum->arg_slot + i,
1366         };
1367     }
1368     cum->info_in_idx += n;
1369     cum->arg_slot += n;
1370 }
1371 
1372 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1373 {
1374     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1375     int n = 128 / TCG_TARGET_REG_BITS;
1376 
1377     /* The first subindex carries the pointer. */
1378     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1379 
1380     /*
1381      * The callee is allowed to clobber memory associated with
1382      * structure pass by-reference.  Therefore we must make copies.
1383      * Allocate space from "ref_slot", which will be adjusted to
1384      * follow the parameters on the stack.
1385      */
1386     loc[0].ref_slot = cum->ref_slot;
1387 
1388     /*
1389      * Subsequent words also go into the reference slot, but
1390      * do not accumulate into the regular arguments.
1391      */
1392     for (int i = 1; i < n; ++i) {
1393         loc[i] = (TCGCallArgumentLoc){
1394             .kind = TCG_CALL_ARG_BY_REF_N,
1395             .arg_idx = cum->arg_idx,
1396             .tmp_subindex = i,
1397             .ref_slot = cum->ref_slot + i,
1398         };
1399     }
1400     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1401     cum->ref_slot += n;
1402 }
1403 
1404 static void init_call_layout(TCGHelperInfo *info)
1405 {
1406     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1407     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1408     unsigned typemask = info->typemask;
1409     unsigned typecode;
1410     TCGCumulativeArgs cum = { };
1411 
1412     /*
1413      * Parse and place any function return value.
1414      */
1415     typecode = typemask & 7;
1416     switch (typecode) {
1417     case dh_typecode_void:
1418         info->nr_out = 0;
1419         break;
1420     case dh_typecode_i32:
1421     case dh_typecode_s32:
1422     case dh_typecode_ptr:
1423         info->nr_out = 1;
1424         info->out_kind = TCG_CALL_RET_NORMAL;
1425         break;
1426     case dh_typecode_i64:
1427     case dh_typecode_s64:
1428         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1429         info->out_kind = TCG_CALL_RET_NORMAL;
1430         /* Query the last register now to trigger any assert early. */
1431         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1432         break;
1433     case dh_typecode_i128:
1434         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1435         info->out_kind = TCG_TARGET_CALL_RET_I128;
1436         switch (TCG_TARGET_CALL_RET_I128) {
1437         case TCG_CALL_RET_NORMAL:
1438             /* Query the last register now to trigger any assert early. */
1439             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1440             break;
1441         case TCG_CALL_RET_BY_VEC:
1442             /* Query the single register now to trigger any assert early. */
1443             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1444             break;
1445         case TCG_CALL_RET_BY_REF:
1446             /*
1447              * Allocate the first argument to the output.
1448              * We don't need to store this anywhere, just make it
1449              * unavailable for use in the input loop below.
1450              */
1451             cum.arg_slot = 1;
1452             break;
1453         default:
1454             qemu_build_not_reached();
1455         }
1456         break;
1457     default:
1458         g_assert_not_reached();
1459     }
1460 
1461     /*
1462      * Parse and place function arguments.
1463      */
1464     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1465         TCGCallArgumentKind kind;
1466         TCGType type;
1467 
1468         typecode = typemask & 7;
1469         switch (typecode) {
1470         case dh_typecode_i32:
1471         case dh_typecode_s32:
1472             type = TCG_TYPE_I32;
1473             break;
1474         case dh_typecode_i64:
1475         case dh_typecode_s64:
1476             type = TCG_TYPE_I64;
1477             break;
1478         case dh_typecode_ptr:
1479             type = TCG_TYPE_PTR;
1480             break;
1481         case dh_typecode_i128:
1482             type = TCG_TYPE_I128;
1483             break;
1484         default:
1485             g_assert_not_reached();
1486         }
1487 
1488         switch (type) {
1489         case TCG_TYPE_I32:
1490             switch (TCG_TARGET_CALL_ARG_I32) {
1491             case TCG_CALL_ARG_EVEN:
1492                 layout_arg_even(&cum);
1493                 /* fall through */
1494             case TCG_CALL_ARG_NORMAL:
1495                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1496                 break;
1497             case TCG_CALL_ARG_EXTEND:
1498                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1499                 layout_arg_1(&cum, info, kind);
1500                 break;
1501             default:
1502                 qemu_build_not_reached();
1503             }
1504             break;
1505 
1506         case TCG_TYPE_I64:
1507             switch (TCG_TARGET_CALL_ARG_I64) {
1508             case TCG_CALL_ARG_EVEN:
1509                 layout_arg_even(&cum);
1510                 /* fall through */
1511             case TCG_CALL_ARG_NORMAL:
1512                 if (TCG_TARGET_REG_BITS == 32) {
1513                     layout_arg_normal_n(&cum, info, 2);
1514                 } else {
1515                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1516                 }
1517                 break;
1518             default:
1519                 qemu_build_not_reached();
1520             }
1521             break;
1522 
1523         case TCG_TYPE_I128:
1524             switch (TCG_TARGET_CALL_ARG_I128) {
1525             case TCG_CALL_ARG_EVEN:
1526                 layout_arg_even(&cum);
1527                 /* fall through */
1528             case TCG_CALL_ARG_NORMAL:
1529                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1530                 break;
1531             case TCG_CALL_ARG_BY_REF:
1532                 layout_arg_by_ref(&cum, info);
1533                 break;
1534             default:
1535                 qemu_build_not_reached();
1536             }
1537             break;
1538 
1539         default:
1540             g_assert_not_reached();
1541         }
1542     }
1543     info->nr_in = cum.info_in_idx;
1544 
1545     /* Validate that we didn't overrun the input array. */
1546     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1547     /* Validate the backend has enough argument space. */
1548     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1549 
1550     /*
1551      * Relocate the "ref_slot" area to the end of the parameters.
1552      * Minimizing this stack offset helps code size for x86,
1553      * which has a signed 8-bit offset encoding.
1554      */
1555     if (cum.ref_slot != 0) {
1556         int ref_base = 0;
1557 
1558         if (cum.arg_slot > max_reg_slots) {
1559             int align = __alignof(Int128) / sizeof(tcg_target_long);
1560 
1561             ref_base = cum.arg_slot - max_reg_slots;
1562             if (align > 1) {
1563                 ref_base = ROUND_UP(ref_base, align);
1564             }
1565         }
1566         assert(ref_base + cum.ref_slot <= max_stk_slots);
1567         ref_base += max_reg_slots;
1568 
1569         if (ref_base != 0) {
1570             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1571                 TCGCallArgumentLoc *loc = &info->in[i];
1572                 switch (loc->kind) {
1573                 case TCG_CALL_ARG_BY_REF:
1574                 case TCG_CALL_ARG_BY_REF_N:
1575                     loc->ref_slot += ref_base;
1576                     break;
1577                 default:
1578                     break;
1579                 }
1580             }
1581         }
1582     }
1583 }
1584 
1585 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1586 static void process_constraint_sets(void);
1587 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1588                                             TCGReg reg, const char *name);
1589 
1590 static void tcg_context_init(unsigned max_threads)
1591 {
1592     TCGContext *s = &tcg_init_ctx;
1593     int n, i;
1594     TCGTemp *ts;
1595 
1596     memset(s, 0, sizeof(*s));
1597     s->nb_globals = 0;
1598 
1599     init_call_layout(&info_helper_ld32_mmu);
1600     init_call_layout(&info_helper_ld64_mmu);
1601     init_call_layout(&info_helper_ld128_mmu);
1602     init_call_layout(&info_helper_st32_mmu);
1603     init_call_layout(&info_helper_st64_mmu);
1604     init_call_layout(&info_helper_st128_mmu);
1605 
1606     tcg_target_init(s);
1607     process_constraint_sets();
1608 
1609     /* Reverse the order of the saved registers, assuming they're all at
1610        the start of tcg_target_reg_alloc_order.  */
1611     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1612         int r = tcg_target_reg_alloc_order[n];
1613         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1614             break;
1615         }
1616     }
1617     for (i = 0; i < n; ++i) {
1618         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1619     }
1620     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1621         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1622     }
1623 
1624     tcg_ctx = s;
1625     /*
1626      * In user-mode we simply share the init context among threads, since we
1627      * use a single region. See the documentation tcg_region_init() for the
1628      * reasoning behind this.
1629      * In system-mode we will have at most max_threads TCG threads.
1630      */
1631 #ifdef CONFIG_USER_ONLY
1632     tcg_ctxs = &tcg_ctx;
1633     tcg_cur_ctxs = 1;
1634     tcg_max_ctxs = 1;
1635 #else
1636     tcg_max_ctxs = max_threads;
1637     tcg_ctxs = g_new0(TCGContext *, max_threads);
1638 #endif
1639 
1640     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1641     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1642     tcg_env = temp_tcgv_ptr(ts);
1643 }
1644 
1645 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1646 {
1647     tcg_context_init(max_threads);
1648     tcg_region_init(tb_size, splitwx, max_threads);
1649 }
1650 
1651 /*
1652  * Allocate TBs right before their corresponding translated code, making
1653  * sure that TBs and code are on different cache lines.
1654  */
1655 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1656 {
1657     uintptr_t align = qemu_icache_linesize;
1658     TranslationBlock *tb;
1659     void *next;
1660 
1661  retry:
1662     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1663     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1664 
1665     if (unlikely(next > s->code_gen_highwater)) {
1666         if (tcg_region_alloc(s)) {
1667             return NULL;
1668         }
1669         goto retry;
1670     }
1671     qatomic_set(&s->code_gen_ptr, next);
1672     return tb;
1673 }
1674 
1675 void tcg_prologue_init(void)
1676 {
1677     TCGContext *s = tcg_ctx;
1678     size_t prologue_size;
1679 
1680     s->code_ptr = s->code_gen_ptr;
1681     s->code_buf = s->code_gen_ptr;
1682     s->data_gen_ptr = NULL;
1683 
1684 #ifndef CONFIG_TCG_INTERPRETER
1685     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1686 #endif
1687 
1688     s->pool_labels = NULL;
1689 
1690     qemu_thread_jit_write();
1691     /* Generate the prologue.  */
1692     tcg_target_qemu_prologue(s);
1693 
1694     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1695     {
1696         int result = tcg_out_pool_finalize(s);
1697         tcg_debug_assert(result == 0);
1698     }
1699 
1700     prologue_size = tcg_current_code_size(s);
1701     perf_report_prologue(s->code_gen_ptr, prologue_size);
1702 
1703 #ifndef CONFIG_TCG_INTERPRETER
1704     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1705                         (uintptr_t)s->code_buf, prologue_size);
1706 #endif
1707 
1708     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1709         FILE *logfile = qemu_log_trylock();
1710         if (logfile) {
1711             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1712             if (s->data_gen_ptr) {
1713                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1714                 size_t data_size = prologue_size - code_size;
1715                 size_t i;
1716 
1717                 disas(logfile, s->code_gen_ptr, code_size);
1718 
1719                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1720                     if (sizeof(tcg_target_ulong) == 8) {
1721                         fprintf(logfile,
1722                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1723                                 (uintptr_t)s->data_gen_ptr + i,
1724                                 *(uint64_t *)(s->data_gen_ptr + i));
1725                     } else {
1726                         fprintf(logfile,
1727                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1728                                 (uintptr_t)s->data_gen_ptr + i,
1729                                 *(uint32_t *)(s->data_gen_ptr + i));
1730                     }
1731                 }
1732             } else {
1733                 disas(logfile, s->code_gen_ptr, prologue_size);
1734             }
1735             fprintf(logfile, "\n");
1736             qemu_log_unlock(logfile);
1737         }
1738     }
1739 
1740 #ifndef CONFIG_TCG_INTERPRETER
1741     /*
1742      * Assert that goto_ptr is implemented completely, setting an epilogue.
1743      * For tci, we use NULL as the signal to return from the interpreter,
1744      * so skip this check.
1745      */
1746     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1747 #endif
1748 
1749     tcg_region_prologue_set(s);
1750 }
1751 
1752 void tcg_func_start(TCGContext *s)
1753 {
1754     tcg_pool_reset(s);
1755     s->nb_temps = s->nb_globals;
1756 
1757     /* No temps have been previously allocated for size or locality.  */
1758     tcg_temp_ebb_reset_freed(s);
1759 
1760     /* No constant temps have been previously allocated. */
1761     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1762         if (s->const_table[i]) {
1763             g_hash_table_remove_all(s->const_table[i]);
1764         }
1765     }
1766 
1767     s->nb_ops = 0;
1768     s->nb_labels = 0;
1769     s->current_frame_offset = s->frame_start;
1770 
1771 #ifdef CONFIG_DEBUG_TCG
1772     s->goto_tb_issue_mask = 0;
1773 #endif
1774 
1775     QTAILQ_INIT(&s->ops);
1776     QTAILQ_INIT(&s->free_ops);
1777     s->emit_before_op = NULL;
1778     QSIMPLEQ_INIT(&s->labels);
1779 
1780     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1781     tcg_debug_assert(s->insn_start_words > 0);
1782 }
1783 
1784 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1785 {
1786     int n = s->nb_temps++;
1787 
1788     if (n >= TCG_MAX_TEMPS) {
1789         tcg_raise_tb_overflow(s);
1790     }
1791     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1792 }
1793 
1794 static TCGTemp *tcg_global_alloc(TCGContext *s)
1795 {
1796     TCGTemp *ts;
1797 
1798     tcg_debug_assert(s->nb_globals == s->nb_temps);
1799     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1800     s->nb_globals++;
1801     ts = tcg_temp_alloc(s);
1802     ts->kind = TEMP_GLOBAL;
1803 
1804     return ts;
1805 }
1806 
1807 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1808                                             TCGReg reg, const char *name)
1809 {
1810     TCGTemp *ts;
1811 
1812     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1813 
1814     ts = tcg_global_alloc(s);
1815     ts->base_type = type;
1816     ts->type = type;
1817     ts->kind = TEMP_FIXED;
1818     ts->reg = reg;
1819     ts->name = name;
1820     tcg_regset_set_reg(s->reserved_regs, reg);
1821 
1822     return ts;
1823 }
1824 
1825 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1826 {
1827     s->frame_start = start;
1828     s->frame_end = start + size;
1829     s->frame_temp
1830         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1831 }
1832 
1833 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1834                                             const char *name, TCGType type)
1835 {
1836     TCGContext *s = tcg_ctx;
1837     TCGTemp *base_ts = tcgv_ptr_temp(base);
1838     TCGTemp *ts = tcg_global_alloc(s);
1839     int indirect_reg = 0;
1840 
1841     switch (base_ts->kind) {
1842     case TEMP_FIXED:
1843         break;
1844     case TEMP_GLOBAL:
1845         /* We do not support double-indirect registers.  */
1846         tcg_debug_assert(!base_ts->indirect_reg);
1847         base_ts->indirect_base = 1;
1848         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1849                             ? 2 : 1);
1850         indirect_reg = 1;
1851         break;
1852     default:
1853         g_assert_not_reached();
1854     }
1855 
1856     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1857         TCGTemp *ts2 = tcg_global_alloc(s);
1858         char buf[64];
1859 
1860         ts->base_type = TCG_TYPE_I64;
1861         ts->type = TCG_TYPE_I32;
1862         ts->indirect_reg = indirect_reg;
1863         ts->mem_allocated = 1;
1864         ts->mem_base = base_ts;
1865         ts->mem_offset = offset;
1866         pstrcpy(buf, sizeof(buf), name);
1867         pstrcat(buf, sizeof(buf), "_0");
1868         ts->name = strdup(buf);
1869 
1870         tcg_debug_assert(ts2 == ts + 1);
1871         ts2->base_type = TCG_TYPE_I64;
1872         ts2->type = TCG_TYPE_I32;
1873         ts2->indirect_reg = indirect_reg;
1874         ts2->mem_allocated = 1;
1875         ts2->mem_base = base_ts;
1876         ts2->mem_offset = offset + 4;
1877         ts2->temp_subindex = 1;
1878         pstrcpy(buf, sizeof(buf), name);
1879         pstrcat(buf, sizeof(buf), "_1");
1880         ts2->name = strdup(buf);
1881     } else {
1882         ts->base_type = type;
1883         ts->type = type;
1884         ts->indirect_reg = indirect_reg;
1885         ts->mem_allocated = 1;
1886         ts->mem_base = base_ts;
1887         ts->mem_offset = offset;
1888         ts->name = name;
1889     }
1890     return ts;
1891 }
1892 
1893 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1894 {
1895     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1896     return temp_tcgv_i32(ts);
1897 }
1898 
1899 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1900 {
1901     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1902     return temp_tcgv_i64(ts);
1903 }
1904 
1905 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1906 {
1907     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1908     return temp_tcgv_ptr(ts);
1909 }
1910 
1911 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1912 {
1913     TCGContext *s = tcg_ctx;
1914     TCGTemp *ts;
1915     int n;
1916 
1917     if (kind == TEMP_EBB) {
1918         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1919 
1920         if (idx < TCG_MAX_TEMPS) {
1921             /* There is already an available temp with the right type.  */
1922             clear_bit(idx, s->free_temps[type].l);
1923 
1924             ts = &s->temps[idx];
1925             ts->temp_allocated = 1;
1926             tcg_debug_assert(ts->base_type == type);
1927             tcg_debug_assert(ts->kind == kind);
1928             return ts;
1929         }
1930     } else {
1931         tcg_debug_assert(kind == TEMP_TB);
1932     }
1933 
1934     switch (type) {
1935     case TCG_TYPE_I32:
1936     case TCG_TYPE_V64:
1937     case TCG_TYPE_V128:
1938     case TCG_TYPE_V256:
1939         n = 1;
1940         break;
1941     case TCG_TYPE_I64:
1942         n = 64 / TCG_TARGET_REG_BITS;
1943         break;
1944     case TCG_TYPE_I128:
1945         n = 128 / TCG_TARGET_REG_BITS;
1946         break;
1947     default:
1948         g_assert_not_reached();
1949     }
1950 
1951     ts = tcg_temp_alloc(s);
1952     ts->base_type = type;
1953     ts->temp_allocated = 1;
1954     ts->kind = kind;
1955 
1956     if (n == 1) {
1957         ts->type = type;
1958     } else {
1959         ts->type = TCG_TYPE_REG;
1960 
1961         for (int i = 1; i < n; ++i) {
1962             TCGTemp *ts2 = tcg_temp_alloc(s);
1963 
1964             tcg_debug_assert(ts2 == ts + i);
1965             ts2->base_type = type;
1966             ts2->type = TCG_TYPE_REG;
1967             ts2->temp_allocated = 1;
1968             ts2->temp_subindex = i;
1969             ts2->kind = kind;
1970         }
1971     }
1972     return ts;
1973 }
1974 
1975 TCGv_i32 tcg_temp_new_i32(void)
1976 {
1977     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1978 }
1979 
1980 TCGv_i32 tcg_temp_ebb_new_i32(void)
1981 {
1982     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1983 }
1984 
1985 TCGv_i64 tcg_temp_new_i64(void)
1986 {
1987     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1988 }
1989 
1990 TCGv_i64 tcg_temp_ebb_new_i64(void)
1991 {
1992     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1993 }
1994 
1995 TCGv_ptr tcg_temp_new_ptr(void)
1996 {
1997     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1998 }
1999 
2000 TCGv_ptr tcg_temp_ebb_new_ptr(void)
2001 {
2002     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
2003 }
2004 
2005 TCGv_i128 tcg_temp_new_i128(void)
2006 {
2007     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
2008 }
2009 
2010 TCGv_i128 tcg_temp_ebb_new_i128(void)
2011 {
2012     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
2013 }
2014 
2015 TCGv_vec tcg_temp_new_vec(TCGType type)
2016 {
2017     TCGTemp *t;
2018 
2019 #ifdef CONFIG_DEBUG_TCG
2020     switch (type) {
2021     case TCG_TYPE_V64:
2022         assert(TCG_TARGET_HAS_v64);
2023         break;
2024     case TCG_TYPE_V128:
2025         assert(TCG_TARGET_HAS_v128);
2026         break;
2027     case TCG_TYPE_V256:
2028         assert(TCG_TARGET_HAS_v256);
2029         break;
2030     default:
2031         g_assert_not_reached();
2032     }
2033 #endif
2034 
2035     t = tcg_temp_new_internal(type, TEMP_EBB);
2036     return temp_tcgv_vec(t);
2037 }
2038 
2039 /* Create a new temp of the same type as an existing temp.  */
2040 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2041 {
2042     TCGTemp *t = tcgv_vec_temp(match);
2043 
2044     tcg_debug_assert(t->temp_allocated != 0);
2045 
2046     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2047     return temp_tcgv_vec(t);
2048 }
2049 
2050 void tcg_temp_free_internal(TCGTemp *ts)
2051 {
2052     TCGContext *s = tcg_ctx;
2053 
2054     switch (ts->kind) {
2055     case TEMP_CONST:
2056     case TEMP_TB:
2057         /* Silently ignore free. */
2058         break;
2059     case TEMP_EBB:
2060         tcg_debug_assert(ts->temp_allocated != 0);
2061         ts->temp_allocated = 0;
2062         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2063         break;
2064     default:
2065         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2066         g_assert_not_reached();
2067     }
2068 }
2069 
2070 void tcg_temp_free_i32(TCGv_i32 arg)
2071 {
2072     tcg_temp_free_internal(tcgv_i32_temp(arg));
2073 }
2074 
2075 void tcg_temp_free_i64(TCGv_i64 arg)
2076 {
2077     tcg_temp_free_internal(tcgv_i64_temp(arg));
2078 }
2079 
2080 void tcg_temp_free_i128(TCGv_i128 arg)
2081 {
2082     tcg_temp_free_internal(tcgv_i128_temp(arg));
2083 }
2084 
2085 void tcg_temp_free_ptr(TCGv_ptr arg)
2086 {
2087     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2088 }
2089 
2090 void tcg_temp_free_vec(TCGv_vec arg)
2091 {
2092     tcg_temp_free_internal(tcgv_vec_temp(arg));
2093 }
2094 
2095 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2096 {
2097     TCGContext *s = tcg_ctx;
2098     GHashTable *h = s->const_table[type];
2099     TCGTemp *ts;
2100 
2101     if (h == NULL) {
2102         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2103         s->const_table[type] = h;
2104     }
2105 
2106     ts = g_hash_table_lookup(h, &val);
2107     if (ts == NULL) {
2108         int64_t *val_ptr;
2109 
2110         ts = tcg_temp_alloc(s);
2111 
2112         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2113             TCGTemp *ts2 = tcg_temp_alloc(s);
2114 
2115             tcg_debug_assert(ts2 == ts + 1);
2116 
2117             ts->base_type = TCG_TYPE_I64;
2118             ts->type = TCG_TYPE_I32;
2119             ts->kind = TEMP_CONST;
2120             ts->temp_allocated = 1;
2121 
2122             ts2->base_type = TCG_TYPE_I64;
2123             ts2->type = TCG_TYPE_I32;
2124             ts2->kind = TEMP_CONST;
2125             ts2->temp_allocated = 1;
2126             ts2->temp_subindex = 1;
2127 
2128             /*
2129              * Retain the full value of the 64-bit constant in the low
2130              * part, so that the hash table works.  Actual uses will
2131              * truncate the value to the low part.
2132              */
2133             ts[HOST_BIG_ENDIAN].val = val;
2134             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2135             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2136         } else {
2137             ts->base_type = type;
2138             ts->type = type;
2139             ts->kind = TEMP_CONST;
2140             ts->temp_allocated = 1;
2141             ts->val = val;
2142             val_ptr = &ts->val;
2143         }
2144         g_hash_table_insert(h, val_ptr, ts);
2145     }
2146 
2147     return ts;
2148 }
2149 
2150 TCGv_i32 tcg_constant_i32(int32_t val)
2151 {
2152     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2153 }
2154 
2155 TCGv_i64 tcg_constant_i64(int64_t val)
2156 {
2157     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2158 }
2159 
2160 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2161 {
2162     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2163 }
2164 
2165 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2166 {
2167     val = dup_const(vece, val);
2168     return temp_tcgv_vec(tcg_constant_internal(type, val));
2169 }
2170 
2171 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2172 {
2173     TCGTemp *t = tcgv_vec_temp(match);
2174 
2175     tcg_debug_assert(t->temp_allocated != 0);
2176     return tcg_constant_vec(t->base_type, vece, val);
2177 }
2178 
2179 #ifdef CONFIG_DEBUG_TCG
2180 size_t temp_idx(TCGTemp *ts)
2181 {
2182     ptrdiff_t n = ts - tcg_ctx->temps;
2183     assert(n >= 0 && n < tcg_ctx->nb_temps);
2184     return n;
2185 }
2186 
2187 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2188 {
2189     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2190 
2191     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2192     assert(o % sizeof(TCGTemp) == 0);
2193 
2194     return (void *)tcg_ctx + (uintptr_t)v;
2195 }
2196 #endif /* CONFIG_DEBUG_TCG */
2197 
2198 /*
2199  * Return true if OP may appear in the opcode stream with TYPE.
2200  * Test the runtime variable that controls each opcode.
2201  */
2202 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2203 {
2204     bool has_type;
2205 
2206     switch (type) {
2207     case TCG_TYPE_I32:
2208         has_type = true;
2209         break;
2210     case TCG_TYPE_I64:
2211         has_type = TCG_TARGET_REG_BITS == 64;
2212         break;
2213     case TCG_TYPE_V64:
2214         has_type = TCG_TARGET_HAS_v64;
2215         break;
2216     case TCG_TYPE_V128:
2217         has_type = TCG_TARGET_HAS_v128;
2218         break;
2219     case TCG_TYPE_V256:
2220         has_type = TCG_TARGET_HAS_v256;
2221         break;
2222     default:
2223         has_type = false;
2224         break;
2225     }
2226 
2227     switch (op) {
2228     case INDEX_op_discard:
2229     case INDEX_op_set_label:
2230     case INDEX_op_call:
2231     case INDEX_op_br:
2232     case INDEX_op_mb:
2233     case INDEX_op_insn_start:
2234     case INDEX_op_exit_tb:
2235     case INDEX_op_goto_tb:
2236     case INDEX_op_goto_ptr:
2237     case INDEX_op_qemu_ld_i32:
2238     case INDEX_op_qemu_st_i32:
2239     case INDEX_op_qemu_ld_i64:
2240     case INDEX_op_qemu_st_i64:
2241         return true;
2242 
2243     case INDEX_op_qemu_st8_i32:
2244         return TCG_TARGET_HAS_qemu_st8_i32;
2245 
2246     case INDEX_op_qemu_ld_i128:
2247     case INDEX_op_qemu_st_i128:
2248         return TCG_TARGET_HAS_qemu_ldst_i128;
2249 
2250     case INDEX_op_add:
2251     case INDEX_op_and:
2252     case INDEX_op_mov:
2253     case INDEX_op_or:
2254     case INDEX_op_xor:
2255         return has_type;
2256 
2257     case INDEX_op_setcond_i32:
2258     case INDEX_op_brcond_i32:
2259     case INDEX_op_movcond_i32:
2260     case INDEX_op_ld8u_i32:
2261     case INDEX_op_ld8s_i32:
2262     case INDEX_op_ld16u_i32:
2263     case INDEX_op_ld16s_i32:
2264     case INDEX_op_ld_i32:
2265     case INDEX_op_st8_i32:
2266     case INDEX_op_st16_i32:
2267     case INDEX_op_st_i32:
2268     case INDEX_op_extract_i32:
2269     case INDEX_op_sextract_i32:
2270     case INDEX_op_deposit_i32:
2271         return true;
2272 
2273     case INDEX_op_negsetcond_i32:
2274         return TCG_TARGET_HAS_negsetcond_i32;
2275     case INDEX_op_rotl_i32:
2276     case INDEX_op_rotr_i32:
2277         return TCG_TARGET_HAS_rot_i32;
2278     case INDEX_op_extract2_i32:
2279         return TCG_TARGET_HAS_extract2_i32;
2280     case INDEX_op_add2_i32:
2281         return TCG_TARGET_HAS_add2_i32;
2282     case INDEX_op_sub2_i32:
2283         return TCG_TARGET_HAS_sub2_i32;
2284     case INDEX_op_mulu2_i32:
2285         return TCG_TARGET_HAS_mulu2_i32;
2286     case INDEX_op_muls2_i32:
2287         return TCG_TARGET_HAS_muls2_i32;
2288     case INDEX_op_bswap16_i32:
2289         return TCG_TARGET_HAS_bswap16_i32;
2290     case INDEX_op_bswap32_i32:
2291         return TCG_TARGET_HAS_bswap32_i32;
2292     case INDEX_op_clz_i32:
2293         return TCG_TARGET_HAS_clz_i32;
2294     case INDEX_op_ctz_i32:
2295         return TCG_TARGET_HAS_ctz_i32;
2296     case INDEX_op_ctpop_i32:
2297         return TCG_TARGET_HAS_ctpop_i32;
2298 
2299     case INDEX_op_brcond2_i32:
2300     case INDEX_op_setcond2_i32:
2301         return TCG_TARGET_REG_BITS == 32;
2302 
2303     case INDEX_op_setcond_i64:
2304     case INDEX_op_brcond_i64:
2305     case INDEX_op_movcond_i64:
2306     case INDEX_op_ld8u_i64:
2307     case INDEX_op_ld8s_i64:
2308     case INDEX_op_ld16u_i64:
2309     case INDEX_op_ld16s_i64:
2310     case INDEX_op_ld32u_i64:
2311     case INDEX_op_ld32s_i64:
2312     case INDEX_op_ld_i64:
2313     case INDEX_op_st8_i64:
2314     case INDEX_op_st16_i64:
2315     case INDEX_op_st32_i64:
2316     case INDEX_op_st_i64:
2317     case INDEX_op_ext_i32_i64:
2318     case INDEX_op_extu_i32_i64:
2319     case INDEX_op_extract_i64:
2320     case INDEX_op_sextract_i64:
2321     case INDEX_op_deposit_i64:
2322         return TCG_TARGET_REG_BITS == 64;
2323 
2324     case INDEX_op_negsetcond_i64:
2325         return TCG_TARGET_HAS_negsetcond_i64;
2326     case INDEX_op_rotl_i64:
2327     case INDEX_op_rotr_i64:
2328         return TCG_TARGET_HAS_rot_i64;
2329     case INDEX_op_extract2_i64:
2330         return TCG_TARGET_HAS_extract2_i64;
2331     case INDEX_op_extrl_i64_i32:
2332     case INDEX_op_extrh_i64_i32:
2333         return TCG_TARGET_HAS_extr_i64_i32;
2334     case INDEX_op_bswap16_i64:
2335         return TCG_TARGET_HAS_bswap16_i64;
2336     case INDEX_op_bswap32_i64:
2337         return TCG_TARGET_HAS_bswap32_i64;
2338     case INDEX_op_bswap64_i64:
2339         return TCG_TARGET_HAS_bswap64_i64;
2340     case INDEX_op_clz_i64:
2341         return TCG_TARGET_HAS_clz_i64;
2342     case INDEX_op_ctz_i64:
2343         return TCG_TARGET_HAS_ctz_i64;
2344     case INDEX_op_ctpop_i64:
2345         return TCG_TARGET_HAS_ctpop_i64;
2346     case INDEX_op_add2_i64:
2347         return TCG_TARGET_HAS_add2_i64;
2348     case INDEX_op_sub2_i64:
2349         return TCG_TARGET_HAS_sub2_i64;
2350     case INDEX_op_mulu2_i64:
2351         return TCG_TARGET_HAS_mulu2_i64;
2352     case INDEX_op_muls2_i64:
2353         return TCG_TARGET_HAS_muls2_i64;
2354 
2355     case INDEX_op_mov_vec:
2356     case INDEX_op_dup_vec:
2357     case INDEX_op_dupm_vec:
2358     case INDEX_op_ld_vec:
2359     case INDEX_op_st_vec:
2360     case INDEX_op_add_vec:
2361     case INDEX_op_sub_vec:
2362     case INDEX_op_and_vec:
2363     case INDEX_op_or_vec:
2364     case INDEX_op_xor_vec:
2365     case INDEX_op_cmp_vec:
2366         return has_type;
2367     case INDEX_op_dup2_vec:
2368         return has_type && TCG_TARGET_REG_BITS == 32;
2369     case INDEX_op_not_vec:
2370         return has_type && TCG_TARGET_HAS_not_vec;
2371     case INDEX_op_neg_vec:
2372         return has_type && TCG_TARGET_HAS_neg_vec;
2373     case INDEX_op_abs_vec:
2374         return has_type && TCG_TARGET_HAS_abs_vec;
2375     case INDEX_op_andc_vec:
2376         return has_type && TCG_TARGET_HAS_andc_vec;
2377     case INDEX_op_orc_vec:
2378         return has_type && TCG_TARGET_HAS_orc_vec;
2379     case INDEX_op_nand_vec:
2380         return has_type && TCG_TARGET_HAS_nand_vec;
2381     case INDEX_op_nor_vec:
2382         return has_type && TCG_TARGET_HAS_nor_vec;
2383     case INDEX_op_eqv_vec:
2384         return has_type && TCG_TARGET_HAS_eqv_vec;
2385     case INDEX_op_mul_vec:
2386         return has_type && TCG_TARGET_HAS_mul_vec;
2387     case INDEX_op_shli_vec:
2388     case INDEX_op_shri_vec:
2389     case INDEX_op_sari_vec:
2390         return has_type && TCG_TARGET_HAS_shi_vec;
2391     case INDEX_op_shls_vec:
2392     case INDEX_op_shrs_vec:
2393     case INDEX_op_sars_vec:
2394         return has_type && TCG_TARGET_HAS_shs_vec;
2395     case INDEX_op_shlv_vec:
2396     case INDEX_op_shrv_vec:
2397     case INDEX_op_sarv_vec:
2398         return has_type && TCG_TARGET_HAS_shv_vec;
2399     case INDEX_op_rotli_vec:
2400         return has_type && TCG_TARGET_HAS_roti_vec;
2401     case INDEX_op_rotls_vec:
2402         return has_type && TCG_TARGET_HAS_rots_vec;
2403     case INDEX_op_rotlv_vec:
2404     case INDEX_op_rotrv_vec:
2405         return has_type && TCG_TARGET_HAS_rotv_vec;
2406     case INDEX_op_ssadd_vec:
2407     case INDEX_op_usadd_vec:
2408     case INDEX_op_sssub_vec:
2409     case INDEX_op_ussub_vec:
2410         return has_type && TCG_TARGET_HAS_sat_vec;
2411     case INDEX_op_smin_vec:
2412     case INDEX_op_umin_vec:
2413     case INDEX_op_smax_vec:
2414     case INDEX_op_umax_vec:
2415         return has_type && TCG_TARGET_HAS_minmax_vec;
2416     case INDEX_op_bitsel_vec:
2417         return has_type && TCG_TARGET_HAS_bitsel_vec;
2418     case INDEX_op_cmpsel_vec:
2419         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2420 
2421     default:
2422         if (op < INDEX_op_last_generic) {
2423             const TCGOutOp *outop;
2424             TCGConstraintSetIndex con_set;
2425 
2426             if (!has_type) {
2427                 return false;
2428             }
2429 
2430             outop = all_outop[op];
2431             tcg_debug_assert(outop != NULL);
2432 
2433             con_set = outop->static_constraint;
2434             if (con_set == C_Dynamic) {
2435                 con_set = outop->dynamic_constraint(type, flags);
2436             }
2437             if (con_set >= 0) {
2438                 return true;
2439             }
2440             tcg_debug_assert(con_set == C_NotImplemented);
2441             return false;
2442         }
2443         tcg_debug_assert(op < NB_OPS);
2444         return true;
2445 
2446     case INDEX_op_last_generic:
2447         g_assert_not_reached();
2448     }
2449 }
2450 
2451 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2452 {
2453     unsigned width;
2454 
2455     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2456     width = (type == TCG_TYPE_I32 ? 32 : 64);
2457 
2458     tcg_debug_assert(ofs < width);
2459     tcg_debug_assert(len > 0);
2460     tcg_debug_assert(len <= width - ofs);
2461 
2462     return TCG_TARGET_deposit_valid(type, ofs, len);
2463 }
2464 
2465 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2466 
2467 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2468                           TCGTemp *ret, TCGTemp **args)
2469 {
2470     TCGv_i64 extend_free[MAX_CALL_IARGS];
2471     int n_extend = 0;
2472     TCGOp *op;
2473     int i, n, pi = 0, total_args;
2474 
2475     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2476         init_call_layout(info);
2477         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2478     }
2479 
2480     total_args = info->nr_out + info->nr_in + 2;
2481     op = tcg_op_alloc(INDEX_op_call, total_args);
2482 
2483 #ifdef CONFIG_PLUGIN
2484     /* Flag helpers that may affect guest state */
2485     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2486         tcg_ctx->plugin_insn->calls_helpers = true;
2487     }
2488 #endif
2489 
2490     TCGOP_CALLO(op) = n = info->nr_out;
2491     switch (n) {
2492     case 0:
2493         tcg_debug_assert(ret == NULL);
2494         break;
2495     case 1:
2496         tcg_debug_assert(ret != NULL);
2497         op->args[pi++] = temp_arg(ret);
2498         break;
2499     case 2:
2500     case 4:
2501         tcg_debug_assert(ret != NULL);
2502         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2503         tcg_debug_assert(ret->temp_subindex == 0);
2504         for (i = 0; i < n; ++i) {
2505             op->args[pi++] = temp_arg(ret + i);
2506         }
2507         break;
2508     default:
2509         g_assert_not_reached();
2510     }
2511 
2512     TCGOP_CALLI(op) = n = info->nr_in;
2513     for (i = 0; i < n; i++) {
2514         const TCGCallArgumentLoc *loc = &info->in[i];
2515         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2516 
2517         switch (loc->kind) {
2518         case TCG_CALL_ARG_NORMAL:
2519         case TCG_CALL_ARG_BY_REF:
2520         case TCG_CALL_ARG_BY_REF_N:
2521             op->args[pi++] = temp_arg(ts);
2522             break;
2523 
2524         case TCG_CALL_ARG_EXTEND_U:
2525         case TCG_CALL_ARG_EXTEND_S:
2526             {
2527                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2528                 TCGv_i32 orig = temp_tcgv_i32(ts);
2529 
2530                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2531                     tcg_gen_ext_i32_i64(temp, orig);
2532                 } else {
2533                     tcg_gen_extu_i32_i64(temp, orig);
2534                 }
2535                 op->args[pi++] = tcgv_i64_arg(temp);
2536                 extend_free[n_extend++] = temp;
2537             }
2538             break;
2539 
2540         default:
2541             g_assert_not_reached();
2542         }
2543     }
2544     op->args[pi++] = (uintptr_t)func;
2545     op->args[pi++] = (uintptr_t)info;
2546     tcg_debug_assert(pi == total_args);
2547 
2548     if (tcg_ctx->emit_before_op) {
2549         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2550     } else {
2551         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2552     }
2553 
2554     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2555     for (i = 0; i < n_extend; ++i) {
2556         tcg_temp_free_i64(extend_free[i]);
2557     }
2558 }
2559 
2560 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2561 {
2562     tcg_gen_callN(func, info, ret, NULL);
2563 }
2564 
2565 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2566 {
2567     tcg_gen_callN(func, info, ret, &t1);
2568 }
2569 
2570 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2571                    TCGTemp *t1, TCGTemp *t2)
2572 {
2573     TCGTemp *args[2] = { t1, t2 };
2574     tcg_gen_callN(func, info, ret, args);
2575 }
2576 
2577 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2578                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2579 {
2580     TCGTemp *args[3] = { t1, t2, t3 };
2581     tcg_gen_callN(func, info, ret, args);
2582 }
2583 
2584 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2585                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2586 {
2587     TCGTemp *args[4] = { t1, t2, t3, t4 };
2588     tcg_gen_callN(func, info, ret, args);
2589 }
2590 
2591 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2592                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2593 {
2594     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2595     tcg_gen_callN(func, info, ret, args);
2596 }
2597 
2598 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2599                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2600                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2601 {
2602     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2603     tcg_gen_callN(func, info, ret, args);
2604 }
2605 
2606 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2607                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2608                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2609 {
2610     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2611     tcg_gen_callN(func, info, ret, args);
2612 }
2613 
2614 static void tcg_reg_alloc_start(TCGContext *s)
2615 {
2616     int i, n;
2617 
2618     for (i = 0, n = s->nb_temps; i < n; i++) {
2619         TCGTemp *ts = &s->temps[i];
2620         TCGTempVal val = TEMP_VAL_MEM;
2621 
2622         switch (ts->kind) {
2623         case TEMP_CONST:
2624             val = TEMP_VAL_CONST;
2625             break;
2626         case TEMP_FIXED:
2627             val = TEMP_VAL_REG;
2628             break;
2629         case TEMP_GLOBAL:
2630             break;
2631         case TEMP_EBB:
2632             val = TEMP_VAL_DEAD;
2633             /* fall through */
2634         case TEMP_TB:
2635             ts->mem_allocated = 0;
2636             break;
2637         default:
2638             g_assert_not_reached();
2639         }
2640         ts->val_type = val;
2641     }
2642 
2643     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2644 }
2645 
2646 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2647                                  TCGTemp *ts)
2648 {
2649     int idx = temp_idx(ts);
2650 
2651     switch (ts->kind) {
2652     case TEMP_FIXED:
2653     case TEMP_GLOBAL:
2654         pstrcpy(buf, buf_size, ts->name);
2655         break;
2656     case TEMP_TB:
2657         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2658         break;
2659     case TEMP_EBB:
2660         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2661         break;
2662     case TEMP_CONST:
2663         switch (ts->type) {
2664         case TCG_TYPE_I32:
2665             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2666             break;
2667 #if TCG_TARGET_REG_BITS > 32
2668         case TCG_TYPE_I64:
2669             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2670             break;
2671 #endif
2672         case TCG_TYPE_V64:
2673         case TCG_TYPE_V128:
2674         case TCG_TYPE_V256:
2675             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2676                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2677             break;
2678         default:
2679             g_assert_not_reached();
2680         }
2681         break;
2682     }
2683     return buf;
2684 }
2685 
2686 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2687                              int buf_size, TCGArg arg)
2688 {
2689     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2690 }
2691 
2692 static const char * const cond_name[] =
2693 {
2694     [TCG_COND_NEVER] = "never",
2695     [TCG_COND_ALWAYS] = "always",
2696     [TCG_COND_EQ] = "eq",
2697     [TCG_COND_NE] = "ne",
2698     [TCG_COND_LT] = "lt",
2699     [TCG_COND_GE] = "ge",
2700     [TCG_COND_LE] = "le",
2701     [TCG_COND_GT] = "gt",
2702     [TCG_COND_LTU] = "ltu",
2703     [TCG_COND_GEU] = "geu",
2704     [TCG_COND_LEU] = "leu",
2705     [TCG_COND_GTU] = "gtu",
2706     [TCG_COND_TSTEQ] = "tsteq",
2707     [TCG_COND_TSTNE] = "tstne",
2708 };
2709 
2710 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2711 {
2712     [MO_UB]   = "ub",
2713     [MO_SB]   = "sb",
2714     [MO_LEUW] = "leuw",
2715     [MO_LESW] = "lesw",
2716     [MO_LEUL] = "leul",
2717     [MO_LESL] = "lesl",
2718     [MO_LEUQ] = "leq",
2719     [MO_BEUW] = "beuw",
2720     [MO_BESW] = "besw",
2721     [MO_BEUL] = "beul",
2722     [MO_BESL] = "besl",
2723     [MO_BEUQ] = "beq",
2724     [MO_128 + MO_BE] = "beo",
2725     [MO_128 + MO_LE] = "leo",
2726 };
2727 
2728 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2729     [MO_UNALN >> MO_ASHIFT]    = "un+",
2730     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2731     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2732     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2733     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2734     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2735     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2736     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2737 };
2738 
2739 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2740     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2741     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2742     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2743     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2744     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2745     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2746 };
2747 
2748 static const char bswap_flag_name[][6] = {
2749     [TCG_BSWAP_IZ] = "iz",
2750     [TCG_BSWAP_OZ] = "oz",
2751     [TCG_BSWAP_OS] = "os",
2752     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2753     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2754 };
2755 
2756 #ifdef CONFIG_PLUGIN
2757 static const char * const plugin_from_name[] = {
2758     "from-tb",
2759     "from-insn",
2760     "after-insn",
2761     "after-tb",
2762 };
2763 #endif
2764 
2765 static inline bool tcg_regset_single(TCGRegSet d)
2766 {
2767     return (d & (d - 1)) == 0;
2768 }
2769 
2770 static inline TCGReg tcg_regset_first(TCGRegSet d)
2771 {
2772     if (TCG_TARGET_NB_REGS <= 32) {
2773         return ctz32(d);
2774     } else {
2775         return ctz64(d);
2776     }
2777 }
2778 
2779 /* Return only the number of characters output -- no error return. */
2780 #define ne_fprintf(...) \
2781     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2782 
2783 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2784 {
2785     char buf[128];
2786     TCGOp *op;
2787 
2788     QTAILQ_FOREACH(op, &s->ops, link) {
2789         int i, k, nb_oargs, nb_iargs, nb_cargs;
2790         const TCGOpDef *def;
2791         TCGOpcode c;
2792         int col = 0;
2793 
2794         c = op->opc;
2795         def = &tcg_op_defs[c];
2796 
2797         if (c == INDEX_op_insn_start) {
2798             nb_oargs = 0;
2799             col += ne_fprintf(f, "\n ----");
2800 
2801             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2802                 col += ne_fprintf(f, " %016" PRIx64,
2803                                   tcg_get_insn_start_param(op, i));
2804             }
2805         } else if (c == INDEX_op_call) {
2806             const TCGHelperInfo *info = tcg_call_info(op);
2807             void *func = tcg_call_func(op);
2808 
2809             /* variable number of arguments */
2810             nb_oargs = TCGOP_CALLO(op);
2811             nb_iargs = TCGOP_CALLI(op);
2812             nb_cargs = def->nb_cargs;
2813 
2814             col += ne_fprintf(f, " %s ", def->name);
2815 
2816             /*
2817              * Print the function name from TCGHelperInfo, if available.
2818              * Note that plugins have a template function for the info,
2819              * but the actual function pointer comes from the plugin.
2820              */
2821             if (func == info->func) {
2822                 col += ne_fprintf(f, "%s", info->name);
2823             } else {
2824                 col += ne_fprintf(f, "plugin(%p)", func);
2825             }
2826 
2827             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2828             for (i = 0; i < nb_oargs; i++) {
2829                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2830                                                             op->args[i]));
2831             }
2832             for (i = 0; i < nb_iargs; i++) {
2833                 TCGArg arg = op->args[nb_oargs + i];
2834                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2835                 col += ne_fprintf(f, ",%s", t);
2836             }
2837         } else {
2838             if (def->flags & TCG_OPF_INT) {
2839                 col += ne_fprintf(f, " %s_i%d ",
2840                                   def->name,
2841                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2842             } else if (def->flags & TCG_OPF_VECTOR) {
2843                 col += ne_fprintf(f, "%s v%d,e%d,",
2844                                   def->name,
2845                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2846                                   8 << TCGOP_VECE(op));
2847             } else {
2848                 col += ne_fprintf(f, " %s ", def->name);
2849             }
2850 
2851             nb_oargs = def->nb_oargs;
2852             nb_iargs = def->nb_iargs;
2853             nb_cargs = def->nb_cargs;
2854 
2855             k = 0;
2856             for (i = 0; i < nb_oargs; i++) {
2857                 const char *sep =  k ? "," : "";
2858                 col += ne_fprintf(f, "%s%s", sep,
2859                                   tcg_get_arg_str(s, buf, sizeof(buf),
2860                                                   op->args[k++]));
2861             }
2862             for (i = 0; i < nb_iargs; i++) {
2863                 const char *sep =  k ? "," : "";
2864                 col += ne_fprintf(f, "%s%s", sep,
2865                                   tcg_get_arg_str(s, buf, sizeof(buf),
2866                                                   op->args[k++]));
2867             }
2868             switch (c) {
2869             case INDEX_op_brcond_i32:
2870             case INDEX_op_setcond_i32:
2871             case INDEX_op_negsetcond_i32:
2872             case INDEX_op_movcond_i32:
2873             case INDEX_op_brcond2_i32:
2874             case INDEX_op_setcond2_i32:
2875             case INDEX_op_brcond_i64:
2876             case INDEX_op_setcond_i64:
2877             case INDEX_op_negsetcond_i64:
2878             case INDEX_op_movcond_i64:
2879             case INDEX_op_cmp_vec:
2880             case INDEX_op_cmpsel_vec:
2881                 if (op->args[k] < ARRAY_SIZE(cond_name)
2882                     && cond_name[op->args[k]]) {
2883                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2884                 } else {
2885                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2886                 }
2887                 i = 1;
2888                 break;
2889             case INDEX_op_qemu_ld_i32:
2890             case INDEX_op_qemu_st_i32:
2891             case INDEX_op_qemu_st8_i32:
2892             case INDEX_op_qemu_ld_i64:
2893             case INDEX_op_qemu_st_i64:
2894             case INDEX_op_qemu_ld_i128:
2895             case INDEX_op_qemu_st_i128:
2896                 {
2897                     const char *s_al, *s_op, *s_at;
2898                     MemOpIdx oi = op->args[k++];
2899                     MemOp mop = get_memop(oi);
2900                     unsigned ix = get_mmuidx(oi);
2901 
2902                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2903                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2904                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2905                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2906 
2907                     /* If all fields are accounted for, print symbolically. */
2908                     if (!mop && s_al && s_op && s_at) {
2909                         col += ne_fprintf(f, ",%s%s%s,%u",
2910                                           s_at, s_al, s_op, ix);
2911                     } else {
2912                         mop = get_memop(oi);
2913                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2914                     }
2915                     i = 1;
2916                 }
2917                 break;
2918             case INDEX_op_bswap16_i32:
2919             case INDEX_op_bswap16_i64:
2920             case INDEX_op_bswap32_i32:
2921             case INDEX_op_bswap32_i64:
2922             case INDEX_op_bswap64_i64:
2923                 {
2924                     TCGArg flags = op->args[k];
2925                     const char *name = NULL;
2926 
2927                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2928                         name = bswap_flag_name[flags];
2929                     }
2930                     if (name) {
2931                         col += ne_fprintf(f, ",%s", name);
2932                     } else {
2933                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2934                     }
2935                     i = k = 1;
2936                 }
2937                 break;
2938 #ifdef CONFIG_PLUGIN
2939             case INDEX_op_plugin_cb:
2940                 {
2941                     TCGArg from = op->args[k++];
2942                     const char *name = NULL;
2943 
2944                     if (from < ARRAY_SIZE(plugin_from_name)) {
2945                         name = plugin_from_name[from];
2946                     }
2947                     if (name) {
2948                         col += ne_fprintf(f, "%s", name);
2949                     } else {
2950                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2951                     }
2952                     i = 1;
2953                 }
2954                 break;
2955 #endif
2956             default:
2957                 i = 0;
2958                 break;
2959             }
2960             switch (c) {
2961             case INDEX_op_set_label:
2962             case INDEX_op_br:
2963             case INDEX_op_brcond_i32:
2964             case INDEX_op_brcond_i64:
2965             case INDEX_op_brcond2_i32:
2966                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2967                                   arg_label(op->args[k])->id);
2968                 i++, k++;
2969                 break;
2970             case INDEX_op_mb:
2971                 {
2972                     TCGBar membar = op->args[k];
2973                     const char *b_op, *m_op;
2974 
2975                     switch (membar & TCG_BAR_SC) {
2976                     case 0:
2977                         b_op = "none";
2978                         break;
2979                     case TCG_BAR_LDAQ:
2980                         b_op = "acq";
2981                         break;
2982                     case TCG_BAR_STRL:
2983                         b_op = "rel";
2984                         break;
2985                     case TCG_BAR_SC:
2986                         b_op = "seq";
2987                         break;
2988                     default:
2989                         g_assert_not_reached();
2990                     }
2991 
2992                     switch (membar & TCG_MO_ALL) {
2993                     case 0:
2994                         m_op = "none";
2995                         break;
2996                     case TCG_MO_LD_LD:
2997                         m_op = "rr";
2998                         break;
2999                     case TCG_MO_LD_ST:
3000                         m_op = "rw";
3001                         break;
3002                     case TCG_MO_ST_LD:
3003                         m_op = "wr";
3004                         break;
3005                     case TCG_MO_ST_ST:
3006                         m_op = "ww";
3007                         break;
3008                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3009                         m_op = "rr+rw";
3010                         break;
3011                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3012                         m_op = "rr+wr";
3013                         break;
3014                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3015                         m_op = "rr+ww";
3016                         break;
3017                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3018                         m_op = "rw+wr";
3019                         break;
3020                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3021                         m_op = "rw+ww";
3022                         break;
3023                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3024                         m_op = "wr+ww";
3025                         break;
3026                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3027                         m_op = "rr+rw+wr";
3028                         break;
3029                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3030                         m_op = "rr+rw+ww";
3031                         break;
3032                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3033                         m_op = "rr+wr+ww";
3034                         break;
3035                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3036                         m_op = "rw+wr+ww";
3037                         break;
3038                     case TCG_MO_ALL:
3039                         m_op = "all";
3040                         break;
3041                     default:
3042                         g_assert_not_reached();
3043                     }
3044 
3045                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3046                     i++, k++;
3047                 }
3048                 break;
3049             default:
3050                 break;
3051             }
3052             for (; i < nb_cargs; i++, k++) {
3053                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3054                                   op->args[k]);
3055             }
3056         }
3057 
3058         if (have_prefs || op->life) {
3059             for (; col < 40; ++col) {
3060                 putc(' ', f);
3061             }
3062         }
3063 
3064         if (op->life) {
3065             unsigned life = op->life;
3066 
3067             if (life & (SYNC_ARG * 3)) {
3068                 ne_fprintf(f, "  sync:");
3069                 for (i = 0; i < 2; ++i) {
3070                     if (life & (SYNC_ARG << i)) {
3071                         ne_fprintf(f, " %d", i);
3072                     }
3073                 }
3074             }
3075             life /= DEAD_ARG;
3076             if (life) {
3077                 ne_fprintf(f, "  dead:");
3078                 for (i = 0; life; ++i, life >>= 1) {
3079                     if (life & 1) {
3080                         ne_fprintf(f, " %d", i);
3081                     }
3082                 }
3083             }
3084         }
3085 
3086         if (have_prefs) {
3087             for (i = 0; i < nb_oargs; ++i) {
3088                 TCGRegSet set = output_pref(op, i);
3089 
3090                 if (i == 0) {
3091                     ne_fprintf(f, "  pref=");
3092                 } else {
3093                     ne_fprintf(f, ",");
3094                 }
3095                 if (set == 0) {
3096                     ne_fprintf(f, "none");
3097                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3098                     ne_fprintf(f, "all");
3099 #ifdef CONFIG_DEBUG_TCG
3100                 } else if (tcg_regset_single(set)) {
3101                     TCGReg reg = tcg_regset_first(set);
3102                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3103 #endif
3104                 } else if (TCG_TARGET_NB_REGS <= 32) {
3105                     ne_fprintf(f, "0x%x", (uint32_t)set);
3106                 } else {
3107                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3108                 }
3109             }
3110         }
3111 
3112         putc('\n', f);
3113     }
3114 }
3115 
3116 /* we give more priority to constraints with less registers */
3117 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3118 {
3119     int n;
3120 
3121     arg_ct += k;
3122     n = ctpop64(arg_ct->regs);
3123 
3124     /*
3125      * Sort constraints of a single register first, which includes output
3126      * aliases (which must exactly match the input already allocated).
3127      */
3128     if (n == 1 || arg_ct->oalias) {
3129         return INT_MAX;
3130     }
3131 
3132     /*
3133      * Sort register pairs next, first then second immediately after.
3134      * Arbitrarily sort multiple pairs by the index of the first reg;
3135      * there shouldn't be many pairs.
3136      */
3137     switch (arg_ct->pair) {
3138     case 1:
3139     case 3:
3140         return (k + 1) * 2;
3141     case 2:
3142         return (arg_ct->pair_index + 1) * 2 - 1;
3143     }
3144 
3145     /* Finally, sort by decreasing register count. */
3146     assert(n > 1);
3147     return -n;
3148 }
3149 
3150 /* sort from highest priority to lowest */
3151 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3152 {
3153     int i, j;
3154 
3155     for (i = 0; i < n; i++) {
3156         a[start + i].sort_index = start + i;
3157     }
3158     if (n <= 1) {
3159         return;
3160     }
3161     for (i = 0; i < n - 1; i++) {
3162         for (j = i + 1; j < n; j++) {
3163             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3164             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3165             if (p1 < p2) {
3166                 int tmp = a[start + i].sort_index;
3167                 a[start + i].sort_index = a[start + j].sort_index;
3168                 a[start + j].sort_index = tmp;
3169             }
3170         }
3171     }
3172 }
3173 
3174 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3175 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3176 
3177 static void process_constraint_sets(void)
3178 {
3179     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3180         const TCGConstraintSet *tdefs = &constraint_sets[c];
3181         TCGArgConstraint *args_ct = all_cts[c];
3182         int nb_oargs = tdefs->nb_oargs;
3183         int nb_iargs = tdefs->nb_iargs;
3184         int nb_args = nb_oargs + nb_iargs;
3185         bool saw_alias_pair = false;
3186 
3187         for (int i = 0; i < nb_args; i++) {
3188             const char *ct_str = tdefs->args_ct_str[i];
3189             bool input_p = i >= nb_oargs;
3190             int o;
3191 
3192             switch (*ct_str) {
3193             case '0' ... '9':
3194                 o = *ct_str - '0';
3195                 tcg_debug_assert(input_p);
3196                 tcg_debug_assert(o < nb_oargs);
3197                 tcg_debug_assert(args_ct[o].regs != 0);
3198                 tcg_debug_assert(!args_ct[o].oalias);
3199                 args_ct[i] = args_ct[o];
3200                 /* The output sets oalias.  */
3201                 args_ct[o].oalias = 1;
3202                 args_ct[o].alias_index = i;
3203                 /* The input sets ialias. */
3204                 args_ct[i].ialias = 1;
3205                 args_ct[i].alias_index = o;
3206                 if (args_ct[i].pair) {
3207                     saw_alias_pair = true;
3208                 }
3209                 tcg_debug_assert(ct_str[1] == '\0');
3210                 continue;
3211 
3212             case '&':
3213                 tcg_debug_assert(!input_p);
3214                 args_ct[i].newreg = true;
3215                 ct_str++;
3216                 break;
3217 
3218             case 'p': /* plus */
3219                 /* Allocate to the register after the previous. */
3220                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3221                 o = i - 1;
3222                 tcg_debug_assert(!args_ct[o].pair);
3223                 tcg_debug_assert(!args_ct[o].ct);
3224                 args_ct[i] = (TCGArgConstraint){
3225                     .pair = 2,
3226                     .pair_index = o,
3227                     .regs = args_ct[o].regs << 1,
3228                     .newreg = args_ct[o].newreg,
3229                 };
3230                 args_ct[o].pair = 1;
3231                 args_ct[o].pair_index = i;
3232                 tcg_debug_assert(ct_str[1] == '\0');
3233                 continue;
3234 
3235             case 'm': /* minus */
3236                 /* Allocate to the register before the previous. */
3237                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3238                 o = i - 1;
3239                 tcg_debug_assert(!args_ct[o].pair);
3240                 tcg_debug_assert(!args_ct[o].ct);
3241                 args_ct[i] = (TCGArgConstraint){
3242                     .pair = 1,
3243                     .pair_index = o,
3244                     .regs = args_ct[o].regs >> 1,
3245                     .newreg = args_ct[o].newreg,
3246                 };
3247                 args_ct[o].pair = 2;
3248                 args_ct[o].pair_index = i;
3249                 tcg_debug_assert(ct_str[1] == '\0');
3250                 continue;
3251             }
3252 
3253             do {
3254                 switch (*ct_str) {
3255                 case 'i':
3256                     args_ct[i].ct |= TCG_CT_CONST;
3257                     break;
3258 #ifdef TCG_REG_ZERO
3259                 case 'z':
3260                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3261                     break;
3262 #endif
3263 
3264                 /* Include all of the target-specific constraints. */
3265 
3266 #undef CONST
3267 #define CONST(CASE, MASK) \
3268     case CASE: args_ct[i].ct |= MASK; break;
3269 #define REGS(CASE, MASK) \
3270     case CASE: args_ct[i].regs |= MASK; break;
3271 
3272 #include "tcg-target-con-str.h"
3273 
3274 #undef REGS
3275 #undef CONST
3276                 default:
3277                 case '0' ... '9':
3278                 case '&':
3279                 case 'p':
3280                 case 'm':
3281                     /* Typo in TCGConstraintSet constraint. */
3282                     g_assert_not_reached();
3283                 }
3284             } while (*++ct_str != '\0');
3285         }
3286 
3287         /*
3288          * Fix up output pairs that are aliased with inputs.
3289          * When we created the alias, we copied pair from the output.
3290          * There are three cases:
3291          *    (1a) Pairs of inputs alias pairs of outputs.
3292          *    (1b) One input aliases the first of a pair of outputs.
3293          *    (2)  One input aliases the second of a pair of outputs.
3294          *
3295          * Case 1a is handled by making sure that the pair_index'es are
3296          * properly updated so that they appear the same as a pair of inputs.
3297          *
3298          * Case 1b is handled by setting the pair_index of the input to
3299          * itself, simply so it doesn't point to an unrelated argument.
3300          * Since we don't encounter the "second" during the input allocation
3301          * phase, nothing happens with the second half of the input pair.
3302          *
3303          * Case 2 is handled by setting the second input to pair=3, the
3304          * first output to pair=3, and the pair_index'es to match.
3305          */
3306         if (saw_alias_pair) {
3307             for (int i = nb_oargs; i < nb_args; i++) {
3308                 int o, o2, i2;
3309 
3310                 /*
3311                  * Since [0-9pm] must be alone in the constraint string,
3312                  * the only way they can both be set is if the pair comes
3313                  * from the output alias.
3314                  */
3315                 if (!args_ct[i].ialias) {
3316                     continue;
3317                 }
3318                 switch (args_ct[i].pair) {
3319                 case 0:
3320                     break;
3321                 case 1:
3322                     o = args_ct[i].alias_index;
3323                     o2 = args_ct[o].pair_index;
3324                     tcg_debug_assert(args_ct[o].pair == 1);
3325                     tcg_debug_assert(args_ct[o2].pair == 2);
3326                     if (args_ct[o2].oalias) {
3327                         /* Case 1a */
3328                         i2 = args_ct[o2].alias_index;
3329                         tcg_debug_assert(args_ct[i2].pair == 2);
3330                         args_ct[i2].pair_index = i;
3331                         args_ct[i].pair_index = i2;
3332                     } else {
3333                         /* Case 1b */
3334                         args_ct[i].pair_index = i;
3335                     }
3336                     break;
3337                 case 2:
3338                     o = args_ct[i].alias_index;
3339                     o2 = args_ct[o].pair_index;
3340                     tcg_debug_assert(args_ct[o].pair == 2);
3341                     tcg_debug_assert(args_ct[o2].pair == 1);
3342                     if (args_ct[o2].oalias) {
3343                         /* Case 1a */
3344                         i2 = args_ct[o2].alias_index;
3345                         tcg_debug_assert(args_ct[i2].pair == 1);
3346                         args_ct[i2].pair_index = i;
3347                         args_ct[i].pair_index = i2;
3348                     } else {
3349                         /* Case 2 */
3350                         args_ct[i].pair = 3;
3351                         args_ct[o2].pair = 3;
3352                         args_ct[i].pair_index = o2;
3353                         args_ct[o2].pair_index = i;
3354                     }
3355                     break;
3356                 default:
3357                     g_assert_not_reached();
3358                 }
3359             }
3360         }
3361 
3362         /* sort the constraints (XXX: this is just an heuristic) */
3363         sort_constraints(args_ct, 0, nb_oargs);
3364         sort_constraints(args_ct, nb_oargs, nb_iargs);
3365     }
3366 }
3367 
3368 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3369 {
3370     TCGOpcode opc = op->opc;
3371     TCGType type = TCGOP_TYPE(op);
3372     unsigned flags = TCGOP_FLAGS(op);
3373     const TCGOpDef *def = &tcg_op_defs[opc];
3374     const TCGOutOp *outop = all_outop[opc];
3375     TCGConstraintSetIndex con_set;
3376 
3377     if (def->flags & TCG_OPF_NOT_PRESENT) {
3378         return empty_cts;
3379     }
3380 
3381     if (outop) {
3382         con_set = outop->static_constraint;
3383         if (con_set == C_Dynamic) {
3384             con_set = outop->dynamic_constraint(type, flags);
3385         }
3386     } else {
3387         con_set = tcg_target_op_def(opc, type, flags);
3388     }
3389     tcg_debug_assert(con_set >= 0);
3390     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3391 
3392     /* The constraint arguments must match TCGOpcode arguments. */
3393     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3394     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3395 
3396     return all_cts[con_set];
3397 }
3398 
3399 static void remove_label_use(TCGOp *op, int idx)
3400 {
3401     TCGLabel *label = arg_label(op->args[idx]);
3402     TCGLabelUse *use;
3403 
3404     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3405         if (use->op == op) {
3406             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3407             return;
3408         }
3409     }
3410     g_assert_not_reached();
3411 }
3412 
3413 void tcg_op_remove(TCGContext *s, TCGOp *op)
3414 {
3415     switch (op->opc) {
3416     case INDEX_op_br:
3417         remove_label_use(op, 0);
3418         break;
3419     case INDEX_op_brcond_i32:
3420     case INDEX_op_brcond_i64:
3421         remove_label_use(op, 3);
3422         break;
3423     case INDEX_op_brcond2_i32:
3424         remove_label_use(op, 5);
3425         break;
3426     default:
3427         break;
3428     }
3429 
3430     QTAILQ_REMOVE(&s->ops, op, link);
3431     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3432     s->nb_ops--;
3433 }
3434 
3435 void tcg_remove_ops_after(TCGOp *op)
3436 {
3437     TCGContext *s = tcg_ctx;
3438 
3439     while (true) {
3440         TCGOp *last = tcg_last_op();
3441         if (last == op) {
3442             return;
3443         }
3444         tcg_op_remove(s, last);
3445     }
3446 }
3447 
3448 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3449 {
3450     TCGContext *s = tcg_ctx;
3451     TCGOp *op = NULL;
3452 
3453     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3454         QTAILQ_FOREACH(op, &s->free_ops, link) {
3455             if (nargs <= op->nargs) {
3456                 QTAILQ_REMOVE(&s->free_ops, op, link);
3457                 nargs = op->nargs;
3458                 goto found;
3459             }
3460         }
3461     }
3462 
3463     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3464     nargs = MAX(4, nargs);
3465     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3466 
3467  found:
3468     memset(op, 0, offsetof(TCGOp, link));
3469     op->opc = opc;
3470     op->nargs = nargs;
3471 
3472     /* Check for bitfield overflow. */
3473     tcg_debug_assert(op->nargs == nargs);
3474 
3475     s->nb_ops++;
3476     return op;
3477 }
3478 
3479 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3480 {
3481     TCGOp *op = tcg_op_alloc(opc, nargs);
3482 
3483     if (tcg_ctx->emit_before_op) {
3484         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3485     } else {
3486         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3487     }
3488     return op;
3489 }
3490 
3491 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3492                             TCGOpcode opc, TCGType type, unsigned nargs)
3493 {
3494     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3495 
3496     TCGOP_TYPE(new_op) = type;
3497     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3498     return new_op;
3499 }
3500 
3501 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3502                            TCGOpcode opc, TCGType type, unsigned nargs)
3503 {
3504     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3505 
3506     TCGOP_TYPE(new_op) = type;
3507     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3508     return new_op;
3509 }
3510 
3511 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3512 {
3513     TCGLabelUse *u;
3514 
3515     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3516         TCGOp *op = u->op;
3517         switch (op->opc) {
3518         case INDEX_op_br:
3519             op->args[0] = label_arg(to);
3520             break;
3521         case INDEX_op_brcond_i32:
3522         case INDEX_op_brcond_i64:
3523             op->args[3] = label_arg(to);
3524             break;
3525         case INDEX_op_brcond2_i32:
3526             op->args[5] = label_arg(to);
3527             break;
3528         default:
3529             g_assert_not_reached();
3530         }
3531     }
3532 
3533     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3534 }
3535 
3536 /* Reachable analysis : remove unreachable code.  */
3537 static void __attribute__((noinline))
3538 reachable_code_pass(TCGContext *s)
3539 {
3540     TCGOp *op, *op_next, *op_prev;
3541     bool dead = false;
3542 
3543     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3544         bool remove = dead;
3545         TCGLabel *label;
3546 
3547         switch (op->opc) {
3548         case INDEX_op_set_label:
3549             label = arg_label(op->args[0]);
3550 
3551             /*
3552              * Note that the first op in the TB is always a load,
3553              * so there is always something before a label.
3554              */
3555             op_prev = QTAILQ_PREV(op, link);
3556 
3557             /*
3558              * If we find two sequential labels, move all branches to
3559              * reference the second label and remove the first label.
3560              * Do this before branch to next optimization, so that the
3561              * middle label is out of the way.
3562              */
3563             if (op_prev->opc == INDEX_op_set_label) {
3564                 move_label_uses(label, arg_label(op_prev->args[0]));
3565                 tcg_op_remove(s, op_prev);
3566                 op_prev = QTAILQ_PREV(op, link);
3567             }
3568 
3569             /*
3570              * Optimization can fold conditional branches to unconditional.
3571              * If we find a label which is preceded by an unconditional
3572              * branch to next, remove the branch.  We couldn't do this when
3573              * processing the branch because any dead code between the branch
3574              * and label had not yet been removed.
3575              */
3576             if (op_prev->opc == INDEX_op_br &&
3577                 label == arg_label(op_prev->args[0])) {
3578                 tcg_op_remove(s, op_prev);
3579                 /* Fall through means insns become live again.  */
3580                 dead = false;
3581             }
3582 
3583             if (QSIMPLEQ_EMPTY(&label->branches)) {
3584                 /*
3585                  * While there is an occasional backward branch, virtually
3586                  * all branches generated by the translators are forward.
3587                  * Which means that generally we will have already removed
3588                  * all references to the label that will be, and there is
3589                  * little to be gained by iterating.
3590                  */
3591                 remove = true;
3592             } else {
3593                 /* Once we see a label, insns become live again.  */
3594                 dead = false;
3595                 remove = false;
3596             }
3597             break;
3598 
3599         case INDEX_op_br:
3600         case INDEX_op_exit_tb:
3601         case INDEX_op_goto_ptr:
3602             /* Unconditional branches; everything following is dead.  */
3603             dead = true;
3604             break;
3605 
3606         case INDEX_op_call:
3607             /* Notice noreturn helper calls, raising exceptions.  */
3608             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3609                 dead = true;
3610             }
3611             break;
3612 
3613         case INDEX_op_insn_start:
3614             /* Never remove -- we need to keep these for unwind.  */
3615             remove = false;
3616             break;
3617 
3618         default:
3619             break;
3620         }
3621 
3622         if (remove) {
3623             tcg_op_remove(s, op);
3624         }
3625     }
3626 }
3627 
3628 #define TS_DEAD  1
3629 #define TS_MEM   2
3630 
3631 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3632 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3633 
3634 /* For liveness_pass_1, the register preferences for a given temp.  */
3635 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3636 {
3637     return ts->state_ptr;
3638 }
3639 
3640 /* For liveness_pass_1, reset the preferences for a given temp to the
3641  * maximal regset for its type.
3642  */
3643 static inline void la_reset_pref(TCGTemp *ts)
3644 {
3645     *la_temp_pref(ts)
3646         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3647 }
3648 
3649 /* liveness analysis: end of function: all temps are dead, and globals
3650    should be in memory. */
3651 static void la_func_end(TCGContext *s, int ng, int nt)
3652 {
3653     int i;
3654 
3655     for (i = 0; i < ng; ++i) {
3656         s->temps[i].state = TS_DEAD | TS_MEM;
3657         la_reset_pref(&s->temps[i]);
3658     }
3659     for (i = ng; i < nt; ++i) {
3660         s->temps[i].state = TS_DEAD;
3661         la_reset_pref(&s->temps[i]);
3662     }
3663 }
3664 
3665 /* liveness analysis: end of basic block: all temps are dead, globals
3666    and local temps should be in memory. */
3667 static void la_bb_end(TCGContext *s, int ng, int nt)
3668 {
3669     int i;
3670 
3671     for (i = 0; i < nt; ++i) {
3672         TCGTemp *ts = &s->temps[i];
3673         int state;
3674 
3675         switch (ts->kind) {
3676         case TEMP_FIXED:
3677         case TEMP_GLOBAL:
3678         case TEMP_TB:
3679             state = TS_DEAD | TS_MEM;
3680             break;
3681         case TEMP_EBB:
3682         case TEMP_CONST:
3683             state = TS_DEAD;
3684             break;
3685         default:
3686             g_assert_not_reached();
3687         }
3688         ts->state = state;
3689         la_reset_pref(ts);
3690     }
3691 }
3692 
3693 /* liveness analysis: sync globals back to memory.  */
3694 static void la_global_sync(TCGContext *s, int ng)
3695 {
3696     int i;
3697 
3698     for (i = 0; i < ng; ++i) {
3699         int state = s->temps[i].state;
3700         s->temps[i].state = state | TS_MEM;
3701         if (state == TS_DEAD) {
3702             /* If the global was previously dead, reset prefs.  */
3703             la_reset_pref(&s->temps[i]);
3704         }
3705     }
3706 }
3707 
3708 /*
3709  * liveness analysis: conditional branch: all temps are dead unless
3710  * explicitly live-across-conditional-branch, globals and local temps
3711  * should be synced.
3712  */
3713 static void la_bb_sync(TCGContext *s, int ng, int nt)
3714 {
3715     la_global_sync(s, ng);
3716 
3717     for (int i = ng; i < nt; ++i) {
3718         TCGTemp *ts = &s->temps[i];
3719         int state;
3720 
3721         switch (ts->kind) {
3722         case TEMP_TB:
3723             state = ts->state;
3724             ts->state = state | TS_MEM;
3725             if (state != TS_DEAD) {
3726                 continue;
3727             }
3728             break;
3729         case TEMP_EBB:
3730         case TEMP_CONST:
3731             continue;
3732         default:
3733             g_assert_not_reached();
3734         }
3735         la_reset_pref(&s->temps[i]);
3736     }
3737 }
3738 
3739 /* liveness analysis: sync globals back to memory and kill.  */
3740 static void la_global_kill(TCGContext *s, int ng)
3741 {
3742     int i;
3743 
3744     for (i = 0; i < ng; i++) {
3745         s->temps[i].state = TS_DEAD | TS_MEM;
3746         la_reset_pref(&s->temps[i]);
3747     }
3748 }
3749 
3750 /* liveness analysis: note live globals crossing calls.  */
3751 static void la_cross_call(TCGContext *s, int nt)
3752 {
3753     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3754     int i;
3755 
3756     for (i = 0; i < nt; i++) {
3757         TCGTemp *ts = &s->temps[i];
3758         if (!(ts->state & TS_DEAD)) {
3759             TCGRegSet *pset = la_temp_pref(ts);
3760             TCGRegSet set = *pset;
3761 
3762             set &= mask;
3763             /* If the combination is not possible, restart.  */
3764             if (set == 0) {
3765                 set = tcg_target_available_regs[ts->type] & mask;
3766             }
3767             *pset = set;
3768         }
3769     }
3770 }
3771 
3772 /*
3773  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3774  * to TEMP_EBB, if possible.
3775  */
3776 static void __attribute__((noinline))
3777 liveness_pass_0(TCGContext *s)
3778 {
3779     void * const multiple_ebb = (void *)(uintptr_t)-1;
3780     int nb_temps = s->nb_temps;
3781     TCGOp *op, *ebb;
3782 
3783     for (int i = s->nb_globals; i < nb_temps; ++i) {
3784         s->temps[i].state_ptr = NULL;
3785     }
3786 
3787     /*
3788      * Represent each EBB by the op at which it begins.  In the case of
3789      * the first EBB, this is the first op, otherwise it is a label.
3790      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3791      * within a single EBB, else MULTIPLE_EBB.
3792      */
3793     ebb = QTAILQ_FIRST(&s->ops);
3794     QTAILQ_FOREACH(op, &s->ops, link) {
3795         const TCGOpDef *def;
3796         int nb_oargs, nb_iargs;
3797 
3798         switch (op->opc) {
3799         case INDEX_op_set_label:
3800             ebb = op;
3801             continue;
3802         case INDEX_op_discard:
3803             continue;
3804         case INDEX_op_call:
3805             nb_oargs = TCGOP_CALLO(op);
3806             nb_iargs = TCGOP_CALLI(op);
3807             break;
3808         default:
3809             def = &tcg_op_defs[op->opc];
3810             nb_oargs = def->nb_oargs;
3811             nb_iargs = def->nb_iargs;
3812             break;
3813         }
3814 
3815         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3816             TCGTemp *ts = arg_temp(op->args[i]);
3817 
3818             if (ts->kind != TEMP_TB) {
3819                 continue;
3820             }
3821             if (ts->state_ptr == NULL) {
3822                 ts->state_ptr = ebb;
3823             } else if (ts->state_ptr != ebb) {
3824                 ts->state_ptr = multiple_ebb;
3825             }
3826         }
3827     }
3828 
3829     /*
3830      * For TEMP_TB that turned out not to be used beyond one EBB,
3831      * reduce the liveness to TEMP_EBB.
3832      */
3833     for (int i = s->nb_globals; i < nb_temps; ++i) {
3834         TCGTemp *ts = &s->temps[i];
3835         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3836             ts->kind = TEMP_EBB;
3837         }
3838     }
3839 }
3840 
3841 /* Liveness analysis : update the opc_arg_life array to tell if a
3842    given input arguments is dead. Instructions updating dead
3843    temporaries are removed. */
3844 static void __attribute__((noinline))
3845 liveness_pass_1(TCGContext *s)
3846 {
3847     int nb_globals = s->nb_globals;
3848     int nb_temps = s->nb_temps;
3849     TCGOp *op, *op_prev;
3850     TCGRegSet *prefs;
3851     int i;
3852 
3853     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3854     for (i = 0; i < nb_temps; ++i) {
3855         s->temps[i].state_ptr = prefs + i;
3856     }
3857 
3858     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3859     la_func_end(s, nb_globals, nb_temps);
3860 
3861     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3862         int nb_iargs, nb_oargs;
3863         TCGOpcode opc_new, opc_new2;
3864         TCGLifeData arg_life = 0;
3865         TCGTemp *ts;
3866         TCGOpcode opc = op->opc;
3867         const TCGOpDef *def = &tcg_op_defs[opc];
3868         const TCGArgConstraint *args_ct;
3869 
3870         switch (opc) {
3871         case INDEX_op_call:
3872             {
3873                 const TCGHelperInfo *info = tcg_call_info(op);
3874                 int call_flags = tcg_call_flags(op);
3875 
3876                 nb_oargs = TCGOP_CALLO(op);
3877                 nb_iargs = TCGOP_CALLI(op);
3878 
3879                 /* pure functions can be removed if their result is unused */
3880                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3881                     for (i = 0; i < nb_oargs; i++) {
3882                         ts = arg_temp(op->args[i]);
3883                         if (ts->state != TS_DEAD) {
3884                             goto do_not_remove_call;
3885                         }
3886                     }
3887                     goto do_remove;
3888                 }
3889             do_not_remove_call:
3890 
3891                 /* Output args are dead.  */
3892                 for (i = 0; i < nb_oargs; i++) {
3893                     ts = arg_temp(op->args[i]);
3894                     if (ts->state & TS_DEAD) {
3895                         arg_life |= DEAD_ARG << i;
3896                     }
3897                     if (ts->state & TS_MEM) {
3898                         arg_life |= SYNC_ARG << i;
3899                     }
3900                     ts->state = TS_DEAD;
3901                     la_reset_pref(ts);
3902                 }
3903 
3904                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3905                 memset(op->output_pref, 0, sizeof(op->output_pref));
3906 
3907                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3908                                     TCG_CALL_NO_READ_GLOBALS))) {
3909                     la_global_kill(s, nb_globals);
3910                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3911                     la_global_sync(s, nb_globals);
3912                 }
3913 
3914                 /* Record arguments that die in this helper.  */
3915                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3916                     ts = arg_temp(op->args[i]);
3917                     if (ts->state & TS_DEAD) {
3918                         arg_life |= DEAD_ARG << i;
3919                     }
3920                 }
3921 
3922                 /* For all live registers, remove call-clobbered prefs.  */
3923                 la_cross_call(s, nb_temps);
3924 
3925                 /*
3926                  * Input arguments are live for preceding opcodes.
3927                  *
3928                  * For those arguments that die, and will be allocated in
3929                  * registers, clear the register set for that arg, to be
3930                  * filled in below.  For args that will be on the stack,
3931                  * reset to any available reg.  Process arguments in reverse
3932                  * order so that if a temp is used more than once, the stack
3933                  * reset to max happens before the register reset to 0.
3934                  */
3935                 for (i = nb_iargs - 1; i >= 0; i--) {
3936                     const TCGCallArgumentLoc *loc = &info->in[i];
3937                     ts = arg_temp(op->args[nb_oargs + i]);
3938 
3939                     if (ts->state & TS_DEAD) {
3940                         switch (loc->kind) {
3941                         case TCG_CALL_ARG_NORMAL:
3942                         case TCG_CALL_ARG_EXTEND_U:
3943                         case TCG_CALL_ARG_EXTEND_S:
3944                             if (arg_slot_reg_p(loc->arg_slot)) {
3945                                 *la_temp_pref(ts) = 0;
3946                                 break;
3947                             }
3948                             /* fall through */
3949                         default:
3950                             *la_temp_pref(ts) =
3951                                 tcg_target_available_regs[ts->type];
3952                             break;
3953                         }
3954                         ts->state &= ~TS_DEAD;
3955                     }
3956                 }
3957 
3958                 /*
3959                  * For each input argument, add its input register to prefs.
3960                  * If a temp is used once, this produces a single set bit;
3961                  * if a temp is used multiple times, this produces a set.
3962                  */
3963                 for (i = 0; i < nb_iargs; i++) {
3964                     const TCGCallArgumentLoc *loc = &info->in[i];
3965                     ts = arg_temp(op->args[nb_oargs + i]);
3966 
3967                     switch (loc->kind) {
3968                     case TCG_CALL_ARG_NORMAL:
3969                     case TCG_CALL_ARG_EXTEND_U:
3970                     case TCG_CALL_ARG_EXTEND_S:
3971                         if (arg_slot_reg_p(loc->arg_slot)) {
3972                             tcg_regset_set_reg(*la_temp_pref(ts),
3973                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3974                         }
3975                         break;
3976                     default:
3977                         break;
3978                     }
3979                 }
3980             }
3981             break;
3982         case INDEX_op_insn_start:
3983             break;
3984         case INDEX_op_discard:
3985             /* mark the temporary as dead */
3986             ts = arg_temp(op->args[0]);
3987             ts->state = TS_DEAD;
3988             la_reset_pref(ts);
3989             break;
3990 
3991         case INDEX_op_add2_i32:
3992         case INDEX_op_add2_i64:
3993             opc_new = INDEX_op_add;
3994             goto do_addsub2;
3995         case INDEX_op_sub2_i32:
3996         case INDEX_op_sub2_i64:
3997             opc_new = INDEX_op_sub;
3998         do_addsub2:
3999             nb_iargs = 4;
4000             nb_oargs = 2;
4001             /* Test if the high part of the operation is dead, but not
4002                the low part.  The result can be optimized to a simple
4003                add or sub.  This happens often for x86_64 guest when the
4004                cpu mode is set to 32 bit.  */
4005             if (arg_temp(op->args[1])->state == TS_DEAD) {
4006                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4007                     goto do_remove;
4008                 }
4009                 /* Replace the opcode and adjust the args in place,
4010                    leaving 3 unused args at the end.  */
4011                 op->opc = opc = opc_new;
4012                 op->args[1] = op->args[2];
4013                 op->args[2] = op->args[4];
4014                 /* Fall through and mark the single-word operation live.  */
4015                 nb_iargs = 2;
4016                 nb_oargs = 1;
4017             }
4018             goto do_not_remove;
4019 
4020         case INDEX_op_muls2_i32:
4021         case INDEX_op_muls2_i64:
4022             opc_new = INDEX_op_mul;
4023             opc_new2 = INDEX_op_mulsh;
4024             goto do_mul2;
4025         case INDEX_op_mulu2_i32:
4026         case INDEX_op_mulu2_i64:
4027             opc_new = INDEX_op_mul;
4028             opc_new2 = INDEX_op_muluh;
4029         do_mul2:
4030             nb_iargs = 2;
4031             nb_oargs = 2;
4032             if (arg_temp(op->args[1])->state == TS_DEAD) {
4033                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4034                     /* Both parts of the operation are dead.  */
4035                     goto do_remove;
4036                 }
4037                 /* The high part of the operation is dead; generate the low. */
4038                 op->opc = opc = opc_new;
4039                 op->args[1] = op->args[2];
4040                 op->args[2] = op->args[3];
4041             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4042                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4043                 /* The low part of the operation is dead; generate the high. */
4044                 op->opc = opc = opc_new2;
4045                 op->args[0] = op->args[1];
4046                 op->args[1] = op->args[2];
4047                 op->args[2] = op->args[3];
4048             } else {
4049                 goto do_not_remove;
4050             }
4051             /* Mark the single-word operation live.  */
4052             nb_oargs = 1;
4053             goto do_not_remove;
4054 
4055         default:
4056             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4057             nb_iargs = def->nb_iargs;
4058             nb_oargs = def->nb_oargs;
4059 
4060             /* Test if the operation can be removed because all
4061                its outputs are dead. We assume that nb_oargs == 0
4062                implies side effects */
4063             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4064                 for (i = 0; i < nb_oargs; i++) {
4065                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4066                         goto do_not_remove;
4067                     }
4068                 }
4069                 goto do_remove;
4070             }
4071             goto do_not_remove;
4072 
4073         do_remove:
4074             tcg_op_remove(s, op);
4075             break;
4076 
4077         do_not_remove:
4078             for (i = 0; i < nb_oargs; i++) {
4079                 ts = arg_temp(op->args[i]);
4080 
4081                 /* Remember the preference of the uses that followed.  */
4082                 if (i < ARRAY_SIZE(op->output_pref)) {
4083                     op->output_pref[i] = *la_temp_pref(ts);
4084                 }
4085 
4086                 /* Output args are dead.  */
4087                 if (ts->state & TS_DEAD) {
4088                     arg_life |= DEAD_ARG << i;
4089                 }
4090                 if (ts->state & TS_MEM) {
4091                     arg_life |= SYNC_ARG << i;
4092                 }
4093                 ts->state = TS_DEAD;
4094                 la_reset_pref(ts);
4095             }
4096 
4097             /* If end of basic block, update.  */
4098             if (def->flags & TCG_OPF_BB_EXIT) {
4099                 la_func_end(s, nb_globals, nb_temps);
4100             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4101                 la_bb_sync(s, nb_globals, nb_temps);
4102             } else if (def->flags & TCG_OPF_BB_END) {
4103                 la_bb_end(s, nb_globals, nb_temps);
4104             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4105                 la_global_sync(s, nb_globals);
4106                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4107                     la_cross_call(s, nb_temps);
4108                 }
4109             }
4110 
4111             /* Record arguments that die in this opcode.  */
4112             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4113                 ts = arg_temp(op->args[i]);
4114                 if (ts->state & TS_DEAD) {
4115                     arg_life |= DEAD_ARG << i;
4116                 }
4117             }
4118 
4119             /* Input arguments are live for preceding opcodes.  */
4120             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4121                 ts = arg_temp(op->args[i]);
4122                 if (ts->state & TS_DEAD) {
4123                     /* For operands that were dead, initially allow
4124                        all regs for the type.  */
4125                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4126                     ts->state &= ~TS_DEAD;
4127                 }
4128             }
4129 
4130             /* Incorporate constraints for this operand.  */
4131             switch (opc) {
4132             case INDEX_op_mov:
4133                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4134                    have proper constraints.  That said, special case
4135                    moves to propagate preferences backward.  */
4136                 if (IS_DEAD_ARG(1)) {
4137                     *la_temp_pref(arg_temp(op->args[0]))
4138                         = *la_temp_pref(arg_temp(op->args[1]));
4139                 }
4140                 break;
4141 
4142             default:
4143                 args_ct = opcode_args_ct(op);
4144                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4145                     const TCGArgConstraint *ct = &args_ct[i];
4146                     TCGRegSet set, *pset;
4147 
4148                     ts = arg_temp(op->args[i]);
4149                     pset = la_temp_pref(ts);
4150                     set = *pset;
4151 
4152                     set &= ct->regs;
4153                     if (ct->ialias) {
4154                         set &= output_pref(op, ct->alias_index);
4155                     }
4156                     /* If the combination is not possible, restart.  */
4157                     if (set == 0) {
4158                         set = ct->regs;
4159                     }
4160                     *pset = set;
4161                 }
4162                 break;
4163             }
4164             break;
4165         }
4166         op->life = arg_life;
4167     }
4168 }
4169 
4170 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4171 static bool __attribute__((noinline))
4172 liveness_pass_2(TCGContext *s)
4173 {
4174     int nb_globals = s->nb_globals;
4175     int nb_temps, i;
4176     bool changes = false;
4177     TCGOp *op, *op_next;
4178 
4179     /* Create a temporary for each indirect global.  */
4180     for (i = 0; i < nb_globals; ++i) {
4181         TCGTemp *its = &s->temps[i];
4182         if (its->indirect_reg) {
4183             TCGTemp *dts = tcg_temp_alloc(s);
4184             dts->type = its->type;
4185             dts->base_type = its->base_type;
4186             dts->temp_subindex = its->temp_subindex;
4187             dts->kind = TEMP_EBB;
4188             its->state_ptr = dts;
4189         } else {
4190             its->state_ptr = NULL;
4191         }
4192         /* All globals begin dead.  */
4193         its->state = TS_DEAD;
4194     }
4195     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4196         TCGTemp *its = &s->temps[i];
4197         its->state_ptr = NULL;
4198         its->state = TS_DEAD;
4199     }
4200 
4201     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4202         TCGOpcode opc = op->opc;
4203         const TCGOpDef *def = &tcg_op_defs[opc];
4204         TCGLifeData arg_life = op->life;
4205         int nb_iargs, nb_oargs, call_flags;
4206         TCGTemp *arg_ts, *dir_ts;
4207 
4208         if (opc == INDEX_op_call) {
4209             nb_oargs = TCGOP_CALLO(op);
4210             nb_iargs = TCGOP_CALLI(op);
4211             call_flags = tcg_call_flags(op);
4212         } else {
4213             nb_iargs = def->nb_iargs;
4214             nb_oargs = def->nb_oargs;
4215 
4216             /* Set flags similar to how calls require.  */
4217             if (def->flags & TCG_OPF_COND_BRANCH) {
4218                 /* Like reading globals: sync_globals */
4219                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4220             } else if (def->flags & TCG_OPF_BB_END) {
4221                 /* Like writing globals: save_globals */
4222                 call_flags = 0;
4223             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4224                 /* Like reading globals: sync_globals */
4225                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4226             } else {
4227                 /* No effect on globals.  */
4228                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4229                               TCG_CALL_NO_WRITE_GLOBALS);
4230             }
4231         }
4232 
4233         /* Make sure that input arguments are available.  */
4234         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4235             arg_ts = arg_temp(op->args[i]);
4236             dir_ts = arg_ts->state_ptr;
4237             if (dir_ts && arg_ts->state == TS_DEAD) {
4238                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4239                                   ? INDEX_op_ld_i32
4240                                   : INDEX_op_ld_i64);
4241                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4242                                                   arg_ts->type, 3);
4243 
4244                 lop->args[0] = temp_arg(dir_ts);
4245                 lop->args[1] = temp_arg(arg_ts->mem_base);
4246                 lop->args[2] = arg_ts->mem_offset;
4247 
4248                 /* Loaded, but synced with memory.  */
4249                 arg_ts->state = TS_MEM;
4250             }
4251         }
4252 
4253         /* Perform input replacement, and mark inputs that became dead.
4254            No action is required except keeping temp_state up to date
4255            so that we reload when needed.  */
4256         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4257             arg_ts = arg_temp(op->args[i]);
4258             dir_ts = arg_ts->state_ptr;
4259             if (dir_ts) {
4260                 op->args[i] = temp_arg(dir_ts);
4261                 changes = true;
4262                 if (IS_DEAD_ARG(i)) {
4263                     arg_ts->state = TS_DEAD;
4264                 }
4265             }
4266         }
4267 
4268         /* Liveness analysis should ensure that the following are
4269            all correct, for call sites and basic block end points.  */
4270         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4271             /* Nothing to do */
4272         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4273             for (i = 0; i < nb_globals; ++i) {
4274                 /* Liveness should see that globals are synced back,
4275                    that is, either TS_DEAD or TS_MEM.  */
4276                 arg_ts = &s->temps[i];
4277                 tcg_debug_assert(arg_ts->state_ptr == 0
4278                                  || arg_ts->state != 0);
4279             }
4280         } else {
4281             for (i = 0; i < nb_globals; ++i) {
4282                 /* Liveness should see that globals are saved back,
4283                    that is, TS_DEAD, waiting to be reloaded.  */
4284                 arg_ts = &s->temps[i];
4285                 tcg_debug_assert(arg_ts->state_ptr == 0
4286                                  || arg_ts->state == TS_DEAD);
4287             }
4288         }
4289 
4290         /* Outputs become available.  */
4291         if (opc == INDEX_op_mov) {
4292             arg_ts = arg_temp(op->args[0]);
4293             dir_ts = arg_ts->state_ptr;
4294             if (dir_ts) {
4295                 op->args[0] = temp_arg(dir_ts);
4296                 changes = true;
4297 
4298                 /* The output is now live and modified.  */
4299                 arg_ts->state = 0;
4300 
4301                 if (NEED_SYNC_ARG(0)) {
4302                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4303                                       ? INDEX_op_st_i32
4304                                       : INDEX_op_st_i64);
4305                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4306                                                      arg_ts->type, 3);
4307                     TCGTemp *out_ts = dir_ts;
4308 
4309                     if (IS_DEAD_ARG(0)) {
4310                         out_ts = arg_temp(op->args[1]);
4311                         arg_ts->state = TS_DEAD;
4312                         tcg_op_remove(s, op);
4313                     } else {
4314                         arg_ts->state = TS_MEM;
4315                     }
4316 
4317                     sop->args[0] = temp_arg(out_ts);
4318                     sop->args[1] = temp_arg(arg_ts->mem_base);
4319                     sop->args[2] = arg_ts->mem_offset;
4320                 } else {
4321                     tcg_debug_assert(!IS_DEAD_ARG(0));
4322                 }
4323             }
4324         } else {
4325             for (i = 0; i < nb_oargs; i++) {
4326                 arg_ts = arg_temp(op->args[i]);
4327                 dir_ts = arg_ts->state_ptr;
4328                 if (!dir_ts) {
4329                     continue;
4330                 }
4331                 op->args[i] = temp_arg(dir_ts);
4332                 changes = true;
4333 
4334                 /* The output is now live and modified.  */
4335                 arg_ts->state = 0;
4336 
4337                 /* Sync outputs upon their last write.  */
4338                 if (NEED_SYNC_ARG(i)) {
4339                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4340                                       ? INDEX_op_st_i32
4341                                       : INDEX_op_st_i64);
4342                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4343                                                      arg_ts->type, 3);
4344 
4345                     sop->args[0] = temp_arg(dir_ts);
4346                     sop->args[1] = temp_arg(arg_ts->mem_base);
4347                     sop->args[2] = arg_ts->mem_offset;
4348 
4349                     arg_ts->state = TS_MEM;
4350                 }
4351                 /* Drop outputs that are dead.  */
4352                 if (IS_DEAD_ARG(i)) {
4353                     arg_ts->state = TS_DEAD;
4354                 }
4355             }
4356         }
4357     }
4358 
4359     return changes;
4360 }
4361 
4362 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4363 {
4364     intptr_t off;
4365     int size, align;
4366 
4367     /* When allocating an object, look at the full type. */
4368     size = tcg_type_size(ts->base_type);
4369     switch (ts->base_type) {
4370     case TCG_TYPE_I32:
4371         align = 4;
4372         break;
4373     case TCG_TYPE_I64:
4374     case TCG_TYPE_V64:
4375         align = 8;
4376         break;
4377     case TCG_TYPE_I128:
4378     case TCG_TYPE_V128:
4379     case TCG_TYPE_V256:
4380         /*
4381          * Note that we do not require aligned storage for V256,
4382          * and that we provide alignment for I128 to match V128,
4383          * even if that's above what the host ABI requires.
4384          */
4385         align = 16;
4386         break;
4387     default:
4388         g_assert_not_reached();
4389     }
4390 
4391     /*
4392      * Assume the stack is sufficiently aligned.
4393      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4394      * and do not require 16 byte vector alignment.  This seems slightly
4395      * easier than fully parameterizing the above switch statement.
4396      */
4397     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4398     off = ROUND_UP(s->current_frame_offset, align);
4399 
4400     /* If we've exhausted the stack frame, restart with a smaller TB. */
4401     if (off + size > s->frame_end) {
4402         tcg_raise_tb_overflow(s);
4403     }
4404     s->current_frame_offset = off + size;
4405 #if defined(__sparc__)
4406     off += TCG_TARGET_STACK_BIAS;
4407 #endif
4408 
4409     /* If the object was subdivided, assign memory to all the parts. */
4410     if (ts->base_type != ts->type) {
4411         int part_size = tcg_type_size(ts->type);
4412         int part_count = size / part_size;
4413 
4414         /*
4415          * Each part is allocated sequentially in tcg_temp_new_internal.
4416          * Jump back to the first part by subtracting the current index.
4417          */
4418         ts -= ts->temp_subindex;
4419         for (int i = 0; i < part_count; ++i) {
4420             ts[i].mem_offset = off + i * part_size;
4421             ts[i].mem_base = s->frame_temp;
4422             ts[i].mem_allocated = 1;
4423         }
4424     } else {
4425         ts->mem_offset = off;
4426         ts->mem_base = s->frame_temp;
4427         ts->mem_allocated = 1;
4428     }
4429 }
4430 
4431 /* Assign @reg to @ts, and update reg_to_temp[]. */
4432 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4433 {
4434     if (ts->val_type == TEMP_VAL_REG) {
4435         TCGReg old = ts->reg;
4436         tcg_debug_assert(s->reg_to_temp[old] == ts);
4437         if (old == reg) {
4438             return;
4439         }
4440         s->reg_to_temp[old] = NULL;
4441     }
4442     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4443     s->reg_to_temp[reg] = ts;
4444     ts->val_type = TEMP_VAL_REG;
4445     ts->reg = reg;
4446 }
4447 
4448 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4449 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4450 {
4451     tcg_debug_assert(type != TEMP_VAL_REG);
4452     if (ts->val_type == TEMP_VAL_REG) {
4453         TCGReg reg = ts->reg;
4454         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4455         s->reg_to_temp[reg] = NULL;
4456     }
4457     ts->val_type = type;
4458 }
4459 
4460 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4461 
4462 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4463    mark it free; otherwise mark it dead.  */
4464 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4465 {
4466     TCGTempVal new_type;
4467 
4468     switch (ts->kind) {
4469     case TEMP_FIXED:
4470         return;
4471     case TEMP_GLOBAL:
4472     case TEMP_TB:
4473         new_type = TEMP_VAL_MEM;
4474         break;
4475     case TEMP_EBB:
4476         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4477         break;
4478     case TEMP_CONST:
4479         new_type = TEMP_VAL_CONST;
4480         break;
4481     default:
4482         g_assert_not_reached();
4483     }
4484     set_temp_val_nonreg(s, ts, new_type);
4485 }
4486 
4487 /* Mark a temporary as dead.  */
4488 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4489 {
4490     temp_free_or_dead(s, ts, 1);
4491 }
4492 
4493 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4494    registers needs to be allocated to store a constant.  If 'free_or_dead'
4495    is non-zero, subsequently release the temporary; if it is positive, the
4496    temp is dead; if it is negative, the temp is free.  */
4497 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4498                       TCGRegSet preferred_regs, int free_or_dead)
4499 {
4500     if (!temp_readonly(ts) && !ts->mem_coherent) {
4501         if (!ts->mem_allocated) {
4502             temp_allocate_frame(s, ts);
4503         }
4504         switch (ts->val_type) {
4505         case TEMP_VAL_CONST:
4506             /* If we're going to free the temp immediately, then we won't
4507                require it later in a register, so attempt to store the
4508                constant to memory directly.  */
4509             if (free_or_dead
4510                 && tcg_out_sti(s, ts->type, ts->val,
4511                                ts->mem_base->reg, ts->mem_offset)) {
4512                 break;
4513             }
4514             temp_load(s, ts, tcg_target_available_regs[ts->type],
4515                       allocated_regs, preferred_regs);
4516             /* fallthrough */
4517 
4518         case TEMP_VAL_REG:
4519             tcg_out_st(s, ts->type, ts->reg,
4520                        ts->mem_base->reg, ts->mem_offset);
4521             break;
4522 
4523         case TEMP_VAL_MEM:
4524             break;
4525 
4526         case TEMP_VAL_DEAD:
4527         default:
4528             g_assert_not_reached();
4529         }
4530         ts->mem_coherent = 1;
4531     }
4532     if (free_or_dead) {
4533         temp_free_or_dead(s, ts, free_or_dead);
4534     }
4535 }
4536 
4537 /* free register 'reg' by spilling the corresponding temporary if necessary */
4538 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4539 {
4540     TCGTemp *ts = s->reg_to_temp[reg];
4541     if (ts != NULL) {
4542         temp_sync(s, ts, allocated_regs, 0, -1);
4543     }
4544 }
4545 
4546 /**
4547  * tcg_reg_alloc:
4548  * @required_regs: Set of registers in which we must allocate.
4549  * @allocated_regs: Set of registers which must be avoided.
4550  * @preferred_regs: Set of registers we should prefer.
4551  * @rev: True if we search the registers in "indirect" order.
4552  *
4553  * The allocated register must be in @required_regs & ~@allocated_regs,
4554  * but if we can put it in @preferred_regs we may save a move later.
4555  */
4556 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4557                             TCGRegSet allocated_regs,
4558                             TCGRegSet preferred_regs, bool rev)
4559 {
4560     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4561     TCGRegSet reg_ct[2];
4562     const int *order;
4563 
4564     reg_ct[1] = required_regs & ~allocated_regs;
4565     tcg_debug_assert(reg_ct[1] != 0);
4566     reg_ct[0] = reg_ct[1] & preferred_regs;
4567 
4568     /* Skip the preferred_regs option if it cannot be satisfied,
4569        or if the preference made no difference.  */
4570     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4571 
4572     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4573 
4574     /* Try free registers, preferences first.  */
4575     for (j = f; j < 2; j++) {
4576         TCGRegSet set = reg_ct[j];
4577 
4578         if (tcg_regset_single(set)) {
4579             /* One register in the set.  */
4580             TCGReg reg = tcg_regset_first(set);
4581             if (s->reg_to_temp[reg] == NULL) {
4582                 return reg;
4583             }
4584         } else {
4585             for (i = 0; i < n; i++) {
4586                 TCGReg reg = order[i];
4587                 if (s->reg_to_temp[reg] == NULL &&
4588                     tcg_regset_test_reg(set, reg)) {
4589                     return reg;
4590                 }
4591             }
4592         }
4593     }
4594 
4595     /* We must spill something.  */
4596     for (j = f; j < 2; j++) {
4597         TCGRegSet set = reg_ct[j];
4598 
4599         if (tcg_regset_single(set)) {
4600             /* One register in the set.  */
4601             TCGReg reg = tcg_regset_first(set);
4602             tcg_reg_free(s, reg, allocated_regs);
4603             return reg;
4604         } else {
4605             for (i = 0; i < n; i++) {
4606                 TCGReg reg = order[i];
4607                 if (tcg_regset_test_reg(set, reg)) {
4608                     tcg_reg_free(s, reg, allocated_regs);
4609                     return reg;
4610                 }
4611             }
4612         }
4613     }
4614 
4615     g_assert_not_reached();
4616 }
4617 
4618 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4619                                  TCGRegSet allocated_regs,
4620                                  TCGRegSet preferred_regs, bool rev)
4621 {
4622     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4623     TCGRegSet reg_ct[2];
4624     const int *order;
4625 
4626     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4627     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4628     tcg_debug_assert(reg_ct[1] != 0);
4629     reg_ct[0] = reg_ct[1] & preferred_regs;
4630 
4631     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4632 
4633     /*
4634      * Skip the preferred_regs option if it cannot be satisfied,
4635      * or if the preference made no difference.
4636      */
4637     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4638 
4639     /*
4640      * Minimize the number of flushes by looking for 2 free registers first,
4641      * then a single flush, then two flushes.
4642      */
4643     for (fmin = 2; fmin >= 0; fmin--) {
4644         for (j = k; j < 2; j++) {
4645             TCGRegSet set = reg_ct[j];
4646 
4647             for (i = 0; i < n; i++) {
4648                 TCGReg reg = order[i];
4649 
4650                 if (tcg_regset_test_reg(set, reg)) {
4651                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4652                     if (f >= fmin) {
4653                         tcg_reg_free(s, reg, allocated_regs);
4654                         tcg_reg_free(s, reg + 1, allocated_regs);
4655                         return reg;
4656                     }
4657                 }
4658             }
4659         }
4660     }
4661     g_assert_not_reached();
4662 }
4663 
4664 /* Make sure the temporary is in a register.  If needed, allocate the register
4665    from DESIRED while avoiding ALLOCATED.  */
4666 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4667                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4668 {
4669     TCGReg reg;
4670 
4671     switch (ts->val_type) {
4672     case TEMP_VAL_REG:
4673         return;
4674     case TEMP_VAL_CONST:
4675         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4676                             preferred_regs, ts->indirect_base);
4677         if (ts->type <= TCG_TYPE_I64) {
4678             tcg_out_movi(s, ts->type, reg, ts->val);
4679         } else {
4680             uint64_t val = ts->val;
4681             MemOp vece = MO_64;
4682 
4683             /*
4684              * Find the minimal vector element that matches the constant.
4685              * The targets will, in general, have to do this search anyway,
4686              * do this generically.
4687              */
4688             if (val == dup_const(MO_8, val)) {
4689                 vece = MO_8;
4690             } else if (val == dup_const(MO_16, val)) {
4691                 vece = MO_16;
4692             } else if (val == dup_const(MO_32, val)) {
4693                 vece = MO_32;
4694             }
4695 
4696             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4697         }
4698         ts->mem_coherent = 0;
4699         break;
4700     case TEMP_VAL_MEM:
4701         if (!ts->mem_allocated) {
4702             temp_allocate_frame(s, ts);
4703         }
4704         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4705                             preferred_regs, ts->indirect_base);
4706         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4707         ts->mem_coherent = 1;
4708         break;
4709     case TEMP_VAL_DEAD:
4710     default:
4711         g_assert_not_reached();
4712     }
4713     set_temp_val_reg(s, ts, reg);
4714 }
4715 
4716 /* Save a temporary to memory. 'allocated_regs' is used in case a
4717    temporary registers needs to be allocated to store a constant.  */
4718 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4719 {
4720     /* The liveness analysis already ensures that globals are back
4721        in memory. Keep an tcg_debug_assert for safety. */
4722     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4723 }
4724 
4725 /* save globals to their canonical location and assume they can be
4726    modified be the following code. 'allocated_regs' is used in case a
4727    temporary registers needs to be allocated to store a constant. */
4728 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4729 {
4730     int i, n;
4731 
4732     for (i = 0, n = s->nb_globals; i < n; i++) {
4733         temp_save(s, &s->temps[i], allocated_regs);
4734     }
4735 }
4736 
4737 /* sync globals to their canonical location and assume they can be
4738    read by the following code. 'allocated_regs' is used in case a
4739    temporary registers needs to be allocated to store a constant. */
4740 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4741 {
4742     int i, n;
4743 
4744     for (i = 0, n = s->nb_globals; i < n; i++) {
4745         TCGTemp *ts = &s->temps[i];
4746         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4747                          || ts->kind == TEMP_FIXED
4748                          || ts->mem_coherent);
4749     }
4750 }
4751 
4752 /* at the end of a basic block, we assume all temporaries are dead and
4753    all globals are stored at their canonical location. */
4754 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4755 {
4756     int i;
4757 
4758     for (i = s->nb_globals; i < s->nb_temps; i++) {
4759         TCGTemp *ts = &s->temps[i];
4760 
4761         switch (ts->kind) {
4762         case TEMP_TB:
4763             temp_save(s, ts, allocated_regs);
4764             break;
4765         case TEMP_EBB:
4766             /* The liveness analysis already ensures that temps are dead.
4767                Keep an tcg_debug_assert for safety. */
4768             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4769             break;
4770         case TEMP_CONST:
4771             /* Similarly, we should have freed any allocated register. */
4772             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4773             break;
4774         default:
4775             g_assert_not_reached();
4776         }
4777     }
4778 
4779     save_globals(s, allocated_regs);
4780 }
4781 
4782 /*
4783  * At a conditional branch, we assume all temporaries are dead unless
4784  * explicitly live-across-conditional-branch; all globals and local
4785  * temps are synced to their location.
4786  */
4787 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4788 {
4789     sync_globals(s, allocated_regs);
4790 
4791     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4792         TCGTemp *ts = &s->temps[i];
4793         /*
4794          * The liveness analysis already ensures that temps are dead.
4795          * Keep tcg_debug_asserts for safety.
4796          */
4797         switch (ts->kind) {
4798         case TEMP_TB:
4799             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4800             break;
4801         case TEMP_EBB:
4802         case TEMP_CONST:
4803             break;
4804         default:
4805             g_assert_not_reached();
4806         }
4807     }
4808 }
4809 
4810 /*
4811  * Specialized code generation for INDEX_op_mov_* with a constant.
4812  */
4813 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4814                                   tcg_target_ulong val, TCGLifeData arg_life,
4815                                   TCGRegSet preferred_regs)
4816 {
4817     /* ENV should not be modified.  */
4818     tcg_debug_assert(!temp_readonly(ots));
4819 
4820     /* The movi is not explicitly generated here.  */
4821     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4822     ots->val = val;
4823     ots->mem_coherent = 0;
4824     if (NEED_SYNC_ARG(0)) {
4825         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4826     } else if (IS_DEAD_ARG(0)) {
4827         temp_dead(s, ots);
4828     }
4829 }
4830 
4831 /*
4832  * Specialized code generation for INDEX_op_mov_*.
4833  */
4834 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4835 {
4836     const TCGLifeData arg_life = op->life;
4837     TCGRegSet allocated_regs, preferred_regs;
4838     TCGTemp *ts, *ots;
4839     TCGType otype, itype;
4840     TCGReg oreg, ireg;
4841 
4842     allocated_regs = s->reserved_regs;
4843     preferred_regs = output_pref(op, 0);
4844     ots = arg_temp(op->args[0]);
4845     ts = arg_temp(op->args[1]);
4846 
4847     /* ENV should not be modified.  */
4848     tcg_debug_assert(!temp_readonly(ots));
4849 
4850     /* Note that otype != itype for no-op truncation.  */
4851     otype = ots->type;
4852     itype = ts->type;
4853 
4854     if (ts->val_type == TEMP_VAL_CONST) {
4855         /* propagate constant or generate sti */
4856         tcg_target_ulong val = ts->val;
4857         if (IS_DEAD_ARG(1)) {
4858             temp_dead(s, ts);
4859         }
4860         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4861         return;
4862     }
4863 
4864     /* If the source value is in memory we're going to be forced
4865        to have it in a register in order to perform the copy.  Copy
4866        the SOURCE value into its own register first, that way we
4867        don't have to reload SOURCE the next time it is used. */
4868     if (ts->val_type == TEMP_VAL_MEM) {
4869         temp_load(s, ts, tcg_target_available_regs[itype],
4870                   allocated_regs, preferred_regs);
4871     }
4872     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4873     ireg = ts->reg;
4874 
4875     if (IS_DEAD_ARG(0)) {
4876         /* mov to a non-saved dead register makes no sense (even with
4877            liveness analysis disabled). */
4878         tcg_debug_assert(NEED_SYNC_ARG(0));
4879         if (!ots->mem_allocated) {
4880             temp_allocate_frame(s, ots);
4881         }
4882         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4883         if (IS_DEAD_ARG(1)) {
4884             temp_dead(s, ts);
4885         }
4886         temp_dead(s, ots);
4887         return;
4888     }
4889 
4890     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4891         /*
4892          * The mov can be suppressed.  Kill input first, so that it
4893          * is unlinked from reg_to_temp, then set the output to the
4894          * reg that we saved from the input.
4895          */
4896         temp_dead(s, ts);
4897         oreg = ireg;
4898     } else {
4899         if (ots->val_type == TEMP_VAL_REG) {
4900             oreg = ots->reg;
4901         } else {
4902             /* Make sure to not spill the input register during allocation. */
4903             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4904                                  allocated_regs | ((TCGRegSet)1 << ireg),
4905                                  preferred_regs, ots->indirect_base);
4906         }
4907         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4908             /*
4909              * Cross register class move not supported.
4910              * Store the source register into the destination slot
4911              * and leave the destination temp as TEMP_VAL_MEM.
4912              */
4913             assert(!temp_readonly(ots));
4914             if (!ts->mem_allocated) {
4915                 temp_allocate_frame(s, ots);
4916             }
4917             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4918             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4919             ots->mem_coherent = 1;
4920             return;
4921         }
4922     }
4923     set_temp_val_reg(s, ots, oreg);
4924     ots->mem_coherent = 0;
4925 
4926     if (NEED_SYNC_ARG(0)) {
4927         temp_sync(s, ots, allocated_regs, 0, 0);
4928     }
4929 }
4930 
4931 /*
4932  * Specialized code generation for INDEX_op_dup_vec.
4933  */
4934 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4935 {
4936     const TCGLifeData arg_life = op->life;
4937     TCGRegSet dup_out_regs, dup_in_regs;
4938     const TCGArgConstraint *dup_args_ct;
4939     TCGTemp *its, *ots;
4940     TCGType itype, vtype;
4941     unsigned vece;
4942     int lowpart_ofs;
4943     bool ok;
4944 
4945     ots = arg_temp(op->args[0]);
4946     its = arg_temp(op->args[1]);
4947 
4948     /* ENV should not be modified.  */
4949     tcg_debug_assert(!temp_readonly(ots));
4950 
4951     itype = its->type;
4952     vece = TCGOP_VECE(op);
4953     vtype = TCGOP_TYPE(op);
4954 
4955     if (its->val_type == TEMP_VAL_CONST) {
4956         /* Propagate constant via movi -> dupi.  */
4957         tcg_target_ulong val = its->val;
4958         if (IS_DEAD_ARG(1)) {
4959             temp_dead(s, its);
4960         }
4961         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4962         return;
4963     }
4964 
4965     dup_args_ct = opcode_args_ct(op);
4966     dup_out_regs = dup_args_ct[0].regs;
4967     dup_in_regs = dup_args_ct[1].regs;
4968 
4969     /* Allocate the output register now.  */
4970     if (ots->val_type != TEMP_VAL_REG) {
4971         TCGRegSet allocated_regs = s->reserved_regs;
4972         TCGReg oreg;
4973 
4974         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4975             /* Make sure to not spill the input register. */
4976             tcg_regset_set_reg(allocated_regs, its->reg);
4977         }
4978         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4979                              output_pref(op, 0), ots->indirect_base);
4980         set_temp_val_reg(s, ots, oreg);
4981     }
4982 
4983     switch (its->val_type) {
4984     case TEMP_VAL_REG:
4985         /*
4986          * The dup constriaints must be broad, covering all possible VECE.
4987          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4988          * to fail, indicating that extra moves are required for that case.
4989          */
4990         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4991             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4992                 goto done;
4993             }
4994             /* Try again from memory or a vector input register.  */
4995         }
4996         if (!its->mem_coherent) {
4997             /*
4998              * The input register is not synced, and so an extra store
4999              * would be required to use memory.  Attempt an integer-vector
5000              * register move first.  We do not have a TCGRegSet for this.
5001              */
5002             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5003                 break;
5004             }
5005             /* Sync the temp back to its slot and load from there.  */
5006             temp_sync(s, its, s->reserved_regs, 0, 0);
5007         }
5008         /* fall through */
5009 
5010     case TEMP_VAL_MEM:
5011         lowpart_ofs = 0;
5012         if (HOST_BIG_ENDIAN) {
5013             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5014         }
5015         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5016                              its->mem_offset + lowpart_ofs)) {
5017             goto done;
5018         }
5019         /* Load the input into the destination vector register. */
5020         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5021         break;
5022 
5023     default:
5024         g_assert_not_reached();
5025     }
5026 
5027     /* We now have a vector input register, so dup must succeed. */
5028     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5029     tcg_debug_assert(ok);
5030 
5031  done:
5032     ots->mem_coherent = 0;
5033     if (IS_DEAD_ARG(1)) {
5034         temp_dead(s, its);
5035     }
5036     if (NEED_SYNC_ARG(0)) {
5037         temp_sync(s, ots, s->reserved_regs, 0, 0);
5038     }
5039     if (IS_DEAD_ARG(0)) {
5040         temp_dead(s, ots);
5041     }
5042 }
5043 
5044 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5045 {
5046     const TCGLifeData arg_life = op->life;
5047     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5048     TCGRegSet i_allocated_regs;
5049     TCGRegSet o_allocated_regs;
5050     int i, k, nb_iargs, nb_oargs;
5051     TCGReg reg;
5052     TCGArg arg;
5053     const TCGArgConstraint *args_ct;
5054     const TCGArgConstraint *arg_ct;
5055     TCGTemp *ts;
5056     TCGArg new_args[TCG_MAX_OP_ARGS];
5057     int const_args[TCG_MAX_OP_ARGS];
5058     TCGCond op_cond;
5059 
5060     nb_oargs = def->nb_oargs;
5061     nb_iargs = def->nb_iargs;
5062 
5063     /* copy constants */
5064     memcpy(new_args + nb_oargs + nb_iargs,
5065            op->args + nb_oargs + nb_iargs,
5066            sizeof(TCGArg) * def->nb_cargs);
5067 
5068     i_allocated_regs = s->reserved_regs;
5069     o_allocated_regs = s->reserved_regs;
5070 
5071     switch (op->opc) {
5072     case INDEX_op_brcond_i32:
5073     case INDEX_op_brcond_i64:
5074         op_cond = op->args[2];
5075         break;
5076     case INDEX_op_setcond_i32:
5077     case INDEX_op_setcond_i64:
5078     case INDEX_op_negsetcond_i32:
5079     case INDEX_op_negsetcond_i64:
5080     case INDEX_op_cmp_vec:
5081         op_cond = op->args[3];
5082         break;
5083     case INDEX_op_brcond2_i32:
5084         op_cond = op->args[4];
5085         break;
5086     case INDEX_op_movcond_i32:
5087     case INDEX_op_movcond_i64:
5088     case INDEX_op_setcond2_i32:
5089     case INDEX_op_cmpsel_vec:
5090         op_cond = op->args[5];
5091         break;
5092     default:
5093         /* No condition within opcode. */
5094         op_cond = TCG_COND_ALWAYS;
5095         break;
5096     }
5097 
5098     args_ct = opcode_args_ct(op);
5099 
5100     /* satisfy input constraints */
5101     for (k = 0; k < nb_iargs; k++) {
5102         TCGRegSet i_preferred_regs, i_required_regs;
5103         bool allocate_new_reg, copyto_new_reg;
5104         TCGTemp *ts2;
5105         int i1, i2;
5106 
5107         i = args_ct[nb_oargs + k].sort_index;
5108         arg = op->args[i];
5109         arg_ct = &args_ct[i];
5110         ts = arg_temp(arg);
5111 
5112         if (ts->val_type == TEMP_VAL_CONST) {
5113 #ifdef TCG_REG_ZERO
5114             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5115                 /* Hardware zero register: indicate register via non-const. */
5116                 const_args[i] = 0;
5117                 new_args[i] = TCG_REG_ZERO;
5118                 continue;
5119             }
5120 #endif
5121 
5122             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5123                                        op_cond, TCGOP_VECE(op))) {
5124                 /* constant is OK for instruction */
5125                 const_args[i] = 1;
5126                 new_args[i] = ts->val;
5127                 continue;
5128             }
5129         }
5130 
5131         reg = ts->reg;
5132         i_preferred_regs = 0;
5133         i_required_regs = arg_ct->regs;
5134         allocate_new_reg = false;
5135         copyto_new_reg = false;
5136 
5137         switch (arg_ct->pair) {
5138         case 0: /* not paired */
5139             if (arg_ct->ialias) {
5140                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5141 
5142                 /*
5143                  * If the input is readonly, then it cannot also be an
5144                  * output and aliased to itself.  If the input is not
5145                  * dead after the instruction, we must allocate a new
5146                  * register and move it.
5147                  */
5148                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5149                     || args_ct[arg_ct->alias_index].newreg) {
5150                     allocate_new_reg = true;
5151                 } else if (ts->val_type == TEMP_VAL_REG) {
5152                     /*
5153                      * Check if the current register has already been
5154                      * allocated for another input.
5155                      */
5156                     allocate_new_reg =
5157                         tcg_regset_test_reg(i_allocated_regs, reg);
5158                 }
5159             }
5160             if (!allocate_new_reg) {
5161                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5162                           i_preferred_regs);
5163                 reg = ts->reg;
5164                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5165             }
5166             if (allocate_new_reg) {
5167                 /*
5168                  * Allocate a new register matching the constraint
5169                  * and move the temporary register into it.
5170                  */
5171                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5172                           i_allocated_regs, 0);
5173                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5174                                     i_preferred_regs, ts->indirect_base);
5175                 copyto_new_reg = true;
5176             }
5177             break;
5178 
5179         case 1:
5180             /* First of an input pair; if i1 == i2, the second is an output. */
5181             i1 = i;
5182             i2 = arg_ct->pair_index;
5183             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5184 
5185             /*
5186              * It is easier to default to allocating a new pair
5187              * and to identify a few cases where it's not required.
5188              */
5189             if (arg_ct->ialias) {
5190                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5191                 if (IS_DEAD_ARG(i1) &&
5192                     IS_DEAD_ARG(i2) &&
5193                     !temp_readonly(ts) &&
5194                     ts->val_type == TEMP_VAL_REG &&
5195                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5196                     tcg_regset_test_reg(i_required_regs, reg) &&
5197                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5198                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5199                     (ts2
5200                      ? ts2->val_type == TEMP_VAL_REG &&
5201                        ts2->reg == reg + 1 &&
5202                        !temp_readonly(ts2)
5203                      : s->reg_to_temp[reg + 1] == NULL)) {
5204                     break;
5205                 }
5206             } else {
5207                 /* Without aliasing, the pair must also be an input. */
5208                 tcg_debug_assert(ts2);
5209                 if (ts->val_type == TEMP_VAL_REG &&
5210                     ts2->val_type == TEMP_VAL_REG &&
5211                     ts2->reg == reg + 1 &&
5212                     tcg_regset_test_reg(i_required_regs, reg)) {
5213                     break;
5214                 }
5215             }
5216             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5217                                      0, ts->indirect_base);
5218             goto do_pair;
5219 
5220         case 2: /* pair second */
5221             reg = new_args[arg_ct->pair_index] + 1;
5222             goto do_pair;
5223 
5224         case 3: /* ialias with second output, no first input */
5225             tcg_debug_assert(arg_ct->ialias);
5226             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5227 
5228             if (IS_DEAD_ARG(i) &&
5229                 !temp_readonly(ts) &&
5230                 ts->val_type == TEMP_VAL_REG &&
5231                 reg > 0 &&
5232                 s->reg_to_temp[reg - 1] == NULL &&
5233                 tcg_regset_test_reg(i_required_regs, reg) &&
5234                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5235                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5236                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5237                 break;
5238             }
5239             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5240                                      i_allocated_regs, 0,
5241                                      ts->indirect_base);
5242             tcg_regset_set_reg(i_allocated_regs, reg);
5243             reg += 1;
5244             goto do_pair;
5245 
5246         do_pair:
5247             /*
5248              * If an aliased input is not dead after the instruction,
5249              * we must allocate a new register and move it.
5250              */
5251             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5252                 TCGRegSet t_allocated_regs = i_allocated_regs;
5253 
5254                 /*
5255                  * Because of the alias, and the continued life, make sure
5256                  * that the temp is somewhere *other* than the reg pair,
5257                  * and we get a copy in reg.
5258                  */
5259                 tcg_regset_set_reg(t_allocated_regs, reg);
5260                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5261                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5262                     /* If ts was already in reg, copy it somewhere else. */
5263                     TCGReg nr;
5264                     bool ok;
5265 
5266                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5267                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5268                                        t_allocated_regs, 0, ts->indirect_base);
5269                     ok = tcg_out_mov(s, ts->type, nr, reg);
5270                     tcg_debug_assert(ok);
5271 
5272                     set_temp_val_reg(s, ts, nr);
5273                 } else {
5274                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5275                               t_allocated_regs, 0);
5276                     copyto_new_reg = true;
5277                 }
5278             } else {
5279                 /* Preferably allocate to reg, otherwise copy. */
5280                 i_required_regs = (TCGRegSet)1 << reg;
5281                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5282                           i_preferred_regs);
5283                 copyto_new_reg = ts->reg != reg;
5284             }
5285             break;
5286 
5287         default:
5288             g_assert_not_reached();
5289         }
5290 
5291         if (copyto_new_reg) {
5292             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5293                 /*
5294                  * Cross register class move not supported.  Sync the
5295                  * temp back to its slot and load from there.
5296                  */
5297                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5298                 tcg_out_ld(s, ts->type, reg,
5299                            ts->mem_base->reg, ts->mem_offset);
5300             }
5301         }
5302         new_args[i] = reg;
5303         const_args[i] = 0;
5304         tcg_regset_set_reg(i_allocated_regs, reg);
5305     }
5306 
5307     /* mark dead temporaries and free the associated registers */
5308     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5309         if (IS_DEAD_ARG(i)) {
5310             temp_dead(s, arg_temp(op->args[i]));
5311         }
5312     }
5313 
5314     if (def->flags & TCG_OPF_COND_BRANCH) {
5315         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5316     } else if (def->flags & TCG_OPF_BB_END) {
5317         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5318     } else {
5319         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5320             /* XXX: permit generic clobber register list ? */
5321             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5322                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5323                     tcg_reg_free(s, i, i_allocated_regs);
5324                 }
5325             }
5326         }
5327         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5328             /* sync globals if the op has side effects and might trigger
5329                an exception. */
5330             sync_globals(s, i_allocated_regs);
5331         }
5332 
5333         /* satisfy the output constraints */
5334         for (k = 0; k < nb_oargs; k++) {
5335             i = args_ct[k].sort_index;
5336             arg = op->args[i];
5337             arg_ct = &args_ct[i];
5338             ts = arg_temp(arg);
5339 
5340             /* ENV should not be modified.  */
5341             tcg_debug_assert(!temp_readonly(ts));
5342 
5343             switch (arg_ct->pair) {
5344             case 0: /* not paired */
5345                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5346                     reg = new_args[arg_ct->alias_index];
5347                 } else if (arg_ct->newreg) {
5348                     reg = tcg_reg_alloc(s, arg_ct->regs,
5349                                         i_allocated_regs | o_allocated_regs,
5350                                         output_pref(op, k), ts->indirect_base);
5351                 } else {
5352                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5353                                         output_pref(op, k), ts->indirect_base);
5354                 }
5355                 break;
5356 
5357             case 1: /* first of pair */
5358                 if (arg_ct->oalias) {
5359                     reg = new_args[arg_ct->alias_index];
5360                 } else if (arg_ct->newreg) {
5361                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5362                                              i_allocated_regs | o_allocated_regs,
5363                                              output_pref(op, k),
5364                                              ts->indirect_base);
5365                 } else {
5366                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5367                                              output_pref(op, k),
5368                                              ts->indirect_base);
5369                 }
5370                 break;
5371 
5372             case 2: /* second of pair */
5373                 if (arg_ct->oalias) {
5374                     reg = new_args[arg_ct->alias_index];
5375                 } else {
5376                     reg = new_args[arg_ct->pair_index] + 1;
5377                 }
5378                 break;
5379 
5380             case 3: /* first of pair, aliasing with a second input */
5381                 tcg_debug_assert(!arg_ct->newreg);
5382                 reg = new_args[arg_ct->pair_index] - 1;
5383                 break;
5384 
5385             default:
5386                 g_assert_not_reached();
5387             }
5388             tcg_regset_set_reg(o_allocated_regs, reg);
5389             set_temp_val_reg(s, ts, reg);
5390             ts->mem_coherent = 0;
5391             new_args[i] = reg;
5392         }
5393     }
5394 
5395     /* emit instruction */
5396     TCGType type = TCGOP_TYPE(op);
5397     switch (op->opc) {
5398     case INDEX_op_ext_i32_i64:
5399         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5400         break;
5401     case INDEX_op_extu_i32_i64:
5402         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5403         break;
5404     case INDEX_op_extrl_i64_i32:
5405         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5406         break;
5407 
5408     case INDEX_op_add:
5409     case INDEX_op_and:
5410     case INDEX_op_andc:
5411     case INDEX_op_divs:
5412     case INDEX_op_divu:
5413     case INDEX_op_eqv:
5414     case INDEX_op_mul:
5415     case INDEX_op_mulsh:
5416     case INDEX_op_muluh:
5417     case INDEX_op_nand:
5418     case INDEX_op_nor:
5419     case INDEX_op_or:
5420     case INDEX_op_orc:
5421     case INDEX_op_rems:
5422     case INDEX_op_remu:
5423     case INDEX_op_sar:
5424     case INDEX_op_shl:
5425     case INDEX_op_shr:
5426     case INDEX_op_xor:
5427         {
5428             const TCGOutOpBinary *out =
5429                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5430 
5431             /* Constants should never appear in the first source operand. */
5432             tcg_debug_assert(!const_args[1]);
5433             if (const_args[2]) {
5434                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5435             } else {
5436                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5437             }
5438         }
5439         break;
5440 
5441     case INDEX_op_sub:
5442         {
5443             const TCGOutOpSubtract *out = &outop_sub;
5444 
5445             /*
5446              * Constants should never appear in the second source operand.
5447              * These are folded to add with negative constant.
5448              */
5449             tcg_debug_assert(!const_args[2]);
5450             if (const_args[1]) {
5451                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5452             } else {
5453                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5454             }
5455         }
5456         break;
5457 
5458     case INDEX_op_neg:
5459     case INDEX_op_not:
5460         {
5461             const TCGOutOpUnary *out =
5462                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5463 
5464             /* Constants should have been folded. */
5465             tcg_debug_assert(!const_args[1]);
5466             out->out_rr(s, type, new_args[0], new_args[1]);
5467         }
5468         break;
5469 
5470     case INDEX_op_divs2:
5471     case INDEX_op_divu2:
5472         {
5473             const TCGOutOpDivRem *out =
5474                 container_of(all_outop[op->opc], TCGOutOpDivRem, base);
5475 
5476             /* Only used by x86 and s390x, which use matching constraints. */
5477             tcg_debug_assert(new_args[0] == new_args[2]);
5478             tcg_debug_assert(new_args[1] == new_args[3]);
5479             tcg_debug_assert(!const_args[4]);
5480             out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]);
5481         }
5482         break;
5483 
5484     default:
5485         if (def->flags & TCG_OPF_VECTOR) {
5486             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5487                            TCGOP_VECE(op), new_args, const_args);
5488         } else {
5489             tcg_out_op(s, op->opc, type, new_args, const_args);
5490         }
5491         break;
5492     }
5493 
5494     /* move the outputs in the correct register if needed */
5495     for(i = 0; i < nb_oargs; i++) {
5496         ts = arg_temp(op->args[i]);
5497 
5498         /* ENV should not be modified.  */
5499         tcg_debug_assert(!temp_readonly(ts));
5500 
5501         if (NEED_SYNC_ARG(i)) {
5502             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5503         } else if (IS_DEAD_ARG(i)) {
5504             temp_dead(s, ts);
5505         }
5506     }
5507 }
5508 
5509 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5510 {
5511     const TCGLifeData arg_life = op->life;
5512     TCGTemp *ots, *itsl, *itsh;
5513     TCGType vtype = TCGOP_TYPE(op);
5514 
5515     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5516     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5517     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5518 
5519     ots = arg_temp(op->args[0]);
5520     itsl = arg_temp(op->args[1]);
5521     itsh = arg_temp(op->args[2]);
5522 
5523     /* ENV should not be modified.  */
5524     tcg_debug_assert(!temp_readonly(ots));
5525 
5526     /* Allocate the output register now.  */
5527     if (ots->val_type != TEMP_VAL_REG) {
5528         TCGRegSet allocated_regs = s->reserved_regs;
5529         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5530         TCGReg oreg;
5531 
5532         /* Make sure to not spill the input registers. */
5533         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5534             tcg_regset_set_reg(allocated_regs, itsl->reg);
5535         }
5536         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5537             tcg_regset_set_reg(allocated_regs, itsh->reg);
5538         }
5539 
5540         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5541                              output_pref(op, 0), ots->indirect_base);
5542         set_temp_val_reg(s, ots, oreg);
5543     }
5544 
5545     /* Promote dup2 of immediates to dupi_vec. */
5546     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5547         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5548         MemOp vece = MO_64;
5549 
5550         if (val == dup_const(MO_8, val)) {
5551             vece = MO_8;
5552         } else if (val == dup_const(MO_16, val)) {
5553             vece = MO_16;
5554         } else if (val == dup_const(MO_32, val)) {
5555             vece = MO_32;
5556         }
5557 
5558         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5559         goto done;
5560     }
5561 
5562     /* If the two inputs form one 64-bit value, try dupm_vec. */
5563     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5564         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5565         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5566         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5567 
5568         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5569         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5570 
5571         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5572                              its->mem_base->reg, its->mem_offset)) {
5573             goto done;
5574         }
5575     }
5576 
5577     /* Fall back to generic expansion. */
5578     return false;
5579 
5580  done:
5581     ots->mem_coherent = 0;
5582     if (IS_DEAD_ARG(1)) {
5583         temp_dead(s, itsl);
5584     }
5585     if (IS_DEAD_ARG(2)) {
5586         temp_dead(s, itsh);
5587     }
5588     if (NEED_SYNC_ARG(0)) {
5589         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5590     } else if (IS_DEAD_ARG(0)) {
5591         temp_dead(s, ots);
5592     }
5593     return true;
5594 }
5595 
5596 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5597                          TCGRegSet allocated_regs)
5598 {
5599     if (ts->val_type == TEMP_VAL_REG) {
5600         if (ts->reg != reg) {
5601             tcg_reg_free(s, reg, allocated_regs);
5602             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5603                 /*
5604                  * Cross register class move not supported.  Sync the
5605                  * temp back to its slot and load from there.
5606                  */
5607                 temp_sync(s, ts, allocated_regs, 0, 0);
5608                 tcg_out_ld(s, ts->type, reg,
5609                            ts->mem_base->reg, ts->mem_offset);
5610             }
5611         }
5612     } else {
5613         TCGRegSet arg_set = 0;
5614 
5615         tcg_reg_free(s, reg, allocated_regs);
5616         tcg_regset_set_reg(arg_set, reg);
5617         temp_load(s, ts, arg_set, allocated_regs, 0);
5618     }
5619 }
5620 
5621 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5622                          TCGRegSet allocated_regs)
5623 {
5624     /*
5625      * When the destination is on the stack, load up the temp and store.
5626      * If there are many call-saved registers, the temp might live to
5627      * see another use; otherwise it'll be discarded.
5628      */
5629     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5630     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5631                arg_slot_stk_ofs(arg_slot));
5632 }
5633 
5634 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5635                             TCGTemp *ts, TCGRegSet *allocated_regs)
5636 {
5637     if (arg_slot_reg_p(l->arg_slot)) {
5638         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5639         load_arg_reg(s, reg, ts, *allocated_regs);
5640         tcg_regset_set_reg(*allocated_regs, reg);
5641     } else {
5642         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5643     }
5644 }
5645 
5646 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5647                          intptr_t ref_off, TCGRegSet *allocated_regs)
5648 {
5649     TCGReg reg;
5650 
5651     if (arg_slot_reg_p(arg_slot)) {
5652         reg = tcg_target_call_iarg_regs[arg_slot];
5653         tcg_reg_free(s, reg, *allocated_regs);
5654         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5655         tcg_regset_set_reg(*allocated_regs, reg);
5656     } else {
5657         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5658                             *allocated_regs, 0, false);
5659         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5660         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5661                    arg_slot_stk_ofs(arg_slot));
5662     }
5663 }
5664 
5665 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5666 {
5667     const int nb_oargs = TCGOP_CALLO(op);
5668     const int nb_iargs = TCGOP_CALLI(op);
5669     const TCGLifeData arg_life = op->life;
5670     const TCGHelperInfo *info = tcg_call_info(op);
5671     TCGRegSet allocated_regs = s->reserved_regs;
5672     int i;
5673 
5674     /*
5675      * Move inputs into place in reverse order,
5676      * so that we place stacked arguments first.
5677      */
5678     for (i = nb_iargs - 1; i >= 0; --i) {
5679         const TCGCallArgumentLoc *loc = &info->in[i];
5680         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5681 
5682         switch (loc->kind) {
5683         case TCG_CALL_ARG_NORMAL:
5684         case TCG_CALL_ARG_EXTEND_U:
5685         case TCG_CALL_ARG_EXTEND_S:
5686             load_arg_normal(s, loc, ts, &allocated_regs);
5687             break;
5688         case TCG_CALL_ARG_BY_REF:
5689             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5690             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5691                          arg_slot_stk_ofs(loc->ref_slot),
5692                          &allocated_regs);
5693             break;
5694         case TCG_CALL_ARG_BY_REF_N:
5695             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5696             break;
5697         default:
5698             g_assert_not_reached();
5699         }
5700     }
5701 
5702     /* Mark dead temporaries and free the associated registers.  */
5703     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5704         if (IS_DEAD_ARG(i)) {
5705             temp_dead(s, arg_temp(op->args[i]));
5706         }
5707     }
5708 
5709     /* Clobber call registers.  */
5710     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5711         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5712             tcg_reg_free(s, i, allocated_regs);
5713         }
5714     }
5715 
5716     /*
5717      * Save globals if they might be written by the helper,
5718      * sync them if they might be read.
5719      */
5720     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5721         /* Nothing to do */
5722     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5723         sync_globals(s, allocated_regs);
5724     } else {
5725         save_globals(s, allocated_regs);
5726     }
5727 
5728     /*
5729      * If the ABI passes a pointer to the returned struct as the first
5730      * argument, load that now.  Pass a pointer to the output home slot.
5731      */
5732     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5733         TCGTemp *ts = arg_temp(op->args[0]);
5734 
5735         if (!ts->mem_allocated) {
5736             temp_allocate_frame(s, ts);
5737         }
5738         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5739     }
5740 
5741     tcg_out_call(s, tcg_call_func(op), info);
5742 
5743     /* Assign output registers and emit moves if needed.  */
5744     switch (info->out_kind) {
5745     case TCG_CALL_RET_NORMAL:
5746         for (i = 0; i < nb_oargs; i++) {
5747             TCGTemp *ts = arg_temp(op->args[i]);
5748             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5749 
5750             /* ENV should not be modified.  */
5751             tcg_debug_assert(!temp_readonly(ts));
5752 
5753             set_temp_val_reg(s, ts, reg);
5754             ts->mem_coherent = 0;
5755         }
5756         break;
5757 
5758     case TCG_CALL_RET_BY_VEC:
5759         {
5760             TCGTemp *ts = arg_temp(op->args[0]);
5761 
5762             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5763             tcg_debug_assert(ts->temp_subindex == 0);
5764             if (!ts->mem_allocated) {
5765                 temp_allocate_frame(s, ts);
5766             }
5767             tcg_out_st(s, TCG_TYPE_V128,
5768                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5769                        ts->mem_base->reg, ts->mem_offset);
5770         }
5771         /* fall through to mark all parts in memory */
5772 
5773     case TCG_CALL_RET_BY_REF:
5774         /* The callee has performed a write through the reference. */
5775         for (i = 0; i < nb_oargs; i++) {
5776             TCGTemp *ts = arg_temp(op->args[i]);
5777             ts->val_type = TEMP_VAL_MEM;
5778         }
5779         break;
5780 
5781     default:
5782         g_assert_not_reached();
5783     }
5784 
5785     /* Flush or discard output registers as needed. */
5786     for (i = 0; i < nb_oargs; i++) {
5787         TCGTemp *ts = arg_temp(op->args[i]);
5788         if (NEED_SYNC_ARG(i)) {
5789             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5790         } else if (IS_DEAD_ARG(i)) {
5791             temp_dead(s, ts);
5792         }
5793     }
5794 }
5795 
5796 /**
5797  * atom_and_align_for_opc:
5798  * @s: tcg context
5799  * @opc: memory operation code
5800  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5801  * @allow_two_ops: true if we are prepared to issue two operations
5802  *
5803  * Return the alignment and atomicity to use for the inline fast path
5804  * for the given memory operation.  The alignment may be larger than
5805  * that specified in @opc, and the correct alignment will be diagnosed
5806  * by the slow path helper.
5807  *
5808  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5809  * and issue two loads or stores for subalignment.
5810  */
5811 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5812                                            MemOp host_atom, bool allow_two_ops)
5813 {
5814     MemOp align = memop_alignment_bits(opc);
5815     MemOp size = opc & MO_SIZE;
5816     MemOp half = size ? size - 1 : 0;
5817     MemOp atom = opc & MO_ATOM_MASK;
5818     MemOp atmax;
5819 
5820     switch (atom) {
5821     case MO_ATOM_NONE:
5822         /* The operation requires no specific atomicity. */
5823         atmax = MO_8;
5824         break;
5825 
5826     case MO_ATOM_IFALIGN:
5827         atmax = size;
5828         break;
5829 
5830     case MO_ATOM_IFALIGN_PAIR:
5831         atmax = half;
5832         break;
5833 
5834     case MO_ATOM_WITHIN16:
5835         atmax = size;
5836         if (size == MO_128) {
5837             /* Misalignment implies !within16, and therefore no atomicity. */
5838         } else if (host_atom != MO_ATOM_WITHIN16) {
5839             /* The host does not implement within16, so require alignment. */
5840             align = MAX(align, size);
5841         }
5842         break;
5843 
5844     case MO_ATOM_WITHIN16_PAIR:
5845         atmax = size;
5846         /*
5847          * Misalignment implies !within16, and therefore half atomicity.
5848          * Any host prepared for two operations can implement this with
5849          * half alignment.
5850          */
5851         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5852             align = MAX(align, half);
5853         }
5854         break;
5855 
5856     case MO_ATOM_SUBALIGN:
5857         atmax = size;
5858         if (host_atom != MO_ATOM_SUBALIGN) {
5859             /* If unaligned but not odd, there are subobjects up to half. */
5860             if (allow_two_ops) {
5861                 align = MAX(align, half);
5862             } else {
5863                 align = MAX(align, size);
5864             }
5865         }
5866         break;
5867 
5868     default:
5869         g_assert_not_reached();
5870     }
5871 
5872     return (TCGAtomAlign){ .atom = atmax, .align = align };
5873 }
5874 
5875 /*
5876  * Similarly for qemu_ld/st slow path helpers.
5877  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5878  * using only the provided backend tcg_out_* functions.
5879  */
5880 
5881 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5882 {
5883     int ofs = arg_slot_stk_ofs(slot);
5884 
5885     /*
5886      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5887      * require extension to uint64_t, adjust the address for uint32_t.
5888      */
5889     if (HOST_BIG_ENDIAN &&
5890         TCG_TARGET_REG_BITS == 64 &&
5891         type == TCG_TYPE_I32) {
5892         ofs += 4;
5893     }
5894     return ofs;
5895 }
5896 
5897 static void tcg_out_helper_load_slots(TCGContext *s,
5898                                       unsigned nmov, TCGMovExtend *mov,
5899                                       const TCGLdstHelperParam *parm)
5900 {
5901     unsigned i;
5902     TCGReg dst3;
5903 
5904     /*
5905      * Start from the end, storing to the stack first.
5906      * This frees those registers, so we need not consider overlap.
5907      */
5908     for (i = nmov; i-- > 0; ) {
5909         unsigned slot = mov[i].dst;
5910 
5911         if (arg_slot_reg_p(slot)) {
5912             goto found_reg;
5913         }
5914 
5915         TCGReg src = mov[i].src;
5916         TCGType dst_type = mov[i].dst_type;
5917         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5918 
5919         /* The argument is going onto the stack; extend into scratch. */
5920         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5921             tcg_debug_assert(parm->ntmp != 0);
5922             mov[i].dst = src = parm->tmp[0];
5923             tcg_out_movext1(s, &mov[i]);
5924         }
5925 
5926         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5927                    tcg_out_helper_stk_ofs(dst_type, slot));
5928     }
5929     return;
5930 
5931  found_reg:
5932     /*
5933      * The remaining arguments are in registers.
5934      * Convert slot numbers to argument registers.
5935      */
5936     nmov = i + 1;
5937     for (i = 0; i < nmov; ++i) {
5938         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5939     }
5940 
5941     switch (nmov) {
5942     case 4:
5943         /* The backend must have provided enough temps for the worst case. */
5944         tcg_debug_assert(parm->ntmp >= 2);
5945 
5946         dst3 = mov[3].dst;
5947         for (unsigned j = 0; j < 3; ++j) {
5948             if (dst3 == mov[j].src) {
5949                 /*
5950                  * Conflict. Copy the source to a temporary, perform the
5951                  * remaining moves, then the extension from our scratch
5952                  * on the way out.
5953                  */
5954                 TCGReg scratch = parm->tmp[1];
5955 
5956                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5957                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5958                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5959                 break;
5960             }
5961         }
5962 
5963         /* No conflicts: perform this move and continue. */
5964         tcg_out_movext1(s, &mov[3]);
5965         /* fall through */
5966 
5967     case 3:
5968         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5969                         parm->ntmp ? parm->tmp[0] : -1);
5970         break;
5971     case 2:
5972         tcg_out_movext2(s, mov, mov + 1,
5973                         parm->ntmp ? parm->tmp[0] : -1);
5974         break;
5975     case 1:
5976         tcg_out_movext1(s, mov);
5977         break;
5978     default:
5979         g_assert_not_reached();
5980     }
5981 }
5982 
5983 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5984                                     TCGType type, tcg_target_long imm,
5985                                     const TCGLdstHelperParam *parm)
5986 {
5987     if (arg_slot_reg_p(slot)) {
5988         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5989     } else {
5990         int ofs = tcg_out_helper_stk_ofs(type, slot);
5991         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5992             tcg_debug_assert(parm->ntmp != 0);
5993             tcg_out_movi(s, type, parm->tmp[0], imm);
5994             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5995         }
5996     }
5997 }
5998 
5999 static void tcg_out_helper_load_common_args(TCGContext *s,
6000                                             const TCGLabelQemuLdst *ldst,
6001                                             const TCGLdstHelperParam *parm,
6002                                             const TCGHelperInfo *info,
6003                                             unsigned next_arg)
6004 {
6005     TCGMovExtend ptr_mov = {
6006         .dst_type = TCG_TYPE_PTR,
6007         .src_type = TCG_TYPE_PTR,
6008         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6009     };
6010     const TCGCallArgumentLoc *loc = &info->in[0];
6011     TCGType type;
6012     unsigned slot;
6013     tcg_target_ulong imm;
6014 
6015     /*
6016      * Handle env, which is always first.
6017      */
6018     ptr_mov.dst = loc->arg_slot;
6019     ptr_mov.src = TCG_AREG0;
6020     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6021 
6022     /*
6023      * Handle oi.
6024      */
6025     imm = ldst->oi;
6026     loc = &info->in[next_arg];
6027     type = TCG_TYPE_I32;
6028     switch (loc->kind) {
6029     case TCG_CALL_ARG_NORMAL:
6030         break;
6031     case TCG_CALL_ARG_EXTEND_U:
6032     case TCG_CALL_ARG_EXTEND_S:
6033         /* No extension required for MemOpIdx. */
6034         tcg_debug_assert(imm <= INT32_MAX);
6035         type = TCG_TYPE_REG;
6036         break;
6037     default:
6038         g_assert_not_reached();
6039     }
6040     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6041     next_arg++;
6042 
6043     /*
6044      * Handle ra.
6045      */
6046     loc = &info->in[next_arg];
6047     slot = loc->arg_slot;
6048     if (parm->ra_gen) {
6049         int arg_reg = -1;
6050         TCGReg ra_reg;
6051 
6052         if (arg_slot_reg_p(slot)) {
6053             arg_reg = tcg_target_call_iarg_regs[slot];
6054         }
6055         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6056 
6057         ptr_mov.dst = slot;
6058         ptr_mov.src = ra_reg;
6059         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6060     } else {
6061         imm = (uintptr_t)ldst->raddr;
6062         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6063     }
6064 }
6065 
6066 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6067                                        const TCGCallArgumentLoc *loc,
6068                                        TCGType dst_type, TCGType src_type,
6069                                        TCGReg lo, TCGReg hi)
6070 {
6071     MemOp reg_mo;
6072 
6073     if (dst_type <= TCG_TYPE_REG) {
6074         MemOp src_ext;
6075 
6076         switch (loc->kind) {
6077         case TCG_CALL_ARG_NORMAL:
6078             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6079             break;
6080         case TCG_CALL_ARG_EXTEND_U:
6081             dst_type = TCG_TYPE_REG;
6082             src_ext = MO_UL;
6083             break;
6084         case TCG_CALL_ARG_EXTEND_S:
6085             dst_type = TCG_TYPE_REG;
6086             src_ext = MO_SL;
6087             break;
6088         default:
6089             g_assert_not_reached();
6090         }
6091 
6092         mov[0].dst = loc->arg_slot;
6093         mov[0].dst_type = dst_type;
6094         mov[0].src = lo;
6095         mov[0].src_type = src_type;
6096         mov[0].src_ext = src_ext;
6097         return 1;
6098     }
6099 
6100     if (TCG_TARGET_REG_BITS == 32) {
6101         assert(dst_type == TCG_TYPE_I64);
6102         reg_mo = MO_32;
6103     } else {
6104         assert(dst_type == TCG_TYPE_I128);
6105         reg_mo = MO_64;
6106     }
6107 
6108     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6109     mov[0].src = lo;
6110     mov[0].dst_type = TCG_TYPE_REG;
6111     mov[0].src_type = TCG_TYPE_REG;
6112     mov[0].src_ext = reg_mo;
6113 
6114     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6115     mov[1].src = hi;
6116     mov[1].dst_type = TCG_TYPE_REG;
6117     mov[1].src_type = TCG_TYPE_REG;
6118     mov[1].src_ext = reg_mo;
6119 
6120     return 2;
6121 }
6122 
6123 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6124                                    const TCGLdstHelperParam *parm)
6125 {
6126     const TCGHelperInfo *info;
6127     const TCGCallArgumentLoc *loc;
6128     TCGMovExtend mov[2];
6129     unsigned next_arg, nmov;
6130     MemOp mop = get_memop(ldst->oi);
6131 
6132     switch (mop & MO_SIZE) {
6133     case MO_8:
6134     case MO_16:
6135     case MO_32:
6136         info = &info_helper_ld32_mmu;
6137         break;
6138     case MO_64:
6139         info = &info_helper_ld64_mmu;
6140         break;
6141     case MO_128:
6142         info = &info_helper_ld128_mmu;
6143         break;
6144     default:
6145         g_assert_not_reached();
6146     }
6147 
6148     /* Defer env argument. */
6149     next_arg = 1;
6150 
6151     loc = &info->in[next_arg];
6152     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6153         /*
6154          * 32-bit host with 32-bit guest: zero-extend the guest address
6155          * to 64-bits for the helper by storing the low part, then
6156          * load a zero for the high part.
6157          */
6158         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6159                                TCG_TYPE_I32, TCG_TYPE_I32,
6160                                ldst->addr_reg, -1);
6161         tcg_out_helper_load_slots(s, 1, mov, parm);
6162 
6163         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6164                                 TCG_TYPE_I32, 0, parm);
6165         next_arg += 2;
6166     } else {
6167         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6168                                       ldst->addr_reg, -1);
6169         tcg_out_helper_load_slots(s, nmov, mov, parm);
6170         next_arg += nmov;
6171     }
6172 
6173     switch (info->out_kind) {
6174     case TCG_CALL_RET_NORMAL:
6175     case TCG_CALL_RET_BY_VEC:
6176         break;
6177     case TCG_CALL_RET_BY_REF:
6178         /*
6179          * The return reference is in the first argument slot.
6180          * We need memory in which to return: re-use the top of stack.
6181          */
6182         {
6183             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6184 
6185             if (arg_slot_reg_p(0)) {
6186                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6187                                  TCG_REG_CALL_STACK, ofs_slot0);
6188             } else {
6189                 tcg_debug_assert(parm->ntmp != 0);
6190                 tcg_out_addi_ptr(s, parm->tmp[0],
6191                                  TCG_REG_CALL_STACK, ofs_slot0);
6192                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6193                            TCG_REG_CALL_STACK, ofs_slot0);
6194             }
6195         }
6196         break;
6197     default:
6198         g_assert_not_reached();
6199     }
6200 
6201     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6202 }
6203 
6204 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6205                                   bool load_sign,
6206                                   const TCGLdstHelperParam *parm)
6207 {
6208     MemOp mop = get_memop(ldst->oi);
6209     TCGMovExtend mov[2];
6210     int ofs_slot0;
6211 
6212     switch (ldst->type) {
6213     case TCG_TYPE_I64:
6214         if (TCG_TARGET_REG_BITS == 32) {
6215             break;
6216         }
6217         /* fall through */
6218 
6219     case TCG_TYPE_I32:
6220         mov[0].dst = ldst->datalo_reg;
6221         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6222         mov[0].dst_type = ldst->type;
6223         mov[0].src_type = TCG_TYPE_REG;
6224 
6225         /*
6226          * If load_sign, then we allowed the helper to perform the
6227          * appropriate sign extension to tcg_target_ulong, and all
6228          * we need now is a plain move.
6229          *
6230          * If they do not, then we expect the relevant extension
6231          * instruction to be no more expensive than a move, and
6232          * we thus save the icache etc by only using one of two
6233          * helper functions.
6234          */
6235         if (load_sign || !(mop & MO_SIGN)) {
6236             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6237                 mov[0].src_ext = MO_32;
6238             } else {
6239                 mov[0].src_ext = MO_64;
6240             }
6241         } else {
6242             mov[0].src_ext = mop & MO_SSIZE;
6243         }
6244         tcg_out_movext1(s, mov);
6245         return;
6246 
6247     case TCG_TYPE_I128:
6248         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6249         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6250         switch (TCG_TARGET_CALL_RET_I128) {
6251         case TCG_CALL_RET_NORMAL:
6252             break;
6253         case TCG_CALL_RET_BY_VEC:
6254             tcg_out_st(s, TCG_TYPE_V128,
6255                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6256                        TCG_REG_CALL_STACK, ofs_slot0);
6257             /* fall through */
6258         case TCG_CALL_RET_BY_REF:
6259             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6260                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6261             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6262                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6263             return;
6264         default:
6265             g_assert_not_reached();
6266         }
6267         break;
6268 
6269     default:
6270         g_assert_not_reached();
6271     }
6272 
6273     mov[0].dst = ldst->datalo_reg;
6274     mov[0].src =
6275         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6276     mov[0].dst_type = TCG_TYPE_REG;
6277     mov[0].src_type = TCG_TYPE_REG;
6278     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6279 
6280     mov[1].dst = ldst->datahi_reg;
6281     mov[1].src =
6282         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6283     mov[1].dst_type = TCG_TYPE_REG;
6284     mov[1].src_type = TCG_TYPE_REG;
6285     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6286 
6287     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6288 }
6289 
6290 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6291                                    const TCGLdstHelperParam *parm)
6292 {
6293     const TCGHelperInfo *info;
6294     const TCGCallArgumentLoc *loc;
6295     TCGMovExtend mov[4];
6296     TCGType data_type;
6297     unsigned next_arg, nmov, n;
6298     MemOp mop = get_memop(ldst->oi);
6299 
6300     switch (mop & MO_SIZE) {
6301     case MO_8:
6302     case MO_16:
6303     case MO_32:
6304         info = &info_helper_st32_mmu;
6305         data_type = TCG_TYPE_I32;
6306         break;
6307     case MO_64:
6308         info = &info_helper_st64_mmu;
6309         data_type = TCG_TYPE_I64;
6310         break;
6311     case MO_128:
6312         info = &info_helper_st128_mmu;
6313         data_type = TCG_TYPE_I128;
6314         break;
6315     default:
6316         g_assert_not_reached();
6317     }
6318 
6319     /* Defer env argument. */
6320     next_arg = 1;
6321     nmov = 0;
6322 
6323     /* Handle addr argument. */
6324     loc = &info->in[next_arg];
6325     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6326     if (TCG_TARGET_REG_BITS == 32) {
6327         /*
6328          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6329          * to 64-bits for the helper by storing the low part.  Later,
6330          * after we have processed the register inputs, we will load a
6331          * zero for the high part.
6332          */
6333         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6334                                TCG_TYPE_I32, TCG_TYPE_I32,
6335                                ldst->addr_reg, -1);
6336         next_arg += 2;
6337         nmov += 1;
6338     } else {
6339         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6340                                    ldst->addr_reg, -1);
6341         next_arg += n;
6342         nmov += n;
6343     }
6344 
6345     /* Handle data argument. */
6346     loc = &info->in[next_arg];
6347     switch (loc->kind) {
6348     case TCG_CALL_ARG_NORMAL:
6349     case TCG_CALL_ARG_EXTEND_U:
6350     case TCG_CALL_ARG_EXTEND_S:
6351         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6352                                    ldst->datalo_reg, ldst->datahi_reg);
6353         next_arg += n;
6354         nmov += n;
6355         tcg_out_helper_load_slots(s, nmov, mov, parm);
6356         break;
6357 
6358     case TCG_CALL_ARG_BY_REF:
6359         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6360         tcg_debug_assert(data_type == TCG_TYPE_I128);
6361         tcg_out_st(s, TCG_TYPE_I64,
6362                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6363                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6364         tcg_out_st(s, TCG_TYPE_I64,
6365                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6366                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6367 
6368         tcg_out_helper_load_slots(s, nmov, mov, parm);
6369 
6370         if (arg_slot_reg_p(loc->arg_slot)) {
6371             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6372                              TCG_REG_CALL_STACK,
6373                              arg_slot_stk_ofs(loc->ref_slot));
6374         } else {
6375             tcg_debug_assert(parm->ntmp != 0);
6376             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6377                              arg_slot_stk_ofs(loc->ref_slot));
6378             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6379                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6380         }
6381         next_arg += 2;
6382         break;
6383 
6384     default:
6385         g_assert_not_reached();
6386     }
6387 
6388     if (TCG_TARGET_REG_BITS == 32) {
6389         /* Zero extend the address by loading a zero for the high part. */
6390         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6391         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6392     }
6393 
6394     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6395 }
6396 
6397 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6398 {
6399     int i, start_words, num_insns;
6400     TCGOp *op;
6401 
6402     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6403                  && qemu_log_in_addr_range(pc_start))) {
6404         FILE *logfile = qemu_log_trylock();
6405         if (logfile) {
6406             fprintf(logfile, "OP:\n");
6407             tcg_dump_ops(s, logfile, false);
6408             fprintf(logfile, "\n");
6409             qemu_log_unlock(logfile);
6410         }
6411     }
6412 
6413 #ifdef CONFIG_DEBUG_TCG
6414     /* Ensure all labels referenced have been emitted.  */
6415     {
6416         TCGLabel *l;
6417         bool error = false;
6418 
6419         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6420             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6421                 qemu_log_mask(CPU_LOG_TB_OP,
6422                               "$L%d referenced but not present.\n", l->id);
6423                 error = true;
6424             }
6425         }
6426         assert(!error);
6427     }
6428 #endif
6429 
6430     /* Do not reuse any EBB that may be allocated within the TB. */
6431     tcg_temp_ebb_reset_freed(s);
6432 
6433     tcg_optimize(s);
6434 
6435     reachable_code_pass(s);
6436     liveness_pass_0(s);
6437     liveness_pass_1(s);
6438 
6439     if (s->nb_indirects > 0) {
6440         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6441                      && qemu_log_in_addr_range(pc_start))) {
6442             FILE *logfile = qemu_log_trylock();
6443             if (logfile) {
6444                 fprintf(logfile, "OP before indirect lowering:\n");
6445                 tcg_dump_ops(s, logfile, false);
6446                 fprintf(logfile, "\n");
6447                 qemu_log_unlock(logfile);
6448             }
6449         }
6450 
6451         /* Replace indirect temps with direct temps.  */
6452         if (liveness_pass_2(s)) {
6453             /* If changes were made, re-run liveness.  */
6454             liveness_pass_1(s);
6455         }
6456     }
6457 
6458     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6459                  && qemu_log_in_addr_range(pc_start))) {
6460         FILE *logfile = qemu_log_trylock();
6461         if (logfile) {
6462             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6463             tcg_dump_ops(s, logfile, true);
6464             fprintf(logfile, "\n");
6465             qemu_log_unlock(logfile);
6466         }
6467     }
6468 
6469     /* Initialize goto_tb jump offsets. */
6470     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6471     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6472     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6473     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6474 
6475     tcg_reg_alloc_start(s);
6476 
6477     /*
6478      * Reset the buffer pointers when restarting after overflow.
6479      * TODO: Move this into translate-all.c with the rest of the
6480      * buffer management.  Having only this done here is confusing.
6481      */
6482     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6483     s->code_ptr = s->code_buf;
6484     s->data_gen_ptr = NULL;
6485 
6486     QSIMPLEQ_INIT(&s->ldst_labels);
6487     s->pool_labels = NULL;
6488 
6489     start_words = s->insn_start_words;
6490     s->gen_insn_data =
6491         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6492 
6493     tcg_out_tb_start(s);
6494 
6495     num_insns = -1;
6496     QTAILQ_FOREACH(op, &s->ops, link) {
6497         TCGOpcode opc = op->opc;
6498 
6499         switch (opc) {
6500         case INDEX_op_mov:
6501         case INDEX_op_mov_vec:
6502             tcg_reg_alloc_mov(s, op);
6503             break;
6504         case INDEX_op_dup_vec:
6505             tcg_reg_alloc_dup(s, op);
6506             break;
6507         case INDEX_op_insn_start:
6508             if (num_insns >= 0) {
6509                 size_t off = tcg_current_code_size(s);
6510                 s->gen_insn_end_off[num_insns] = off;
6511                 /* Assert that we do not overflow our stored offset.  */
6512                 assert(s->gen_insn_end_off[num_insns] == off);
6513             }
6514             num_insns++;
6515             for (i = 0; i < start_words; ++i) {
6516                 s->gen_insn_data[num_insns * start_words + i] =
6517                     tcg_get_insn_start_param(op, i);
6518             }
6519             break;
6520         case INDEX_op_discard:
6521             temp_dead(s, arg_temp(op->args[0]));
6522             break;
6523         case INDEX_op_set_label:
6524             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6525             tcg_out_label(s, arg_label(op->args[0]));
6526             break;
6527         case INDEX_op_call:
6528             tcg_reg_alloc_call(s, op);
6529             break;
6530         case INDEX_op_exit_tb:
6531             tcg_out_exit_tb(s, op->args[0]);
6532             break;
6533         case INDEX_op_goto_tb:
6534             tcg_out_goto_tb(s, op->args[0]);
6535             break;
6536         case INDEX_op_dup2_vec:
6537             if (tcg_reg_alloc_dup2(s, op)) {
6538                 break;
6539             }
6540             /* fall through */
6541         default:
6542             /* Sanity check that we've not introduced any unhandled opcodes. */
6543             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6544                                               TCGOP_FLAGS(op)));
6545             /* Note: in order to speed up the code, it would be much
6546                faster to have specialized register allocator functions for
6547                some common argument patterns */
6548             tcg_reg_alloc_op(s, op);
6549             break;
6550         }
6551         /* Test for (pending) buffer overflow.  The assumption is that any
6552            one operation beginning below the high water mark cannot overrun
6553            the buffer completely.  Thus we can test for overflow after
6554            generating code without having to check during generation.  */
6555         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6556             return -1;
6557         }
6558         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6559         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6560             return -2;
6561         }
6562     }
6563     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6564     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6565 
6566     /* Generate TB finalization at the end of block */
6567     i = tcg_out_ldst_finalize(s);
6568     if (i < 0) {
6569         return i;
6570     }
6571     i = tcg_out_pool_finalize(s);
6572     if (i < 0) {
6573         return i;
6574     }
6575     if (!tcg_resolve_relocs(s)) {
6576         return -2;
6577     }
6578 
6579 #ifndef CONFIG_TCG_INTERPRETER
6580     /* flush instruction cache */
6581     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6582                         (uintptr_t)s->code_buf,
6583                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6584 #endif
6585 
6586     return tcg_current_code_size(s);
6587 }
6588 
6589 #ifdef ELF_HOST_MACHINE
6590 /* In order to use this feature, the backend needs to do three things:
6591 
6592    (1) Define ELF_HOST_MACHINE to indicate both what value to
6593        put into the ELF image and to indicate support for the feature.
6594 
6595    (2) Define tcg_register_jit.  This should create a buffer containing
6596        the contents of a .debug_frame section that describes the post-
6597        prologue unwind info for the tcg machine.
6598 
6599    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6600 */
6601 
6602 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6603 typedef enum {
6604     JIT_NOACTION = 0,
6605     JIT_REGISTER_FN,
6606     JIT_UNREGISTER_FN
6607 } jit_actions_t;
6608 
6609 struct jit_code_entry {
6610     struct jit_code_entry *next_entry;
6611     struct jit_code_entry *prev_entry;
6612     const void *symfile_addr;
6613     uint64_t symfile_size;
6614 };
6615 
6616 struct jit_descriptor {
6617     uint32_t version;
6618     uint32_t action_flag;
6619     struct jit_code_entry *relevant_entry;
6620     struct jit_code_entry *first_entry;
6621 };
6622 
6623 void __jit_debug_register_code(void) __attribute__((noinline));
6624 void __jit_debug_register_code(void)
6625 {
6626     asm("");
6627 }
6628 
6629 /* Must statically initialize the version, because GDB may check
6630    the version before we can set it.  */
6631 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6632 
6633 /* End GDB interface.  */
6634 
6635 static int find_string(const char *strtab, const char *str)
6636 {
6637     const char *p = strtab + 1;
6638 
6639     while (1) {
6640         if (strcmp(p, str) == 0) {
6641             return p - strtab;
6642         }
6643         p += strlen(p) + 1;
6644     }
6645 }
6646 
6647 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6648                                  const void *debug_frame,
6649                                  size_t debug_frame_size)
6650 {
6651     struct __attribute__((packed)) DebugInfo {
6652         uint32_t  len;
6653         uint16_t  version;
6654         uint32_t  abbrev;
6655         uint8_t   ptr_size;
6656         uint8_t   cu_die;
6657         uint16_t  cu_lang;
6658         uintptr_t cu_low_pc;
6659         uintptr_t cu_high_pc;
6660         uint8_t   fn_die;
6661         char      fn_name[16];
6662         uintptr_t fn_low_pc;
6663         uintptr_t fn_high_pc;
6664         uint8_t   cu_eoc;
6665     };
6666 
6667     struct ElfImage {
6668         ElfW(Ehdr) ehdr;
6669         ElfW(Phdr) phdr;
6670         ElfW(Shdr) shdr[7];
6671         ElfW(Sym)  sym[2];
6672         struct DebugInfo di;
6673         uint8_t    da[24];
6674         char       str[80];
6675     };
6676 
6677     struct ElfImage *img;
6678 
6679     static const struct ElfImage img_template = {
6680         .ehdr = {
6681             .e_ident[EI_MAG0] = ELFMAG0,
6682             .e_ident[EI_MAG1] = ELFMAG1,
6683             .e_ident[EI_MAG2] = ELFMAG2,
6684             .e_ident[EI_MAG3] = ELFMAG3,
6685             .e_ident[EI_CLASS] = ELF_CLASS,
6686             .e_ident[EI_DATA] = ELF_DATA,
6687             .e_ident[EI_VERSION] = EV_CURRENT,
6688             .e_type = ET_EXEC,
6689             .e_machine = ELF_HOST_MACHINE,
6690             .e_version = EV_CURRENT,
6691             .e_phoff = offsetof(struct ElfImage, phdr),
6692             .e_shoff = offsetof(struct ElfImage, shdr),
6693             .e_ehsize = sizeof(ElfW(Shdr)),
6694             .e_phentsize = sizeof(ElfW(Phdr)),
6695             .e_phnum = 1,
6696             .e_shentsize = sizeof(ElfW(Shdr)),
6697             .e_shnum = ARRAY_SIZE(img->shdr),
6698             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6699 #ifdef ELF_HOST_FLAGS
6700             .e_flags = ELF_HOST_FLAGS,
6701 #endif
6702 #ifdef ELF_OSABI
6703             .e_ident[EI_OSABI] = ELF_OSABI,
6704 #endif
6705         },
6706         .phdr = {
6707             .p_type = PT_LOAD,
6708             .p_flags = PF_X,
6709         },
6710         .shdr = {
6711             [0] = { .sh_type = SHT_NULL },
6712             /* Trick: The contents of code_gen_buffer are not present in
6713                this fake ELF file; that got allocated elsewhere.  Therefore
6714                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6715                will not look for contents.  We can record any address.  */
6716             [1] = { /* .text */
6717                 .sh_type = SHT_NOBITS,
6718                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6719             },
6720             [2] = { /* .debug_info */
6721                 .sh_type = SHT_PROGBITS,
6722                 .sh_offset = offsetof(struct ElfImage, di),
6723                 .sh_size = sizeof(struct DebugInfo),
6724             },
6725             [3] = { /* .debug_abbrev */
6726                 .sh_type = SHT_PROGBITS,
6727                 .sh_offset = offsetof(struct ElfImage, da),
6728                 .sh_size = sizeof(img->da),
6729             },
6730             [4] = { /* .debug_frame */
6731                 .sh_type = SHT_PROGBITS,
6732                 .sh_offset = sizeof(struct ElfImage),
6733             },
6734             [5] = { /* .symtab */
6735                 .sh_type = SHT_SYMTAB,
6736                 .sh_offset = offsetof(struct ElfImage, sym),
6737                 .sh_size = sizeof(img->sym),
6738                 .sh_info = 1,
6739                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6740                 .sh_entsize = sizeof(ElfW(Sym)),
6741             },
6742             [6] = { /* .strtab */
6743                 .sh_type = SHT_STRTAB,
6744                 .sh_offset = offsetof(struct ElfImage, str),
6745                 .sh_size = sizeof(img->str),
6746             }
6747         },
6748         .sym = {
6749             [1] = { /* code_gen_buffer */
6750                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6751                 .st_shndx = 1,
6752             }
6753         },
6754         .di = {
6755             .len = sizeof(struct DebugInfo) - 4,
6756             .version = 2,
6757             .ptr_size = sizeof(void *),
6758             .cu_die = 1,
6759             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6760             .fn_die = 2,
6761             .fn_name = "code_gen_buffer"
6762         },
6763         .da = {
6764             1,          /* abbrev number (the cu) */
6765             0x11, 1,    /* DW_TAG_compile_unit, has children */
6766             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6767             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6768             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6769             0, 0,       /* end of abbrev */
6770             2,          /* abbrev number (the fn) */
6771             0x2e, 0,    /* DW_TAG_subprogram, no children */
6772             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6773             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6774             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6775             0, 0,       /* end of abbrev */
6776             0           /* no more abbrev */
6777         },
6778         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6779                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6780     };
6781 
6782     /* We only need a single jit entry; statically allocate it.  */
6783     static struct jit_code_entry one_entry;
6784 
6785     uintptr_t buf = (uintptr_t)buf_ptr;
6786     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6787     DebugFrameHeader *dfh;
6788 
6789     img = g_malloc(img_size);
6790     *img = img_template;
6791 
6792     img->phdr.p_vaddr = buf;
6793     img->phdr.p_paddr = buf;
6794     img->phdr.p_memsz = buf_size;
6795 
6796     img->shdr[1].sh_name = find_string(img->str, ".text");
6797     img->shdr[1].sh_addr = buf;
6798     img->shdr[1].sh_size = buf_size;
6799 
6800     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6801     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6802 
6803     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6804     img->shdr[4].sh_size = debug_frame_size;
6805 
6806     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6807     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6808 
6809     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6810     img->sym[1].st_value = buf;
6811     img->sym[1].st_size = buf_size;
6812 
6813     img->di.cu_low_pc = buf;
6814     img->di.cu_high_pc = buf + buf_size;
6815     img->di.fn_low_pc = buf;
6816     img->di.fn_high_pc = buf + buf_size;
6817 
6818     dfh = (DebugFrameHeader *)(img + 1);
6819     memcpy(dfh, debug_frame, debug_frame_size);
6820     dfh->fde.func_start = buf;
6821     dfh->fde.func_len = buf_size;
6822 
6823 #ifdef DEBUG_JIT
6824     /* Enable this block to be able to debug the ELF image file creation.
6825        One can use readelf, objdump, or other inspection utilities.  */
6826     {
6827         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6828         FILE *f = fopen(jit, "w+b");
6829         if (f) {
6830             if (fwrite(img, img_size, 1, f) != img_size) {
6831                 /* Avoid stupid unused return value warning for fwrite.  */
6832             }
6833             fclose(f);
6834         }
6835     }
6836 #endif
6837 
6838     one_entry.symfile_addr = img;
6839     one_entry.symfile_size = img_size;
6840 
6841     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6842     __jit_debug_descriptor.relevant_entry = &one_entry;
6843     __jit_debug_descriptor.first_entry = &one_entry;
6844     __jit_debug_register_code();
6845 }
6846 #else
6847 /* No support for the feature.  Provide the entry point expected by exec.c,
6848    and implement the internal function we declared earlier.  */
6849 
6850 static void tcg_register_jit_int(const void *buf, size_t size,
6851                                  const void *debug_frame,
6852                                  size_t debug_frame_size)
6853 {
6854 }
6855 
6856 void tcg_register_jit(const void *buf, size_t buf_size)
6857 {
6858 }
6859 #endif /* ELF_HOST_MACHINE */
6860 
6861 #if !TCG_TARGET_MAYBE_vec
6862 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6863 {
6864     g_assert_not_reached();
6865 }
6866 #endif
6867