xref: /openbmc/qemu/tcg/tcg.c (revision 60f34f55f1a708c071774bd7f837163d6b686867)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpSubtract {
990     TCGOutOp base;
991     void (*out_rrr)(TCGContext *s, TCGType type,
992                     TCGReg a0, TCGReg a1, TCGReg a2);
993     void (*out_rir)(TCGContext *s, TCGType type,
994                     TCGReg a0, tcg_target_long a1, TCGReg a2);
995 } TCGOutOpSubtract;
996 
997 #include "tcg-target.c.inc"
998 
999 #ifndef CONFIG_TCG_INTERPRETER
1000 /* Validate CPUTLBDescFast placement. */
1001 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1002                         sizeof(CPUNegativeOffsetState))
1003                   < MIN_TLB_MASK_TABLE_OFS);
1004 #endif
1005 
1006 /*
1007  * Register V as the TCGOutOp for O.
1008  * This verifies that V is of type T, otherwise give a nice compiler error.
1009  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1010  */
1011 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1012 
1013 /* Register allocation descriptions for every TCGOpcode. */
1014 static const TCGOutOp * const all_outop[NB_OPS] = {
1015     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1016     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1017     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1018     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1019     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1020     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1021     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1022     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1023     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1024     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1025 };
1026 
1027 #undef OUTOP
1028 
1029 /*
1030  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1031  * and registered the target's TCG globals) must register with this function
1032  * before initiating translation.
1033  *
1034  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1035  * of tcg_region_init() for the reasoning behind this.
1036  *
1037  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1038  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1039  * is not used anymore for translation once this function is called.
1040  *
1041  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1042  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1043  * modes.
1044  */
1045 #ifdef CONFIG_USER_ONLY
1046 void tcg_register_thread(void)
1047 {
1048     tcg_ctx = &tcg_init_ctx;
1049 }
1050 #else
1051 void tcg_register_thread(void)
1052 {
1053     TCGContext *s = g_malloc(sizeof(*s));
1054     unsigned int i, n;
1055 
1056     *s = tcg_init_ctx;
1057 
1058     /* Relink mem_base.  */
1059     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1060         if (tcg_init_ctx.temps[i].mem_base) {
1061             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1062             tcg_debug_assert(b >= 0 && b < n);
1063             s->temps[i].mem_base = &s->temps[b];
1064         }
1065     }
1066 
1067     /* Claim an entry in tcg_ctxs */
1068     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1069     g_assert(n < tcg_max_ctxs);
1070     qatomic_set(&tcg_ctxs[n], s);
1071 
1072     if (n > 0) {
1073         tcg_region_initial_alloc(s);
1074     }
1075 
1076     tcg_ctx = s;
1077 }
1078 #endif /* !CONFIG_USER_ONLY */
1079 
1080 /* pool based memory allocation */
1081 void *tcg_malloc_internal(TCGContext *s, int size)
1082 {
1083     TCGPool *p;
1084     int pool_size;
1085 
1086     if (size > TCG_POOL_CHUNK_SIZE) {
1087         /* big malloc: insert a new pool (XXX: could optimize) */
1088         p = g_malloc(sizeof(TCGPool) + size);
1089         p->size = size;
1090         p->next = s->pool_first_large;
1091         s->pool_first_large = p;
1092         return p->data;
1093     } else {
1094         p = s->pool_current;
1095         if (!p) {
1096             p = s->pool_first;
1097             if (!p)
1098                 goto new_pool;
1099         } else {
1100             if (!p->next) {
1101             new_pool:
1102                 pool_size = TCG_POOL_CHUNK_SIZE;
1103                 p = g_malloc(sizeof(TCGPool) + pool_size);
1104                 p->size = pool_size;
1105                 p->next = NULL;
1106                 if (s->pool_current) {
1107                     s->pool_current->next = p;
1108                 } else {
1109                     s->pool_first = p;
1110                 }
1111             } else {
1112                 p = p->next;
1113             }
1114         }
1115     }
1116     s->pool_current = p;
1117     s->pool_cur = p->data + size;
1118     s->pool_end = p->data + p->size;
1119     return p->data;
1120 }
1121 
1122 void tcg_pool_reset(TCGContext *s)
1123 {
1124     TCGPool *p, *t;
1125     for (p = s->pool_first_large; p; p = t) {
1126         t = p->next;
1127         g_free(p);
1128     }
1129     s->pool_first_large = NULL;
1130     s->pool_cur = s->pool_end = NULL;
1131     s->pool_current = NULL;
1132 }
1133 
1134 /*
1135  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1136  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1137  * We only use these for layout in tcg_out_ld_helper_ret and
1138  * tcg_out_st_helper_args, and share them between several of
1139  * the helpers, with the end result that it's easier to build manually.
1140  */
1141 
1142 #if TCG_TARGET_REG_BITS == 32
1143 # define dh_typecode_ttl  dh_typecode_i32
1144 #else
1145 # define dh_typecode_ttl  dh_typecode_i64
1146 #endif
1147 
1148 static TCGHelperInfo info_helper_ld32_mmu = {
1149     .flags = TCG_CALL_NO_WG,
1150     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1151               | dh_typemask(env, 1)
1152               | dh_typemask(i64, 2)  /* uint64_t addr */
1153               | dh_typemask(i32, 3)  /* unsigned oi */
1154               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1155 };
1156 
1157 static TCGHelperInfo info_helper_ld64_mmu = {
1158     .flags = TCG_CALL_NO_WG,
1159     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1160               | dh_typemask(env, 1)
1161               | dh_typemask(i64, 2)  /* uint64_t addr */
1162               | dh_typemask(i32, 3)  /* unsigned oi */
1163               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1164 };
1165 
1166 static TCGHelperInfo info_helper_ld128_mmu = {
1167     .flags = TCG_CALL_NO_WG,
1168     .typemask = dh_typemask(i128, 0) /* return Int128 */
1169               | dh_typemask(env, 1)
1170               | dh_typemask(i64, 2)  /* uint64_t addr */
1171               | dh_typemask(i32, 3)  /* unsigned oi */
1172               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1173 };
1174 
1175 static TCGHelperInfo info_helper_st32_mmu = {
1176     .flags = TCG_CALL_NO_WG,
1177     .typemask = dh_typemask(void, 0)
1178               | dh_typemask(env, 1)
1179               | dh_typemask(i64, 2)  /* uint64_t addr */
1180               | dh_typemask(i32, 3)  /* uint32_t data */
1181               | dh_typemask(i32, 4)  /* unsigned oi */
1182               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1183 };
1184 
1185 static TCGHelperInfo info_helper_st64_mmu = {
1186     .flags = TCG_CALL_NO_WG,
1187     .typemask = dh_typemask(void, 0)
1188               | dh_typemask(env, 1)
1189               | dh_typemask(i64, 2)  /* uint64_t addr */
1190               | dh_typemask(i64, 3)  /* uint64_t data */
1191               | dh_typemask(i32, 4)  /* unsigned oi */
1192               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1193 };
1194 
1195 static TCGHelperInfo info_helper_st128_mmu = {
1196     .flags = TCG_CALL_NO_WG,
1197     .typemask = dh_typemask(void, 0)
1198               | dh_typemask(env, 1)
1199               | dh_typemask(i64, 2)  /* uint64_t addr */
1200               | dh_typemask(i128, 3) /* Int128 data */
1201               | dh_typemask(i32, 4)  /* unsigned oi */
1202               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1203 };
1204 
1205 #ifdef CONFIG_TCG_INTERPRETER
1206 static ffi_type *typecode_to_ffi(int argmask)
1207 {
1208     /*
1209      * libffi does not support __int128_t, so we have forced Int128
1210      * to use the structure definition instead of the builtin type.
1211      */
1212     static ffi_type *ffi_type_i128_elements[3] = {
1213         &ffi_type_uint64,
1214         &ffi_type_uint64,
1215         NULL
1216     };
1217     static ffi_type ffi_type_i128 = {
1218         .size = 16,
1219         .alignment = __alignof__(Int128),
1220         .type = FFI_TYPE_STRUCT,
1221         .elements = ffi_type_i128_elements,
1222     };
1223 
1224     switch (argmask) {
1225     case dh_typecode_void:
1226         return &ffi_type_void;
1227     case dh_typecode_i32:
1228         return &ffi_type_uint32;
1229     case dh_typecode_s32:
1230         return &ffi_type_sint32;
1231     case dh_typecode_i64:
1232         return &ffi_type_uint64;
1233     case dh_typecode_s64:
1234         return &ffi_type_sint64;
1235     case dh_typecode_ptr:
1236         return &ffi_type_pointer;
1237     case dh_typecode_i128:
1238         return &ffi_type_i128;
1239     }
1240     g_assert_not_reached();
1241 }
1242 
1243 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1244 {
1245     unsigned typemask = info->typemask;
1246     struct {
1247         ffi_cif cif;
1248         ffi_type *args[];
1249     } *ca;
1250     ffi_status status;
1251     int nargs;
1252 
1253     /* Ignoring the return type, find the last non-zero field. */
1254     nargs = 32 - clz32(typemask >> 3);
1255     nargs = DIV_ROUND_UP(nargs, 3);
1256     assert(nargs <= MAX_CALL_IARGS);
1257 
1258     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1259     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1260     ca->cif.nargs = nargs;
1261 
1262     if (nargs != 0) {
1263         ca->cif.arg_types = ca->args;
1264         for (int j = 0; j < nargs; ++j) {
1265             int typecode = extract32(typemask, (j + 1) * 3, 3);
1266             ca->args[j] = typecode_to_ffi(typecode);
1267         }
1268     }
1269 
1270     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1271                           ca->cif.rtype, ca->cif.arg_types);
1272     assert(status == FFI_OK);
1273 
1274     return &ca->cif;
1275 }
1276 
1277 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1278 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1279 #else
1280 #define HELPER_INFO_INIT(I)      (&(I)->init)
1281 #define HELPER_INFO_INIT_VAL(I)  1
1282 #endif /* CONFIG_TCG_INTERPRETER */
1283 
1284 static inline bool arg_slot_reg_p(unsigned arg_slot)
1285 {
1286     /*
1287      * Split the sizeof away from the comparison to avoid Werror from
1288      * "unsigned < 0 is always false", when iarg_regs is empty.
1289      */
1290     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1291     return arg_slot < nreg;
1292 }
1293 
1294 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1295 {
1296     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1297     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1298 
1299     tcg_debug_assert(stk_slot < max);
1300     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1301 }
1302 
1303 typedef struct TCGCumulativeArgs {
1304     int arg_idx;                /* tcg_gen_callN args[] */
1305     int info_in_idx;            /* TCGHelperInfo in[] */
1306     int arg_slot;               /* regs+stack slot */
1307     int ref_slot;               /* stack slots for references */
1308 } TCGCumulativeArgs;
1309 
1310 static void layout_arg_even(TCGCumulativeArgs *cum)
1311 {
1312     cum->arg_slot += cum->arg_slot & 1;
1313 }
1314 
1315 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1316                          TCGCallArgumentKind kind)
1317 {
1318     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1319 
1320     *loc = (TCGCallArgumentLoc){
1321         .kind = kind,
1322         .arg_idx = cum->arg_idx,
1323         .arg_slot = cum->arg_slot,
1324     };
1325     cum->info_in_idx++;
1326     cum->arg_slot++;
1327 }
1328 
1329 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1330                                 TCGHelperInfo *info, int n)
1331 {
1332     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1333 
1334     for (int i = 0; i < n; ++i) {
1335         /* Layout all using the same arg_idx, adjusting the subindex. */
1336         loc[i] = (TCGCallArgumentLoc){
1337             .kind = TCG_CALL_ARG_NORMAL,
1338             .arg_idx = cum->arg_idx,
1339             .tmp_subindex = i,
1340             .arg_slot = cum->arg_slot + i,
1341         };
1342     }
1343     cum->info_in_idx += n;
1344     cum->arg_slot += n;
1345 }
1346 
1347 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1348 {
1349     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1350     int n = 128 / TCG_TARGET_REG_BITS;
1351 
1352     /* The first subindex carries the pointer. */
1353     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1354 
1355     /*
1356      * The callee is allowed to clobber memory associated with
1357      * structure pass by-reference.  Therefore we must make copies.
1358      * Allocate space from "ref_slot", which will be adjusted to
1359      * follow the parameters on the stack.
1360      */
1361     loc[0].ref_slot = cum->ref_slot;
1362 
1363     /*
1364      * Subsequent words also go into the reference slot, but
1365      * do not accumulate into the regular arguments.
1366      */
1367     for (int i = 1; i < n; ++i) {
1368         loc[i] = (TCGCallArgumentLoc){
1369             .kind = TCG_CALL_ARG_BY_REF_N,
1370             .arg_idx = cum->arg_idx,
1371             .tmp_subindex = i,
1372             .ref_slot = cum->ref_slot + i,
1373         };
1374     }
1375     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1376     cum->ref_slot += n;
1377 }
1378 
1379 static void init_call_layout(TCGHelperInfo *info)
1380 {
1381     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1382     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1383     unsigned typemask = info->typemask;
1384     unsigned typecode;
1385     TCGCumulativeArgs cum = { };
1386 
1387     /*
1388      * Parse and place any function return value.
1389      */
1390     typecode = typemask & 7;
1391     switch (typecode) {
1392     case dh_typecode_void:
1393         info->nr_out = 0;
1394         break;
1395     case dh_typecode_i32:
1396     case dh_typecode_s32:
1397     case dh_typecode_ptr:
1398         info->nr_out = 1;
1399         info->out_kind = TCG_CALL_RET_NORMAL;
1400         break;
1401     case dh_typecode_i64:
1402     case dh_typecode_s64:
1403         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1404         info->out_kind = TCG_CALL_RET_NORMAL;
1405         /* Query the last register now to trigger any assert early. */
1406         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1407         break;
1408     case dh_typecode_i128:
1409         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1410         info->out_kind = TCG_TARGET_CALL_RET_I128;
1411         switch (TCG_TARGET_CALL_RET_I128) {
1412         case TCG_CALL_RET_NORMAL:
1413             /* Query the last register now to trigger any assert early. */
1414             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1415             break;
1416         case TCG_CALL_RET_BY_VEC:
1417             /* Query the single register now to trigger any assert early. */
1418             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1419             break;
1420         case TCG_CALL_RET_BY_REF:
1421             /*
1422              * Allocate the first argument to the output.
1423              * We don't need to store this anywhere, just make it
1424              * unavailable for use in the input loop below.
1425              */
1426             cum.arg_slot = 1;
1427             break;
1428         default:
1429             qemu_build_not_reached();
1430         }
1431         break;
1432     default:
1433         g_assert_not_reached();
1434     }
1435 
1436     /*
1437      * Parse and place function arguments.
1438      */
1439     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1440         TCGCallArgumentKind kind;
1441         TCGType type;
1442 
1443         typecode = typemask & 7;
1444         switch (typecode) {
1445         case dh_typecode_i32:
1446         case dh_typecode_s32:
1447             type = TCG_TYPE_I32;
1448             break;
1449         case dh_typecode_i64:
1450         case dh_typecode_s64:
1451             type = TCG_TYPE_I64;
1452             break;
1453         case dh_typecode_ptr:
1454             type = TCG_TYPE_PTR;
1455             break;
1456         case dh_typecode_i128:
1457             type = TCG_TYPE_I128;
1458             break;
1459         default:
1460             g_assert_not_reached();
1461         }
1462 
1463         switch (type) {
1464         case TCG_TYPE_I32:
1465             switch (TCG_TARGET_CALL_ARG_I32) {
1466             case TCG_CALL_ARG_EVEN:
1467                 layout_arg_even(&cum);
1468                 /* fall through */
1469             case TCG_CALL_ARG_NORMAL:
1470                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1471                 break;
1472             case TCG_CALL_ARG_EXTEND:
1473                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1474                 layout_arg_1(&cum, info, kind);
1475                 break;
1476             default:
1477                 qemu_build_not_reached();
1478             }
1479             break;
1480 
1481         case TCG_TYPE_I64:
1482             switch (TCG_TARGET_CALL_ARG_I64) {
1483             case TCG_CALL_ARG_EVEN:
1484                 layout_arg_even(&cum);
1485                 /* fall through */
1486             case TCG_CALL_ARG_NORMAL:
1487                 if (TCG_TARGET_REG_BITS == 32) {
1488                     layout_arg_normal_n(&cum, info, 2);
1489                 } else {
1490                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1491                 }
1492                 break;
1493             default:
1494                 qemu_build_not_reached();
1495             }
1496             break;
1497 
1498         case TCG_TYPE_I128:
1499             switch (TCG_TARGET_CALL_ARG_I128) {
1500             case TCG_CALL_ARG_EVEN:
1501                 layout_arg_even(&cum);
1502                 /* fall through */
1503             case TCG_CALL_ARG_NORMAL:
1504                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1505                 break;
1506             case TCG_CALL_ARG_BY_REF:
1507                 layout_arg_by_ref(&cum, info);
1508                 break;
1509             default:
1510                 qemu_build_not_reached();
1511             }
1512             break;
1513 
1514         default:
1515             g_assert_not_reached();
1516         }
1517     }
1518     info->nr_in = cum.info_in_idx;
1519 
1520     /* Validate that we didn't overrun the input array. */
1521     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1522     /* Validate the backend has enough argument space. */
1523     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1524 
1525     /*
1526      * Relocate the "ref_slot" area to the end of the parameters.
1527      * Minimizing this stack offset helps code size for x86,
1528      * which has a signed 8-bit offset encoding.
1529      */
1530     if (cum.ref_slot != 0) {
1531         int ref_base = 0;
1532 
1533         if (cum.arg_slot > max_reg_slots) {
1534             int align = __alignof(Int128) / sizeof(tcg_target_long);
1535 
1536             ref_base = cum.arg_slot - max_reg_slots;
1537             if (align > 1) {
1538                 ref_base = ROUND_UP(ref_base, align);
1539             }
1540         }
1541         assert(ref_base + cum.ref_slot <= max_stk_slots);
1542         ref_base += max_reg_slots;
1543 
1544         if (ref_base != 0) {
1545             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1546                 TCGCallArgumentLoc *loc = &info->in[i];
1547                 switch (loc->kind) {
1548                 case TCG_CALL_ARG_BY_REF:
1549                 case TCG_CALL_ARG_BY_REF_N:
1550                     loc->ref_slot += ref_base;
1551                     break;
1552                 default:
1553                     break;
1554                 }
1555             }
1556         }
1557     }
1558 }
1559 
1560 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1561 static void process_constraint_sets(void);
1562 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1563                                             TCGReg reg, const char *name);
1564 
1565 static void tcg_context_init(unsigned max_threads)
1566 {
1567     TCGContext *s = &tcg_init_ctx;
1568     int n, i;
1569     TCGTemp *ts;
1570 
1571     memset(s, 0, sizeof(*s));
1572     s->nb_globals = 0;
1573 
1574     init_call_layout(&info_helper_ld32_mmu);
1575     init_call_layout(&info_helper_ld64_mmu);
1576     init_call_layout(&info_helper_ld128_mmu);
1577     init_call_layout(&info_helper_st32_mmu);
1578     init_call_layout(&info_helper_st64_mmu);
1579     init_call_layout(&info_helper_st128_mmu);
1580 
1581     tcg_target_init(s);
1582     process_constraint_sets();
1583 
1584     /* Reverse the order of the saved registers, assuming they're all at
1585        the start of tcg_target_reg_alloc_order.  */
1586     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1587         int r = tcg_target_reg_alloc_order[n];
1588         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1589             break;
1590         }
1591     }
1592     for (i = 0; i < n; ++i) {
1593         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1594     }
1595     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1596         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1597     }
1598 
1599     tcg_ctx = s;
1600     /*
1601      * In user-mode we simply share the init context among threads, since we
1602      * use a single region. See the documentation tcg_region_init() for the
1603      * reasoning behind this.
1604      * In system-mode we will have at most max_threads TCG threads.
1605      */
1606 #ifdef CONFIG_USER_ONLY
1607     tcg_ctxs = &tcg_ctx;
1608     tcg_cur_ctxs = 1;
1609     tcg_max_ctxs = 1;
1610 #else
1611     tcg_max_ctxs = max_threads;
1612     tcg_ctxs = g_new0(TCGContext *, max_threads);
1613 #endif
1614 
1615     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1616     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1617     tcg_env = temp_tcgv_ptr(ts);
1618 }
1619 
1620 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1621 {
1622     tcg_context_init(max_threads);
1623     tcg_region_init(tb_size, splitwx, max_threads);
1624 }
1625 
1626 /*
1627  * Allocate TBs right before their corresponding translated code, making
1628  * sure that TBs and code are on different cache lines.
1629  */
1630 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1631 {
1632     uintptr_t align = qemu_icache_linesize;
1633     TranslationBlock *tb;
1634     void *next;
1635 
1636  retry:
1637     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1638     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1639 
1640     if (unlikely(next > s->code_gen_highwater)) {
1641         if (tcg_region_alloc(s)) {
1642             return NULL;
1643         }
1644         goto retry;
1645     }
1646     qatomic_set(&s->code_gen_ptr, next);
1647     return tb;
1648 }
1649 
1650 void tcg_prologue_init(void)
1651 {
1652     TCGContext *s = tcg_ctx;
1653     size_t prologue_size;
1654 
1655     s->code_ptr = s->code_gen_ptr;
1656     s->code_buf = s->code_gen_ptr;
1657     s->data_gen_ptr = NULL;
1658 
1659 #ifndef CONFIG_TCG_INTERPRETER
1660     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1661 #endif
1662 
1663     s->pool_labels = NULL;
1664 
1665     qemu_thread_jit_write();
1666     /* Generate the prologue.  */
1667     tcg_target_qemu_prologue(s);
1668 
1669     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1670     {
1671         int result = tcg_out_pool_finalize(s);
1672         tcg_debug_assert(result == 0);
1673     }
1674 
1675     prologue_size = tcg_current_code_size(s);
1676     perf_report_prologue(s->code_gen_ptr, prologue_size);
1677 
1678 #ifndef CONFIG_TCG_INTERPRETER
1679     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1680                         (uintptr_t)s->code_buf, prologue_size);
1681 #endif
1682 
1683     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1684         FILE *logfile = qemu_log_trylock();
1685         if (logfile) {
1686             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1687             if (s->data_gen_ptr) {
1688                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1689                 size_t data_size = prologue_size - code_size;
1690                 size_t i;
1691 
1692                 disas(logfile, s->code_gen_ptr, code_size);
1693 
1694                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1695                     if (sizeof(tcg_target_ulong) == 8) {
1696                         fprintf(logfile,
1697                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1698                                 (uintptr_t)s->data_gen_ptr + i,
1699                                 *(uint64_t *)(s->data_gen_ptr + i));
1700                     } else {
1701                         fprintf(logfile,
1702                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1703                                 (uintptr_t)s->data_gen_ptr + i,
1704                                 *(uint32_t *)(s->data_gen_ptr + i));
1705                     }
1706                 }
1707             } else {
1708                 disas(logfile, s->code_gen_ptr, prologue_size);
1709             }
1710             fprintf(logfile, "\n");
1711             qemu_log_unlock(logfile);
1712         }
1713     }
1714 
1715 #ifndef CONFIG_TCG_INTERPRETER
1716     /*
1717      * Assert that goto_ptr is implemented completely, setting an epilogue.
1718      * For tci, we use NULL as the signal to return from the interpreter,
1719      * so skip this check.
1720      */
1721     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1722 #endif
1723 
1724     tcg_region_prologue_set(s);
1725 }
1726 
1727 void tcg_func_start(TCGContext *s)
1728 {
1729     tcg_pool_reset(s);
1730     s->nb_temps = s->nb_globals;
1731 
1732     /* No temps have been previously allocated for size or locality.  */
1733     tcg_temp_ebb_reset_freed(s);
1734 
1735     /* No constant temps have been previously allocated. */
1736     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1737         if (s->const_table[i]) {
1738             g_hash_table_remove_all(s->const_table[i]);
1739         }
1740     }
1741 
1742     s->nb_ops = 0;
1743     s->nb_labels = 0;
1744     s->current_frame_offset = s->frame_start;
1745 
1746 #ifdef CONFIG_DEBUG_TCG
1747     s->goto_tb_issue_mask = 0;
1748 #endif
1749 
1750     QTAILQ_INIT(&s->ops);
1751     QTAILQ_INIT(&s->free_ops);
1752     s->emit_before_op = NULL;
1753     QSIMPLEQ_INIT(&s->labels);
1754 
1755     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1756     tcg_debug_assert(s->insn_start_words > 0);
1757 }
1758 
1759 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1760 {
1761     int n = s->nb_temps++;
1762 
1763     if (n >= TCG_MAX_TEMPS) {
1764         tcg_raise_tb_overflow(s);
1765     }
1766     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1767 }
1768 
1769 static TCGTemp *tcg_global_alloc(TCGContext *s)
1770 {
1771     TCGTemp *ts;
1772 
1773     tcg_debug_assert(s->nb_globals == s->nb_temps);
1774     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1775     s->nb_globals++;
1776     ts = tcg_temp_alloc(s);
1777     ts->kind = TEMP_GLOBAL;
1778 
1779     return ts;
1780 }
1781 
1782 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1783                                             TCGReg reg, const char *name)
1784 {
1785     TCGTemp *ts;
1786 
1787     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1788 
1789     ts = tcg_global_alloc(s);
1790     ts->base_type = type;
1791     ts->type = type;
1792     ts->kind = TEMP_FIXED;
1793     ts->reg = reg;
1794     ts->name = name;
1795     tcg_regset_set_reg(s->reserved_regs, reg);
1796 
1797     return ts;
1798 }
1799 
1800 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1801 {
1802     s->frame_start = start;
1803     s->frame_end = start + size;
1804     s->frame_temp
1805         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1806 }
1807 
1808 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1809                                             const char *name, TCGType type)
1810 {
1811     TCGContext *s = tcg_ctx;
1812     TCGTemp *base_ts = tcgv_ptr_temp(base);
1813     TCGTemp *ts = tcg_global_alloc(s);
1814     int indirect_reg = 0;
1815 
1816     switch (base_ts->kind) {
1817     case TEMP_FIXED:
1818         break;
1819     case TEMP_GLOBAL:
1820         /* We do not support double-indirect registers.  */
1821         tcg_debug_assert(!base_ts->indirect_reg);
1822         base_ts->indirect_base = 1;
1823         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1824                             ? 2 : 1);
1825         indirect_reg = 1;
1826         break;
1827     default:
1828         g_assert_not_reached();
1829     }
1830 
1831     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1832         TCGTemp *ts2 = tcg_global_alloc(s);
1833         char buf[64];
1834 
1835         ts->base_type = TCG_TYPE_I64;
1836         ts->type = TCG_TYPE_I32;
1837         ts->indirect_reg = indirect_reg;
1838         ts->mem_allocated = 1;
1839         ts->mem_base = base_ts;
1840         ts->mem_offset = offset;
1841         pstrcpy(buf, sizeof(buf), name);
1842         pstrcat(buf, sizeof(buf), "_0");
1843         ts->name = strdup(buf);
1844 
1845         tcg_debug_assert(ts2 == ts + 1);
1846         ts2->base_type = TCG_TYPE_I64;
1847         ts2->type = TCG_TYPE_I32;
1848         ts2->indirect_reg = indirect_reg;
1849         ts2->mem_allocated = 1;
1850         ts2->mem_base = base_ts;
1851         ts2->mem_offset = offset + 4;
1852         ts2->temp_subindex = 1;
1853         pstrcpy(buf, sizeof(buf), name);
1854         pstrcat(buf, sizeof(buf), "_1");
1855         ts2->name = strdup(buf);
1856     } else {
1857         ts->base_type = type;
1858         ts->type = type;
1859         ts->indirect_reg = indirect_reg;
1860         ts->mem_allocated = 1;
1861         ts->mem_base = base_ts;
1862         ts->mem_offset = offset;
1863         ts->name = name;
1864     }
1865     return ts;
1866 }
1867 
1868 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1869 {
1870     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1871     return temp_tcgv_i32(ts);
1872 }
1873 
1874 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1875 {
1876     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1877     return temp_tcgv_i64(ts);
1878 }
1879 
1880 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1881 {
1882     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1883     return temp_tcgv_ptr(ts);
1884 }
1885 
1886 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1887 {
1888     TCGContext *s = tcg_ctx;
1889     TCGTemp *ts;
1890     int n;
1891 
1892     if (kind == TEMP_EBB) {
1893         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1894 
1895         if (idx < TCG_MAX_TEMPS) {
1896             /* There is already an available temp with the right type.  */
1897             clear_bit(idx, s->free_temps[type].l);
1898 
1899             ts = &s->temps[idx];
1900             ts->temp_allocated = 1;
1901             tcg_debug_assert(ts->base_type == type);
1902             tcg_debug_assert(ts->kind == kind);
1903             return ts;
1904         }
1905     } else {
1906         tcg_debug_assert(kind == TEMP_TB);
1907     }
1908 
1909     switch (type) {
1910     case TCG_TYPE_I32:
1911     case TCG_TYPE_V64:
1912     case TCG_TYPE_V128:
1913     case TCG_TYPE_V256:
1914         n = 1;
1915         break;
1916     case TCG_TYPE_I64:
1917         n = 64 / TCG_TARGET_REG_BITS;
1918         break;
1919     case TCG_TYPE_I128:
1920         n = 128 / TCG_TARGET_REG_BITS;
1921         break;
1922     default:
1923         g_assert_not_reached();
1924     }
1925 
1926     ts = tcg_temp_alloc(s);
1927     ts->base_type = type;
1928     ts->temp_allocated = 1;
1929     ts->kind = kind;
1930 
1931     if (n == 1) {
1932         ts->type = type;
1933     } else {
1934         ts->type = TCG_TYPE_REG;
1935 
1936         for (int i = 1; i < n; ++i) {
1937             TCGTemp *ts2 = tcg_temp_alloc(s);
1938 
1939             tcg_debug_assert(ts2 == ts + i);
1940             ts2->base_type = type;
1941             ts2->type = TCG_TYPE_REG;
1942             ts2->temp_allocated = 1;
1943             ts2->temp_subindex = i;
1944             ts2->kind = kind;
1945         }
1946     }
1947     return ts;
1948 }
1949 
1950 TCGv_i32 tcg_temp_new_i32(void)
1951 {
1952     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1953 }
1954 
1955 TCGv_i32 tcg_temp_ebb_new_i32(void)
1956 {
1957     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1958 }
1959 
1960 TCGv_i64 tcg_temp_new_i64(void)
1961 {
1962     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1963 }
1964 
1965 TCGv_i64 tcg_temp_ebb_new_i64(void)
1966 {
1967     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1968 }
1969 
1970 TCGv_ptr tcg_temp_new_ptr(void)
1971 {
1972     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1973 }
1974 
1975 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1976 {
1977     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1978 }
1979 
1980 TCGv_i128 tcg_temp_new_i128(void)
1981 {
1982     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1983 }
1984 
1985 TCGv_i128 tcg_temp_ebb_new_i128(void)
1986 {
1987     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1988 }
1989 
1990 TCGv_vec tcg_temp_new_vec(TCGType type)
1991 {
1992     TCGTemp *t;
1993 
1994 #ifdef CONFIG_DEBUG_TCG
1995     switch (type) {
1996     case TCG_TYPE_V64:
1997         assert(TCG_TARGET_HAS_v64);
1998         break;
1999     case TCG_TYPE_V128:
2000         assert(TCG_TARGET_HAS_v128);
2001         break;
2002     case TCG_TYPE_V256:
2003         assert(TCG_TARGET_HAS_v256);
2004         break;
2005     default:
2006         g_assert_not_reached();
2007     }
2008 #endif
2009 
2010     t = tcg_temp_new_internal(type, TEMP_EBB);
2011     return temp_tcgv_vec(t);
2012 }
2013 
2014 /* Create a new temp of the same type as an existing temp.  */
2015 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2016 {
2017     TCGTemp *t = tcgv_vec_temp(match);
2018 
2019     tcg_debug_assert(t->temp_allocated != 0);
2020 
2021     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2022     return temp_tcgv_vec(t);
2023 }
2024 
2025 void tcg_temp_free_internal(TCGTemp *ts)
2026 {
2027     TCGContext *s = tcg_ctx;
2028 
2029     switch (ts->kind) {
2030     case TEMP_CONST:
2031     case TEMP_TB:
2032         /* Silently ignore free. */
2033         break;
2034     case TEMP_EBB:
2035         tcg_debug_assert(ts->temp_allocated != 0);
2036         ts->temp_allocated = 0;
2037         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2038         break;
2039     default:
2040         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2041         g_assert_not_reached();
2042     }
2043 }
2044 
2045 void tcg_temp_free_i32(TCGv_i32 arg)
2046 {
2047     tcg_temp_free_internal(tcgv_i32_temp(arg));
2048 }
2049 
2050 void tcg_temp_free_i64(TCGv_i64 arg)
2051 {
2052     tcg_temp_free_internal(tcgv_i64_temp(arg));
2053 }
2054 
2055 void tcg_temp_free_i128(TCGv_i128 arg)
2056 {
2057     tcg_temp_free_internal(tcgv_i128_temp(arg));
2058 }
2059 
2060 void tcg_temp_free_ptr(TCGv_ptr arg)
2061 {
2062     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2063 }
2064 
2065 void tcg_temp_free_vec(TCGv_vec arg)
2066 {
2067     tcg_temp_free_internal(tcgv_vec_temp(arg));
2068 }
2069 
2070 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2071 {
2072     TCGContext *s = tcg_ctx;
2073     GHashTable *h = s->const_table[type];
2074     TCGTemp *ts;
2075 
2076     if (h == NULL) {
2077         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2078         s->const_table[type] = h;
2079     }
2080 
2081     ts = g_hash_table_lookup(h, &val);
2082     if (ts == NULL) {
2083         int64_t *val_ptr;
2084 
2085         ts = tcg_temp_alloc(s);
2086 
2087         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2088             TCGTemp *ts2 = tcg_temp_alloc(s);
2089 
2090             tcg_debug_assert(ts2 == ts + 1);
2091 
2092             ts->base_type = TCG_TYPE_I64;
2093             ts->type = TCG_TYPE_I32;
2094             ts->kind = TEMP_CONST;
2095             ts->temp_allocated = 1;
2096 
2097             ts2->base_type = TCG_TYPE_I64;
2098             ts2->type = TCG_TYPE_I32;
2099             ts2->kind = TEMP_CONST;
2100             ts2->temp_allocated = 1;
2101             ts2->temp_subindex = 1;
2102 
2103             /*
2104              * Retain the full value of the 64-bit constant in the low
2105              * part, so that the hash table works.  Actual uses will
2106              * truncate the value to the low part.
2107              */
2108             ts[HOST_BIG_ENDIAN].val = val;
2109             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2110             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2111         } else {
2112             ts->base_type = type;
2113             ts->type = type;
2114             ts->kind = TEMP_CONST;
2115             ts->temp_allocated = 1;
2116             ts->val = val;
2117             val_ptr = &ts->val;
2118         }
2119         g_hash_table_insert(h, val_ptr, ts);
2120     }
2121 
2122     return ts;
2123 }
2124 
2125 TCGv_i32 tcg_constant_i32(int32_t val)
2126 {
2127     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2128 }
2129 
2130 TCGv_i64 tcg_constant_i64(int64_t val)
2131 {
2132     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2133 }
2134 
2135 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2136 {
2137     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2138 }
2139 
2140 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2141 {
2142     val = dup_const(vece, val);
2143     return temp_tcgv_vec(tcg_constant_internal(type, val));
2144 }
2145 
2146 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2147 {
2148     TCGTemp *t = tcgv_vec_temp(match);
2149 
2150     tcg_debug_assert(t->temp_allocated != 0);
2151     return tcg_constant_vec(t->base_type, vece, val);
2152 }
2153 
2154 #ifdef CONFIG_DEBUG_TCG
2155 size_t temp_idx(TCGTemp *ts)
2156 {
2157     ptrdiff_t n = ts - tcg_ctx->temps;
2158     assert(n >= 0 && n < tcg_ctx->nb_temps);
2159     return n;
2160 }
2161 
2162 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2163 {
2164     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2165 
2166     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2167     assert(o % sizeof(TCGTemp) == 0);
2168 
2169     return (void *)tcg_ctx + (uintptr_t)v;
2170 }
2171 #endif /* CONFIG_DEBUG_TCG */
2172 
2173 /*
2174  * Return true if OP may appear in the opcode stream with TYPE.
2175  * Test the runtime variable that controls each opcode.
2176  */
2177 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2178 {
2179     bool has_type;
2180 
2181     switch (type) {
2182     case TCG_TYPE_I32:
2183         has_type = true;
2184         break;
2185     case TCG_TYPE_I64:
2186         has_type = TCG_TARGET_REG_BITS == 64;
2187         break;
2188     case TCG_TYPE_V64:
2189         has_type = TCG_TARGET_HAS_v64;
2190         break;
2191     case TCG_TYPE_V128:
2192         has_type = TCG_TARGET_HAS_v128;
2193         break;
2194     case TCG_TYPE_V256:
2195         has_type = TCG_TARGET_HAS_v256;
2196         break;
2197     default:
2198         has_type = false;
2199         break;
2200     }
2201 
2202     switch (op) {
2203     case INDEX_op_discard:
2204     case INDEX_op_set_label:
2205     case INDEX_op_call:
2206     case INDEX_op_br:
2207     case INDEX_op_mb:
2208     case INDEX_op_insn_start:
2209     case INDEX_op_exit_tb:
2210     case INDEX_op_goto_tb:
2211     case INDEX_op_goto_ptr:
2212     case INDEX_op_qemu_ld_i32:
2213     case INDEX_op_qemu_st_i32:
2214     case INDEX_op_qemu_ld_i64:
2215     case INDEX_op_qemu_st_i64:
2216         return true;
2217 
2218     case INDEX_op_qemu_st8_i32:
2219         return TCG_TARGET_HAS_qemu_st8_i32;
2220 
2221     case INDEX_op_qemu_ld_i128:
2222     case INDEX_op_qemu_st_i128:
2223         return TCG_TARGET_HAS_qemu_ldst_i128;
2224 
2225     case INDEX_op_add:
2226     case INDEX_op_and:
2227     case INDEX_op_mov:
2228     case INDEX_op_or:
2229     case INDEX_op_xor:
2230         return has_type;
2231 
2232     case INDEX_op_setcond_i32:
2233     case INDEX_op_brcond_i32:
2234     case INDEX_op_movcond_i32:
2235     case INDEX_op_ld8u_i32:
2236     case INDEX_op_ld8s_i32:
2237     case INDEX_op_ld16u_i32:
2238     case INDEX_op_ld16s_i32:
2239     case INDEX_op_ld_i32:
2240     case INDEX_op_st8_i32:
2241     case INDEX_op_st16_i32:
2242     case INDEX_op_st_i32:
2243     case INDEX_op_neg_i32:
2244     case INDEX_op_mul_i32:
2245     case INDEX_op_shl_i32:
2246     case INDEX_op_shr_i32:
2247     case INDEX_op_sar_i32:
2248     case INDEX_op_extract_i32:
2249     case INDEX_op_sextract_i32:
2250     case INDEX_op_deposit_i32:
2251         return true;
2252 
2253     case INDEX_op_negsetcond_i32:
2254         return TCG_TARGET_HAS_negsetcond_i32;
2255     case INDEX_op_div_i32:
2256     case INDEX_op_divu_i32:
2257         return TCG_TARGET_HAS_div_i32;
2258     case INDEX_op_rem_i32:
2259     case INDEX_op_remu_i32:
2260         return TCG_TARGET_HAS_rem_i32;
2261     case INDEX_op_div2_i32:
2262     case INDEX_op_divu2_i32:
2263         return TCG_TARGET_HAS_div2_i32;
2264     case INDEX_op_rotl_i32:
2265     case INDEX_op_rotr_i32:
2266         return TCG_TARGET_HAS_rot_i32;
2267     case INDEX_op_extract2_i32:
2268         return TCG_TARGET_HAS_extract2_i32;
2269     case INDEX_op_add2_i32:
2270         return TCG_TARGET_HAS_add2_i32;
2271     case INDEX_op_sub2_i32:
2272         return TCG_TARGET_HAS_sub2_i32;
2273     case INDEX_op_mulu2_i32:
2274         return TCG_TARGET_HAS_mulu2_i32;
2275     case INDEX_op_muls2_i32:
2276         return TCG_TARGET_HAS_muls2_i32;
2277     case INDEX_op_muluh_i32:
2278         return TCG_TARGET_HAS_muluh_i32;
2279     case INDEX_op_mulsh_i32:
2280         return TCG_TARGET_HAS_mulsh_i32;
2281     case INDEX_op_bswap16_i32:
2282         return TCG_TARGET_HAS_bswap16_i32;
2283     case INDEX_op_bswap32_i32:
2284         return TCG_TARGET_HAS_bswap32_i32;
2285     case INDEX_op_not_i32:
2286         return TCG_TARGET_HAS_not_i32;
2287     case INDEX_op_clz_i32:
2288         return TCG_TARGET_HAS_clz_i32;
2289     case INDEX_op_ctz_i32:
2290         return TCG_TARGET_HAS_ctz_i32;
2291     case INDEX_op_ctpop_i32:
2292         return TCG_TARGET_HAS_ctpop_i32;
2293 
2294     case INDEX_op_brcond2_i32:
2295     case INDEX_op_setcond2_i32:
2296         return TCG_TARGET_REG_BITS == 32;
2297 
2298     case INDEX_op_setcond_i64:
2299     case INDEX_op_brcond_i64:
2300     case INDEX_op_movcond_i64:
2301     case INDEX_op_ld8u_i64:
2302     case INDEX_op_ld8s_i64:
2303     case INDEX_op_ld16u_i64:
2304     case INDEX_op_ld16s_i64:
2305     case INDEX_op_ld32u_i64:
2306     case INDEX_op_ld32s_i64:
2307     case INDEX_op_ld_i64:
2308     case INDEX_op_st8_i64:
2309     case INDEX_op_st16_i64:
2310     case INDEX_op_st32_i64:
2311     case INDEX_op_st_i64:
2312     case INDEX_op_neg_i64:
2313     case INDEX_op_mul_i64:
2314     case INDEX_op_shl_i64:
2315     case INDEX_op_shr_i64:
2316     case INDEX_op_sar_i64:
2317     case INDEX_op_ext_i32_i64:
2318     case INDEX_op_extu_i32_i64:
2319     case INDEX_op_extract_i64:
2320     case INDEX_op_sextract_i64:
2321     case INDEX_op_deposit_i64:
2322         return TCG_TARGET_REG_BITS == 64;
2323 
2324     case INDEX_op_negsetcond_i64:
2325         return TCG_TARGET_HAS_negsetcond_i64;
2326     case INDEX_op_div_i64:
2327     case INDEX_op_divu_i64:
2328         return TCG_TARGET_HAS_div_i64;
2329     case INDEX_op_rem_i64:
2330     case INDEX_op_remu_i64:
2331         return TCG_TARGET_HAS_rem_i64;
2332     case INDEX_op_div2_i64:
2333     case INDEX_op_divu2_i64:
2334         return TCG_TARGET_HAS_div2_i64;
2335     case INDEX_op_rotl_i64:
2336     case INDEX_op_rotr_i64:
2337         return TCG_TARGET_HAS_rot_i64;
2338     case INDEX_op_extract2_i64:
2339         return TCG_TARGET_HAS_extract2_i64;
2340     case INDEX_op_extrl_i64_i32:
2341     case INDEX_op_extrh_i64_i32:
2342         return TCG_TARGET_HAS_extr_i64_i32;
2343     case INDEX_op_bswap16_i64:
2344         return TCG_TARGET_HAS_bswap16_i64;
2345     case INDEX_op_bswap32_i64:
2346         return TCG_TARGET_HAS_bswap32_i64;
2347     case INDEX_op_bswap64_i64:
2348         return TCG_TARGET_HAS_bswap64_i64;
2349     case INDEX_op_not_i64:
2350         return TCG_TARGET_HAS_not_i64;
2351     case INDEX_op_clz_i64:
2352         return TCG_TARGET_HAS_clz_i64;
2353     case INDEX_op_ctz_i64:
2354         return TCG_TARGET_HAS_ctz_i64;
2355     case INDEX_op_ctpop_i64:
2356         return TCG_TARGET_HAS_ctpop_i64;
2357     case INDEX_op_add2_i64:
2358         return TCG_TARGET_HAS_add2_i64;
2359     case INDEX_op_sub2_i64:
2360         return TCG_TARGET_HAS_sub2_i64;
2361     case INDEX_op_mulu2_i64:
2362         return TCG_TARGET_HAS_mulu2_i64;
2363     case INDEX_op_muls2_i64:
2364         return TCG_TARGET_HAS_muls2_i64;
2365     case INDEX_op_muluh_i64:
2366         return TCG_TARGET_HAS_muluh_i64;
2367     case INDEX_op_mulsh_i64:
2368         return TCG_TARGET_HAS_mulsh_i64;
2369 
2370     case INDEX_op_mov_vec:
2371     case INDEX_op_dup_vec:
2372     case INDEX_op_dupm_vec:
2373     case INDEX_op_ld_vec:
2374     case INDEX_op_st_vec:
2375     case INDEX_op_add_vec:
2376     case INDEX_op_sub_vec:
2377     case INDEX_op_and_vec:
2378     case INDEX_op_or_vec:
2379     case INDEX_op_xor_vec:
2380     case INDEX_op_cmp_vec:
2381         return has_type;
2382     case INDEX_op_dup2_vec:
2383         return has_type && TCG_TARGET_REG_BITS == 32;
2384     case INDEX_op_not_vec:
2385         return has_type && TCG_TARGET_HAS_not_vec;
2386     case INDEX_op_neg_vec:
2387         return has_type && TCG_TARGET_HAS_neg_vec;
2388     case INDEX_op_abs_vec:
2389         return has_type && TCG_TARGET_HAS_abs_vec;
2390     case INDEX_op_andc_vec:
2391         return has_type && TCG_TARGET_HAS_andc_vec;
2392     case INDEX_op_orc_vec:
2393         return has_type && TCG_TARGET_HAS_orc_vec;
2394     case INDEX_op_nand_vec:
2395         return has_type && TCG_TARGET_HAS_nand_vec;
2396     case INDEX_op_nor_vec:
2397         return has_type && TCG_TARGET_HAS_nor_vec;
2398     case INDEX_op_eqv_vec:
2399         return has_type && TCG_TARGET_HAS_eqv_vec;
2400     case INDEX_op_mul_vec:
2401         return has_type && TCG_TARGET_HAS_mul_vec;
2402     case INDEX_op_shli_vec:
2403     case INDEX_op_shri_vec:
2404     case INDEX_op_sari_vec:
2405         return has_type && TCG_TARGET_HAS_shi_vec;
2406     case INDEX_op_shls_vec:
2407     case INDEX_op_shrs_vec:
2408     case INDEX_op_sars_vec:
2409         return has_type && TCG_TARGET_HAS_shs_vec;
2410     case INDEX_op_shlv_vec:
2411     case INDEX_op_shrv_vec:
2412     case INDEX_op_sarv_vec:
2413         return has_type && TCG_TARGET_HAS_shv_vec;
2414     case INDEX_op_rotli_vec:
2415         return has_type && TCG_TARGET_HAS_roti_vec;
2416     case INDEX_op_rotls_vec:
2417         return has_type && TCG_TARGET_HAS_rots_vec;
2418     case INDEX_op_rotlv_vec:
2419     case INDEX_op_rotrv_vec:
2420         return has_type && TCG_TARGET_HAS_rotv_vec;
2421     case INDEX_op_ssadd_vec:
2422     case INDEX_op_usadd_vec:
2423     case INDEX_op_sssub_vec:
2424     case INDEX_op_ussub_vec:
2425         return has_type && TCG_TARGET_HAS_sat_vec;
2426     case INDEX_op_smin_vec:
2427     case INDEX_op_umin_vec:
2428     case INDEX_op_smax_vec:
2429     case INDEX_op_umax_vec:
2430         return has_type && TCG_TARGET_HAS_minmax_vec;
2431     case INDEX_op_bitsel_vec:
2432         return has_type && TCG_TARGET_HAS_bitsel_vec;
2433     case INDEX_op_cmpsel_vec:
2434         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2435 
2436     default:
2437         if (op < INDEX_op_last_generic) {
2438             const TCGOutOp *outop;
2439             TCGConstraintSetIndex con_set;
2440 
2441             if (!has_type) {
2442                 return false;
2443             }
2444 
2445             outop = all_outop[op];
2446             tcg_debug_assert(outop != NULL);
2447 
2448             con_set = outop->static_constraint;
2449             if (con_set == C_Dynamic) {
2450                 con_set = outop->dynamic_constraint(type, flags);
2451             }
2452             if (con_set >= 0) {
2453                 return true;
2454             }
2455             tcg_debug_assert(con_set == C_NotImplemented);
2456             return false;
2457         }
2458         tcg_debug_assert(op < NB_OPS);
2459         return true;
2460 
2461     case INDEX_op_last_generic:
2462         g_assert_not_reached();
2463     }
2464 }
2465 
2466 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2467 {
2468     unsigned width;
2469 
2470     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2471     width = (type == TCG_TYPE_I32 ? 32 : 64);
2472 
2473     tcg_debug_assert(ofs < width);
2474     tcg_debug_assert(len > 0);
2475     tcg_debug_assert(len <= width - ofs);
2476 
2477     return TCG_TARGET_deposit_valid(type, ofs, len);
2478 }
2479 
2480 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2481 
2482 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2483                           TCGTemp *ret, TCGTemp **args)
2484 {
2485     TCGv_i64 extend_free[MAX_CALL_IARGS];
2486     int n_extend = 0;
2487     TCGOp *op;
2488     int i, n, pi = 0, total_args;
2489 
2490     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2491         init_call_layout(info);
2492         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2493     }
2494 
2495     total_args = info->nr_out + info->nr_in + 2;
2496     op = tcg_op_alloc(INDEX_op_call, total_args);
2497 
2498 #ifdef CONFIG_PLUGIN
2499     /* Flag helpers that may affect guest state */
2500     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2501         tcg_ctx->plugin_insn->calls_helpers = true;
2502     }
2503 #endif
2504 
2505     TCGOP_CALLO(op) = n = info->nr_out;
2506     switch (n) {
2507     case 0:
2508         tcg_debug_assert(ret == NULL);
2509         break;
2510     case 1:
2511         tcg_debug_assert(ret != NULL);
2512         op->args[pi++] = temp_arg(ret);
2513         break;
2514     case 2:
2515     case 4:
2516         tcg_debug_assert(ret != NULL);
2517         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2518         tcg_debug_assert(ret->temp_subindex == 0);
2519         for (i = 0; i < n; ++i) {
2520             op->args[pi++] = temp_arg(ret + i);
2521         }
2522         break;
2523     default:
2524         g_assert_not_reached();
2525     }
2526 
2527     TCGOP_CALLI(op) = n = info->nr_in;
2528     for (i = 0; i < n; i++) {
2529         const TCGCallArgumentLoc *loc = &info->in[i];
2530         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2531 
2532         switch (loc->kind) {
2533         case TCG_CALL_ARG_NORMAL:
2534         case TCG_CALL_ARG_BY_REF:
2535         case TCG_CALL_ARG_BY_REF_N:
2536             op->args[pi++] = temp_arg(ts);
2537             break;
2538 
2539         case TCG_CALL_ARG_EXTEND_U:
2540         case TCG_CALL_ARG_EXTEND_S:
2541             {
2542                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2543                 TCGv_i32 orig = temp_tcgv_i32(ts);
2544 
2545                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2546                     tcg_gen_ext_i32_i64(temp, orig);
2547                 } else {
2548                     tcg_gen_extu_i32_i64(temp, orig);
2549                 }
2550                 op->args[pi++] = tcgv_i64_arg(temp);
2551                 extend_free[n_extend++] = temp;
2552             }
2553             break;
2554 
2555         default:
2556             g_assert_not_reached();
2557         }
2558     }
2559     op->args[pi++] = (uintptr_t)func;
2560     op->args[pi++] = (uintptr_t)info;
2561     tcg_debug_assert(pi == total_args);
2562 
2563     if (tcg_ctx->emit_before_op) {
2564         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2565     } else {
2566         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2567     }
2568 
2569     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2570     for (i = 0; i < n_extend; ++i) {
2571         tcg_temp_free_i64(extend_free[i]);
2572     }
2573 }
2574 
2575 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2576 {
2577     tcg_gen_callN(func, info, ret, NULL);
2578 }
2579 
2580 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2581 {
2582     tcg_gen_callN(func, info, ret, &t1);
2583 }
2584 
2585 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2586                    TCGTemp *t1, TCGTemp *t2)
2587 {
2588     TCGTemp *args[2] = { t1, t2 };
2589     tcg_gen_callN(func, info, ret, args);
2590 }
2591 
2592 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2593                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2594 {
2595     TCGTemp *args[3] = { t1, t2, t3 };
2596     tcg_gen_callN(func, info, ret, args);
2597 }
2598 
2599 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2600                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2601 {
2602     TCGTemp *args[4] = { t1, t2, t3, t4 };
2603     tcg_gen_callN(func, info, ret, args);
2604 }
2605 
2606 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2607                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2608 {
2609     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2610     tcg_gen_callN(func, info, ret, args);
2611 }
2612 
2613 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2614                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2615                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2616 {
2617     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2618     tcg_gen_callN(func, info, ret, args);
2619 }
2620 
2621 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2622                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2623                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2624 {
2625     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2626     tcg_gen_callN(func, info, ret, args);
2627 }
2628 
2629 static void tcg_reg_alloc_start(TCGContext *s)
2630 {
2631     int i, n;
2632 
2633     for (i = 0, n = s->nb_temps; i < n; i++) {
2634         TCGTemp *ts = &s->temps[i];
2635         TCGTempVal val = TEMP_VAL_MEM;
2636 
2637         switch (ts->kind) {
2638         case TEMP_CONST:
2639             val = TEMP_VAL_CONST;
2640             break;
2641         case TEMP_FIXED:
2642             val = TEMP_VAL_REG;
2643             break;
2644         case TEMP_GLOBAL:
2645             break;
2646         case TEMP_EBB:
2647             val = TEMP_VAL_DEAD;
2648             /* fall through */
2649         case TEMP_TB:
2650             ts->mem_allocated = 0;
2651             break;
2652         default:
2653             g_assert_not_reached();
2654         }
2655         ts->val_type = val;
2656     }
2657 
2658     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2659 }
2660 
2661 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2662                                  TCGTemp *ts)
2663 {
2664     int idx = temp_idx(ts);
2665 
2666     switch (ts->kind) {
2667     case TEMP_FIXED:
2668     case TEMP_GLOBAL:
2669         pstrcpy(buf, buf_size, ts->name);
2670         break;
2671     case TEMP_TB:
2672         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2673         break;
2674     case TEMP_EBB:
2675         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2676         break;
2677     case TEMP_CONST:
2678         switch (ts->type) {
2679         case TCG_TYPE_I32:
2680             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2681             break;
2682 #if TCG_TARGET_REG_BITS > 32
2683         case TCG_TYPE_I64:
2684             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2685             break;
2686 #endif
2687         case TCG_TYPE_V64:
2688         case TCG_TYPE_V128:
2689         case TCG_TYPE_V256:
2690             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2691                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2692             break;
2693         default:
2694             g_assert_not_reached();
2695         }
2696         break;
2697     }
2698     return buf;
2699 }
2700 
2701 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2702                              int buf_size, TCGArg arg)
2703 {
2704     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2705 }
2706 
2707 static const char * const cond_name[] =
2708 {
2709     [TCG_COND_NEVER] = "never",
2710     [TCG_COND_ALWAYS] = "always",
2711     [TCG_COND_EQ] = "eq",
2712     [TCG_COND_NE] = "ne",
2713     [TCG_COND_LT] = "lt",
2714     [TCG_COND_GE] = "ge",
2715     [TCG_COND_LE] = "le",
2716     [TCG_COND_GT] = "gt",
2717     [TCG_COND_LTU] = "ltu",
2718     [TCG_COND_GEU] = "geu",
2719     [TCG_COND_LEU] = "leu",
2720     [TCG_COND_GTU] = "gtu",
2721     [TCG_COND_TSTEQ] = "tsteq",
2722     [TCG_COND_TSTNE] = "tstne",
2723 };
2724 
2725 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2726 {
2727     [MO_UB]   = "ub",
2728     [MO_SB]   = "sb",
2729     [MO_LEUW] = "leuw",
2730     [MO_LESW] = "lesw",
2731     [MO_LEUL] = "leul",
2732     [MO_LESL] = "lesl",
2733     [MO_LEUQ] = "leq",
2734     [MO_BEUW] = "beuw",
2735     [MO_BESW] = "besw",
2736     [MO_BEUL] = "beul",
2737     [MO_BESL] = "besl",
2738     [MO_BEUQ] = "beq",
2739     [MO_128 + MO_BE] = "beo",
2740     [MO_128 + MO_LE] = "leo",
2741 };
2742 
2743 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2744     [MO_UNALN >> MO_ASHIFT]    = "un+",
2745     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2746     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2747     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2748     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2749     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2750     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2751     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2752 };
2753 
2754 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2755     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2756     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2757     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2758     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2759     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2760     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2761 };
2762 
2763 static const char bswap_flag_name[][6] = {
2764     [TCG_BSWAP_IZ] = "iz",
2765     [TCG_BSWAP_OZ] = "oz",
2766     [TCG_BSWAP_OS] = "os",
2767     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2768     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2769 };
2770 
2771 #ifdef CONFIG_PLUGIN
2772 static const char * const plugin_from_name[] = {
2773     "from-tb",
2774     "from-insn",
2775     "after-insn",
2776     "after-tb",
2777 };
2778 #endif
2779 
2780 static inline bool tcg_regset_single(TCGRegSet d)
2781 {
2782     return (d & (d - 1)) == 0;
2783 }
2784 
2785 static inline TCGReg tcg_regset_first(TCGRegSet d)
2786 {
2787     if (TCG_TARGET_NB_REGS <= 32) {
2788         return ctz32(d);
2789     } else {
2790         return ctz64(d);
2791     }
2792 }
2793 
2794 /* Return only the number of characters output -- no error return. */
2795 #define ne_fprintf(...) \
2796     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2797 
2798 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2799 {
2800     char buf[128];
2801     TCGOp *op;
2802 
2803     QTAILQ_FOREACH(op, &s->ops, link) {
2804         int i, k, nb_oargs, nb_iargs, nb_cargs;
2805         const TCGOpDef *def;
2806         TCGOpcode c;
2807         int col = 0;
2808 
2809         c = op->opc;
2810         def = &tcg_op_defs[c];
2811 
2812         if (c == INDEX_op_insn_start) {
2813             nb_oargs = 0;
2814             col += ne_fprintf(f, "\n ----");
2815 
2816             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2817                 col += ne_fprintf(f, " %016" PRIx64,
2818                                   tcg_get_insn_start_param(op, i));
2819             }
2820         } else if (c == INDEX_op_call) {
2821             const TCGHelperInfo *info = tcg_call_info(op);
2822             void *func = tcg_call_func(op);
2823 
2824             /* variable number of arguments */
2825             nb_oargs = TCGOP_CALLO(op);
2826             nb_iargs = TCGOP_CALLI(op);
2827             nb_cargs = def->nb_cargs;
2828 
2829             col += ne_fprintf(f, " %s ", def->name);
2830 
2831             /*
2832              * Print the function name from TCGHelperInfo, if available.
2833              * Note that plugins have a template function for the info,
2834              * but the actual function pointer comes from the plugin.
2835              */
2836             if (func == info->func) {
2837                 col += ne_fprintf(f, "%s", info->name);
2838             } else {
2839                 col += ne_fprintf(f, "plugin(%p)", func);
2840             }
2841 
2842             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2843             for (i = 0; i < nb_oargs; i++) {
2844                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2845                                                             op->args[i]));
2846             }
2847             for (i = 0; i < nb_iargs; i++) {
2848                 TCGArg arg = op->args[nb_oargs + i];
2849                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2850                 col += ne_fprintf(f, ",%s", t);
2851             }
2852         } else {
2853             if (def->flags & TCG_OPF_INT) {
2854                 col += ne_fprintf(f, " %s_i%d ",
2855                                   def->name,
2856                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2857             } else if (def->flags & TCG_OPF_VECTOR) {
2858                 col += ne_fprintf(f, "%s v%d,e%d,",
2859                                   def->name,
2860                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2861                                   8 << TCGOP_VECE(op));
2862             } else {
2863                 col += ne_fprintf(f, " %s ", def->name);
2864             }
2865 
2866             nb_oargs = def->nb_oargs;
2867             nb_iargs = def->nb_iargs;
2868             nb_cargs = def->nb_cargs;
2869 
2870             k = 0;
2871             for (i = 0; i < nb_oargs; i++) {
2872                 const char *sep =  k ? "," : "";
2873                 col += ne_fprintf(f, "%s%s", sep,
2874                                   tcg_get_arg_str(s, buf, sizeof(buf),
2875                                                   op->args[k++]));
2876             }
2877             for (i = 0; i < nb_iargs; i++) {
2878                 const char *sep =  k ? "," : "";
2879                 col += ne_fprintf(f, "%s%s", sep,
2880                                   tcg_get_arg_str(s, buf, sizeof(buf),
2881                                                   op->args[k++]));
2882             }
2883             switch (c) {
2884             case INDEX_op_brcond_i32:
2885             case INDEX_op_setcond_i32:
2886             case INDEX_op_negsetcond_i32:
2887             case INDEX_op_movcond_i32:
2888             case INDEX_op_brcond2_i32:
2889             case INDEX_op_setcond2_i32:
2890             case INDEX_op_brcond_i64:
2891             case INDEX_op_setcond_i64:
2892             case INDEX_op_negsetcond_i64:
2893             case INDEX_op_movcond_i64:
2894             case INDEX_op_cmp_vec:
2895             case INDEX_op_cmpsel_vec:
2896                 if (op->args[k] < ARRAY_SIZE(cond_name)
2897                     && cond_name[op->args[k]]) {
2898                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2899                 } else {
2900                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2901                 }
2902                 i = 1;
2903                 break;
2904             case INDEX_op_qemu_ld_i32:
2905             case INDEX_op_qemu_st_i32:
2906             case INDEX_op_qemu_st8_i32:
2907             case INDEX_op_qemu_ld_i64:
2908             case INDEX_op_qemu_st_i64:
2909             case INDEX_op_qemu_ld_i128:
2910             case INDEX_op_qemu_st_i128:
2911                 {
2912                     const char *s_al, *s_op, *s_at;
2913                     MemOpIdx oi = op->args[k++];
2914                     MemOp mop = get_memop(oi);
2915                     unsigned ix = get_mmuidx(oi);
2916 
2917                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2918                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2919                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2920                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2921 
2922                     /* If all fields are accounted for, print symbolically. */
2923                     if (!mop && s_al && s_op && s_at) {
2924                         col += ne_fprintf(f, ",%s%s%s,%u",
2925                                           s_at, s_al, s_op, ix);
2926                     } else {
2927                         mop = get_memop(oi);
2928                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2929                     }
2930                     i = 1;
2931                 }
2932                 break;
2933             case INDEX_op_bswap16_i32:
2934             case INDEX_op_bswap16_i64:
2935             case INDEX_op_bswap32_i32:
2936             case INDEX_op_bswap32_i64:
2937             case INDEX_op_bswap64_i64:
2938                 {
2939                     TCGArg flags = op->args[k];
2940                     const char *name = NULL;
2941 
2942                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2943                         name = bswap_flag_name[flags];
2944                     }
2945                     if (name) {
2946                         col += ne_fprintf(f, ",%s", name);
2947                     } else {
2948                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2949                     }
2950                     i = k = 1;
2951                 }
2952                 break;
2953 #ifdef CONFIG_PLUGIN
2954             case INDEX_op_plugin_cb:
2955                 {
2956                     TCGArg from = op->args[k++];
2957                     const char *name = NULL;
2958 
2959                     if (from < ARRAY_SIZE(plugin_from_name)) {
2960                         name = plugin_from_name[from];
2961                     }
2962                     if (name) {
2963                         col += ne_fprintf(f, "%s", name);
2964                     } else {
2965                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2966                     }
2967                     i = 1;
2968                 }
2969                 break;
2970 #endif
2971             default:
2972                 i = 0;
2973                 break;
2974             }
2975             switch (c) {
2976             case INDEX_op_set_label:
2977             case INDEX_op_br:
2978             case INDEX_op_brcond_i32:
2979             case INDEX_op_brcond_i64:
2980             case INDEX_op_brcond2_i32:
2981                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2982                                   arg_label(op->args[k])->id);
2983                 i++, k++;
2984                 break;
2985             case INDEX_op_mb:
2986                 {
2987                     TCGBar membar = op->args[k];
2988                     const char *b_op, *m_op;
2989 
2990                     switch (membar & TCG_BAR_SC) {
2991                     case 0:
2992                         b_op = "none";
2993                         break;
2994                     case TCG_BAR_LDAQ:
2995                         b_op = "acq";
2996                         break;
2997                     case TCG_BAR_STRL:
2998                         b_op = "rel";
2999                         break;
3000                     case TCG_BAR_SC:
3001                         b_op = "seq";
3002                         break;
3003                     default:
3004                         g_assert_not_reached();
3005                     }
3006 
3007                     switch (membar & TCG_MO_ALL) {
3008                     case 0:
3009                         m_op = "none";
3010                         break;
3011                     case TCG_MO_LD_LD:
3012                         m_op = "rr";
3013                         break;
3014                     case TCG_MO_LD_ST:
3015                         m_op = "rw";
3016                         break;
3017                     case TCG_MO_ST_LD:
3018                         m_op = "wr";
3019                         break;
3020                     case TCG_MO_ST_ST:
3021                         m_op = "ww";
3022                         break;
3023                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
3024                         m_op = "rr+rw";
3025                         break;
3026                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3027                         m_op = "rr+wr";
3028                         break;
3029                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3030                         m_op = "rr+ww";
3031                         break;
3032                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3033                         m_op = "rw+wr";
3034                         break;
3035                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3036                         m_op = "rw+ww";
3037                         break;
3038                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3039                         m_op = "wr+ww";
3040                         break;
3041                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3042                         m_op = "rr+rw+wr";
3043                         break;
3044                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3045                         m_op = "rr+rw+ww";
3046                         break;
3047                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3048                         m_op = "rr+wr+ww";
3049                         break;
3050                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3051                         m_op = "rw+wr+ww";
3052                         break;
3053                     case TCG_MO_ALL:
3054                         m_op = "all";
3055                         break;
3056                     default:
3057                         g_assert_not_reached();
3058                     }
3059 
3060                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3061                     i++, k++;
3062                 }
3063                 break;
3064             default:
3065                 break;
3066             }
3067             for (; i < nb_cargs; i++, k++) {
3068                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3069                                   op->args[k]);
3070             }
3071         }
3072 
3073         if (have_prefs || op->life) {
3074             for (; col < 40; ++col) {
3075                 putc(' ', f);
3076             }
3077         }
3078 
3079         if (op->life) {
3080             unsigned life = op->life;
3081 
3082             if (life & (SYNC_ARG * 3)) {
3083                 ne_fprintf(f, "  sync:");
3084                 for (i = 0; i < 2; ++i) {
3085                     if (life & (SYNC_ARG << i)) {
3086                         ne_fprintf(f, " %d", i);
3087                     }
3088                 }
3089             }
3090             life /= DEAD_ARG;
3091             if (life) {
3092                 ne_fprintf(f, "  dead:");
3093                 for (i = 0; life; ++i, life >>= 1) {
3094                     if (life & 1) {
3095                         ne_fprintf(f, " %d", i);
3096                     }
3097                 }
3098             }
3099         }
3100 
3101         if (have_prefs) {
3102             for (i = 0; i < nb_oargs; ++i) {
3103                 TCGRegSet set = output_pref(op, i);
3104 
3105                 if (i == 0) {
3106                     ne_fprintf(f, "  pref=");
3107                 } else {
3108                     ne_fprintf(f, ",");
3109                 }
3110                 if (set == 0) {
3111                     ne_fprintf(f, "none");
3112                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3113                     ne_fprintf(f, "all");
3114 #ifdef CONFIG_DEBUG_TCG
3115                 } else if (tcg_regset_single(set)) {
3116                     TCGReg reg = tcg_regset_first(set);
3117                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3118 #endif
3119                 } else if (TCG_TARGET_NB_REGS <= 32) {
3120                     ne_fprintf(f, "0x%x", (uint32_t)set);
3121                 } else {
3122                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3123                 }
3124             }
3125         }
3126 
3127         putc('\n', f);
3128     }
3129 }
3130 
3131 /* we give more priority to constraints with less registers */
3132 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3133 {
3134     int n;
3135 
3136     arg_ct += k;
3137     n = ctpop64(arg_ct->regs);
3138 
3139     /*
3140      * Sort constraints of a single register first, which includes output
3141      * aliases (which must exactly match the input already allocated).
3142      */
3143     if (n == 1 || arg_ct->oalias) {
3144         return INT_MAX;
3145     }
3146 
3147     /*
3148      * Sort register pairs next, first then second immediately after.
3149      * Arbitrarily sort multiple pairs by the index of the first reg;
3150      * there shouldn't be many pairs.
3151      */
3152     switch (arg_ct->pair) {
3153     case 1:
3154     case 3:
3155         return (k + 1) * 2;
3156     case 2:
3157         return (arg_ct->pair_index + 1) * 2 - 1;
3158     }
3159 
3160     /* Finally, sort by decreasing register count. */
3161     assert(n > 1);
3162     return -n;
3163 }
3164 
3165 /* sort from highest priority to lowest */
3166 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3167 {
3168     int i, j;
3169 
3170     for (i = 0; i < n; i++) {
3171         a[start + i].sort_index = start + i;
3172     }
3173     if (n <= 1) {
3174         return;
3175     }
3176     for (i = 0; i < n - 1; i++) {
3177         for (j = i + 1; j < n; j++) {
3178             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3179             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3180             if (p1 < p2) {
3181                 int tmp = a[start + i].sort_index;
3182                 a[start + i].sort_index = a[start + j].sort_index;
3183                 a[start + j].sort_index = tmp;
3184             }
3185         }
3186     }
3187 }
3188 
3189 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3190 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3191 
3192 static void process_constraint_sets(void)
3193 {
3194     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3195         const TCGConstraintSet *tdefs = &constraint_sets[c];
3196         TCGArgConstraint *args_ct = all_cts[c];
3197         int nb_oargs = tdefs->nb_oargs;
3198         int nb_iargs = tdefs->nb_iargs;
3199         int nb_args = nb_oargs + nb_iargs;
3200         bool saw_alias_pair = false;
3201 
3202         for (int i = 0; i < nb_args; i++) {
3203             const char *ct_str = tdefs->args_ct_str[i];
3204             bool input_p = i >= nb_oargs;
3205             int o;
3206 
3207             switch (*ct_str) {
3208             case '0' ... '9':
3209                 o = *ct_str - '0';
3210                 tcg_debug_assert(input_p);
3211                 tcg_debug_assert(o < nb_oargs);
3212                 tcg_debug_assert(args_ct[o].regs != 0);
3213                 tcg_debug_assert(!args_ct[o].oalias);
3214                 args_ct[i] = args_ct[o];
3215                 /* The output sets oalias.  */
3216                 args_ct[o].oalias = 1;
3217                 args_ct[o].alias_index = i;
3218                 /* The input sets ialias. */
3219                 args_ct[i].ialias = 1;
3220                 args_ct[i].alias_index = o;
3221                 if (args_ct[i].pair) {
3222                     saw_alias_pair = true;
3223                 }
3224                 tcg_debug_assert(ct_str[1] == '\0');
3225                 continue;
3226 
3227             case '&':
3228                 tcg_debug_assert(!input_p);
3229                 args_ct[i].newreg = true;
3230                 ct_str++;
3231                 break;
3232 
3233             case 'p': /* plus */
3234                 /* Allocate to the register after the previous. */
3235                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3236                 o = i - 1;
3237                 tcg_debug_assert(!args_ct[o].pair);
3238                 tcg_debug_assert(!args_ct[o].ct);
3239                 args_ct[i] = (TCGArgConstraint){
3240                     .pair = 2,
3241                     .pair_index = o,
3242                     .regs = args_ct[o].regs << 1,
3243                     .newreg = args_ct[o].newreg,
3244                 };
3245                 args_ct[o].pair = 1;
3246                 args_ct[o].pair_index = i;
3247                 tcg_debug_assert(ct_str[1] == '\0');
3248                 continue;
3249 
3250             case 'm': /* minus */
3251                 /* Allocate to the register before the previous. */
3252                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3253                 o = i - 1;
3254                 tcg_debug_assert(!args_ct[o].pair);
3255                 tcg_debug_assert(!args_ct[o].ct);
3256                 args_ct[i] = (TCGArgConstraint){
3257                     .pair = 1,
3258                     .pair_index = o,
3259                     .regs = args_ct[o].regs >> 1,
3260                     .newreg = args_ct[o].newreg,
3261                 };
3262                 args_ct[o].pair = 2;
3263                 args_ct[o].pair_index = i;
3264                 tcg_debug_assert(ct_str[1] == '\0');
3265                 continue;
3266             }
3267 
3268             do {
3269                 switch (*ct_str) {
3270                 case 'i':
3271                     args_ct[i].ct |= TCG_CT_CONST;
3272                     break;
3273 #ifdef TCG_REG_ZERO
3274                 case 'z':
3275                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3276                     break;
3277 #endif
3278 
3279                 /* Include all of the target-specific constraints. */
3280 
3281 #undef CONST
3282 #define CONST(CASE, MASK) \
3283     case CASE: args_ct[i].ct |= MASK; break;
3284 #define REGS(CASE, MASK) \
3285     case CASE: args_ct[i].regs |= MASK; break;
3286 
3287 #include "tcg-target-con-str.h"
3288 
3289 #undef REGS
3290 #undef CONST
3291                 default:
3292                 case '0' ... '9':
3293                 case '&':
3294                 case 'p':
3295                 case 'm':
3296                     /* Typo in TCGConstraintSet constraint. */
3297                     g_assert_not_reached();
3298                 }
3299             } while (*++ct_str != '\0');
3300         }
3301 
3302         /*
3303          * Fix up output pairs that are aliased with inputs.
3304          * When we created the alias, we copied pair from the output.
3305          * There are three cases:
3306          *    (1a) Pairs of inputs alias pairs of outputs.
3307          *    (1b) One input aliases the first of a pair of outputs.
3308          *    (2)  One input aliases the second of a pair of outputs.
3309          *
3310          * Case 1a is handled by making sure that the pair_index'es are
3311          * properly updated so that they appear the same as a pair of inputs.
3312          *
3313          * Case 1b is handled by setting the pair_index of the input to
3314          * itself, simply so it doesn't point to an unrelated argument.
3315          * Since we don't encounter the "second" during the input allocation
3316          * phase, nothing happens with the second half of the input pair.
3317          *
3318          * Case 2 is handled by setting the second input to pair=3, the
3319          * first output to pair=3, and the pair_index'es to match.
3320          */
3321         if (saw_alias_pair) {
3322             for (int i = nb_oargs; i < nb_args; i++) {
3323                 int o, o2, i2;
3324 
3325                 /*
3326                  * Since [0-9pm] must be alone in the constraint string,
3327                  * the only way they can both be set is if the pair comes
3328                  * from the output alias.
3329                  */
3330                 if (!args_ct[i].ialias) {
3331                     continue;
3332                 }
3333                 switch (args_ct[i].pair) {
3334                 case 0:
3335                     break;
3336                 case 1:
3337                     o = args_ct[i].alias_index;
3338                     o2 = args_ct[o].pair_index;
3339                     tcg_debug_assert(args_ct[o].pair == 1);
3340                     tcg_debug_assert(args_ct[o2].pair == 2);
3341                     if (args_ct[o2].oalias) {
3342                         /* Case 1a */
3343                         i2 = args_ct[o2].alias_index;
3344                         tcg_debug_assert(args_ct[i2].pair == 2);
3345                         args_ct[i2].pair_index = i;
3346                         args_ct[i].pair_index = i2;
3347                     } else {
3348                         /* Case 1b */
3349                         args_ct[i].pair_index = i;
3350                     }
3351                     break;
3352                 case 2:
3353                     o = args_ct[i].alias_index;
3354                     o2 = args_ct[o].pair_index;
3355                     tcg_debug_assert(args_ct[o].pair == 2);
3356                     tcg_debug_assert(args_ct[o2].pair == 1);
3357                     if (args_ct[o2].oalias) {
3358                         /* Case 1a */
3359                         i2 = args_ct[o2].alias_index;
3360                         tcg_debug_assert(args_ct[i2].pair == 1);
3361                         args_ct[i2].pair_index = i;
3362                         args_ct[i].pair_index = i2;
3363                     } else {
3364                         /* Case 2 */
3365                         args_ct[i].pair = 3;
3366                         args_ct[o2].pair = 3;
3367                         args_ct[i].pair_index = o2;
3368                         args_ct[o2].pair_index = i;
3369                     }
3370                     break;
3371                 default:
3372                     g_assert_not_reached();
3373                 }
3374             }
3375         }
3376 
3377         /* sort the constraints (XXX: this is just an heuristic) */
3378         sort_constraints(args_ct, 0, nb_oargs);
3379         sort_constraints(args_ct, nb_oargs, nb_iargs);
3380     }
3381 }
3382 
3383 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3384 {
3385     TCGOpcode opc = op->opc;
3386     TCGType type = TCGOP_TYPE(op);
3387     unsigned flags = TCGOP_FLAGS(op);
3388     const TCGOpDef *def = &tcg_op_defs[opc];
3389     const TCGOutOp *outop = all_outop[opc];
3390     TCGConstraintSetIndex con_set;
3391 
3392     if (def->flags & TCG_OPF_NOT_PRESENT) {
3393         return empty_cts;
3394     }
3395 
3396     if (outop) {
3397         con_set = outop->static_constraint;
3398         if (con_set == C_Dynamic) {
3399             con_set = outop->dynamic_constraint(type, flags);
3400         }
3401     } else {
3402         con_set = tcg_target_op_def(opc, type, flags);
3403     }
3404     tcg_debug_assert(con_set >= 0);
3405     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3406 
3407     /* The constraint arguments must match TCGOpcode arguments. */
3408     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3409     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3410 
3411     return all_cts[con_set];
3412 }
3413 
3414 static void remove_label_use(TCGOp *op, int idx)
3415 {
3416     TCGLabel *label = arg_label(op->args[idx]);
3417     TCGLabelUse *use;
3418 
3419     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3420         if (use->op == op) {
3421             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3422             return;
3423         }
3424     }
3425     g_assert_not_reached();
3426 }
3427 
3428 void tcg_op_remove(TCGContext *s, TCGOp *op)
3429 {
3430     switch (op->opc) {
3431     case INDEX_op_br:
3432         remove_label_use(op, 0);
3433         break;
3434     case INDEX_op_brcond_i32:
3435     case INDEX_op_brcond_i64:
3436         remove_label_use(op, 3);
3437         break;
3438     case INDEX_op_brcond2_i32:
3439         remove_label_use(op, 5);
3440         break;
3441     default:
3442         break;
3443     }
3444 
3445     QTAILQ_REMOVE(&s->ops, op, link);
3446     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3447     s->nb_ops--;
3448 }
3449 
3450 void tcg_remove_ops_after(TCGOp *op)
3451 {
3452     TCGContext *s = tcg_ctx;
3453 
3454     while (true) {
3455         TCGOp *last = tcg_last_op();
3456         if (last == op) {
3457             return;
3458         }
3459         tcg_op_remove(s, last);
3460     }
3461 }
3462 
3463 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3464 {
3465     TCGContext *s = tcg_ctx;
3466     TCGOp *op = NULL;
3467 
3468     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3469         QTAILQ_FOREACH(op, &s->free_ops, link) {
3470             if (nargs <= op->nargs) {
3471                 QTAILQ_REMOVE(&s->free_ops, op, link);
3472                 nargs = op->nargs;
3473                 goto found;
3474             }
3475         }
3476     }
3477 
3478     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3479     nargs = MAX(4, nargs);
3480     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3481 
3482  found:
3483     memset(op, 0, offsetof(TCGOp, link));
3484     op->opc = opc;
3485     op->nargs = nargs;
3486 
3487     /* Check for bitfield overflow. */
3488     tcg_debug_assert(op->nargs == nargs);
3489 
3490     s->nb_ops++;
3491     return op;
3492 }
3493 
3494 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3495 {
3496     TCGOp *op = tcg_op_alloc(opc, nargs);
3497 
3498     if (tcg_ctx->emit_before_op) {
3499         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3500     } else {
3501         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3502     }
3503     return op;
3504 }
3505 
3506 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3507                             TCGOpcode opc, TCGType type, unsigned nargs)
3508 {
3509     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3510 
3511     TCGOP_TYPE(new_op) = type;
3512     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3513     return new_op;
3514 }
3515 
3516 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3517                            TCGOpcode opc, TCGType type, unsigned nargs)
3518 {
3519     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3520 
3521     TCGOP_TYPE(new_op) = type;
3522     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3523     return new_op;
3524 }
3525 
3526 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3527 {
3528     TCGLabelUse *u;
3529 
3530     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3531         TCGOp *op = u->op;
3532         switch (op->opc) {
3533         case INDEX_op_br:
3534             op->args[0] = label_arg(to);
3535             break;
3536         case INDEX_op_brcond_i32:
3537         case INDEX_op_brcond_i64:
3538             op->args[3] = label_arg(to);
3539             break;
3540         case INDEX_op_brcond2_i32:
3541             op->args[5] = label_arg(to);
3542             break;
3543         default:
3544             g_assert_not_reached();
3545         }
3546     }
3547 
3548     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3549 }
3550 
3551 /* Reachable analysis : remove unreachable code.  */
3552 static void __attribute__((noinline))
3553 reachable_code_pass(TCGContext *s)
3554 {
3555     TCGOp *op, *op_next, *op_prev;
3556     bool dead = false;
3557 
3558     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3559         bool remove = dead;
3560         TCGLabel *label;
3561 
3562         switch (op->opc) {
3563         case INDEX_op_set_label:
3564             label = arg_label(op->args[0]);
3565 
3566             /*
3567              * Note that the first op in the TB is always a load,
3568              * so there is always something before a label.
3569              */
3570             op_prev = QTAILQ_PREV(op, link);
3571 
3572             /*
3573              * If we find two sequential labels, move all branches to
3574              * reference the second label and remove the first label.
3575              * Do this before branch to next optimization, so that the
3576              * middle label is out of the way.
3577              */
3578             if (op_prev->opc == INDEX_op_set_label) {
3579                 move_label_uses(label, arg_label(op_prev->args[0]));
3580                 tcg_op_remove(s, op_prev);
3581                 op_prev = QTAILQ_PREV(op, link);
3582             }
3583 
3584             /*
3585              * Optimization can fold conditional branches to unconditional.
3586              * If we find a label which is preceded by an unconditional
3587              * branch to next, remove the branch.  We couldn't do this when
3588              * processing the branch because any dead code between the branch
3589              * and label had not yet been removed.
3590              */
3591             if (op_prev->opc == INDEX_op_br &&
3592                 label == arg_label(op_prev->args[0])) {
3593                 tcg_op_remove(s, op_prev);
3594                 /* Fall through means insns become live again.  */
3595                 dead = false;
3596             }
3597 
3598             if (QSIMPLEQ_EMPTY(&label->branches)) {
3599                 /*
3600                  * While there is an occasional backward branch, virtually
3601                  * all branches generated by the translators are forward.
3602                  * Which means that generally we will have already removed
3603                  * all references to the label that will be, and there is
3604                  * little to be gained by iterating.
3605                  */
3606                 remove = true;
3607             } else {
3608                 /* Once we see a label, insns become live again.  */
3609                 dead = false;
3610                 remove = false;
3611             }
3612             break;
3613 
3614         case INDEX_op_br:
3615         case INDEX_op_exit_tb:
3616         case INDEX_op_goto_ptr:
3617             /* Unconditional branches; everything following is dead.  */
3618             dead = true;
3619             break;
3620 
3621         case INDEX_op_call:
3622             /* Notice noreturn helper calls, raising exceptions.  */
3623             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3624                 dead = true;
3625             }
3626             break;
3627 
3628         case INDEX_op_insn_start:
3629             /* Never remove -- we need to keep these for unwind.  */
3630             remove = false;
3631             break;
3632 
3633         default:
3634             break;
3635         }
3636 
3637         if (remove) {
3638             tcg_op_remove(s, op);
3639         }
3640     }
3641 }
3642 
3643 #define TS_DEAD  1
3644 #define TS_MEM   2
3645 
3646 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3647 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3648 
3649 /* For liveness_pass_1, the register preferences for a given temp.  */
3650 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3651 {
3652     return ts->state_ptr;
3653 }
3654 
3655 /* For liveness_pass_1, reset the preferences for a given temp to the
3656  * maximal regset for its type.
3657  */
3658 static inline void la_reset_pref(TCGTemp *ts)
3659 {
3660     *la_temp_pref(ts)
3661         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3662 }
3663 
3664 /* liveness analysis: end of function: all temps are dead, and globals
3665    should be in memory. */
3666 static void la_func_end(TCGContext *s, int ng, int nt)
3667 {
3668     int i;
3669 
3670     for (i = 0; i < ng; ++i) {
3671         s->temps[i].state = TS_DEAD | TS_MEM;
3672         la_reset_pref(&s->temps[i]);
3673     }
3674     for (i = ng; i < nt; ++i) {
3675         s->temps[i].state = TS_DEAD;
3676         la_reset_pref(&s->temps[i]);
3677     }
3678 }
3679 
3680 /* liveness analysis: end of basic block: all temps are dead, globals
3681    and local temps should be in memory. */
3682 static void la_bb_end(TCGContext *s, int ng, int nt)
3683 {
3684     int i;
3685 
3686     for (i = 0; i < nt; ++i) {
3687         TCGTemp *ts = &s->temps[i];
3688         int state;
3689 
3690         switch (ts->kind) {
3691         case TEMP_FIXED:
3692         case TEMP_GLOBAL:
3693         case TEMP_TB:
3694             state = TS_DEAD | TS_MEM;
3695             break;
3696         case TEMP_EBB:
3697         case TEMP_CONST:
3698             state = TS_DEAD;
3699             break;
3700         default:
3701             g_assert_not_reached();
3702         }
3703         ts->state = state;
3704         la_reset_pref(ts);
3705     }
3706 }
3707 
3708 /* liveness analysis: sync globals back to memory.  */
3709 static void la_global_sync(TCGContext *s, int ng)
3710 {
3711     int i;
3712 
3713     for (i = 0; i < ng; ++i) {
3714         int state = s->temps[i].state;
3715         s->temps[i].state = state | TS_MEM;
3716         if (state == TS_DEAD) {
3717             /* If the global was previously dead, reset prefs.  */
3718             la_reset_pref(&s->temps[i]);
3719         }
3720     }
3721 }
3722 
3723 /*
3724  * liveness analysis: conditional branch: all temps are dead unless
3725  * explicitly live-across-conditional-branch, globals and local temps
3726  * should be synced.
3727  */
3728 static void la_bb_sync(TCGContext *s, int ng, int nt)
3729 {
3730     la_global_sync(s, ng);
3731 
3732     for (int i = ng; i < nt; ++i) {
3733         TCGTemp *ts = &s->temps[i];
3734         int state;
3735 
3736         switch (ts->kind) {
3737         case TEMP_TB:
3738             state = ts->state;
3739             ts->state = state | TS_MEM;
3740             if (state != TS_DEAD) {
3741                 continue;
3742             }
3743             break;
3744         case TEMP_EBB:
3745         case TEMP_CONST:
3746             continue;
3747         default:
3748             g_assert_not_reached();
3749         }
3750         la_reset_pref(&s->temps[i]);
3751     }
3752 }
3753 
3754 /* liveness analysis: sync globals back to memory and kill.  */
3755 static void la_global_kill(TCGContext *s, int ng)
3756 {
3757     int i;
3758 
3759     for (i = 0; i < ng; i++) {
3760         s->temps[i].state = TS_DEAD | TS_MEM;
3761         la_reset_pref(&s->temps[i]);
3762     }
3763 }
3764 
3765 /* liveness analysis: note live globals crossing calls.  */
3766 static void la_cross_call(TCGContext *s, int nt)
3767 {
3768     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3769     int i;
3770 
3771     for (i = 0; i < nt; i++) {
3772         TCGTemp *ts = &s->temps[i];
3773         if (!(ts->state & TS_DEAD)) {
3774             TCGRegSet *pset = la_temp_pref(ts);
3775             TCGRegSet set = *pset;
3776 
3777             set &= mask;
3778             /* If the combination is not possible, restart.  */
3779             if (set == 0) {
3780                 set = tcg_target_available_regs[ts->type] & mask;
3781             }
3782             *pset = set;
3783         }
3784     }
3785 }
3786 
3787 /*
3788  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3789  * to TEMP_EBB, if possible.
3790  */
3791 static void __attribute__((noinline))
3792 liveness_pass_0(TCGContext *s)
3793 {
3794     void * const multiple_ebb = (void *)(uintptr_t)-1;
3795     int nb_temps = s->nb_temps;
3796     TCGOp *op, *ebb;
3797 
3798     for (int i = s->nb_globals; i < nb_temps; ++i) {
3799         s->temps[i].state_ptr = NULL;
3800     }
3801 
3802     /*
3803      * Represent each EBB by the op at which it begins.  In the case of
3804      * the first EBB, this is the first op, otherwise it is a label.
3805      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3806      * within a single EBB, else MULTIPLE_EBB.
3807      */
3808     ebb = QTAILQ_FIRST(&s->ops);
3809     QTAILQ_FOREACH(op, &s->ops, link) {
3810         const TCGOpDef *def;
3811         int nb_oargs, nb_iargs;
3812 
3813         switch (op->opc) {
3814         case INDEX_op_set_label:
3815             ebb = op;
3816             continue;
3817         case INDEX_op_discard:
3818             continue;
3819         case INDEX_op_call:
3820             nb_oargs = TCGOP_CALLO(op);
3821             nb_iargs = TCGOP_CALLI(op);
3822             break;
3823         default:
3824             def = &tcg_op_defs[op->opc];
3825             nb_oargs = def->nb_oargs;
3826             nb_iargs = def->nb_iargs;
3827             break;
3828         }
3829 
3830         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3831             TCGTemp *ts = arg_temp(op->args[i]);
3832 
3833             if (ts->kind != TEMP_TB) {
3834                 continue;
3835             }
3836             if (ts->state_ptr == NULL) {
3837                 ts->state_ptr = ebb;
3838             } else if (ts->state_ptr != ebb) {
3839                 ts->state_ptr = multiple_ebb;
3840             }
3841         }
3842     }
3843 
3844     /*
3845      * For TEMP_TB that turned out not to be used beyond one EBB,
3846      * reduce the liveness to TEMP_EBB.
3847      */
3848     for (int i = s->nb_globals; i < nb_temps; ++i) {
3849         TCGTemp *ts = &s->temps[i];
3850         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3851             ts->kind = TEMP_EBB;
3852         }
3853     }
3854 }
3855 
3856 /* Liveness analysis : update the opc_arg_life array to tell if a
3857    given input arguments is dead. Instructions updating dead
3858    temporaries are removed. */
3859 static void __attribute__((noinline))
3860 liveness_pass_1(TCGContext *s)
3861 {
3862     int nb_globals = s->nb_globals;
3863     int nb_temps = s->nb_temps;
3864     TCGOp *op, *op_prev;
3865     TCGRegSet *prefs;
3866     int i;
3867 
3868     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3869     for (i = 0; i < nb_temps; ++i) {
3870         s->temps[i].state_ptr = prefs + i;
3871     }
3872 
3873     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3874     la_func_end(s, nb_globals, nb_temps);
3875 
3876     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3877         int nb_iargs, nb_oargs;
3878         TCGOpcode opc_new, opc_new2;
3879         bool have_opc_new2;
3880         TCGLifeData arg_life = 0;
3881         TCGTemp *ts;
3882         TCGOpcode opc = op->opc;
3883         const TCGOpDef *def = &tcg_op_defs[opc];
3884         const TCGArgConstraint *args_ct;
3885 
3886         switch (opc) {
3887         case INDEX_op_call:
3888             {
3889                 const TCGHelperInfo *info = tcg_call_info(op);
3890                 int call_flags = tcg_call_flags(op);
3891 
3892                 nb_oargs = TCGOP_CALLO(op);
3893                 nb_iargs = TCGOP_CALLI(op);
3894 
3895                 /* pure functions can be removed if their result is unused */
3896                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3897                     for (i = 0; i < nb_oargs; i++) {
3898                         ts = arg_temp(op->args[i]);
3899                         if (ts->state != TS_DEAD) {
3900                             goto do_not_remove_call;
3901                         }
3902                     }
3903                     goto do_remove;
3904                 }
3905             do_not_remove_call:
3906 
3907                 /* Output args are dead.  */
3908                 for (i = 0; i < nb_oargs; i++) {
3909                     ts = arg_temp(op->args[i]);
3910                     if (ts->state & TS_DEAD) {
3911                         arg_life |= DEAD_ARG << i;
3912                     }
3913                     if (ts->state & TS_MEM) {
3914                         arg_life |= SYNC_ARG << i;
3915                     }
3916                     ts->state = TS_DEAD;
3917                     la_reset_pref(ts);
3918                 }
3919 
3920                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3921                 memset(op->output_pref, 0, sizeof(op->output_pref));
3922 
3923                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3924                                     TCG_CALL_NO_READ_GLOBALS))) {
3925                     la_global_kill(s, nb_globals);
3926                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3927                     la_global_sync(s, nb_globals);
3928                 }
3929 
3930                 /* Record arguments that die in this helper.  */
3931                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3932                     ts = arg_temp(op->args[i]);
3933                     if (ts->state & TS_DEAD) {
3934                         arg_life |= DEAD_ARG << i;
3935                     }
3936                 }
3937 
3938                 /* For all live registers, remove call-clobbered prefs.  */
3939                 la_cross_call(s, nb_temps);
3940 
3941                 /*
3942                  * Input arguments are live for preceding opcodes.
3943                  *
3944                  * For those arguments that die, and will be allocated in
3945                  * registers, clear the register set for that arg, to be
3946                  * filled in below.  For args that will be on the stack,
3947                  * reset to any available reg.  Process arguments in reverse
3948                  * order so that if a temp is used more than once, the stack
3949                  * reset to max happens before the register reset to 0.
3950                  */
3951                 for (i = nb_iargs - 1; i >= 0; i--) {
3952                     const TCGCallArgumentLoc *loc = &info->in[i];
3953                     ts = arg_temp(op->args[nb_oargs + i]);
3954 
3955                     if (ts->state & TS_DEAD) {
3956                         switch (loc->kind) {
3957                         case TCG_CALL_ARG_NORMAL:
3958                         case TCG_CALL_ARG_EXTEND_U:
3959                         case TCG_CALL_ARG_EXTEND_S:
3960                             if (arg_slot_reg_p(loc->arg_slot)) {
3961                                 *la_temp_pref(ts) = 0;
3962                                 break;
3963                             }
3964                             /* fall through */
3965                         default:
3966                             *la_temp_pref(ts) =
3967                                 tcg_target_available_regs[ts->type];
3968                             break;
3969                         }
3970                         ts->state &= ~TS_DEAD;
3971                     }
3972                 }
3973 
3974                 /*
3975                  * For each input argument, add its input register to prefs.
3976                  * If a temp is used once, this produces a single set bit;
3977                  * if a temp is used multiple times, this produces a set.
3978                  */
3979                 for (i = 0; i < nb_iargs; i++) {
3980                     const TCGCallArgumentLoc *loc = &info->in[i];
3981                     ts = arg_temp(op->args[nb_oargs + i]);
3982 
3983                     switch (loc->kind) {
3984                     case TCG_CALL_ARG_NORMAL:
3985                     case TCG_CALL_ARG_EXTEND_U:
3986                     case TCG_CALL_ARG_EXTEND_S:
3987                         if (arg_slot_reg_p(loc->arg_slot)) {
3988                             tcg_regset_set_reg(*la_temp_pref(ts),
3989                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3990                         }
3991                         break;
3992                     default:
3993                         break;
3994                     }
3995                 }
3996             }
3997             break;
3998         case INDEX_op_insn_start:
3999             break;
4000         case INDEX_op_discard:
4001             /* mark the temporary as dead */
4002             ts = arg_temp(op->args[0]);
4003             ts->state = TS_DEAD;
4004             la_reset_pref(ts);
4005             break;
4006 
4007         case INDEX_op_add2_i32:
4008         case INDEX_op_add2_i64:
4009             opc_new = INDEX_op_add;
4010             goto do_addsub2;
4011         case INDEX_op_sub2_i32:
4012         case INDEX_op_sub2_i64:
4013             opc_new = INDEX_op_sub;
4014         do_addsub2:
4015             nb_iargs = 4;
4016             nb_oargs = 2;
4017             /* Test if the high part of the operation is dead, but not
4018                the low part.  The result can be optimized to a simple
4019                add or sub.  This happens often for x86_64 guest when the
4020                cpu mode is set to 32 bit.  */
4021             if (arg_temp(op->args[1])->state == TS_DEAD) {
4022                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4023                     goto do_remove;
4024                 }
4025                 /* Replace the opcode and adjust the args in place,
4026                    leaving 3 unused args at the end.  */
4027                 op->opc = opc = opc_new;
4028                 op->args[1] = op->args[2];
4029                 op->args[2] = op->args[4];
4030                 /* Fall through and mark the single-word operation live.  */
4031                 nb_iargs = 2;
4032                 nb_oargs = 1;
4033             }
4034             goto do_not_remove;
4035 
4036         case INDEX_op_mulu2_i32:
4037             opc_new = INDEX_op_mul_i32;
4038             opc_new2 = INDEX_op_muluh_i32;
4039             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
4040             goto do_mul2;
4041         case INDEX_op_muls2_i32:
4042             opc_new = INDEX_op_mul_i32;
4043             opc_new2 = INDEX_op_mulsh_i32;
4044             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
4045             goto do_mul2;
4046         case INDEX_op_mulu2_i64:
4047             opc_new = INDEX_op_mul_i64;
4048             opc_new2 = INDEX_op_muluh_i64;
4049             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
4050             goto do_mul2;
4051         case INDEX_op_muls2_i64:
4052             opc_new = INDEX_op_mul_i64;
4053             opc_new2 = INDEX_op_mulsh_i64;
4054             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
4055             goto do_mul2;
4056         do_mul2:
4057             nb_iargs = 2;
4058             nb_oargs = 2;
4059             if (arg_temp(op->args[1])->state == TS_DEAD) {
4060                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4061                     /* Both parts of the operation are dead.  */
4062                     goto do_remove;
4063                 }
4064                 /* The high part of the operation is dead; generate the low. */
4065                 op->opc = opc = opc_new;
4066                 op->args[1] = op->args[2];
4067                 op->args[2] = op->args[3];
4068             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
4069                 /* The low part of the operation is dead; generate the high. */
4070                 op->opc = opc = opc_new2;
4071                 op->args[0] = op->args[1];
4072                 op->args[1] = op->args[2];
4073                 op->args[2] = op->args[3];
4074             } else {
4075                 goto do_not_remove;
4076             }
4077             /* Mark the single-word operation live.  */
4078             nb_oargs = 1;
4079             goto do_not_remove;
4080 
4081         default:
4082             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4083             nb_iargs = def->nb_iargs;
4084             nb_oargs = def->nb_oargs;
4085 
4086             /* Test if the operation can be removed because all
4087                its outputs are dead. We assume that nb_oargs == 0
4088                implies side effects */
4089             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4090                 for (i = 0; i < nb_oargs; i++) {
4091                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4092                         goto do_not_remove;
4093                     }
4094                 }
4095                 goto do_remove;
4096             }
4097             goto do_not_remove;
4098 
4099         do_remove:
4100             tcg_op_remove(s, op);
4101             break;
4102 
4103         do_not_remove:
4104             for (i = 0; i < nb_oargs; i++) {
4105                 ts = arg_temp(op->args[i]);
4106 
4107                 /* Remember the preference of the uses that followed.  */
4108                 if (i < ARRAY_SIZE(op->output_pref)) {
4109                     op->output_pref[i] = *la_temp_pref(ts);
4110                 }
4111 
4112                 /* Output args are dead.  */
4113                 if (ts->state & TS_DEAD) {
4114                     arg_life |= DEAD_ARG << i;
4115                 }
4116                 if (ts->state & TS_MEM) {
4117                     arg_life |= SYNC_ARG << i;
4118                 }
4119                 ts->state = TS_DEAD;
4120                 la_reset_pref(ts);
4121             }
4122 
4123             /* If end of basic block, update.  */
4124             if (def->flags & TCG_OPF_BB_EXIT) {
4125                 la_func_end(s, nb_globals, nb_temps);
4126             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4127                 la_bb_sync(s, nb_globals, nb_temps);
4128             } else if (def->flags & TCG_OPF_BB_END) {
4129                 la_bb_end(s, nb_globals, nb_temps);
4130             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4131                 la_global_sync(s, nb_globals);
4132                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4133                     la_cross_call(s, nb_temps);
4134                 }
4135             }
4136 
4137             /* Record arguments that die in this opcode.  */
4138             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4139                 ts = arg_temp(op->args[i]);
4140                 if (ts->state & TS_DEAD) {
4141                     arg_life |= DEAD_ARG << i;
4142                 }
4143             }
4144 
4145             /* Input arguments are live for preceding opcodes.  */
4146             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4147                 ts = arg_temp(op->args[i]);
4148                 if (ts->state & TS_DEAD) {
4149                     /* For operands that were dead, initially allow
4150                        all regs for the type.  */
4151                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4152                     ts->state &= ~TS_DEAD;
4153                 }
4154             }
4155 
4156             /* Incorporate constraints for this operand.  */
4157             switch (opc) {
4158             case INDEX_op_mov:
4159                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4160                    have proper constraints.  That said, special case
4161                    moves to propagate preferences backward.  */
4162                 if (IS_DEAD_ARG(1)) {
4163                     *la_temp_pref(arg_temp(op->args[0]))
4164                         = *la_temp_pref(arg_temp(op->args[1]));
4165                 }
4166                 break;
4167 
4168             default:
4169                 args_ct = opcode_args_ct(op);
4170                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4171                     const TCGArgConstraint *ct = &args_ct[i];
4172                     TCGRegSet set, *pset;
4173 
4174                     ts = arg_temp(op->args[i]);
4175                     pset = la_temp_pref(ts);
4176                     set = *pset;
4177 
4178                     set &= ct->regs;
4179                     if (ct->ialias) {
4180                         set &= output_pref(op, ct->alias_index);
4181                     }
4182                     /* If the combination is not possible, restart.  */
4183                     if (set == 0) {
4184                         set = ct->regs;
4185                     }
4186                     *pset = set;
4187                 }
4188                 break;
4189             }
4190             break;
4191         }
4192         op->life = arg_life;
4193     }
4194 }
4195 
4196 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4197 static bool __attribute__((noinline))
4198 liveness_pass_2(TCGContext *s)
4199 {
4200     int nb_globals = s->nb_globals;
4201     int nb_temps, i;
4202     bool changes = false;
4203     TCGOp *op, *op_next;
4204 
4205     /* Create a temporary for each indirect global.  */
4206     for (i = 0; i < nb_globals; ++i) {
4207         TCGTemp *its = &s->temps[i];
4208         if (its->indirect_reg) {
4209             TCGTemp *dts = tcg_temp_alloc(s);
4210             dts->type = its->type;
4211             dts->base_type = its->base_type;
4212             dts->temp_subindex = its->temp_subindex;
4213             dts->kind = TEMP_EBB;
4214             its->state_ptr = dts;
4215         } else {
4216             its->state_ptr = NULL;
4217         }
4218         /* All globals begin dead.  */
4219         its->state = TS_DEAD;
4220     }
4221     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4222         TCGTemp *its = &s->temps[i];
4223         its->state_ptr = NULL;
4224         its->state = TS_DEAD;
4225     }
4226 
4227     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4228         TCGOpcode opc = op->opc;
4229         const TCGOpDef *def = &tcg_op_defs[opc];
4230         TCGLifeData arg_life = op->life;
4231         int nb_iargs, nb_oargs, call_flags;
4232         TCGTemp *arg_ts, *dir_ts;
4233 
4234         if (opc == INDEX_op_call) {
4235             nb_oargs = TCGOP_CALLO(op);
4236             nb_iargs = TCGOP_CALLI(op);
4237             call_flags = tcg_call_flags(op);
4238         } else {
4239             nb_iargs = def->nb_iargs;
4240             nb_oargs = def->nb_oargs;
4241 
4242             /* Set flags similar to how calls require.  */
4243             if (def->flags & TCG_OPF_COND_BRANCH) {
4244                 /* Like reading globals: sync_globals */
4245                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4246             } else if (def->flags & TCG_OPF_BB_END) {
4247                 /* Like writing globals: save_globals */
4248                 call_flags = 0;
4249             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4250                 /* Like reading globals: sync_globals */
4251                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4252             } else {
4253                 /* No effect on globals.  */
4254                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4255                               TCG_CALL_NO_WRITE_GLOBALS);
4256             }
4257         }
4258 
4259         /* Make sure that input arguments are available.  */
4260         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4261             arg_ts = arg_temp(op->args[i]);
4262             dir_ts = arg_ts->state_ptr;
4263             if (dir_ts && arg_ts->state == TS_DEAD) {
4264                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4265                                   ? INDEX_op_ld_i32
4266                                   : INDEX_op_ld_i64);
4267                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4268                                                   arg_ts->type, 3);
4269 
4270                 lop->args[0] = temp_arg(dir_ts);
4271                 lop->args[1] = temp_arg(arg_ts->mem_base);
4272                 lop->args[2] = arg_ts->mem_offset;
4273 
4274                 /* Loaded, but synced with memory.  */
4275                 arg_ts->state = TS_MEM;
4276             }
4277         }
4278 
4279         /* Perform input replacement, and mark inputs that became dead.
4280            No action is required except keeping temp_state up to date
4281            so that we reload when needed.  */
4282         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4283             arg_ts = arg_temp(op->args[i]);
4284             dir_ts = arg_ts->state_ptr;
4285             if (dir_ts) {
4286                 op->args[i] = temp_arg(dir_ts);
4287                 changes = true;
4288                 if (IS_DEAD_ARG(i)) {
4289                     arg_ts->state = TS_DEAD;
4290                 }
4291             }
4292         }
4293 
4294         /* Liveness analysis should ensure that the following are
4295            all correct, for call sites and basic block end points.  */
4296         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4297             /* Nothing to do */
4298         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4299             for (i = 0; i < nb_globals; ++i) {
4300                 /* Liveness should see that globals are synced back,
4301                    that is, either TS_DEAD or TS_MEM.  */
4302                 arg_ts = &s->temps[i];
4303                 tcg_debug_assert(arg_ts->state_ptr == 0
4304                                  || arg_ts->state != 0);
4305             }
4306         } else {
4307             for (i = 0; i < nb_globals; ++i) {
4308                 /* Liveness should see that globals are saved back,
4309                    that is, TS_DEAD, waiting to be reloaded.  */
4310                 arg_ts = &s->temps[i];
4311                 tcg_debug_assert(arg_ts->state_ptr == 0
4312                                  || arg_ts->state == TS_DEAD);
4313             }
4314         }
4315 
4316         /* Outputs become available.  */
4317         if (opc == INDEX_op_mov) {
4318             arg_ts = arg_temp(op->args[0]);
4319             dir_ts = arg_ts->state_ptr;
4320             if (dir_ts) {
4321                 op->args[0] = temp_arg(dir_ts);
4322                 changes = true;
4323 
4324                 /* The output is now live and modified.  */
4325                 arg_ts->state = 0;
4326 
4327                 if (NEED_SYNC_ARG(0)) {
4328                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4329                                       ? INDEX_op_st_i32
4330                                       : INDEX_op_st_i64);
4331                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4332                                                      arg_ts->type, 3);
4333                     TCGTemp *out_ts = dir_ts;
4334 
4335                     if (IS_DEAD_ARG(0)) {
4336                         out_ts = arg_temp(op->args[1]);
4337                         arg_ts->state = TS_DEAD;
4338                         tcg_op_remove(s, op);
4339                     } else {
4340                         arg_ts->state = TS_MEM;
4341                     }
4342 
4343                     sop->args[0] = temp_arg(out_ts);
4344                     sop->args[1] = temp_arg(arg_ts->mem_base);
4345                     sop->args[2] = arg_ts->mem_offset;
4346                 } else {
4347                     tcg_debug_assert(!IS_DEAD_ARG(0));
4348                 }
4349             }
4350         } else {
4351             for (i = 0; i < nb_oargs; i++) {
4352                 arg_ts = arg_temp(op->args[i]);
4353                 dir_ts = arg_ts->state_ptr;
4354                 if (!dir_ts) {
4355                     continue;
4356                 }
4357                 op->args[i] = temp_arg(dir_ts);
4358                 changes = true;
4359 
4360                 /* The output is now live and modified.  */
4361                 arg_ts->state = 0;
4362 
4363                 /* Sync outputs upon their last write.  */
4364                 if (NEED_SYNC_ARG(i)) {
4365                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4366                                       ? INDEX_op_st_i32
4367                                       : INDEX_op_st_i64);
4368                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4369                                                      arg_ts->type, 3);
4370 
4371                     sop->args[0] = temp_arg(dir_ts);
4372                     sop->args[1] = temp_arg(arg_ts->mem_base);
4373                     sop->args[2] = arg_ts->mem_offset;
4374 
4375                     arg_ts->state = TS_MEM;
4376                 }
4377                 /* Drop outputs that are dead.  */
4378                 if (IS_DEAD_ARG(i)) {
4379                     arg_ts->state = TS_DEAD;
4380                 }
4381             }
4382         }
4383     }
4384 
4385     return changes;
4386 }
4387 
4388 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4389 {
4390     intptr_t off;
4391     int size, align;
4392 
4393     /* When allocating an object, look at the full type. */
4394     size = tcg_type_size(ts->base_type);
4395     switch (ts->base_type) {
4396     case TCG_TYPE_I32:
4397         align = 4;
4398         break;
4399     case TCG_TYPE_I64:
4400     case TCG_TYPE_V64:
4401         align = 8;
4402         break;
4403     case TCG_TYPE_I128:
4404     case TCG_TYPE_V128:
4405     case TCG_TYPE_V256:
4406         /*
4407          * Note that we do not require aligned storage for V256,
4408          * and that we provide alignment for I128 to match V128,
4409          * even if that's above what the host ABI requires.
4410          */
4411         align = 16;
4412         break;
4413     default:
4414         g_assert_not_reached();
4415     }
4416 
4417     /*
4418      * Assume the stack is sufficiently aligned.
4419      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4420      * and do not require 16 byte vector alignment.  This seems slightly
4421      * easier than fully parameterizing the above switch statement.
4422      */
4423     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4424     off = ROUND_UP(s->current_frame_offset, align);
4425 
4426     /* If we've exhausted the stack frame, restart with a smaller TB. */
4427     if (off + size > s->frame_end) {
4428         tcg_raise_tb_overflow(s);
4429     }
4430     s->current_frame_offset = off + size;
4431 #if defined(__sparc__)
4432     off += TCG_TARGET_STACK_BIAS;
4433 #endif
4434 
4435     /* If the object was subdivided, assign memory to all the parts. */
4436     if (ts->base_type != ts->type) {
4437         int part_size = tcg_type_size(ts->type);
4438         int part_count = size / part_size;
4439 
4440         /*
4441          * Each part is allocated sequentially in tcg_temp_new_internal.
4442          * Jump back to the first part by subtracting the current index.
4443          */
4444         ts -= ts->temp_subindex;
4445         for (int i = 0; i < part_count; ++i) {
4446             ts[i].mem_offset = off + i * part_size;
4447             ts[i].mem_base = s->frame_temp;
4448             ts[i].mem_allocated = 1;
4449         }
4450     } else {
4451         ts->mem_offset = off;
4452         ts->mem_base = s->frame_temp;
4453         ts->mem_allocated = 1;
4454     }
4455 }
4456 
4457 /* Assign @reg to @ts, and update reg_to_temp[]. */
4458 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4459 {
4460     if (ts->val_type == TEMP_VAL_REG) {
4461         TCGReg old = ts->reg;
4462         tcg_debug_assert(s->reg_to_temp[old] == ts);
4463         if (old == reg) {
4464             return;
4465         }
4466         s->reg_to_temp[old] = NULL;
4467     }
4468     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4469     s->reg_to_temp[reg] = ts;
4470     ts->val_type = TEMP_VAL_REG;
4471     ts->reg = reg;
4472 }
4473 
4474 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4475 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4476 {
4477     tcg_debug_assert(type != TEMP_VAL_REG);
4478     if (ts->val_type == TEMP_VAL_REG) {
4479         TCGReg reg = ts->reg;
4480         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4481         s->reg_to_temp[reg] = NULL;
4482     }
4483     ts->val_type = type;
4484 }
4485 
4486 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4487 
4488 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4489    mark it free; otherwise mark it dead.  */
4490 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4491 {
4492     TCGTempVal new_type;
4493 
4494     switch (ts->kind) {
4495     case TEMP_FIXED:
4496         return;
4497     case TEMP_GLOBAL:
4498     case TEMP_TB:
4499         new_type = TEMP_VAL_MEM;
4500         break;
4501     case TEMP_EBB:
4502         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4503         break;
4504     case TEMP_CONST:
4505         new_type = TEMP_VAL_CONST;
4506         break;
4507     default:
4508         g_assert_not_reached();
4509     }
4510     set_temp_val_nonreg(s, ts, new_type);
4511 }
4512 
4513 /* Mark a temporary as dead.  */
4514 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4515 {
4516     temp_free_or_dead(s, ts, 1);
4517 }
4518 
4519 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4520    registers needs to be allocated to store a constant.  If 'free_or_dead'
4521    is non-zero, subsequently release the temporary; if it is positive, the
4522    temp is dead; if it is negative, the temp is free.  */
4523 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4524                       TCGRegSet preferred_regs, int free_or_dead)
4525 {
4526     if (!temp_readonly(ts) && !ts->mem_coherent) {
4527         if (!ts->mem_allocated) {
4528             temp_allocate_frame(s, ts);
4529         }
4530         switch (ts->val_type) {
4531         case TEMP_VAL_CONST:
4532             /* If we're going to free the temp immediately, then we won't
4533                require it later in a register, so attempt to store the
4534                constant to memory directly.  */
4535             if (free_or_dead
4536                 && tcg_out_sti(s, ts->type, ts->val,
4537                                ts->mem_base->reg, ts->mem_offset)) {
4538                 break;
4539             }
4540             temp_load(s, ts, tcg_target_available_regs[ts->type],
4541                       allocated_regs, preferred_regs);
4542             /* fallthrough */
4543 
4544         case TEMP_VAL_REG:
4545             tcg_out_st(s, ts->type, ts->reg,
4546                        ts->mem_base->reg, ts->mem_offset);
4547             break;
4548 
4549         case TEMP_VAL_MEM:
4550             break;
4551 
4552         case TEMP_VAL_DEAD:
4553         default:
4554             g_assert_not_reached();
4555         }
4556         ts->mem_coherent = 1;
4557     }
4558     if (free_or_dead) {
4559         temp_free_or_dead(s, ts, free_or_dead);
4560     }
4561 }
4562 
4563 /* free register 'reg' by spilling the corresponding temporary if necessary */
4564 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4565 {
4566     TCGTemp *ts = s->reg_to_temp[reg];
4567     if (ts != NULL) {
4568         temp_sync(s, ts, allocated_regs, 0, -1);
4569     }
4570 }
4571 
4572 /**
4573  * tcg_reg_alloc:
4574  * @required_regs: Set of registers in which we must allocate.
4575  * @allocated_regs: Set of registers which must be avoided.
4576  * @preferred_regs: Set of registers we should prefer.
4577  * @rev: True if we search the registers in "indirect" order.
4578  *
4579  * The allocated register must be in @required_regs & ~@allocated_regs,
4580  * but if we can put it in @preferred_regs we may save a move later.
4581  */
4582 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4583                             TCGRegSet allocated_regs,
4584                             TCGRegSet preferred_regs, bool rev)
4585 {
4586     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4587     TCGRegSet reg_ct[2];
4588     const int *order;
4589 
4590     reg_ct[1] = required_regs & ~allocated_regs;
4591     tcg_debug_assert(reg_ct[1] != 0);
4592     reg_ct[0] = reg_ct[1] & preferred_regs;
4593 
4594     /* Skip the preferred_regs option if it cannot be satisfied,
4595        or if the preference made no difference.  */
4596     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4597 
4598     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4599 
4600     /* Try free registers, preferences first.  */
4601     for (j = f; j < 2; j++) {
4602         TCGRegSet set = reg_ct[j];
4603 
4604         if (tcg_regset_single(set)) {
4605             /* One register in the set.  */
4606             TCGReg reg = tcg_regset_first(set);
4607             if (s->reg_to_temp[reg] == NULL) {
4608                 return reg;
4609             }
4610         } else {
4611             for (i = 0; i < n; i++) {
4612                 TCGReg reg = order[i];
4613                 if (s->reg_to_temp[reg] == NULL &&
4614                     tcg_regset_test_reg(set, reg)) {
4615                     return reg;
4616                 }
4617             }
4618         }
4619     }
4620 
4621     /* We must spill something.  */
4622     for (j = f; j < 2; j++) {
4623         TCGRegSet set = reg_ct[j];
4624 
4625         if (tcg_regset_single(set)) {
4626             /* One register in the set.  */
4627             TCGReg reg = tcg_regset_first(set);
4628             tcg_reg_free(s, reg, allocated_regs);
4629             return reg;
4630         } else {
4631             for (i = 0; i < n; i++) {
4632                 TCGReg reg = order[i];
4633                 if (tcg_regset_test_reg(set, reg)) {
4634                     tcg_reg_free(s, reg, allocated_regs);
4635                     return reg;
4636                 }
4637             }
4638         }
4639     }
4640 
4641     g_assert_not_reached();
4642 }
4643 
4644 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4645                                  TCGRegSet allocated_regs,
4646                                  TCGRegSet preferred_regs, bool rev)
4647 {
4648     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4649     TCGRegSet reg_ct[2];
4650     const int *order;
4651 
4652     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4653     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4654     tcg_debug_assert(reg_ct[1] != 0);
4655     reg_ct[0] = reg_ct[1] & preferred_regs;
4656 
4657     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4658 
4659     /*
4660      * Skip the preferred_regs option if it cannot be satisfied,
4661      * or if the preference made no difference.
4662      */
4663     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4664 
4665     /*
4666      * Minimize the number of flushes by looking for 2 free registers first,
4667      * then a single flush, then two flushes.
4668      */
4669     for (fmin = 2; fmin >= 0; fmin--) {
4670         for (j = k; j < 2; j++) {
4671             TCGRegSet set = reg_ct[j];
4672 
4673             for (i = 0; i < n; i++) {
4674                 TCGReg reg = order[i];
4675 
4676                 if (tcg_regset_test_reg(set, reg)) {
4677                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4678                     if (f >= fmin) {
4679                         tcg_reg_free(s, reg, allocated_regs);
4680                         tcg_reg_free(s, reg + 1, allocated_regs);
4681                         return reg;
4682                     }
4683                 }
4684             }
4685         }
4686     }
4687     g_assert_not_reached();
4688 }
4689 
4690 /* Make sure the temporary is in a register.  If needed, allocate the register
4691    from DESIRED while avoiding ALLOCATED.  */
4692 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4693                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4694 {
4695     TCGReg reg;
4696 
4697     switch (ts->val_type) {
4698     case TEMP_VAL_REG:
4699         return;
4700     case TEMP_VAL_CONST:
4701         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4702                             preferred_regs, ts->indirect_base);
4703         if (ts->type <= TCG_TYPE_I64) {
4704             tcg_out_movi(s, ts->type, reg, ts->val);
4705         } else {
4706             uint64_t val = ts->val;
4707             MemOp vece = MO_64;
4708 
4709             /*
4710              * Find the minimal vector element that matches the constant.
4711              * The targets will, in general, have to do this search anyway,
4712              * do this generically.
4713              */
4714             if (val == dup_const(MO_8, val)) {
4715                 vece = MO_8;
4716             } else if (val == dup_const(MO_16, val)) {
4717                 vece = MO_16;
4718             } else if (val == dup_const(MO_32, val)) {
4719                 vece = MO_32;
4720             }
4721 
4722             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4723         }
4724         ts->mem_coherent = 0;
4725         break;
4726     case TEMP_VAL_MEM:
4727         if (!ts->mem_allocated) {
4728             temp_allocate_frame(s, ts);
4729         }
4730         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4731                             preferred_regs, ts->indirect_base);
4732         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4733         ts->mem_coherent = 1;
4734         break;
4735     case TEMP_VAL_DEAD:
4736     default:
4737         g_assert_not_reached();
4738     }
4739     set_temp_val_reg(s, ts, reg);
4740 }
4741 
4742 /* Save a temporary to memory. 'allocated_regs' is used in case a
4743    temporary registers needs to be allocated to store a constant.  */
4744 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4745 {
4746     /* The liveness analysis already ensures that globals are back
4747        in memory. Keep an tcg_debug_assert for safety. */
4748     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4749 }
4750 
4751 /* save globals to their canonical location and assume they can be
4752    modified be the following code. 'allocated_regs' is used in case a
4753    temporary registers needs to be allocated to store a constant. */
4754 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4755 {
4756     int i, n;
4757 
4758     for (i = 0, n = s->nb_globals; i < n; i++) {
4759         temp_save(s, &s->temps[i], allocated_regs);
4760     }
4761 }
4762 
4763 /* sync globals to their canonical location and assume they can be
4764    read by the following code. 'allocated_regs' is used in case a
4765    temporary registers needs to be allocated to store a constant. */
4766 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4767 {
4768     int i, n;
4769 
4770     for (i = 0, n = s->nb_globals; i < n; i++) {
4771         TCGTemp *ts = &s->temps[i];
4772         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4773                          || ts->kind == TEMP_FIXED
4774                          || ts->mem_coherent);
4775     }
4776 }
4777 
4778 /* at the end of a basic block, we assume all temporaries are dead and
4779    all globals are stored at their canonical location. */
4780 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4781 {
4782     int i;
4783 
4784     for (i = s->nb_globals; i < s->nb_temps; i++) {
4785         TCGTemp *ts = &s->temps[i];
4786 
4787         switch (ts->kind) {
4788         case TEMP_TB:
4789             temp_save(s, ts, allocated_regs);
4790             break;
4791         case TEMP_EBB:
4792             /* The liveness analysis already ensures that temps are dead.
4793                Keep an tcg_debug_assert for safety. */
4794             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4795             break;
4796         case TEMP_CONST:
4797             /* Similarly, we should have freed any allocated register. */
4798             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4799             break;
4800         default:
4801             g_assert_not_reached();
4802         }
4803     }
4804 
4805     save_globals(s, allocated_regs);
4806 }
4807 
4808 /*
4809  * At a conditional branch, we assume all temporaries are dead unless
4810  * explicitly live-across-conditional-branch; all globals and local
4811  * temps are synced to their location.
4812  */
4813 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4814 {
4815     sync_globals(s, allocated_regs);
4816 
4817     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4818         TCGTemp *ts = &s->temps[i];
4819         /*
4820          * The liveness analysis already ensures that temps are dead.
4821          * Keep tcg_debug_asserts for safety.
4822          */
4823         switch (ts->kind) {
4824         case TEMP_TB:
4825             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4826             break;
4827         case TEMP_EBB:
4828         case TEMP_CONST:
4829             break;
4830         default:
4831             g_assert_not_reached();
4832         }
4833     }
4834 }
4835 
4836 /*
4837  * Specialized code generation for INDEX_op_mov_* with a constant.
4838  */
4839 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4840                                   tcg_target_ulong val, TCGLifeData arg_life,
4841                                   TCGRegSet preferred_regs)
4842 {
4843     /* ENV should not be modified.  */
4844     tcg_debug_assert(!temp_readonly(ots));
4845 
4846     /* The movi is not explicitly generated here.  */
4847     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4848     ots->val = val;
4849     ots->mem_coherent = 0;
4850     if (NEED_SYNC_ARG(0)) {
4851         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4852     } else if (IS_DEAD_ARG(0)) {
4853         temp_dead(s, ots);
4854     }
4855 }
4856 
4857 /*
4858  * Specialized code generation for INDEX_op_mov_*.
4859  */
4860 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4861 {
4862     const TCGLifeData arg_life = op->life;
4863     TCGRegSet allocated_regs, preferred_regs;
4864     TCGTemp *ts, *ots;
4865     TCGType otype, itype;
4866     TCGReg oreg, ireg;
4867 
4868     allocated_regs = s->reserved_regs;
4869     preferred_regs = output_pref(op, 0);
4870     ots = arg_temp(op->args[0]);
4871     ts = arg_temp(op->args[1]);
4872 
4873     /* ENV should not be modified.  */
4874     tcg_debug_assert(!temp_readonly(ots));
4875 
4876     /* Note that otype != itype for no-op truncation.  */
4877     otype = ots->type;
4878     itype = ts->type;
4879 
4880     if (ts->val_type == TEMP_VAL_CONST) {
4881         /* propagate constant or generate sti */
4882         tcg_target_ulong val = ts->val;
4883         if (IS_DEAD_ARG(1)) {
4884             temp_dead(s, ts);
4885         }
4886         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4887         return;
4888     }
4889 
4890     /* If the source value is in memory we're going to be forced
4891        to have it in a register in order to perform the copy.  Copy
4892        the SOURCE value into its own register first, that way we
4893        don't have to reload SOURCE the next time it is used. */
4894     if (ts->val_type == TEMP_VAL_MEM) {
4895         temp_load(s, ts, tcg_target_available_regs[itype],
4896                   allocated_regs, preferred_regs);
4897     }
4898     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4899     ireg = ts->reg;
4900 
4901     if (IS_DEAD_ARG(0)) {
4902         /* mov to a non-saved dead register makes no sense (even with
4903            liveness analysis disabled). */
4904         tcg_debug_assert(NEED_SYNC_ARG(0));
4905         if (!ots->mem_allocated) {
4906             temp_allocate_frame(s, ots);
4907         }
4908         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4909         if (IS_DEAD_ARG(1)) {
4910             temp_dead(s, ts);
4911         }
4912         temp_dead(s, ots);
4913         return;
4914     }
4915 
4916     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4917         /*
4918          * The mov can be suppressed.  Kill input first, so that it
4919          * is unlinked from reg_to_temp, then set the output to the
4920          * reg that we saved from the input.
4921          */
4922         temp_dead(s, ts);
4923         oreg = ireg;
4924     } else {
4925         if (ots->val_type == TEMP_VAL_REG) {
4926             oreg = ots->reg;
4927         } else {
4928             /* Make sure to not spill the input register during allocation. */
4929             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4930                                  allocated_regs | ((TCGRegSet)1 << ireg),
4931                                  preferred_regs, ots->indirect_base);
4932         }
4933         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4934             /*
4935              * Cross register class move not supported.
4936              * Store the source register into the destination slot
4937              * and leave the destination temp as TEMP_VAL_MEM.
4938              */
4939             assert(!temp_readonly(ots));
4940             if (!ts->mem_allocated) {
4941                 temp_allocate_frame(s, ots);
4942             }
4943             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4944             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4945             ots->mem_coherent = 1;
4946             return;
4947         }
4948     }
4949     set_temp_val_reg(s, ots, oreg);
4950     ots->mem_coherent = 0;
4951 
4952     if (NEED_SYNC_ARG(0)) {
4953         temp_sync(s, ots, allocated_regs, 0, 0);
4954     }
4955 }
4956 
4957 /*
4958  * Specialized code generation for INDEX_op_dup_vec.
4959  */
4960 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4961 {
4962     const TCGLifeData arg_life = op->life;
4963     TCGRegSet dup_out_regs, dup_in_regs;
4964     const TCGArgConstraint *dup_args_ct;
4965     TCGTemp *its, *ots;
4966     TCGType itype, vtype;
4967     unsigned vece;
4968     int lowpart_ofs;
4969     bool ok;
4970 
4971     ots = arg_temp(op->args[0]);
4972     its = arg_temp(op->args[1]);
4973 
4974     /* ENV should not be modified.  */
4975     tcg_debug_assert(!temp_readonly(ots));
4976 
4977     itype = its->type;
4978     vece = TCGOP_VECE(op);
4979     vtype = TCGOP_TYPE(op);
4980 
4981     if (its->val_type == TEMP_VAL_CONST) {
4982         /* Propagate constant via movi -> dupi.  */
4983         tcg_target_ulong val = its->val;
4984         if (IS_DEAD_ARG(1)) {
4985             temp_dead(s, its);
4986         }
4987         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4988         return;
4989     }
4990 
4991     dup_args_ct = opcode_args_ct(op);
4992     dup_out_regs = dup_args_ct[0].regs;
4993     dup_in_regs = dup_args_ct[1].regs;
4994 
4995     /* Allocate the output register now.  */
4996     if (ots->val_type != TEMP_VAL_REG) {
4997         TCGRegSet allocated_regs = s->reserved_regs;
4998         TCGReg oreg;
4999 
5000         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
5001             /* Make sure to not spill the input register. */
5002             tcg_regset_set_reg(allocated_regs, its->reg);
5003         }
5004         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5005                              output_pref(op, 0), ots->indirect_base);
5006         set_temp_val_reg(s, ots, oreg);
5007     }
5008 
5009     switch (its->val_type) {
5010     case TEMP_VAL_REG:
5011         /*
5012          * The dup constriaints must be broad, covering all possible VECE.
5013          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
5014          * to fail, indicating that extra moves are required for that case.
5015          */
5016         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
5017             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
5018                 goto done;
5019             }
5020             /* Try again from memory or a vector input register.  */
5021         }
5022         if (!its->mem_coherent) {
5023             /*
5024              * The input register is not synced, and so an extra store
5025              * would be required to use memory.  Attempt an integer-vector
5026              * register move first.  We do not have a TCGRegSet for this.
5027              */
5028             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
5029                 break;
5030             }
5031             /* Sync the temp back to its slot and load from there.  */
5032             temp_sync(s, its, s->reserved_regs, 0, 0);
5033         }
5034         /* fall through */
5035 
5036     case TEMP_VAL_MEM:
5037         lowpart_ofs = 0;
5038         if (HOST_BIG_ENDIAN) {
5039             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5040         }
5041         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5042                              its->mem_offset + lowpart_ofs)) {
5043             goto done;
5044         }
5045         /* Load the input into the destination vector register. */
5046         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5047         break;
5048 
5049     default:
5050         g_assert_not_reached();
5051     }
5052 
5053     /* We now have a vector input register, so dup must succeed. */
5054     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5055     tcg_debug_assert(ok);
5056 
5057  done:
5058     ots->mem_coherent = 0;
5059     if (IS_DEAD_ARG(1)) {
5060         temp_dead(s, its);
5061     }
5062     if (NEED_SYNC_ARG(0)) {
5063         temp_sync(s, ots, s->reserved_regs, 0, 0);
5064     }
5065     if (IS_DEAD_ARG(0)) {
5066         temp_dead(s, ots);
5067     }
5068 }
5069 
5070 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5071 {
5072     const TCGLifeData arg_life = op->life;
5073     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5074     TCGRegSet i_allocated_regs;
5075     TCGRegSet o_allocated_regs;
5076     int i, k, nb_iargs, nb_oargs;
5077     TCGReg reg;
5078     TCGArg arg;
5079     const TCGArgConstraint *args_ct;
5080     const TCGArgConstraint *arg_ct;
5081     TCGTemp *ts;
5082     TCGArg new_args[TCG_MAX_OP_ARGS];
5083     int const_args[TCG_MAX_OP_ARGS];
5084     TCGCond op_cond;
5085 
5086     nb_oargs = def->nb_oargs;
5087     nb_iargs = def->nb_iargs;
5088 
5089     /* copy constants */
5090     memcpy(new_args + nb_oargs + nb_iargs,
5091            op->args + nb_oargs + nb_iargs,
5092            sizeof(TCGArg) * def->nb_cargs);
5093 
5094     i_allocated_regs = s->reserved_regs;
5095     o_allocated_regs = s->reserved_regs;
5096 
5097     switch (op->opc) {
5098     case INDEX_op_brcond_i32:
5099     case INDEX_op_brcond_i64:
5100         op_cond = op->args[2];
5101         break;
5102     case INDEX_op_setcond_i32:
5103     case INDEX_op_setcond_i64:
5104     case INDEX_op_negsetcond_i32:
5105     case INDEX_op_negsetcond_i64:
5106     case INDEX_op_cmp_vec:
5107         op_cond = op->args[3];
5108         break;
5109     case INDEX_op_brcond2_i32:
5110         op_cond = op->args[4];
5111         break;
5112     case INDEX_op_movcond_i32:
5113     case INDEX_op_movcond_i64:
5114     case INDEX_op_setcond2_i32:
5115     case INDEX_op_cmpsel_vec:
5116         op_cond = op->args[5];
5117         break;
5118     default:
5119         /* No condition within opcode. */
5120         op_cond = TCG_COND_ALWAYS;
5121         break;
5122     }
5123 
5124     args_ct = opcode_args_ct(op);
5125 
5126     /* satisfy input constraints */
5127     for (k = 0; k < nb_iargs; k++) {
5128         TCGRegSet i_preferred_regs, i_required_regs;
5129         bool allocate_new_reg, copyto_new_reg;
5130         TCGTemp *ts2;
5131         int i1, i2;
5132 
5133         i = args_ct[nb_oargs + k].sort_index;
5134         arg = op->args[i];
5135         arg_ct = &args_ct[i];
5136         ts = arg_temp(arg);
5137 
5138         if (ts->val_type == TEMP_VAL_CONST) {
5139 #ifdef TCG_REG_ZERO
5140             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5141                 /* Hardware zero register: indicate register via non-const. */
5142                 const_args[i] = 0;
5143                 new_args[i] = TCG_REG_ZERO;
5144                 continue;
5145             }
5146 #endif
5147 
5148             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5149                                        op_cond, TCGOP_VECE(op))) {
5150                 /* constant is OK for instruction */
5151                 const_args[i] = 1;
5152                 new_args[i] = ts->val;
5153                 continue;
5154             }
5155         }
5156 
5157         reg = ts->reg;
5158         i_preferred_regs = 0;
5159         i_required_regs = arg_ct->regs;
5160         allocate_new_reg = false;
5161         copyto_new_reg = false;
5162 
5163         switch (arg_ct->pair) {
5164         case 0: /* not paired */
5165             if (arg_ct->ialias) {
5166                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5167 
5168                 /*
5169                  * If the input is readonly, then it cannot also be an
5170                  * output and aliased to itself.  If the input is not
5171                  * dead after the instruction, we must allocate a new
5172                  * register and move it.
5173                  */
5174                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5175                     || args_ct[arg_ct->alias_index].newreg) {
5176                     allocate_new_reg = true;
5177                 } else if (ts->val_type == TEMP_VAL_REG) {
5178                     /*
5179                      * Check if the current register has already been
5180                      * allocated for another input.
5181                      */
5182                     allocate_new_reg =
5183                         tcg_regset_test_reg(i_allocated_regs, reg);
5184                 }
5185             }
5186             if (!allocate_new_reg) {
5187                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5188                           i_preferred_regs);
5189                 reg = ts->reg;
5190                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5191             }
5192             if (allocate_new_reg) {
5193                 /*
5194                  * Allocate a new register matching the constraint
5195                  * and move the temporary register into it.
5196                  */
5197                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5198                           i_allocated_regs, 0);
5199                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5200                                     i_preferred_regs, ts->indirect_base);
5201                 copyto_new_reg = true;
5202             }
5203             break;
5204 
5205         case 1:
5206             /* First of an input pair; if i1 == i2, the second is an output. */
5207             i1 = i;
5208             i2 = arg_ct->pair_index;
5209             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5210 
5211             /*
5212              * It is easier to default to allocating a new pair
5213              * and to identify a few cases where it's not required.
5214              */
5215             if (arg_ct->ialias) {
5216                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5217                 if (IS_DEAD_ARG(i1) &&
5218                     IS_DEAD_ARG(i2) &&
5219                     !temp_readonly(ts) &&
5220                     ts->val_type == TEMP_VAL_REG &&
5221                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5222                     tcg_regset_test_reg(i_required_regs, reg) &&
5223                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5224                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5225                     (ts2
5226                      ? ts2->val_type == TEMP_VAL_REG &&
5227                        ts2->reg == reg + 1 &&
5228                        !temp_readonly(ts2)
5229                      : s->reg_to_temp[reg + 1] == NULL)) {
5230                     break;
5231                 }
5232             } else {
5233                 /* Without aliasing, the pair must also be an input. */
5234                 tcg_debug_assert(ts2);
5235                 if (ts->val_type == TEMP_VAL_REG &&
5236                     ts2->val_type == TEMP_VAL_REG &&
5237                     ts2->reg == reg + 1 &&
5238                     tcg_regset_test_reg(i_required_regs, reg)) {
5239                     break;
5240                 }
5241             }
5242             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5243                                      0, ts->indirect_base);
5244             goto do_pair;
5245 
5246         case 2: /* pair second */
5247             reg = new_args[arg_ct->pair_index] + 1;
5248             goto do_pair;
5249 
5250         case 3: /* ialias with second output, no first input */
5251             tcg_debug_assert(arg_ct->ialias);
5252             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5253 
5254             if (IS_DEAD_ARG(i) &&
5255                 !temp_readonly(ts) &&
5256                 ts->val_type == TEMP_VAL_REG &&
5257                 reg > 0 &&
5258                 s->reg_to_temp[reg - 1] == NULL &&
5259                 tcg_regset_test_reg(i_required_regs, reg) &&
5260                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5261                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5262                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5263                 break;
5264             }
5265             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5266                                      i_allocated_regs, 0,
5267                                      ts->indirect_base);
5268             tcg_regset_set_reg(i_allocated_regs, reg);
5269             reg += 1;
5270             goto do_pair;
5271 
5272         do_pair:
5273             /*
5274              * If an aliased input is not dead after the instruction,
5275              * we must allocate a new register and move it.
5276              */
5277             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5278                 TCGRegSet t_allocated_regs = i_allocated_regs;
5279 
5280                 /*
5281                  * Because of the alias, and the continued life, make sure
5282                  * that the temp is somewhere *other* than the reg pair,
5283                  * and we get a copy in reg.
5284                  */
5285                 tcg_regset_set_reg(t_allocated_regs, reg);
5286                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5287                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5288                     /* If ts was already in reg, copy it somewhere else. */
5289                     TCGReg nr;
5290                     bool ok;
5291 
5292                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5293                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5294                                        t_allocated_regs, 0, ts->indirect_base);
5295                     ok = tcg_out_mov(s, ts->type, nr, reg);
5296                     tcg_debug_assert(ok);
5297 
5298                     set_temp_val_reg(s, ts, nr);
5299                 } else {
5300                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5301                               t_allocated_regs, 0);
5302                     copyto_new_reg = true;
5303                 }
5304             } else {
5305                 /* Preferably allocate to reg, otherwise copy. */
5306                 i_required_regs = (TCGRegSet)1 << reg;
5307                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5308                           i_preferred_regs);
5309                 copyto_new_reg = ts->reg != reg;
5310             }
5311             break;
5312 
5313         default:
5314             g_assert_not_reached();
5315         }
5316 
5317         if (copyto_new_reg) {
5318             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5319                 /*
5320                  * Cross register class move not supported.  Sync the
5321                  * temp back to its slot and load from there.
5322                  */
5323                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5324                 tcg_out_ld(s, ts->type, reg,
5325                            ts->mem_base->reg, ts->mem_offset);
5326             }
5327         }
5328         new_args[i] = reg;
5329         const_args[i] = 0;
5330         tcg_regset_set_reg(i_allocated_regs, reg);
5331     }
5332 
5333     /* mark dead temporaries and free the associated registers */
5334     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5335         if (IS_DEAD_ARG(i)) {
5336             temp_dead(s, arg_temp(op->args[i]));
5337         }
5338     }
5339 
5340     if (def->flags & TCG_OPF_COND_BRANCH) {
5341         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5342     } else if (def->flags & TCG_OPF_BB_END) {
5343         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5344     } else {
5345         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5346             /* XXX: permit generic clobber register list ? */
5347             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5348                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5349                     tcg_reg_free(s, i, i_allocated_regs);
5350                 }
5351             }
5352         }
5353         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5354             /* sync globals if the op has side effects and might trigger
5355                an exception. */
5356             sync_globals(s, i_allocated_regs);
5357         }
5358 
5359         /* satisfy the output constraints */
5360         for (k = 0; k < nb_oargs; k++) {
5361             i = args_ct[k].sort_index;
5362             arg = op->args[i];
5363             arg_ct = &args_ct[i];
5364             ts = arg_temp(arg);
5365 
5366             /* ENV should not be modified.  */
5367             tcg_debug_assert(!temp_readonly(ts));
5368 
5369             switch (arg_ct->pair) {
5370             case 0: /* not paired */
5371                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5372                     reg = new_args[arg_ct->alias_index];
5373                 } else if (arg_ct->newreg) {
5374                     reg = tcg_reg_alloc(s, arg_ct->regs,
5375                                         i_allocated_regs | o_allocated_regs,
5376                                         output_pref(op, k), ts->indirect_base);
5377                 } else {
5378                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5379                                         output_pref(op, k), ts->indirect_base);
5380                 }
5381                 break;
5382 
5383             case 1: /* first of pair */
5384                 if (arg_ct->oalias) {
5385                     reg = new_args[arg_ct->alias_index];
5386                 } else if (arg_ct->newreg) {
5387                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5388                                              i_allocated_regs | o_allocated_regs,
5389                                              output_pref(op, k),
5390                                              ts->indirect_base);
5391                 } else {
5392                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5393                                              output_pref(op, k),
5394                                              ts->indirect_base);
5395                 }
5396                 break;
5397 
5398             case 2: /* second of pair */
5399                 if (arg_ct->oalias) {
5400                     reg = new_args[arg_ct->alias_index];
5401                 } else {
5402                     reg = new_args[arg_ct->pair_index] + 1;
5403                 }
5404                 break;
5405 
5406             case 3: /* first of pair, aliasing with a second input */
5407                 tcg_debug_assert(!arg_ct->newreg);
5408                 reg = new_args[arg_ct->pair_index] - 1;
5409                 break;
5410 
5411             default:
5412                 g_assert_not_reached();
5413             }
5414             tcg_regset_set_reg(o_allocated_regs, reg);
5415             set_temp_val_reg(s, ts, reg);
5416             ts->mem_coherent = 0;
5417             new_args[i] = reg;
5418         }
5419     }
5420 
5421     /* emit instruction */
5422     TCGType type = TCGOP_TYPE(op);
5423     switch (op->opc) {
5424     case INDEX_op_ext_i32_i64:
5425         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5426         break;
5427     case INDEX_op_extu_i32_i64:
5428         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5429         break;
5430     case INDEX_op_extrl_i64_i32:
5431         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5432         break;
5433 
5434     case INDEX_op_add:
5435     case INDEX_op_and:
5436     case INDEX_op_andc:
5437     case INDEX_op_eqv:
5438     case INDEX_op_nand:
5439     case INDEX_op_nor:
5440     case INDEX_op_or:
5441     case INDEX_op_orc:
5442     case INDEX_op_xor:
5443         {
5444             const TCGOutOpBinary *out =
5445                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5446 
5447             /* Constants should never appear in the first source operand. */
5448             tcg_debug_assert(!const_args[1]);
5449             if (const_args[2]) {
5450                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5451             } else {
5452                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5453             }
5454         }
5455         break;
5456 
5457     case INDEX_op_sub:
5458         {
5459             const TCGOutOpSubtract *out = &outop_sub;
5460 
5461             /*
5462              * Constants should never appear in the second source operand.
5463              * These are folded to add with negative constant.
5464              */
5465             tcg_debug_assert(!const_args[2]);
5466             if (const_args[1]) {
5467                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5468             } else {
5469                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5470             }
5471         }
5472         break;
5473 
5474     default:
5475         if (def->flags & TCG_OPF_VECTOR) {
5476             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5477                            TCGOP_VECE(op), new_args, const_args);
5478         } else {
5479             tcg_out_op(s, op->opc, type, new_args, const_args);
5480         }
5481         break;
5482     }
5483 
5484     /* move the outputs in the correct register if needed */
5485     for(i = 0; i < nb_oargs; i++) {
5486         ts = arg_temp(op->args[i]);
5487 
5488         /* ENV should not be modified.  */
5489         tcg_debug_assert(!temp_readonly(ts));
5490 
5491         if (NEED_SYNC_ARG(i)) {
5492             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5493         } else if (IS_DEAD_ARG(i)) {
5494             temp_dead(s, ts);
5495         }
5496     }
5497 }
5498 
5499 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5500 {
5501     const TCGLifeData arg_life = op->life;
5502     TCGTemp *ots, *itsl, *itsh;
5503     TCGType vtype = TCGOP_TYPE(op);
5504 
5505     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5506     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5507     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5508 
5509     ots = arg_temp(op->args[0]);
5510     itsl = arg_temp(op->args[1]);
5511     itsh = arg_temp(op->args[2]);
5512 
5513     /* ENV should not be modified.  */
5514     tcg_debug_assert(!temp_readonly(ots));
5515 
5516     /* Allocate the output register now.  */
5517     if (ots->val_type != TEMP_VAL_REG) {
5518         TCGRegSet allocated_regs = s->reserved_regs;
5519         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5520         TCGReg oreg;
5521 
5522         /* Make sure to not spill the input registers. */
5523         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5524             tcg_regset_set_reg(allocated_regs, itsl->reg);
5525         }
5526         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5527             tcg_regset_set_reg(allocated_regs, itsh->reg);
5528         }
5529 
5530         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5531                              output_pref(op, 0), ots->indirect_base);
5532         set_temp_val_reg(s, ots, oreg);
5533     }
5534 
5535     /* Promote dup2 of immediates to dupi_vec. */
5536     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5537         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5538         MemOp vece = MO_64;
5539 
5540         if (val == dup_const(MO_8, val)) {
5541             vece = MO_8;
5542         } else if (val == dup_const(MO_16, val)) {
5543             vece = MO_16;
5544         } else if (val == dup_const(MO_32, val)) {
5545             vece = MO_32;
5546         }
5547 
5548         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5549         goto done;
5550     }
5551 
5552     /* If the two inputs form one 64-bit value, try dupm_vec. */
5553     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5554         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5555         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5556         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5557 
5558         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5559         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5560 
5561         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5562                              its->mem_base->reg, its->mem_offset)) {
5563             goto done;
5564         }
5565     }
5566 
5567     /* Fall back to generic expansion. */
5568     return false;
5569 
5570  done:
5571     ots->mem_coherent = 0;
5572     if (IS_DEAD_ARG(1)) {
5573         temp_dead(s, itsl);
5574     }
5575     if (IS_DEAD_ARG(2)) {
5576         temp_dead(s, itsh);
5577     }
5578     if (NEED_SYNC_ARG(0)) {
5579         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5580     } else if (IS_DEAD_ARG(0)) {
5581         temp_dead(s, ots);
5582     }
5583     return true;
5584 }
5585 
5586 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5587                          TCGRegSet allocated_regs)
5588 {
5589     if (ts->val_type == TEMP_VAL_REG) {
5590         if (ts->reg != reg) {
5591             tcg_reg_free(s, reg, allocated_regs);
5592             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5593                 /*
5594                  * Cross register class move not supported.  Sync the
5595                  * temp back to its slot and load from there.
5596                  */
5597                 temp_sync(s, ts, allocated_regs, 0, 0);
5598                 tcg_out_ld(s, ts->type, reg,
5599                            ts->mem_base->reg, ts->mem_offset);
5600             }
5601         }
5602     } else {
5603         TCGRegSet arg_set = 0;
5604 
5605         tcg_reg_free(s, reg, allocated_regs);
5606         tcg_regset_set_reg(arg_set, reg);
5607         temp_load(s, ts, arg_set, allocated_regs, 0);
5608     }
5609 }
5610 
5611 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5612                          TCGRegSet allocated_regs)
5613 {
5614     /*
5615      * When the destination is on the stack, load up the temp and store.
5616      * If there are many call-saved registers, the temp might live to
5617      * see another use; otherwise it'll be discarded.
5618      */
5619     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5620     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5621                arg_slot_stk_ofs(arg_slot));
5622 }
5623 
5624 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5625                             TCGTemp *ts, TCGRegSet *allocated_regs)
5626 {
5627     if (arg_slot_reg_p(l->arg_slot)) {
5628         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5629         load_arg_reg(s, reg, ts, *allocated_regs);
5630         tcg_regset_set_reg(*allocated_regs, reg);
5631     } else {
5632         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5633     }
5634 }
5635 
5636 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5637                          intptr_t ref_off, TCGRegSet *allocated_regs)
5638 {
5639     TCGReg reg;
5640 
5641     if (arg_slot_reg_p(arg_slot)) {
5642         reg = tcg_target_call_iarg_regs[arg_slot];
5643         tcg_reg_free(s, reg, *allocated_regs);
5644         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5645         tcg_regset_set_reg(*allocated_regs, reg);
5646     } else {
5647         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5648                             *allocated_regs, 0, false);
5649         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5650         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5651                    arg_slot_stk_ofs(arg_slot));
5652     }
5653 }
5654 
5655 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5656 {
5657     const int nb_oargs = TCGOP_CALLO(op);
5658     const int nb_iargs = TCGOP_CALLI(op);
5659     const TCGLifeData arg_life = op->life;
5660     const TCGHelperInfo *info = tcg_call_info(op);
5661     TCGRegSet allocated_regs = s->reserved_regs;
5662     int i;
5663 
5664     /*
5665      * Move inputs into place in reverse order,
5666      * so that we place stacked arguments first.
5667      */
5668     for (i = nb_iargs - 1; i >= 0; --i) {
5669         const TCGCallArgumentLoc *loc = &info->in[i];
5670         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5671 
5672         switch (loc->kind) {
5673         case TCG_CALL_ARG_NORMAL:
5674         case TCG_CALL_ARG_EXTEND_U:
5675         case TCG_CALL_ARG_EXTEND_S:
5676             load_arg_normal(s, loc, ts, &allocated_regs);
5677             break;
5678         case TCG_CALL_ARG_BY_REF:
5679             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5680             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5681                          arg_slot_stk_ofs(loc->ref_slot),
5682                          &allocated_regs);
5683             break;
5684         case TCG_CALL_ARG_BY_REF_N:
5685             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5686             break;
5687         default:
5688             g_assert_not_reached();
5689         }
5690     }
5691 
5692     /* Mark dead temporaries and free the associated registers.  */
5693     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5694         if (IS_DEAD_ARG(i)) {
5695             temp_dead(s, arg_temp(op->args[i]));
5696         }
5697     }
5698 
5699     /* Clobber call registers.  */
5700     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5701         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5702             tcg_reg_free(s, i, allocated_regs);
5703         }
5704     }
5705 
5706     /*
5707      * Save globals if they might be written by the helper,
5708      * sync them if they might be read.
5709      */
5710     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5711         /* Nothing to do */
5712     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5713         sync_globals(s, allocated_regs);
5714     } else {
5715         save_globals(s, allocated_regs);
5716     }
5717 
5718     /*
5719      * If the ABI passes a pointer to the returned struct as the first
5720      * argument, load that now.  Pass a pointer to the output home slot.
5721      */
5722     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5723         TCGTemp *ts = arg_temp(op->args[0]);
5724 
5725         if (!ts->mem_allocated) {
5726             temp_allocate_frame(s, ts);
5727         }
5728         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5729     }
5730 
5731     tcg_out_call(s, tcg_call_func(op), info);
5732 
5733     /* Assign output registers and emit moves if needed.  */
5734     switch (info->out_kind) {
5735     case TCG_CALL_RET_NORMAL:
5736         for (i = 0; i < nb_oargs; i++) {
5737             TCGTemp *ts = arg_temp(op->args[i]);
5738             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5739 
5740             /* ENV should not be modified.  */
5741             tcg_debug_assert(!temp_readonly(ts));
5742 
5743             set_temp_val_reg(s, ts, reg);
5744             ts->mem_coherent = 0;
5745         }
5746         break;
5747 
5748     case TCG_CALL_RET_BY_VEC:
5749         {
5750             TCGTemp *ts = arg_temp(op->args[0]);
5751 
5752             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5753             tcg_debug_assert(ts->temp_subindex == 0);
5754             if (!ts->mem_allocated) {
5755                 temp_allocate_frame(s, ts);
5756             }
5757             tcg_out_st(s, TCG_TYPE_V128,
5758                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5759                        ts->mem_base->reg, ts->mem_offset);
5760         }
5761         /* fall through to mark all parts in memory */
5762 
5763     case TCG_CALL_RET_BY_REF:
5764         /* The callee has performed a write through the reference. */
5765         for (i = 0; i < nb_oargs; i++) {
5766             TCGTemp *ts = arg_temp(op->args[i]);
5767             ts->val_type = TEMP_VAL_MEM;
5768         }
5769         break;
5770 
5771     default:
5772         g_assert_not_reached();
5773     }
5774 
5775     /* Flush or discard output registers as needed. */
5776     for (i = 0; i < nb_oargs; i++) {
5777         TCGTemp *ts = arg_temp(op->args[i]);
5778         if (NEED_SYNC_ARG(i)) {
5779             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5780         } else if (IS_DEAD_ARG(i)) {
5781             temp_dead(s, ts);
5782         }
5783     }
5784 }
5785 
5786 /**
5787  * atom_and_align_for_opc:
5788  * @s: tcg context
5789  * @opc: memory operation code
5790  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5791  * @allow_two_ops: true if we are prepared to issue two operations
5792  *
5793  * Return the alignment and atomicity to use for the inline fast path
5794  * for the given memory operation.  The alignment may be larger than
5795  * that specified in @opc, and the correct alignment will be diagnosed
5796  * by the slow path helper.
5797  *
5798  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5799  * and issue two loads or stores for subalignment.
5800  */
5801 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5802                                            MemOp host_atom, bool allow_two_ops)
5803 {
5804     MemOp align = memop_alignment_bits(opc);
5805     MemOp size = opc & MO_SIZE;
5806     MemOp half = size ? size - 1 : 0;
5807     MemOp atom = opc & MO_ATOM_MASK;
5808     MemOp atmax;
5809 
5810     switch (atom) {
5811     case MO_ATOM_NONE:
5812         /* The operation requires no specific atomicity. */
5813         atmax = MO_8;
5814         break;
5815 
5816     case MO_ATOM_IFALIGN:
5817         atmax = size;
5818         break;
5819 
5820     case MO_ATOM_IFALIGN_PAIR:
5821         atmax = half;
5822         break;
5823 
5824     case MO_ATOM_WITHIN16:
5825         atmax = size;
5826         if (size == MO_128) {
5827             /* Misalignment implies !within16, and therefore no atomicity. */
5828         } else if (host_atom != MO_ATOM_WITHIN16) {
5829             /* The host does not implement within16, so require alignment. */
5830             align = MAX(align, size);
5831         }
5832         break;
5833 
5834     case MO_ATOM_WITHIN16_PAIR:
5835         atmax = size;
5836         /*
5837          * Misalignment implies !within16, and therefore half atomicity.
5838          * Any host prepared for two operations can implement this with
5839          * half alignment.
5840          */
5841         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5842             align = MAX(align, half);
5843         }
5844         break;
5845 
5846     case MO_ATOM_SUBALIGN:
5847         atmax = size;
5848         if (host_atom != MO_ATOM_SUBALIGN) {
5849             /* If unaligned but not odd, there are subobjects up to half. */
5850             if (allow_two_ops) {
5851                 align = MAX(align, half);
5852             } else {
5853                 align = MAX(align, size);
5854             }
5855         }
5856         break;
5857 
5858     default:
5859         g_assert_not_reached();
5860     }
5861 
5862     return (TCGAtomAlign){ .atom = atmax, .align = align };
5863 }
5864 
5865 /*
5866  * Similarly for qemu_ld/st slow path helpers.
5867  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5868  * using only the provided backend tcg_out_* functions.
5869  */
5870 
5871 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5872 {
5873     int ofs = arg_slot_stk_ofs(slot);
5874 
5875     /*
5876      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5877      * require extension to uint64_t, adjust the address for uint32_t.
5878      */
5879     if (HOST_BIG_ENDIAN &&
5880         TCG_TARGET_REG_BITS == 64 &&
5881         type == TCG_TYPE_I32) {
5882         ofs += 4;
5883     }
5884     return ofs;
5885 }
5886 
5887 static void tcg_out_helper_load_slots(TCGContext *s,
5888                                       unsigned nmov, TCGMovExtend *mov,
5889                                       const TCGLdstHelperParam *parm)
5890 {
5891     unsigned i;
5892     TCGReg dst3;
5893 
5894     /*
5895      * Start from the end, storing to the stack first.
5896      * This frees those registers, so we need not consider overlap.
5897      */
5898     for (i = nmov; i-- > 0; ) {
5899         unsigned slot = mov[i].dst;
5900 
5901         if (arg_slot_reg_p(slot)) {
5902             goto found_reg;
5903         }
5904 
5905         TCGReg src = mov[i].src;
5906         TCGType dst_type = mov[i].dst_type;
5907         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5908 
5909         /* The argument is going onto the stack; extend into scratch. */
5910         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5911             tcg_debug_assert(parm->ntmp != 0);
5912             mov[i].dst = src = parm->tmp[0];
5913             tcg_out_movext1(s, &mov[i]);
5914         }
5915 
5916         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5917                    tcg_out_helper_stk_ofs(dst_type, slot));
5918     }
5919     return;
5920 
5921  found_reg:
5922     /*
5923      * The remaining arguments are in registers.
5924      * Convert slot numbers to argument registers.
5925      */
5926     nmov = i + 1;
5927     for (i = 0; i < nmov; ++i) {
5928         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5929     }
5930 
5931     switch (nmov) {
5932     case 4:
5933         /* The backend must have provided enough temps for the worst case. */
5934         tcg_debug_assert(parm->ntmp >= 2);
5935 
5936         dst3 = mov[3].dst;
5937         for (unsigned j = 0; j < 3; ++j) {
5938             if (dst3 == mov[j].src) {
5939                 /*
5940                  * Conflict. Copy the source to a temporary, perform the
5941                  * remaining moves, then the extension from our scratch
5942                  * on the way out.
5943                  */
5944                 TCGReg scratch = parm->tmp[1];
5945 
5946                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5947                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5948                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5949                 break;
5950             }
5951         }
5952 
5953         /* No conflicts: perform this move and continue. */
5954         tcg_out_movext1(s, &mov[3]);
5955         /* fall through */
5956 
5957     case 3:
5958         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5959                         parm->ntmp ? parm->tmp[0] : -1);
5960         break;
5961     case 2:
5962         tcg_out_movext2(s, mov, mov + 1,
5963                         parm->ntmp ? parm->tmp[0] : -1);
5964         break;
5965     case 1:
5966         tcg_out_movext1(s, mov);
5967         break;
5968     default:
5969         g_assert_not_reached();
5970     }
5971 }
5972 
5973 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5974                                     TCGType type, tcg_target_long imm,
5975                                     const TCGLdstHelperParam *parm)
5976 {
5977     if (arg_slot_reg_p(slot)) {
5978         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5979     } else {
5980         int ofs = tcg_out_helper_stk_ofs(type, slot);
5981         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5982             tcg_debug_assert(parm->ntmp != 0);
5983             tcg_out_movi(s, type, parm->tmp[0], imm);
5984             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5985         }
5986     }
5987 }
5988 
5989 static void tcg_out_helper_load_common_args(TCGContext *s,
5990                                             const TCGLabelQemuLdst *ldst,
5991                                             const TCGLdstHelperParam *parm,
5992                                             const TCGHelperInfo *info,
5993                                             unsigned next_arg)
5994 {
5995     TCGMovExtend ptr_mov = {
5996         .dst_type = TCG_TYPE_PTR,
5997         .src_type = TCG_TYPE_PTR,
5998         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5999     };
6000     const TCGCallArgumentLoc *loc = &info->in[0];
6001     TCGType type;
6002     unsigned slot;
6003     tcg_target_ulong imm;
6004 
6005     /*
6006      * Handle env, which is always first.
6007      */
6008     ptr_mov.dst = loc->arg_slot;
6009     ptr_mov.src = TCG_AREG0;
6010     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6011 
6012     /*
6013      * Handle oi.
6014      */
6015     imm = ldst->oi;
6016     loc = &info->in[next_arg];
6017     type = TCG_TYPE_I32;
6018     switch (loc->kind) {
6019     case TCG_CALL_ARG_NORMAL:
6020         break;
6021     case TCG_CALL_ARG_EXTEND_U:
6022     case TCG_CALL_ARG_EXTEND_S:
6023         /* No extension required for MemOpIdx. */
6024         tcg_debug_assert(imm <= INT32_MAX);
6025         type = TCG_TYPE_REG;
6026         break;
6027     default:
6028         g_assert_not_reached();
6029     }
6030     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6031     next_arg++;
6032 
6033     /*
6034      * Handle ra.
6035      */
6036     loc = &info->in[next_arg];
6037     slot = loc->arg_slot;
6038     if (parm->ra_gen) {
6039         int arg_reg = -1;
6040         TCGReg ra_reg;
6041 
6042         if (arg_slot_reg_p(slot)) {
6043             arg_reg = tcg_target_call_iarg_regs[slot];
6044         }
6045         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6046 
6047         ptr_mov.dst = slot;
6048         ptr_mov.src = ra_reg;
6049         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6050     } else {
6051         imm = (uintptr_t)ldst->raddr;
6052         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6053     }
6054 }
6055 
6056 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6057                                        const TCGCallArgumentLoc *loc,
6058                                        TCGType dst_type, TCGType src_type,
6059                                        TCGReg lo, TCGReg hi)
6060 {
6061     MemOp reg_mo;
6062 
6063     if (dst_type <= TCG_TYPE_REG) {
6064         MemOp src_ext;
6065 
6066         switch (loc->kind) {
6067         case TCG_CALL_ARG_NORMAL:
6068             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6069             break;
6070         case TCG_CALL_ARG_EXTEND_U:
6071             dst_type = TCG_TYPE_REG;
6072             src_ext = MO_UL;
6073             break;
6074         case TCG_CALL_ARG_EXTEND_S:
6075             dst_type = TCG_TYPE_REG;
6076             src_ext = MO_SL;
6077             break;
6078         default:
6079             g_assert_not_reached();
6080         }
6081 
6082         mov[0].dst = loc->arg_slot;
6083         mov[0].dst_type = dst_type;
6084         mov[0].src = lo;
6085         mov[0].src_type = src_type;
6086         mov[0].src_ext = src_ext;
6087         return 1;
6088     }
6089 
6090     if (TCG_TARGET_REG_BITS == 32) {
6091         assert(dst_type == TCG_TYPE_I64);
6092         reg_mo = MO_32;
6093     } else {
6094         assert(dst_type == TCG_TYPE_I128);
6095         reg_mo = MO_64;
6096     }
6097 
6098     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6099     mov[0].src = lo;
6100     mov[0].dst_type = TCG_TYPE_REG;
6101     mov[0].src_type = TCG_TYPE_REG;
6102     mov[0].src_ext = reg_mo;
6103 
6104     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6105     mov[1].src = hi;
6106     mov[1].dst_type = TCG_TYPE_REG;
6107     mov[1].src_type = TCG_TYPE_REG;
6108     mov[1].src_ext = reg_mo;
6109 
6110     return 2;
6111 }
6112 
6113 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6114                                    const TCGLdstHelperParam *parm)
6115 {
6116     const TCGHelperInfo *info;
6117     const TCGCallArgumentLoc *loc;
6118     TCGMovExtend mov[2];
6119     unsigned next_arg, nmov;
6120     MemOp mop = get_memop(ldst->oi);
6121 
6122     switch (mop & MO_SIZE) {
6123     case MO_8:
6124     case MO_16:
6125     case MO_32:
6126         info = &info_helper_ld32_mmu;
6127         break;
6128     case MO_64:
6129         info = &info_helper_ld64_mmu;
6130         break;
6131     case MO_128:
6132         info = &info_helper_ld128_mmu;
6133         break;
6134     default:
6135         g_assert_not_reached();
6136     }
6137 
6138     /* Defer env argument. */
6139     next_arg = 1;
6140 
6141     loc = &info->in[next_arg];
6142     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6143         /*
6144          * 32-bit host with 32-bit guest: zero-extend the guest address
6145          * to 64-bits for the helper by storing the low part, then
6146          * load a zero for the high part.
6147          */
6148         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6149                                TCG_TYPE_I32, TCG_TYPE_I32,
6150                                ldst->addr_reg, -1);
6151         tcg_out_helper_load_slots(s, 1, mov, parm);
6152 
6153         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6154                                 TCG_TYPE_I32, 0, parm);
6155         next_arg += 2;
6156     } else {
6157         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6158                                       ldst->addr_reg, -1);
6159         tcg_out_helper_load_slots(s, nmov, mov, parm);
6160         next_arg += nmov;
6161     }
6162 
6163     switch (info->out_kind) {
6164     case TCG_CALL_RET_NORMAL:
6165     case TCG_CALL_RET_BY_VEC:
6166         break;
6167     case TCG_CALL_RET_BY_REF:
6168         /*
6169          * The return reference is in the first argument slot.
6170          * We need memory in which to return: re-use the top of stack.
6171          */
6172         {
6173             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6174 
6175             if (arg_slot_reg_p(0)) {
6176                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6177                                  TCG_REG_CALL_STACK, ofs_slot0);
6178             } else {
6179                 tcg_debug_assert(parm->ntmp != 0);
6180                 tcg_out_addi_ptr(s, parm->tmp[0],
6181                                  TCG_REG_CALL_STACK, ofs_slot0);
6182                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6183                            TCG_REG_CALL_STACK, ofs_slot0);
6184             }
6185         }
6186         break;
6187     default:
6188         g_assert_not_reached();
6189     }
6190 
6191     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6192 }
6193 
6194 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6195                                   bool load_sign,
6196                                   const TCGLdstHelperParam *parm)
6197 {
6198     MemOp mop = get_memop(ldst->oi);
6199     TCGMovExtend mov[2];
6200     int ofs_slot0;
6201 
6202     switch (ldst->type) {
6203     case TCG_TYPE_I64:
6204         if (TCG_TARGET_REG_BITS == 32) {
6205             break;
6206         }
6207         /* fall through */
6208 
6209     case TCG_TYPE_I32:
6210         mov[0].dst = ldst->datalo_reg;
6211         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6212         mov[0].dst_type = ldst->type;
6213         mov[0].src_type = TCG_TYPE_REG;
6214 
6215         /*
6216          * If load_sign, then we allowed the helper to perform the
6217          * appropriate sign extension to tcg_target_ulong, and all
6218          * we need now is a plain move.
6219          *
6220          * If they do not, then we expect the relevant extension
6221          * instruction to be no more expensive than a move, and
6222          * we thus save the icache etc by only using one of two
6223          * helper functions.
6224          */
6225         if (load_sign || !(mop & MO_SIGN)) {
6226             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6227                 mov[0].src_ext = MO_32;
6228             } else {
6229                 mov[0].src_ext = MO_64;
6230             }
6231         } else {
6232             mov[0].src_ext = mop & MO_SSIZE;
6233         }
6234         tcg_out_movext1(s, mov);
6235         return;
6236 
6237     case TCG_TYPE_I128:
6238         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6239         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6240         switch (TCG_TARGET_CALL_RET_I128) {
6241         case TCG_CALL_RET_NORMAL:
6242             break;
6243         case TCG_CALL_RET_BY_VEC:
6244             tcg_out_st(s, TCG_TYPE_V128,
6245                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6246                        TCG_REG_CALL_STACK, ofs_slot0);
6247             /* fall through */
6248         case TCG_CALL_RET_BY_REF:
6249             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6250                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6251             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6252                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6253             return;
6254         default:
6255             g_assert_not_reached();
6256         }
6257         break;
6258 
6259     default:
6260         g_assert_not_reached();
6261     }
6262 
6263     mov[0].dst = ldst->datalo_reg;
6264     mov[0].src =
6265         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6266     mov[0].dst_type = TCG_TYPE_REG;
6267     mov[0].src_type = TCG_TYPE_REG;
6268     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6269 
6270     mov[1].dst = ldst->datahi_reg;
6271     mov[1].src =
6272         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6273     mov[1].dst_type = TCG_TYPE_REG;
6274     mov[1].src_type = TCG_TYPE_REG;
6275     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6276 
6277     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6278 }
6279 
6280 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6281                                    const TCGLdstHelperParam *parm)
6282 {
6283     const TCGHelperInfo *info;
6284     const TCGCallArgumentLoc *loc;
6285     TCGMovExtend mov[4];
6286     TCGType data_type;
6287     unsigned next_arg, nmov, n;
6288     MemOp mop = get_memop(ldst->oi);
6289 
6290     switch (mop & MO_SIZE) {
6291     case MO_8:
6292     case MO_16:
6293     case MO_32:
6294         info = &info_helper_st32_mmu;
6295         data_type = TCG_TYPE_I32;
6296         break;
6297     case MO_64:
6298         info = &info_helper_st64_mmu;
6299         data_type = TCG_TYPE_I64;
6300         break;
6301     case MO_128:
6302         info = &info_helper_st128_mmu;
6303         data_type = TCG_TYPE_I128;
6304         break;
6305     default:
6306         g_assert_not_reached();
6307     }
6308 
6309     /* Defer env argument. */
6310     next_arg = 1;
6311     nmov = 0;
6312 
6313     /* Handle addr argument. */
6314     loc = &info->in[next_arg];
6315     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6316     if (TCG_TARGET_REG_BITS == 32) {
6317         /*
6318          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6319          * to 64-bits for the helper by storing the low part.  Later,
6320          * after we have processed the register inputs, we will load a
6321          * zero for the high part.
6322          */
6323         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6324                                TCG_TYPE_I32, TCG_TYPE_I32,
6325                                ldst->addr_reg, -1);
6326         next_arg += 2;
6327         nmov += 1;
6328     } else {
6329         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6330                                    ldst->addr_reg, -1);
6331         next_arg += n;
6332         nmov += n;
6333     }
6334 
6335     /* Handle data argument. */
6336     loc = &info->in[next_arg];
6337     switch (loc->kind) {
6338     case TCG_CALL_ARG_NORMAL:
6339     case TCG_CALL_ARG_EXTEND_U:
6340     case TCG_CALL_ARG_EXTEND_S:
6341         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6342                                    ldst->datalo_reg, ldst->datahi_reg);
6343         next_arg += n;
6344         nmov += n;
6345         tcg_out_helper_load_slots(s, nmov, mov, parm);
6346         break;
6347 
6348     case TCG_CALL_ARG_BY_REF:
6349         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6350         tcg_debug_assert(data_type == TCG_TYPE_I128);
6351         tcg_out_st(s, TCG_TYPE_I64,
6352                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6353                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6354         tcg_out_st(s, TCG_TYPE_I64,
6355                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6356                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6357 
6358         tcg_out_helper_load_slots(s, nmov, mov, parm);
6359 
6360         if (arg_slot_reg_p(loc->arg_slot)) {
6361             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6362                              TCG_REG_CALL_STACK,
6363                              arg_slot_stk_ofs(loc->ref_slot));
6364         } else {
6365             tcg_debug_assert(parm->ntmp != 0);
6366             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6367                              arg_slot_stk_ofs(loc->ref_slot));
6368             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6369                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6370         }
6371         next_arg += 2;
6372         break;
6373 
6374     default:
6375         g_assert_not_reached();
6376     }
6377 
6378     if (TCG_TARGET_REG_BITS == 32) {
6379         /* Zero extend the address by loading a zero for the high part. */
6380         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6381         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6382     }
6383 
6384     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6385 }
6386 
6387 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6388 {
6389     int i, start_words, num_insns;
6390     TCGOp *op;
6391 
6392     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6393                  && qemu_log_in_addr_range(pc_start))) {
6394         FILE *logfile = qemu_log_trylock();
6395         if (logfile) {
6396             fprintf(logfile, "OP:\n");
6397             tcg_dump_ops(s, logfile, false);
6398             fprintf(logfile, "\n");
6399             qemu_log_unlock(logfile);
6400         }
6401     }
6402 
6403 #ifdef CONFIG_DEBUG_TCG
6404     /* Ensure all labels referenced have been emitted.  */
6405     {
6406         TCGLabel *l;
6407         bool error = false;
6408 
6409         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6410             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6411                 qemu_log_mask(CPU_LOG_TB_OP,
6412                               "$L%d referenced but not present.\n", l->id);
6413                 error = true;
6414             }
6415         }
6416         assert(!error);
6417     }
6418 #endif
6419 
6420     /* Do not reuse any EBB that may be allocated within the TB. */
6421     tcg_temp_ebb_reset_freed(s);
6422 
6423     tcg_optimize(s);
6424 
6425     reachable_code_pass(s);
6426     liveness_pass_0(s);
6427     liveness_pass_1(s);
6428 
6429     if (s->nb_indirects > 0) {
6430         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6431                      && qemu_log_in_addr_range(pc_start))) {
6432             FILE *logfile = qemu_log_trylock();
6433             if (logfile) {
6434                 fprintf(logfile, "OP before indirect lowering:\n");
6435                 tcg_dump_ops(s, logfile, false);
6436                 fprintf(logfile, "\n");
6437                 qemu_log_unlock(logfile);
6438             }
6439         }
6440 
6441         /* Replace indirect temps with direct temps.  */
6442         if (liveness_pass_2(s)) {
6443             /* If changes were made, re-run liveness.  */
6444             liveness_pass_1(s);
6445         }
6446     }
6447 
6448     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6449                  && qemu_log_in_addr_range(pc_start))) {
6450         FILE *logfile = qemu_log_trylock();
6451         if (logfile) {
6452             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6453             tcg_dump_ops(s, logfile, true);
6454             fprintf(logfile, "\n");
6455             qemu_log_unlock(logfile);
6456         }
6457     }
6458 
6459     /* Initialize goto_tb jump offsets. */
6460     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6461     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6462     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6463     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6464 
6465     tcg_reg_alloc_start(s);
6466 
6467     /*
6468      * Reset the buffer pointers when restarting after overflow.
6469      * TODO: Move this into translate-all.c with the rest of the
6470      * buffer management.  Having only this done here is confusing.
6471      */
6472     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6473     s->code_ptr = s->code_buf;
6474     s->data_gen_ptr = NULL;
6475 
6476     QSIMPLEQ_INIT(&s->ldst_labels);
6477     s->pool_labels = NULL;
6478 
6479     start_words = s->insn_start_words;
6480     s->gen_insn_data =
6481         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6482 
6483     tcg_out_tb_start(s);
6484 
6485     num_insns = -1;
6486     QTAILQ_FOREACH(op, &s->ops, link) {
6487         TCGOpcode opc = op->opc;
6488 
6489         switch (opc) {
6490         case INDEX_op_mov:
6491         case INDEX_op_mov_vec:
6492             tcg_reg_alloc_mov(s, op);
6493             break;
6494         case INDEX_op_dup_vec:
6495             tcg_reg_alloc_dup(s, op);
6496             break;
6497         case INDEX_op_insn_start:
6498             if (num_insns >= 0) {
6499                 size_t off = tcg_current_code_size(s);
6500                 s->gen_insn_end_off[num_insns] = off;
6501                 /* Assert that we do not overflow our stored offset.  */
6502                 assert(s->gen_insn_end_off[num_insns] == off);
6503             }
6504             num_insns++;
6505             for (i = 0; i < start_words; ++i) {
6506                 s->gen_insn_data[num_insns * start_words + i] =
6507                     tcg_get_insn_start_param(op, i);
6508             }
6509             break;
6510         case INDEX_op_discard:
6511             temp_dead(s, arg_temp(op->args[0]));
6512             break;
6513         case INDEX_op_set_label:
6514             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6515             tcg_out_label(s, arg_label(op->args[0]));
6516             break;
6517         case INDEX_op_call:
6518             tcg_reg_alloc_call(s, op);
6519             break;
6520         case INDEX_op_exit_tb:
6521             tcg_out_exit_tb(s, op->args[0]);
6522             break;
6523         case INDEX_op_goto_tb:
6524             tcg_out_goto_tb(s, op->args[0]);
6525             break;
6526         case INDEX_op_dup2_vec:
6527             if (tcg_reg_alloc_dup2(s, op)) {
6528                 break;
6529             }
6530             /* fall through */
6531         default:
6532             /* Sanity check that we've not introduced any unhandled opcodes. */
6533             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6534                                               TCGOP_FLAGS(op)));
6535             /* Note: in order to speed up the code, it would be much
6536                faster to have specialized register allocator functions for
6537                some common argument patterns */
6538             tcg_reg_alloc_op(s, op);
6539             break;
6540         }
6541         /* Test for (pending) buffer overflow.  The assumption is that any
6542            one operation beginning below the high water mark cannot overrun
6543            the buffer completely.  Thus we can test for overflow after
6544            generating code without having to check during generation.  */
6545         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6546             return -1;
6547         }
6548         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6549         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6550             return -2;
6551         }
6552     }
6553     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6554     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6555 
6556     /* Generate TB finalization at the end of block */
6557     i = tcg_out_ldst_finalize(s);
6558     if (i < 0) {
6559         return i;
6560     }
6561     i = tcg_out_pool_finalize(s);
6562     if (i < 0) {
6563         return i;
6564     }
6565     if (!tcg_resolve_relocs(s)) {
6566         return -2;
6567     }
6568 
6569 #ifndef CONFIG_TCG_INTERPRETER
6570     /* flush instruction cache */
6571     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6572                         (uintptr_t)s->code_buf,
6573                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6574 #endif
6575 
6576     return tcg_current_code_size(s);
6577 }
6578 
6579 #ifdef ELF_HOST_MACHINE
6580 /* In order to use this feature, the backend needs to do three things:
6581 
6582    (1) Define ELF_HOST_MACHINE to indicate both what value to
6583        put into the ELF image and to indicate support for the feature.
6584 
6585    (2) Define tcg_register_jit.  This should create a buffer containing
6586        the contents of a .debug_frame section that describes the post-
6587        prologue unwind info for the tcg machine.
6588 
6589    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6590 */
6591 
6592 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6593 typedef enum {
6594     JIT_NOACTION = 0,
6595     JIT_REGISTER_FN,
6596     JIT_UNREGISTER_FN
6597 } jit_actions_t;
6598 
6599 struct jit_code_entry {
6600     struct jit_code_entry *next_entry;
6601     struct jit_code_entry *prev_entry;
6602     const void *symfile_addr;
6603     uint64_t symfile_size;
6604 };
6605 
6606 struct jit_descriptor {
6607     uint32_t version;
6608     uint32_t action_flag;
6609     struct jit_code_entry *relevant_entry;
6610     struct jit_code_entry *first_entry;
6611 };
6612 
6613 void __jit_debug_register_code(void) __attribute__((noinline));
6614 void __jit_debug_register_code(void)
6615 {
6616     asm("");
6617 }
6618 
6619 /* Must statically initialize the version, because GDB may check
6620    the version before we can set it.  */
6621 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6622 
6623 /* End GDB interface.  */
6624 
6625 static int find_string(const char *strtab, const char *str)
6626 {
6627     const char *p = strtab + 1;
6628 
6629     while (1) {
6630         if (strcmp(p, str) == 0) {
6631             return p - strtab;
6632         }
6633         p += strlen(p) + 1;
6634     }
6635 }
6636 
6637 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6638                                  const void *debug_frame,
6639                                  size_t debug_frame_size)
6640 {
6641     struct __attribute__((packed)) DebugInfo {
6642         uint32_t  len;
6643         uint16_t  version;
6644         uint32_t  abbrev;
6645         uint8_t   ptr_size;
6646         uint8_t   cu_die;
6647         uint16_t  cu_lang;
6648         uintptr_t cu_low_pc;
6649         uintptr_t cu_high_pc;
6650         uint8_t   fn_die;
6651         char      fn_name[16];
6652         uintptr_t fn_low_pc;
6653         uintptr_t fn_high_pc;
6654         uint8_t   cu_eoc;
6655     };
6656 
6657     struct ElfImage {
6658         ElfW(Ehdr) ehdr;
6659         ElfW(Phdr) phdr;
6660         ElfW(Shdr) shdr[7];
6661         ElfW(Sym)  sym[2];
6662         struct DebugInfo di;
6663         uint8_t    da[24];
6664         char       str[80];
6665     };
6666 
6667     struct ElfImage *img;
6668 
6669     static const struct ElfImage img_template = {
6670         .ehdr = {
6671             .e_ident[EI_MAG0] = ELFMAG0,
6672             .e_ident[EI_MAG1] = ELFMAG1,
6673             .e_ident[EI_MAG2] = ELFMAG2,
6674             .e_ident[EI_MAG3] = ELFMAG3,
6675             .e_ident[EI_CLASS] = ELF_CLASS,
6676             .e_ident[EI_DATA] = ELF_DATA,
6677             .e_ident[EI_VERSION] = EV_CURRENT,
6678             .e_type = ET_EXEC,
6679             .e_machine = ELF_HOST_MACHINE,
6680             .e_version = EV_CURRENT,
6681             .e_phoff = offsetof(struct ElfImage, phdr),
6682             .e_shoff = offsetof(struct ElfImage, shdr),
6683             .e_ehsize = sizeof(ElfW(Shdr)),
6684             .e_phentsize = sizeof(ElfW(Phdr)),
6685             .e_phnum = 1,
6686             .e_shentsize = sizeof(ElfW(Shdr)),
6687             .e_shnum = ARRAY_SIZE(img->shdr),
6688             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6689 #ifdef ELF_HOST_FLAGS
6690             .e_flags = ELF_HOST_FLAGS,
6691 #endif
6692 #ifdef ELF_OSABI
6693             .e_ident[EI_OSABI] = ELF_OSABI,
6694 #endif
6695         },
6696         .phdr = {
6697             .p_type = PT_LOAD,
6698             .p_flags = PF_X,
6699         },
6700         .shdr = {
6701             [0] = { .sh_type = SHT_NULL },
6702             /* Trick: The contents of code_gen_buffer are not present in
6703                this fake ELF file; that got allocated elsewhere.  Therefore
6704                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6705                will not look for contents.  We can record any address.  */
6706             [1] = { /* .text */
6707                 .sh_type = SHT_NOBITS,
6708                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6709             },
6710             [2] = { /* .debug_info */
6711                 .sh_type = SHT_PROGBITS,
6712                 .sh_offset = offsetof(struct ElfImage, di),
6713                 .sh_size = sizeof(struct DebugInfo),
6714             },
6715             [3] = { /* .debug_abbrev */
6716                 .sh_type = SHT_PROGBITS,
6717                 .sh_offset = offsetof(struct ElfImage, da),
6718                 .sh_size = sizeof(img->da),
6719             },
6720             [4] = { /* .debug_frame */
6721                 .sh_type = SHT_PROGBITS,
6722                 .sh_offset = sizeof(struct ElfImage),
6723             },
6724             [5] = { /* .symtab */
6725                 .sh_type = SHT_SYMTAB,
6726                 .sh_offset = offsetof(struct ElfImage, sym),
6727                 .sh_size = sizeof(img->sym),
6728                 .sh_info = 1,
6729                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6730                 .sh_entsize = sizeof(ElfW(Sym)),
6731             },
6732             [6] = { /* .strtab */
6733                 .sh_type = SHT_STRTAB,
6734                 .sh_offset = offsetof(struct ElfImage, str),
6735                 .sh_size = sizeof(img->str),
6736             }
6737         },
6738         .sym = {
6739             [1] = { /* code_gen_buffer */
6740                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6741                 .st_shndx = 1,
6742             }
6743         },
6744         .di = {
6745             .len = sizeof(struct DebugInfo) - 4,
6746             .version = 2,
6747             .ptr_size = sizeof(void *),
6748             .cu_die = 1,
6749             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6750             .fn_die = 2,
6751             .fn_name = "code_gen_buffer"
6752         },
6753         .da = {
6754             1,          /* abbrev number (the cu) */
6755             0x11, 1,    /* DW_TAG_compile_unit, has children */
6756             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6757             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6758             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6759             0, 0,       /* end of abbrev */
6760             2,          /* abbrev number (the fn) */
6761             0x2e, 0,    /* DW_TAG_subprogram, no children */
6762             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6763             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6764             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6765             0, 0,       /* end of abbrev */
6766             0           /* no more abbrev */
6767         },
6768         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6769                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6770     };
6771 
6772     /* We only need a single jit entry; statically allocate it.  */
6773     static struct jit_code_entry one_entry;
6774 
6775     uintptr_t buf = (uintptr_t)buf_ptr;
6776     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6777     DebugFrameHeader *dfh;
6778 
6779     img = g_malloc(img_size);
6780     *img = img_template;
6781 
6782     img->phdr.p_vaddr = buf;
6783     img->phdr.p_paddr = buf;
6784     img->phdr.p_memsz = buf_size;
6785 
6786     img->shdr[1].sh_name = find_string(img->str, ".text");
6787     img->shdr[1].sh_addr = buf;
6788     img->shdr[1].sh_size = buf_size;
6789 
6790     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6791     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6792 
6793     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6794     img->shdr[4].sh_size = debug_frame_size;
6795 
6796     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6797     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6798 
6799     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6800     img->sym[1].st_value = buf;
6801     img->sym[1].st_size = buf_size;
6802 
6803     img->di.cu_low_pc = buf;
6804     img->di.cu_high_pc = buf + buf_size;
6805     img->di.fn_low_pc = buf;
6806     img->di.fn_high_pc = buf + buf_size;
6807 
6808     dfh = (DebugFrameHeader *)(img + 1);
6809     memcpy(dfh, debug_frame, debug_frame_size);
6810     dfh->fde.func_start = buf;
6811     dfh->fde.func_len = buf_size;
6812 
6813 #ifdef DEBUG_JIT
6814     /* Enable this block to be able to debug the ELF image file creation.
6815        One can use readelf, objdump, or other inspection utilities.  */
6816     {
6817         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6818         FILE *f = fopen(jit, "w+b");
6819         if (f) {
6820             if (fwrite(img, img_size, 1, f) != img_size) {
6821                 /* Avoid stupid unused return value warning for fwrite.  */
6822             }
6823             fclose(f);
6824         }
6825     }
6826 #endif
6827 
6828     one_entry.symfile_addr = img;
6829     one_entry.symfile_size = img_size;
6830 
6831     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6832     __jit_debug_descriptor.relevant_entry = &one_entry;
6833     __jit_debug_descriptor.first_entry = &one_entry;
6834     __jit_debug_register_code();
6835 }
6836 #else
6837 /* No support for the feature.  Provide the entry point expected by exec.c,
6838    and implement the internal function we declared earlier.  */
6839 
6840 static void tcg_register_jit_int(const void *buf, size_t size,
6841                                  const void *debug_frame,
6842                                  size_t debug_frame_size)
6843 {
6844 }
6845 
6846 void tcg_register_jit(const void *buf, size_t buf_size)
6847 {
6848 }
6849 #endif /* ELF_HOST_MACHINE */
6850 
6851 #if !TCG_TARGET_MAYBE_vec
6852 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6853 {
6854     g_assert_not_reached();
6855 }
6856 #endif
6857