xref: /openbmc/qemu/tcg/tcg.c (revision c96447d838d67db509cde1a190132e14b8672055)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #include "tcg-has.h"
60 #ifdef CONFIG_USER_ONLY
61 #include "user/guest-base.h"
62 #endif
63 
64 /* Forward declarations for functions declared in tcg-target.c.inc and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
69                         intptr_t value, intptr_t addend);
70 static void tcg_out_nop_fill(tcg_insn_unit *p, int count);
71 
72 typedef struct TCGLabelQemuLdst TCGLabelQemuLdst;
73 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
74 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 struct TCGLabelQemuLdst {
100     bool is_ld;             /* qemu_ld: true, qemu_st: false */
101     MemOpIdx oi;
102     TCGType type;           /* result type of a load */
103     TCGReg addr_reg;        /* reg index for guest virtual addr */
104     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
105     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
106     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
107     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
108     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
109 };
110 
111 static void tcg_register_jit_int(const void *buf, size_t size,
112                                  const void *debug_frame,
113                                  size_t debug_frame_size)
114     __attribute__((unused));
115 
116 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
117 static void tcg_out_tb_start(TCGContext *s);
118 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
119                        intptr_t arg2);
120 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
121 static void tcg_out_movi(TCGContext *s, TCGType type,
122                          TCGReg ret, tcg_target_long arg);
123 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
131 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
132 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
133 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
134 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
135 static void tcg_out_goto_tb(TCGContext *s, int which);
136 static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
137                        const TCGArg args[TCG_MAX_OP_ARGS],
138                        const int const_args[TCG_MAX_OP_ARGS]);
139 #if TCG_TARGET_MAYBE_vec
140 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
141                             TCGReg dst, TCGReg src);
142 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, TCGReg base, intptr_t offset);
144 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
145                              TCGReg dst, int64_t arg);
146 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
147                            unsigned vecl, unsigned vece,
148                            const TCGArg args[TCG_MAX_OP_ARGS],
149                            const int const_args[TCG_MAX_OP_ARGS]);
150 #else
151 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
152                                    TCGReg dst, TCGReg src)
153 {
154     g_assert_not_reached();
155 }
156 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, TCGReg base, intptr_t offset)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
162                                     TCGReg dst, int64_t arg)
163 {
164     g_assert_not_reached();
165 }
166 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
167                                   unsigned vecl, unsigned vece,
168                                   const TCGArg args[TCG_MAX_OP_ARGS],
169                                   const int const_args[TCG_MAX_OP_ARGS])
170 {
171     g_assert_not_reached();
172 }
173 int tcg_can_emit_vec_op(TCGOpcode o, TCGType t, unsigned ve)
174 {
175     return 0;
176 }
177 #endif
178 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
179                        intptr_t arg2);
180 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
181                         TCGReg base, intptr_t ofs);
182 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
183                          const TCGHelperInfo *info);
184 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
185 static bool tcg_target_const_match(int64_t val, int ct,
186                                    TCGType type, TCGCond cond, int vece);
187 
188 #ifndef CONFIG_USER_ONLY
189 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
190 #endif
191 
192 typedef struct TCGLdstHelperParam {
193     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
194     unsigned ntmp;
195     int tmp[3];
196 } TCGLdstHelperParam;
197 
198 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
202                                   bool load_sign, const TCGLdstHelperParam *p)
203     __attribute__((unused));
204 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
205                                    const TCGLdstHelperParam *p)
206     __attribute__((unused));
207 
208 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
209     [MO_UB] = helper_ldub_mmu,
210     [MO_SB] = helper_ldsb_mmu,
211     [MO_UW] = helper_lduw_mmu,
212     [MO_SW] = helper_ldsw_mmu,
213     [MO_UL] = helper_ldul_mmu,
214     [MO_UQ] = helper_ldq_mmu,
215 #if TCG_TARGET_REG_BITS == 64
216     [MO_SL] = helper_ldsl_mmu,
217     [MO_128] = helper_ld16_mmu,
218 #endif
219 };
220 
221 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
222     [MO_8]  = helper_stb_mmu,
223     [MO_16] = helper_stw_mmu,
224     [MO_32] = helper_stl_mmu,
225     [MO_64] = helper_stq_mmu,
226 #if TCG_TARGET_REG_BITS == 64
227     [MO_128] = helper_st16_mmu,
228 #endif
229 };
230 
231 typedef struct {
232     MemOp atom;   /* lg2 bits of atomicity required */
233     MemOp align;  /* lg2 bits of alignment to use */
234 } TCGAtomAlign;
235 
236 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
237                                            MemOp host_atom, bool allow_two_ops)
238     __attribute__((unused));
239 
240 #ifdef CONFIG_USER_ONLY
241 bool tcg_use_softmmu;
242 #endif
243 
244 TCGContext tcg_init_ctx;
245 __thread TCGContext *tcg_ctx;
246 
247 TCGContext **tcg_ctxs;
248 unsigned int tcg_cur_ctxs;
249 unsigned int tcg_max_ctxs;
250 TCGv_env tcg_env;
251 const void *tcg_code_gen_epilogue;
252 uintptr_t tcg_splitwx_diff;
253 
254 #ifndef CONFIG_TCG_INTERPRETER
255 tcg_prologue_fn *tcg_qemu_tb_exec;
256 #endif
257 
258 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
259 static TCGRegSet tcg_target_call_clobber_regs;
260 
261 #if TCG_TARGET_INSN_UNIT_SIZE == 1
262 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
263 {
264     *s->code_ptr++ = v;
265 }
266 
267 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
268                                                       uint8_t v)
269 {
270     *p = v;
271 }
272 #endif
273 
274 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
275 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *s->code_ptr++ = v;
279     } else {
280         tcg_insn_unit *p = s->code_ptr;
281         memcpy(p, &v, sizeof(v));
282         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
283     }
284 }
285 
286 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
287                                                        uint16_t v)
288 {
289     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
290         *p = v;
291     } else {
292         memcpy(p, &v, sizeof(v));
293     }
294 }
295 #endif
296 
297 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
298 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *s->code_ptr++ = v;
302     } else {
303         tcg_insn_unit *p = s->code_ptr;
304         memcpy(p, &v, sizeof(v));
305         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
306     }
307 }
308 
309 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
310                                                        uint32_t v)
311 {
312     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
313         *p = v;
314     } else {
315         memcpy(p, &v, sizeof(v));
316     }
317 }
318 #endif
319 
320 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
321 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *s->code_ptr++ = v;
325     } else {
326         tcg_insn_unit *p = s->code_ptr;
327         memcpy(p, &v, sizeof(v));
328         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
329     }
330 }
331 
332 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
333                                                        uint64_t v)
334 {
335     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
336         *p = v;
337     } else {
338         memcpy(p, &v, sizeof(v));
339     }
340 }
341 #endif
342 
343 /* label relocation processing */
344 
345 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
346                           TCGLabel *l, intptr_t addend)
347 {
348     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
349 
350     r->type = type;
351     r->ptr = code_ptr;
352     r->addend = addend;
353     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
354 }
355 
356 static void tcg_out_label(TCGContext *s, TCGLabel *l)
357 {
358     tcg_debug_assert(!l->has_value);
359     l->has_value = 1;
360     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
361 }
362 
363 TCGLabel *gen_new_label(void)
364 {
365     TCGContext *s = tcg_ctx;
366     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
367 
368     memset(l, 0, sizeof(TCGLabel));
369     l->id = s->nb_labels++;
370     QSIMPLEQ_INIT(&l->branches);
371     QSIMPLEQ_INIT(&l->relocs);
372 
373     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
374 
375     return l;
376 }
377 
378 static bool tcg_resolve_relocs(TCGContext *s)
379 {
380     TCGLabel *l;
381 
382     QSIMPLEQ_FOREACH(l, &s->labels, next) {
383         TCGRelocation *r;
384         uintptr_t value = l->u.value;
385 
386         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
387             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
388                 return false;
389             }
390         }
391     }
392     return true;
393 }
394 
395 static void set_jmp_reset_offset(TCGContext *s, int which)
396 {
397     /*
398      * We will check for overflow at the end of the opcode loop in
399      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
400      */
401     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
402 }
403 
404 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
405 {
406     /*
407      * We will check for overflow at the end of the opcode loop in
408      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
409      */
410     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
411 }
412 
413 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
414 {
415     /*
416      * Return the read-execute version of the pointer, for the benefit
417      * of any pc-relative addressing mode.
418      */
419     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
420 }
421 
422 static int __attribute__((unused))
423 tlb_mask_table_ofs(TCGContext *s, int which)
424 {
425     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
426             sizeof(CPUNegativeOffsetState));
427 }
428 
429 /* Signal overflow, starting over with fewer guest insns. */
430 static G_NORETURN
431 void tcg_raise_tb_overflow(TCGContext *s)
432 {
433     siglongjmp(s->jmp_trans, -2);
434 }
435 
436 /*
437  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
438  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
439  *
440  * However, tcg_out_helper_load_slots reuses this field to hold an
441  * argument slot number (which may designate a argument register or an
442  * argument stack slot), converting to TCGReg once all arguments that
443  * are destined for the stack are processed.
444  */
445 typedef struct TCGMovExtend {
446     unsigned dst;
447     TCGReg src;
448     TCGType dst_type;
449     TCGType src_type;
450     MemOp src_ext;
451 } TCGMovExtend;
452 
453 /**
454  * tcg_out_movext -- move and extend
455  * @s: tcg context
456  * @dst_type: integral type for destination
457  * @dst: destination register
458  * @src_type: integral type for source
459  * @src_ext: extension to apply to source
460  * @src: source register
461  *
462  * Move or extend @src into @dst, depending on @src_ext and the types.
463  */
464 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
465                            TCGType src_type, MemOp src_ext, TCGReg src)
466 {
467     switch (src_ext) {
468     case MO_UB:
469         tcg_out_ext8u(s, dst, src);
470         break;
471     case MO_SB:
472         tcg_out_ext8s(s, dst_type, dst, src);
473         break;
474     case MO_UW:
475         tcg_out_ext16u(s, dst, src);
476         break;
477     case MO_SW:
478         tcg_out_ext16s(s, dst_type, dst, src);
479         break;
480     case MO_UL:
481     case MO_SL:
482         if (dst_type == TCG_TYPE_I32) {
483             if (src_type == TCG_TYPE_I32) {
484                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
485             } else {
486                 tcg_out_extrl_i64_i32(s, dst, src);
487             }
488         } else if (src_type == TCG_TYPE_I32) {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_exts_i32_i64(s, dst, src);
491             } else {
492                 tcg_out_extu_i32_i64(s, dst, src);
493             }
494         } else {
495             if (src_ext & MO_SIGN) {
496                 tcg_out_ext32s(s, dst, src);
497             } else {
498                 tcg_out_ext32u(s, dst, src);
499             }
500         }
501         break;
502     case MO_UQ:
503         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
504         if (dst_type == TCG_TYPE_I32) {
505             tcg_out_extrl_i64_i32(s, dst, src);
506         } else {
507             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
508         }
509         break;
510     default:
511         g_assert_not_reached();
512     }
513 }
514 
515 /* Minor variations on a theme, using a structure. */
516 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
517                                     TCGReg src)
518 {
519     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
520 }
521 
522 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
523 {
524     tcg_out_movext1_new_src(s, i, i->src);
525 }
526 
527 /**
528  * tcg_out_movext2 -- move and extend two pair
529  * @s: tcg context
530  * @i1: first move description
531  * @i2: second move description
532  * @scratch: temporary register, or -1 for none
533  *
534  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
535  * between the sources and destinations.
536  */
537 
538 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
539                             const TCGMovExtend *i2, int scratch)
540 {
541     TCGReg src1 = i1->src;
542     TCGReg src2 = i2->src;
543 
544     if (i1->dst != src2) {
545         tcg_out_movext1(s, i1);
546         tcg_out_movext1(s, i2);
547         return;
548     }
549     if (i2->dst == src1) {
550         TCGType src1_type = i1->src_type;
551         TCGType src2_type = i2->src_type;
552 
553         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
554             /* The data is now in the correct registers, now extend. */
555             src1 = i2->src;
556             src2 = i1->src;
557         } else {
558             tcg_debug_assert(scratch >= 0);
559             tcg_out_mov(s, src1_type, scratch, src1);
560             src1 = scratch;
561         }
562     }
563     tcg_out_movext1_new_src(s, i2, src2);
564     tcg_out_movext1_new_src(s, i1, src1);
565 }
566 
567 /**
568  * tcg_out_movext3 -- move and extend three pair
569  * @s: tcg context
570  * @i1: first move description
571  * @i2: second move description
572  * @i3: third move description
573  * @scratch: temporary register, or -1 for none
574  *
575  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
576  * between the sources and destinations.
577  */
578 
579 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
580                             const TCGMovExtend *i2, const TCGMovExtend *i3,
581                             int scratch)
582 {
583     TCGReg src1 = i1->src;
584     TCGReg src2 = i2->src;
585     TCGReg src3 = i3->src;
586 
587     if (i1->dst != src2 && i1->dst != src3) {
588         tcg_out_movext1(s, i1);
589         tcg_out_movext2(s, i2, i3, scratch);
590         return;
591     }
592     if (i2->dst != src1 && i2->dst != src3) {
593         tcg_out_movext1(s, i2);
594         tcg_out_movext2(s, i1, i3, scratch);
595         return;
596     }
597     if (i3->dst != src1 && i3->dst != src2) {
598         tcg_out_movext1(s, i3);
599         tcg_out_movext2(s, i1, i2, scratch);
600         return;
601     }
602 
603     /*
604      * There is a cycle.  Since there are only 3 nodes, the cycle is
605      * either "clockwise" or "anti-clockwise", and can be solved with
606      * a single scratch or two xchg.
607      */
608     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
609         /* "Clockwise" */
610         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
611             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
612             /* The data is now in the correct registers, now extend. */
613             tcg_out_movext1_new_src(s, i1, i1->dst);
614             tcg_out_movext1_new_src(s, i2, i2->dst);
615             tcg_out_movext1_new_src(s, i3, i3->dst);
616         } else {
617             tcg_debug_assert(scratch >= 0);
618             tcg_out_mov(s, i1->src_type, scratch, src1);
619             tcg_out_movext1(s, i3);
620             tcg_out_movext1(s, i2);
621             tcg_out_movext1_new_src(s, i1, scratch);
622         }
623     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
624         /* "Anti-clockwise" */
625         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
626             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
627             /* The data is now in the correct registers, now extend. */
628             tcg_out_movext1_new_src(s, i1, i1->dst);
629             tcg_out_movext1_new_src(s, i2, i2->dst);
630             tcg_out_movext1_new_src(s, i3, i3->dst);
631         } else {
632             tcg_debug_assert(scratch >= 0);
633             tcg_out_mov(s, i1->src_type, scratch, src1);
634             tcg_out_movext1(s, i2);
635             tcg_out_movext1(s, i3);
636             tcg_out_movext1_new_src(s, i1, scratch);
637         }
638     } else {
639         g_assert_not_reached();
640     }
641 }
642 
643 /*
644  * Allocate a new TCGLabelQemuLdst entry.
645  */
646 
647 __attribute__((unused))
648 static TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
649 {
650     TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
651 
652     memset(l, 0, sizeof(*l));
653     QSIMPLEQ_INSERT_TAIL(&s->ldst_labels, l, next);
654 
655     return l;
656 }
657 
658 /*
659  * Allocate new constant pool entries.
660  */
661 
662 typedef struct TCGLabelPoolData {
663     struct TCGLabelPoolData *next;
664     tcg_insn_unit *label;
665     intptr_t addend;
666     int rtype;
667     unsigned nlong;
668     tcg_target_ulong data[];
669 } TCGLabelPoolData;
670 
671 static TCGLabelPoolData *new_pool_alloc(TCGContext *s, int nlong, int rtype,
672                                         tcg_insn_unit *label, intptr_t addend)
673 {
674     TCGLabelPoolData *n = tcg_malloc(sizeof(TCGLabelPoolData)
675                                      + sizeof(tcg_target_ulong) * nlong);
676 
677     n->label = label;
678     n->addend = addend;
679     n->rtype = rtype;
680     n->nlong = nlong;
681     return n;
682 }
683 
684 static void new_pool_insert(TCGContext *s, TCGLabelPoolData *n)
685 {
686     TCGLabelPoolData *i, **pp;
687     int nlong = n->nlong;
688 
689     /* Insertion sort on the pool.  */
690     for (pp = &s->pool_labels; (i = *pp) != NULL; pp = &i->next) {
691         if (nlong > i->nlong) {
692             break;
693         }
694         if (nlong < i->nlong) {
695             continue;
696         }
697         if (memcmp(n->data, i->data, sizeof(tcg_target_ulong) * nlong) >= 0) {
698             break;
699         }
700     }
701     n->next = *pp;
702     *pp = n;
703 }
704 
705 /* The "usual" for generic integer code.  */
706 __attribute__((unused))
707 static void new_pool_label(TCGContext *s, tcg_target_ulong d, int rtype,
708                            tcg_insn_unit *label, intptr_t addend)
709 {
710     TCGLabelPoolData *n = new_pool_alloc(s, 1, rtype, label, addend);
711     n->data[0] = d;
712     new_pool_insert(s, n);
713 }
714 
715 /* For v64 or v128, depending on the host.  */
716 __attribute__((unused))
717 static void new_pool_l2(TCGContext *s, int rtype, tcg_insn_unit *label,
718                         intptr_t addend, tcg_target_ulong d0,
719                         tcg_target_ulong d1)
720 {
721     TCGLabelPoolData *n = new_pool_alloc(s, 2, rtype, label, addend);
722     n->data[0] = d0;
723     n->data[1] = d1;
724     new_pool_insert(s, n);
725 }
726 
727 /* For v128 or v256, depending on the host.  */
728 __attribute__((unused))
729 static void new_pool_l4(TCGContext *s, int rtype, tcg_insn_unit *label,
730                         intptr_t addend, tcg_target_ulong d0,
731                         tcg_target_ulong d1, tcg_target_ulong d2,
732                         tcg_target_ulong d3)
733 {
734     TCGLabelPoolData *n = new_pool_alloc(s, 4, rtype, label, addend);
735     n->data[0] = d0;
736     n->data[1] = d1;
737     n->data[2] = d2;
738     n->data[3] = d3;
739     new_pool_insert(s, n);
740 }
741 
742 /* For v256, for 32-bit host.  */
743 __attribute__((unused))
744 static void new_pool_l8(TCGContext *s, int rtype, tcg_insn_unit *label,
745                         intptr_t addend, tcg_target_ulong d0,
746                         tcg_target_ulong d1, tcg_target_ulong d2,
747                         tcg_target_ulong d3, tcg_target_ulong d4,
748                         tcg_target_ulong d5, tcg_target_ulong d6,
749                         tcg_target_ulong d7)
750 {
751     TCGLabelPoolData *n = new_pool_alloc(s, 8, rtype, label, addend);
752     n->data[0] = d0;
753     n->data[1] = d1;
754     n->data[2] = d2;
755     n->data[3] = d3;
756     n->data[4] = d4;
757     n->data[5] = d5;
758     n->data[6] = d6;
759     n->data[7] = d7;
760     new_pool_insert(s, n);
761 }
762 
763 /*
764  * Generate TB finalization at the end of block
765  */
766 
767 static int tcg_out_ldst_finalize(TCGContext *s)
768 {
769     TCGLabelQemuLdst *lb;
770 
771     /* qemu_ld/st slow paths */
772     QSIMPLEQ_FOREACH(lb, &s->ldst_labels, next) {
773         if (lb->is_ld
774             ? !tcg_out_qemu_ld_slow_path(s, lb)
775             : !tcg_out_qemu_st_slow_path(s, lb)) {
776             return -2;
777         }
778 
779         /*
780          * Test for (pending) buffer overflow.  The assumption is that any
781          * one operation beginning below the high water mark cannot overrun
782          * the buffer completely.  Thus we can test for overflow after
783          * generating code without having to check during generation.
784          */
785         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
786             return -1;
787         }
788     }
789     return 0;
790 }
791 
792 static int tcg_out_pool_finalize(TCGContext *s)
793 {
794     TCGLabelPoolData *p = s->pool_labels;
795     TCGLabelPoolData *l = NULL;
796     void *a;
797 
798     if (p == NULL) {
799         return 0;
800     }
801 
802     /*
803      * ??? Round up to qemu_icache_linesize, but then do not round
804      * again when allocating the next TranslationBlock structure.
805      */
806     a = (void *)ROUND_UP((uintptr_t)s->code_ptr,
807                          sizeof(tcg_target_ulong) * p->nlong);
808     tcg_out_nop_fill(s->code_ptr, (tcg_insn_unit *)a - s->code_ptr);
809     s->data_gen_ptr = a;
810 
811     for (; p != NULL; p = p->next) {
812         size_t size = sizeof(tcg_target_ulong) * p->nlong;
813         uintptr_t value;
814 
815         if (!l || l->nlong != p->nlong || memcmp(l->data, p->data, size)) {
816             if (unlikely(a > s->code_gen_highwater)) {
817                 return -1;
818             }
819             memcpy(a, p->data, size);
820             a += size;
821             l = p;
822         }
823 
824         value = (uintptr_t)tcg_splitwx_to_rx(a) - size;
825         if (!patch_reloc(p->label, p->rtype, value, p->addend)) {
826             return -2;
827         }
828     }
829 
830     s->code_ptr = a;
831     return 0;
832 }
833 
834 #define C_PFX1(P, A)                    P##A
835 #define C_PFX2(P, A, B)                 P##A##_##B
836 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
837 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
838 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
839 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
840 
841 /* Define an enumeration for the various combinations. */
842 
843 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
844 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
845 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
846 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
847 
848 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
849 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
850 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
851 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
852 
853 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
854 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
855 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
856 
857 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
858 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
859 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
860 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
861 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
862 
863 typedef enum {
864     C_Dynamic = -2,
865     C_NotImplemented = -1,
866 #include "tcg-target-con-set.h"
867 } TCGConstraintSetIndex;
868 
869 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode, TCGType, unsigned);
870 
871 #undef C_O0_I1
872 #undef C_O0_I2
873 #undef C_O0_I3
874 #undef C_O0_I4
875 #undef C_O1_I1
876 #undef C_O1_I2
877 #undef C_O1_I3
878 #undef C_O1_I4
879 #undef C_N1_I2
880 #undef C_N1O1_I1
881 #undef C_N2_I1
882 #undef C_O2_I1
883 #undef C_O2_I2
884 #undef C_O2_I3
885 #undef C_O2_I4
886 #undef C_N1_O1_I4
887 
888 /* Put all of the constraint sets into an array, indexed by the enum. */
889 
890 typedef struct TCGConstraintSet {
891     uint8_t nb_oargs, nb_iargs;
892     const char *args_ct_str[TCG_MAX_OP_ARGS];
893 } TCGConstraintSet;
894 
895 #define C_O0_I1(I1)                     { 0, 1, { #I1 } },
896 #define C_O0_I2(I1, I2)                 { 0, 2, { #I1, #I2 } },
897 #define C_O0_I3(I1, I2, I3)             { 0, 3, { #I1, #I2, #I3 } },
898 #define C_O0_I4(I1, I2, I3, I4)         { 0, 4, { #I1, #I2, #I3, #I4 } },
899 
900 #define C_O1_I1(O1, I1)                 { 1, 1, { #O1, #I1 } },
901 #define C_O1_I2(O1, I1, I2)             { 1, 2, { #O1, #I1, #I2 } },
902 #define C_O1_I3(O1, I1, I2, I3)         { 1, 3, { #O1, #I1, #I2, #I3 } },
903 #define C_O1_I4(O1, I1, I2, I3, I4)     { 1, 4, { #O1, #I1, #I2, #I3, #I4 } },
904 
905 #define C_N1_I2(O1, I1, I2)             { 1, 2, { "&" #O1, #I1, #I2 } },
906 #define C_N1O1_I1(O1, O2, I1)           { 2, 1, { "&" #O1, #O2, #I1 } },
907 #define C_N2_I1(O1, O2, I1)             { 2, 1, { "&" #O1, "&" #O2, #I1 } },
908 
909 #define C_O2_I1(O1, O2, I1)             { 2, 1, { #O1, #O2, #I1 } },
910 #define C_O2_I2(O1, O2, I1, I2)         { 2, 2, { #O1, #O2, #I1, #I2 } },
911 #define C_O2_I3(O1, O2, I1, I2, I3)     { 2, 3, { #O1, #O2, #I1, #I2, #I3 } },
912 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { #O1, #O2, #I1, #I2, #I3, #I4 } },
913 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { 2, 4, { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
914 
915 static const TCGConstraintSet constraint_sets[] = {
916 #include "tcg-target-con-set.h"
917 };
918 
919 #undef C_O0_I1
920 #undef C_O0_I2
921 #undef C_O0_I3
922 #undef C_O0_I4
923 #undef C_O1_I1
924 #undef C_O1_I2
925 #undef C_O1_I3
926 #undef C_O1_I4
927 #undef C_N1_I2
928 #undef C_N1O1_I1
929 #undef C_N2_I1
930 #undef C_O2_I1
931 #undef C_O2_I2
932 #undef C_O2_I3
933 #undef C_O2_I4
934 #undef C_N1_O1_I4
935 
936 /* Expand the enumerator to be returned from tcg_target_op_def(). */
937 
938 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
939 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
940 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
941 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
942 
943 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
944 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
945 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
946 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
947 
948 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
949 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
950 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
951 
952 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
953 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
954 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
955 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
956 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
957 
958 /*
959  * TCGOutOp is the base class for a set of structures that describe how
960  * to generate code for a given TCGOpcode.
961  *
962  * @static_constraint:
963  *   C_NotImplemented: The TCGOpcode is not supported by the backend.
964  *   C_Dynamic:        Use @dynamic_constraint to select a constraint set
965  *                     based on any of @type, @flags, or host isa.
966  *   Otherwise:        The register allocation constrains for the TCGOpcode.
967  *
968  * Subclasses of TCGOutOp will define a set of output routines that may
969  * be used.  Such routines will often be selected by the set of registers
970  * and constants that come out of register allocation.  The set of
971  * routines that are provided will guide the set of constraints that are
972  * legal.  In particular, assume that tcg_optimize() has done its job in
973  * swapping commutative operands and folding operations for which all
974  * operands are constant.
975  */
976 typedef struct TCGOutOp {
977     TCGConstraintSetIndex static_constraint;
978     TCGConstraintSetIndex (*dynamic_constraint)(TCGType type, unsigned flags);
979 } TCGOutOp;
980 
981 typedef struct TCGOutOpBinary {
982     TCGOutOp base;
983     void (*out_rrr)(TCGContext *s, TCGType type,
984                     TCGReg a0, TCGReg a1, TCGReg a2);
985     void (*out_rri)(TCGContext *s, TCGType type,
986                     TCGReg a0, TCGReg a1, tcg_target_long a2);
987 } TCGOutOpBinary;
988 
989 typedef struct TCGOutOpDivRem {
990     TCGOutOp base;
991     void (*out_rr01r)(TCGContext *s, TCGType type,
992                       TCGReg a0, TCGReg a1, TCGReg a4);
993 } TCGOutOpDivRem;
994 
995 typedef struct TCGOutOpUnary {
996     TCGOutOp base;
997     void (*out_rr)(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1);
998 } TCGOutOpUnary;
999 
1000 typedef struct TCGOutOpSubtract {
1001     TCGOutOp base;
1002     void (*out_rrr)(TCGContext *s, TCGType type,
1003                     TCGReg a0, TCGReg a1, TCGReg a2);
1004     void (*out_rir)(TCGContext *s, TCGType type,
1005                     TCGReg a0, tcg_target_long a1, TCGReg a2);
1006 } TCGOutOpSubtract;
1007 
1008 #include "tcg-target.c.inc"
1009 
1010 #ifndef CONFIG_TCG_INTERPRETER
1011 /* Validate CPUTLBDescFast placement. */
1012 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
1013                         sizeof(CPUNegativeOffsetState))
1014                   < MIN_TLB_MASK_TABLE_OFS);
1015 #endif
1016 
1017 /*
1018  * Register V as the TCGOutOp for O.
1019  * This verifies that V is of type T, otherwise give a nice compiler error.
1020  * This prevents trivial mistakes within each arch/tcg-target.c.inc.
1021  */
1022 #define OUTOP(O, T, V)  [O] = _Generic(V, T: &V.base)
1023 
1024 /* Register allocation descriptions for every TCGOpcode. */
1025 static const TCGOutOp * const all_outop[NB_OPS] = {
1026     OUTOP(INDEX_op_add, TCGOutOpBinary, outop_add),
1027     OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
1028     OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
1029     OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz),
1030     OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz),
1031     OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
1032     OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
1033     OUTOP(INDEX_op_divs2, TCGOutOpDivRem, outop_divs2),
1034     OUTOP(INDEX_op_divu2, TCGOutOpDivRem, outop_divu2),
1035     OUTOP(INDEX_op_eqv, TCGOutOpBinary, outop_eqv),
1036     OUTOP(INDEX_op_mul, TCGOutOpBinary, outop_mul),
1037     OUTOP(INDEX_op_mulsh, TCGOutOpBinary, outop_mulsh),
1038     OUTOP(INDEX_op_muluh, TCGOutOpBinary, outop_muluh),
1039     OUTOP(INDEX_op_nand, TCGOutOpBinary, outop_nand),
1040     OUTOP(INDEX_op_neg, TCGOutOpUnary, outop_neg),
1041     OUTOP(INDEX_op_nor, TCGOutOpBinary, outop_nor),
1042     OUTOP(INDEX_op_not, TCGOutOpUnary, outop_not),
1043     OUTOP(INDEX_op_or, TCGOutOpBinary, outop_or),
1044     OUTOP(INDEX_op_orc, TCGOutOpBinary, outop_orc),
1045     OUTOP(INDEX_op_rems, TCGOutOpBinary, outop_rems),
1046     OUTOP(INDEX_op_remu, TCGOutOpBinary, outop_remu),
1047     OUTOP(INDEX_op_rotl, TCGOutOpBinary, outop_rotl),
1048     OUTOP(INDEX_op_rotr, TCGOutOpBinary, outop_rotr),
1049     OUTOP(INDEX_op_sar, TCGOutOpBinary, outop_sar),
1050     OUTOP(INDEX_op_shl, TCGOutOpBinary, outop_shl),
1051     OUTOP(INDEX_op_shr, TCGOutOpBinary, outop_shr),
1052     OUTOP(INDEX_op_sub, TCGOutOpSubtract, outop_sub),
1053     OUTOP(INDEX_op_xor, TCGOutOpBinary, outop_xor),
1054 };
1055 
1056 #undef OUTOP
1057 
1058 /*
1059  * All TCG threads except the parent (i.e. the one that called tcg_context_init
1060  * and registered the target's TCG globals) must register with this function
1061  * before initiating translation.
1062  *
1063  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
1064  * of tcg_region_init() for the reasoning behind this.
1065  *
1066  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
1067  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
1068  * is not used anymore for translation once this function is called.
1069  *
1070  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
1071  * iterates over the array (e.g. tcg_code_size() the same for both system/user
1072  * modes.
1073  */
1074 #ifdef CONFIG_USER_ONLY
1075 void tcg_register_thread(void)
1076 {
1077     tcg_ctx = &tcg_init_ctx;
1078 }
1079 #else
1080 void tcg_register_thread(void)
1081 {
1082     TCGContext *s = g_malloc(sizeof(*s));
1083     unsigned int i, n;
1084 
1085     *s = tcg_init_ctx;
1086 
1087     /* Relink mem_base.  */
1088     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
1089         if (tcg_init_ctx.temps[i].mem_base) {
1090             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
1091             tcg_debug_assert(b >= 0 && b < n);
1092             s->temps[i].mem_base = &s->temps[b];
1093         }
1094     }
1095 
1096     /* Claim an entry in tcg_ctxs */
1097     n = qatomic_fetch_inc(&tcg_cur_ctxs);
1098     g_assert(n < tcg_max_ctxs);
1099     qatomic_set(&tcg_ctxs[n], s);
1100 
1101     if (n > 0) {
1102         tcg_region_initial_alloc(s);
1103     }
1104 
1105     tcg_ctx = s;
1106 }
1107 #endif /* !CONFIG_USER_ONLY */
1108 
1109 /* pool based memory allocation */
1110 void *tcg_malloc_internal(TCGContext *s, int size)
1111 {
1112     TCGPool *p;
1113     int pool_size;
1114 
1115     if (size > TCG_POOL_CHUNK_SIZE) {
1116         /* big malloc: insert a new pool (XXX: could optimize) */
1117         p = g_malloc(sizeof(TCGPool) + size);
1118         p->size = size;
1119         p->next = s->pool_first_large;
1120         s->pool_first_large = p;
1121         return p->data;
1122     } else {
1123         p = s->pool_current;
1124         if (!p) {
1125             p = s->pool_first;
1126             if (!p)
1127                 goto new_pool;
1128         } else {
1129             if (!p->next) {
1130             new_pool:
1131                 pool_size = TCG_POOL_CHUNK_SIZE;
1132                 p = g_malloc(sizeof(TCGPool) + pool_size);
1133                 p->size = pool_size;
1134                 p->next = NULL;
1135                 if (s->pool_current) {
1136                     s->pool_current->next = p;
1137                 } else {
1138                     s->pool_first = p;
1139                 }
1140             } else {
1141                 p = p->next;
1142             }
1143         }
1144     }
1145     s->pool_current = p;
1146     s->pool_cur = p->data + size;
1147     s->pool_end = p->data + p->size;
1148     return p->data;
1149 }
1150 
1151 void tcg_pool_reset(TCGContext *s)
1152 {
1153     TCGPool *p, *t;
1154     for (p = s->pool_first_large; p; p = t) {
1155         t = p->next;
1156         g_free(p);
1157     }
1158     s->pool_first_large = NULL;
1159     s->pool_cur = s->pool_end = NULL;
1160     s->pool_current = NULL;
1161 }
1162 
1163 /*
1164  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
1165  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
1166  * We only use these for layout in tcg_out_ld_helper_ret and
1167  * tcg_out_st_helper_args, and share them between several of
1168  * the helpers, with the end result that it's easier to build manually.
1169  */
1170 
1171 #if TCG_TARGET_REG_BITS == 32
1172 # define dh_typecode_ttl  dh_typecode_i32
1173 #else
1174 # define dh_typecode_ttl  dh_typecode_i64
1175 #endif
1176 
1177 static TCGHelperInfo info_helper_ld32_mmu = {
1178     .flags = TCG_CALL_NO_WG,
1179     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
1180               | dh_typemask(env, 1)
1181               | dh_typemask(i64, 2)  /* uint64_t addr */
1182               | dh_typemask(i32, 3)  /* unsigned oi */
1183               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1184 };
1185 
1186 static TCGHelperInfo info_helper_ld64_mmu = {
1187     .flags = TCG_CALL_NO_WG,
1188     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
1189               | dh_typemask(env, 1)
1190               | dh_typemask(i64, 2)  /* uint64_t addr */
1191               | dh_typemask(i32, 3)  /* unsigned oi */
1192               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1193 };
1194 
1195 static TCGHelperInfo info_helper_ld128_mmu = {
1196     .flags = TCG_CALL_NO_WG,
1197     .typemask = dh_typemask(i128, 0) /* return Int128 */
1198               | dh_typemask(env, 1)
1199               | dh_typemask(i64, 2)  /* uint64_t addr */
1200               | dh_typemask(i32, 3)  /* unsigned oi */
1201               | dh_typemask(ptr, 4)  /* uintptr_t ra */
1202 };
1203 
1204 static TCGHelperInfo info_helper_st32_mmu = {
1205     .flags = TCG_CALL_NO_WG,
1206     .typemask = dh_typemask(void, 0)
1207               | dh_typemask(env, 1)
1208               | dh_typemask(i64, 2)  /* uint64_t addr */
1209               | dh_typemask(i32, 3)  /* uint32_t data */
1210               | dh_typemask(i32, 4)  /* unsigned oi */
1211               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1212 };
1213 
1214 static TCGHelperInfo info_helper_st64_mmu = {
1215     .flags = TCG_CALL_NO_WG,
1216     .typemask = dh_typemask(void, 0)
1217               | dh_typemask(env, 1)
1218               | dh_typemask(i64, 2)  /* uint64_t addr */
1219               | dh_typemask(i64, 3)  /* uint64_t data */
1220               | dh_typemask(i32, 4)  /* unsigned oi */
1221               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1222 };
1223 
1224 static TCGHelperInfo info_helper_st128_mmu = {
1225     .flags = TCG_CALL_NO_WG,
1226     .typemask = dh_typemask(void, 0)
1227               | dh_typemask(env, 1)
1228               | dh_typemask(i64, 2)  /* uint64_t addr */
1229               | dh_typemask(i128, 3) /* Int128 data */
1230               | dh_typemask(i32, 4)  /* unsigned oi */
1231               | dh_typemask(ptr, 5)  /* uintptr_t ra */
1232 };
1233 
1234 #ifdef CONFIG_TCG_INTERPRETER
1235 static ffi_type *typecode_to_ffi(int argmask)
1236 {
1237     /*
1238      * libffi does not support __int128_t, so we have forced Int128
1239      * to use the structure definition instead of the builtin type.
1240      */
1241     static ffi_type *ffi_type_i128_elements[3] = {
1242         &ffi_type_uint64,
1243         &ffi_type_uint64,
1244         NULL
1245     };
1246     static ffi_type ffi_type_i128 = {
1247         .size = 16,
1248         .alignment = __alignof__(Int128),
1249         .type = FFI_TYPE_STRUCT,
1250         .elements = ffi_type_i128_elements,
1251     };
1252 
1253     switch (argmask) {
1254     case dh_typecode_void:
1255         return &ffi_type_void;
1256     case dh_typecode_i32:
1257         return &ffi_type_uint32;
1258     case dh_typecode_s32:
1259         return &ffi_type_sint32;
1260     case dh_typecode_i64:
1261         return &ffi_type_uint64;
1262     case dh_typecode_s64:
1263         return &ffi_type_sint64;
1264     case dh_typecode_ptr:
1265         return &ffi_type_pointer;
1266     case dh_typecode_i128:
1267         return &ffi_type_i128;
1268     }
1269     g_assert_not_reached();
1270 }
1271 
1272 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
1273 {
1274     unsigned typemask = info->typemask;
1275     struct {
1276         ffi_cif cif;
1277         ffi_type *args[];
1278     } *ca;
1279     ffi_status status;
1280     int nargs;
1281 
1282     /* Ignoring the return type, find the last non-zero field. */
1283     nargs = 32 - clz32(typemask >> 3);
1284     nargs = DIV_ROUND_UP(nargs, 3);
1285     assert(nargs <= MAX_CALL_IARGS);
1286 
1287     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1288     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1289     ca->cif.nargs = nargs;
1290 
1291     if (nargs != 0) {
1292         ca->cif.arg_types = ca->args;
1293         for (int j = 0; j < nargs; ++j) {
1294             int typecode = extract32(typemask, (j + 1) * 3, 3);
1295             ca->args[j] = typecode_to_ffi(typecode);
1296         }
1297     }
1298 
1299     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1300                           ca->cif.rtype, ca->cif.arg_types);
1301     assert(status == FFI_OK);
1302 
1303     return &ca->cif;
1304 }
1305 
1306 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1307 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1308 #else
1309 #define HELPER_INFO_INIT(I)      (&(I)->init)
1310 #define HELPER_INFO_INIT_VAL(I)  1
1311 #endif /* CONFIG_TCG_INTERPRETER */
1312 
1313 static inline bool arg_slot_reg_p(unsigned arg_slot)
1314 {
1315     /*
1316      * Split the sizeof away from the comparison to avoid Werror from
1317      * "unsigned < 0 is always false", when iarg_regs is empty.
1318      */
1319     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1320     return arg_slot < nreg;
1321 }
1322 
1323 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1324 {
1325     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1326     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1327 
1328     tcg_debug_assert(stk_slot < max);
1329     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1330 }
1331 
1332 typedef struct TCGCumulativeArgs {
1333     int arg_idx;                /* tcg_gen_callN args[] */
1334     int info_in_idx;            /* TCGHelperInfo in[] */
1335     int arg_slot;               /* regs+stack slot */
1336     int ref_slot;               /* stack slots for references */
1337 } TCGCumulativeArgs;
1338 
1339 static void layout_arg_even(TCGCumulativeArgs *cum)
1340 {
1341     cum->arg_slot += cum->arg_slot & 1;
1342 }
1343 
1344 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1345                          TCGCallArgumentKind kind)
1346 {
1347     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1348 
1349     *loc = (TCGCallArgumentLoc){
1350         .kind = kind,
1351         .arg_idx = cum->arg_idx,
1352         .arg_slot = cum->arg_slot,
1353     };
1354     cum->info_in_idx++;
1355     cum->arg_slot++;
1356 }
1357 
1358 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1359                                 TCGHelperInfo *info, int n)
1360 {
1361     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1362 
1363     for (int i = 0; i < n; ++i) {
1364         /* Layout all using the same arg_idx, adjusting the subindex. */
1365         loc[i] = (TCGCallArgumentLoc){
1366             .kind = TCG_CALL_ARG_NORMAL,
1367             .arg_idx = cum->arg_idx,
1368             .tmp_subindex = i,
1369             .arg_slot = cum->arg_slot + i,
1370         };
1371     }
1372     cum->info_in_idx += n;
1373     cum->arg_slot += n;
1374 }
1375 
1376 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1377 {
1378     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1379     int n = 128 / TCG_TARGET_REG_BITS;
1380 
1381     /* The first subindex carries the pointer. */
1382     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1383 
1384     /*
1385      * The callee is allowed to clobber memory associated with
1386      * structure pass by-reference.  Therefore we must make copies.
1387      * Allocate space from "ref_slot", which will be adjusted to
1388      * follow the parameters on the stack.
1389      */
1390     loc[0].ref_slot = cum->ref_slot;
1391 
1392     /*
1393      * Subsequent words also go into the reference slot, but
1394      * do not accumulate into the regular arguments.
1395      */
1396     for (int i = 1; i < n; ++i) {
1397         loc[i] = (TCGCallArgumentLoc){
1398             .kind = TCG_CALL_ARG_BY_REF_N,
1399             .arg_idx = cum->arg_idx,
1400             .tmp_subindex = i,
1401             .ref_slot = cum->ref_slot + i,
1402         };
1403     }
1404     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1405     cum->ref_slot += n;
1406 }
1407 
1408 static void init_call_layout(TCGHelperInfo *info)
1409 {
1410     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1411     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1412     unsigned typemask = info->typemask;
1413     unsigned typecode;
1414     TCGCumulativeArgs cum = { };
1415 
1416     /*
1417      * Parse and place any function return value.
1418      */
1419     typecode = typemask & 7;
1420     switch (typecode) {
1421     case dh_typecode_void:
1422         info->nr_out = 0;
1423         break;
1424     case dh_typecode_i32:
1425     case dh_typecode_s32:
1426     case dh_typecode_ptr:
1427         info->nr_out = 1;
1428         info->out_kind = TCG_CALL_RET_NORMAL;
1429         break;
1430     case dh_typecode_i64:
1431     case dh_typecode_s64:
1432         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1433         info->out_kind = TCG_CALL_RET_NORMAL;
1434         /* Query the last register now to trigger any assert early. */
1435         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1436         break;
1437     case dh_typecode_i128:
1438         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1439         info->out_kind = TCG_TARGET_CALL_RET_I128;
1440         switch (TCG_TARGET_CALL_RET_I128) {
1441         case TCG_CALL_RET_NORMAL:
1442             /* Query the last register now to trigger any assert early. */
1443             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1444             break;
1445         case TCG_CALL_RET_BY_VEC:
1446             /* Query the single register now to trigger any assert early. */
1447             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1448             break;
1449         case TCG_CALL_RET_BY_REF:
1450             /*
1451              * Allocate the first argument to the output.
1452              * We don't need to store this anywhere, just make it
1453              * unavailable for use in the input loop below.
1454              */
1455             cum.arg_slot = 1;
1456             break;
1457         default:
1458             qemu_build_not_reached();
1459         }
1460         break;
1461     default:
1462         g_assert_not_reached();
1463     }
1464 
1465     /*
1466      * Parse and place function arguments.
1467      */
1468     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1469         TCGCallArgumentKind kind;
1470         TCGType type;
1471 
1472         typecode = typemask & 7;
1473         switch (typecode) {
1474         case dh_typecode_i32:
1475         case dh_typecode_s32:
1476             type = TCG_TYPE_I32;
1477             break;
1478         case dh_typecode_i64:
1479         case dh_typecode_s64:
1480             type = TCG_TYPE_I64;
1481             break;
1482         case dh_typecode_ptr:
1483             type = TCG_TYPE_PTR;
1484             break;
1485         case dh_typecode_i128:
1486             type = TCG_TYPE_I128;
1487             break;
1488         default:
1489             g_assert_not_reached();
1490         }
1491 
1492         switch (type) {
1493         case TCG_TYPE_I32:
1494             switch (TCG_TARGET_CALL_ARG_I32) {
1495             case TCG_CALL_ARG_EVEN:
1496                 layout_arg_even(&cum);
1497                 /* fall through */
1498             case TCG_CALL_ARG_NORMAL:
1499                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1500                 break;
1501             case TCG_CALL_ARG_EXTEND:
1502                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1503                 layout_arg_1(&cum, info, kind);
1504                 break;
1505             default:
1506                 qemu_build_not_reached();
1507             }
1508             break;
1509 
1510         case TCG_TYPE_I64:
1511             switch (TCG_TARGET_CALL_ARG_I64) {
1512             case TCG_CALL_ARG_EVEN:
1513                 layout_arg_even(&cum);
1514                 /* fall through */
1515             case TCG_CALL_ARG_NORMAL:
1516                 if (TCG_TARGET_REG_BITS == 32) {
1517                     layout_arg_normal_n(&cum, info, 2);
1518                 } else {
1519                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1520                 }
1521                 break;
1522             default:
1523                 qemu_build_not_reached();
1524             }
1525             break;
1526 
1527         case TCG_TYPE_I128:
1528             switch (TCG_TARGET_CALL_ARG_I128) {
1529             case TCG_CALL_ARG_EVEN:
1530                 layout_arg_even(&cum);
1531                 /* fall through */
1532             case TCG_CALL_ARG_NORMAL:
1533                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1534                 break;
1535             case TCG_CALL_ARG_BY_REF:
1536                 layout_arg_by_ref(&cum, info);
1537                 break;
1538             default:
1539                 qemu_build_not_reached();
1540             }
1541             break;
1542 
1543         default:
1544             g_assert_not_reached();
1545         }
1546     }
1547     info->nr_in = cum.info_in_idx;
1548 
1549     /* Validate that we didn't overrun the input array. */
1550     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1551     /* Validate the backend has enough argument space. */
1552     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1553 
1554     /*
1555      * Relocate the "ref_slot" area to the end of the parameters.
1556      * Minimizing this stack offset helps code size for x86,
1557      * which has a signed 8-bit offset encoding.
1558      */
1559     if (cum.ref_slot != 0) {
1560         int ref_base = 0;
1561 
1562         if (cum.arg_slot > max_reg_slots) {
1563             int align = __alignof(Int128) / sizeof(tcg_target_long);
1564 
1565             ref_base = cum.arg_slot - max_reg_slots;
1566             if (align > 1) {
1567                 ref_base = ROUND_UP(ref_base, align);
1568             }
1569         }
1570         assert(ref_base + cum.ref_slot <= max_stk_slots);
1571         ref_base += max_reg_slots;
1572 
1573         if (ref_base != 0) {
1574             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1575                 TCGCallArgumentLoc *loc = &info->in[i];
1576                 switch (loc->kind) {
1577                 case TCG_CALL_ARG_BY_REF:
1578                 case TCG_CALL_ARG_BY_REF_N:
1579                     loc->ref_slot += ref_base;
1580                     break;
1581                 default:
1582                     break;
1583                 }
1584             }
1585         }
1586     }
1587 }
1588 
1589 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1590 static void process_constraint_sets(void);
1591 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1592                                             TCGReg reg, const char *name);
1593 
1594 static void tcg_context_init(unsigned max_threads)
1595 {
1596     TCGContext *s = &tcg_init_ctx;
1597     int n, i;
1598     TCGTemp *ts;
1599 
1600     memset(s, 0, sizeof(*s));
1601     s->nb_globals = 0;
1602 
1603     init_call_layout(&info_helper_ld32_mmu);
1604     init_call_layout(&info_helper_ld64_mmu);
1605     init_call_layout(&info_helper_ld128_mmu);
1606     init_call_layout(&info_helper_st32_mmu);
1607     init_call_layout(&info_helper_st64_mmu);
1608     init_call_layout(&info_helper_st128_mmu);
1609 
1610     tcg_target_init(s);
1611     process_constraint_sets();
1612 
1613     /* Reverse the order of the saved registers, assuming they're all at
1614        the start of tcg_target_reg_alloc_order.  */
1615     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1616         int r = tcg_target_reg_alloc_order[n];
1617         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1618             break;
1619         }
1620     }
1621     for (i = 0; i < n; ++i) {
1622         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1623     }
1624     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1625         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1626     }
1627 
1628     tcg_ctx = s;
1629     /*
1630      * In user-mode we simply share the init context among threads, since we
1631      * use a single region. See the documentation tcg_region_init() for the
1632      * reasoning behind this.
1633      * In system-mode we will have at most max_threads TCG threads.
1634      */
1635 #ifdef CONFIG_USER_ONLY
1636     tcg_ctxs = &tcg_ctx;
1637     tcg_cur_ctxs = 1;
1638     tcg_max_ctxs = 1;
1639 #else
1640     tcg_max_ctxs = max_threads;
1641     tcg_ctxs = g_new0(TCGContext *, max_threads);
1642 #endif
1643 
1644     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1645     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1646     tcg_env = temp_tcgv_ptr(ts);
1647 }
1648 
1649 void tcg_init(size_t tb_size, int splitwx, unsigned max_threads)
1650 {
1651     tcg_context_init(max_threads);
1652     tcg_region_init(tb_size, splitwx, max_threads);
1653 }
1654 
1655 /*
1656  * Allocate TBs right before their corresponding translated code, making
1657  * sure that TBs and code are on different cache lines.
1658  */
1659 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1660 {
1661     uintptr_t align = qemu_icache_linesize;
1662     TranslationBlock *tb;
1663     void *next;
1664 
1665  retry:
1666     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1667     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1668 
1669     if (unlikely(next > s->code_gen_highwater)) {
1670         if (tcg_region_alloc(s)) {
1671             return NULL;
1672         }
1673         goto retry;
1674     }
1675     qatomic_set(&s->code_gen_ptr, next);
1676     return tb;
1677 }
1678 
1679 void tcg_prologue_init(void)
1680 {
1681     TCGContext *s = tcg_ctx;
1682     size_t prologue_size;
1683 
1684     s->code_ptr = s->code_gen_ptr;
1685     s->code_buf = s->code_gen_ptr;
1686     s->data_gen_ptr = NULL;
1687 
1688 #ifndef CONFIG_TCG_INTERPRETER
1689     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1690 #endif
1691 
1692     s->pool_labels = NULL;
1693 
1694     qemu_thread_jit_write();
1695     /* Generate the prologue.  */
1696     tcg_target_qemu_prologue(s);
1697 
1698     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1699     {
1700         int result = tcg_out_pool_finalize(s);
1701         tcg_debug_assert(result == 0);
1702     }
1703 
1704     prologue_size = tcg_current_code_size(s);
1705     perf_report_prologue(s->code_gen_ptr, prologue_size);
1706 
1707 #ifndef CONFIG_TCG_INTERPRETER
1708     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1709                         (uintptr_t)s->code_buf, prologue_size);
1710 #endif
1711 
1712     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1713         FILE *logfile = qemu_log_trylock();
1714         if (logfile) {
1715             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1716             if (s->data_gen_ptr) {
1717                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1718                 size_t data_size = prologue_size - code_size;
1719                 size_t i;
1720 
1721                 disas(logfile, s->code_gen_ptr, code_size);
1722 
1723                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1724                     if (sizeof(tcg_target_ulong) == 8) {
1725                         fprintf(logfile,
1726                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1727                                 (uintptr_t)s->data_gen_ptr + i,
1728                                 *(uint64_t *)(s->data_gen_ptr + i));
1729                     } else {
1730                         fprintf(logfile,
1731                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1732                                 (uintptr_t)s->data_gen_ptr + i,
1733                                 *(uint32_t *)(s->data_gen_ptr + i));
1734                     }
1735                 }
1736             } else {
1737                 disas(logfile, s->code_gen_ptr, prologue_size);
1738             }
1739             fprintf(logfile, "\n");
1740             qemu_log_unlock(logfile);
1741         }
1742     }
1743 
1744 #ifndef CONFIG_TCG_INTERPRETER
1745     /*
1746      * Assert that goto_ptr is implemented completely, setting an epilogue.
1747      * For tci, we use NULL as the signal to return from the interpreter,
1748      * so skip this check.
1749      */
1750     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1751 #endif
1752 
1753     tcg_region_prologue_set(s);
1754 }
1755 
1756 void tcg_func_start(TCGContext *s)
1757 {
1758     tcg_pool_reset(s);
1759     s->nb_temps = s->nb_globals;
1760 
1761     /* No temps have been previously allocated for size or locality.  */
1762     tcg_temp_ebb_reset_freed(s);
1763 
1764     /* No constant temps have been previously allocated. */
1765     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1766         if (s->const_table[i]) {
1767             g_hash_table_remove_all(s->const_table[i]);
1768         }
1769     }
1770 
1771     s->nb_ops = 0;
1772     s->nb_labels = 0;
1773     s->current_frame_offset = s->frame_start;
1774 
1775 #ifdef CONFIG_DEBUG_TCG
1776     s->goto_tb_issue_mask = 0;
1777 #endif
1778 
1779     QTAILQ_INIT(&s->ops);
1780     QTAILQ_INIT(&s->free_ops);
1781     s->emit_before_op = NULL;
1782     QSIMPLEQ_INIT(&s->labels);
1783 
1784     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
1785     tcg_debug_assert(s->insn_start_words > 0);
1786 }
1787 
1788 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1789 {
1790     int n = s->nb_temps++;
1791 
1792     if (n >= TCG_MAX_TEMPS) {
1793         tcg_raise_tb_overflow(s);
1794     }
1795     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1796 }
1797 
1798 static TCGTemp *tcg_global_alloc(TCGContext *s)
1799 {
1800     TCGTemp *ts;
1801 
1802     tcg_debug_assert(s->nb_globals == s->nb_temps);
1803     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1804     s->nb_globals++;
1805     ts = tcg_temp_alloc(s);
1806     ts->kind = TEMP_GLOBAL;
1807 
1808     return ts;
1809 }
1810 
1811 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1812                                             TCGReg reg, const char *name)
1813 {
1814     TCGTemp *ts;
1815 
1816     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1817 
1818     ts = tcg_global_alloc(s);
1819     ts->base_type = type;
1820     ts->type = type;
1821     ts->kind = TEMP_FIXED;
1822     ts->reg = reg;
1823     ts->name = name;
1824     tcg_regset_set_reg(s->reserved_regs, reg);
1825 
1826     return ts;
1827 }
1828 
1829 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1830 {
1831     s->frame_start = start;
1832     s->frame_end = start + size;
1833     s->frame_temp
1834         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1835 }
1836 
1837 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1838                                             const char *name, TCGType type)
1839 {
1840     TCGContext *s = tcg_ctx;
1841     TCGTemp *base_ts = tcgv_ptr_temp(base);
1842     TCGTemp *ts = tcg_global_alloc(s);
1843     int indirect_reg = 0;
1844 
1845     switch (base_ts->kind) {
1846     case TEMP_FIXED:
1847         break;
1848     case TEMP_GLOBAL:
1849         /* We do not support double-indirect registers.  */
1850         tcg_debug_assert(!base_ts->indirect_reg);
1851         base_ts->indirect_base = 1;
1852         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1853                             ? 2 : 1);
1854         indirect_reg = 1;
1855         break;
1856     default:
1857         g_assert_not_reached();
1858     }
1859 
1860     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1861         TCGTemp *ts2 = tcg_global_alloc(s);
1862         char buf[64];
1863 
1864         ts->base_type = TCG_TYPE_I64;
1865         ts->type = TCG_TYPE_I32;
1866         ts->indirect_reg = indirect_reg;
1867         ts->mem_allocated = 1;
1868         ts->mem_base = base_ts;
1869         ts->mem_offset = offset;
1870         pstrcpy(buf, sizeof(buf), name);
1871         pstrcat(buf, sizeof(buf), "_0");
1872         ts->name = strdup(buf);
1873 
1874         tcg_debug_assert(ts2 == ts + 1);
1875         ts2->base_type = TCG_TYPE_I64;
1876         ts2->type = TCG_TYPE_I32;
1877         ts2->indirect_reg = indirect_reg;
1878         ts2->mem_allocated = 1;
1879         ts2->mem_base = base_ts;
1880         ts2->mem_offset = offset + 4;
1881         ts2->temp_subindex = 1;
1882         pstrcpy(buf, sizeof(buf), name);
1883         pstrcat(buf, sizeof(buf), "_1");
1884         ts2->name = strdup(buf);
1885     } else {
1886         ts->base_type = type;
1887         ts->type = type;
1888         ts->indirect_reg = indirect_reg;
1889         ts->mem_allocated = 1;
1890         ts->mem_base = base_ts;
1891         ts->mem_offset = offset;
1892         ts->name = name;
1893     }
1894     return ts;
1895 }
1896 
1897 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1898 {
1899     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1900     return temp_tcgv_i32(ts);
1901 }
1902 
1903 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1904 {
1905     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1906     return temp_tcgv_i64(ts);
1907 }
1908 
1909 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1910 {
1911     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1912     return temp_tcgv_ptr(ts);
1913 }
1914 
1915 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1916 {
1917     TCGContext *s = tcg_ctx;
1918     TCGTemp *ts;
1919     int n;
1920 
1921     if (kind == TEMP_EBB) {
1922         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1923 
1924         if (idx < TCG_MAX_TEMPS) {
1925             /* There is already an available temp with the right type.  */
1926             clear_bit(idx, s->free_temps[type].l);
1927 
1928             ts = &s->temps[idx];
1929             ts->temp_allocated = 1;
1930             tcg_debug_assert(ts->base_type == type);
1931             tcg_debug_assert(ts->kind == kind);
1932             return ts;
1933         }
1934     } else {
1935         tcg_debug_assert(kind == TEMP_TB);
1936     }
1937 
1938     switch (type) {
1939     case TCG_TYPE_I32:
1940     case TCG_TYPE_V64:
1941     case TCG_TYPE_V128:
1942     case TCG_TYPE_V256:
1943         n = 1;
1944         break;
1945     case TCG_TYPE_I64:
1946         n = 64 / TCG_TARGET_REG_BITS;
1947         break;
1948     case TCG_TYPE_I128:
1949         n = 128 / TCG_TARGET_REG_BITS;
1950         break;
1951     default:
1952         g_assert_not_reached();
1953     }
1954 
1955     ts = tcg_temp_alloc(s);
1956     ts->base_type = type;
1957     ts->temp_allocated = 1;
1958     ts->kind = kind;
1959 
1960     if (n == 1) {
1961         ts->type = type;
1962     } else {
1963         ts->type = TCG_TYPE_REG;
1964 
1965         for (int i = 1; i < n; ++i) {
1966             TCGTemp *ts2 = tcg_temp_alloc(s);
1967 
1968             tcg_debug_assert(ts2 == ts + i);
1969             ts2->base_type = type;
1970             ts2->type = TCG_TYPE_REG;
1971             ts2->temp_allocated = 1;
1972             ts2->temp_subindex = i;
1973             ts2->kind = kind;
1974         }
1975     }
1976     return ts;
1977 }
1978 
1979 TCGv_i32 tcg_temp_new_i32(void)
1980 {
1981     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1982 }
1983 
1984 TCGv_i32 tcg_temp_ebb_new_i32(void)
1985 {
1986     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1987 }
1988 
1989 TCGv_i64 tcg_temp_new_i64(void)
1990 {
1991     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1992 }
1993 
1994 TCGv_i64 tcg_temp_ebb_new_i64(void)
1995 {
1996     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1997 }
1998 
1999 TCGv_ptr tcg_temp_new_ptr(void)
2000 {
2001     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
2002 }
2003 
2004 TCGv_ptr tcg_temp_ebb_new_ptr(void)
2005 {
2006     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
2007 }
2008 
2009 TCGv_i128 tcg_temp_new_i128(void)
2010 {
2011     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
2012 }
2013 
2014 TCGv_i128 tcg_temp_ebb_new_i128(void)
2015 {
2016     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
2017 }
2018 
2019 TCGv_vec tcg_temp_new_vec(TCGType type)
2020 {
2021     TCGTemp *t;
2022 
2023 #ifdef CONFIG_DEBUG_TCG
2024     switch (type) {
2025     case TCG_TYPE_V64:
2026         assert(TCG_TARGET_HAS_v64);
2027         break;
2028     case TCG_TYPE_V128:
2029         assert(TCG_TARGET_HAS_v128);
2030         break;
2031     case TCG_TYPE_V256:
2032         assert(TCG_TARGET_HAS_v256);
2033         break;
2034     default:
2035         g_assert_not_reached();
2036     }
2037 #endif
2038 
2039     t = tcg_temp_new_internal(type, TEMP_EBB);
2040     return temp_tcgv_vec(t);
2041 }
2042 
2043 /* Create a new temp of the same type as an existing temp.  */
2044 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
2045 {
2046     TCGTemp *t = tcgv_vec_temp(match);
2047 
2048     tcg_debug_assert(t->temp_allocated != 0);
2049 
2050     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
2051     return temp_tcgv_vec(t);
2052 }
2053 
2054 void tcg_temp_free_internal(TCGTemp *ts)
2055 {
2056     TCGContext *s = tcg_ctx;
2057 
2058     switch (ts->kind) {
2059     case TEMP_CONST:
2060     case TEMP_TB:
2061         /* Silently ignore free. */
2062         break;
2063     case TEMP_EBB:
2064         tcg_debug_assert(ts->temp_allocated != 0);
2065         ts->temp_allocated = 0;
2066         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
2067         break;
2068     default:
2069         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
2070         g_assert_not_reached();
2071     }
2072 }
2073 
2074 void tcg_temp_free_i32(TCGv_i32 arg)
2075 {
2076     tcg_temp_free_internal(tcgv_i32_temp(arg));
2077 }
2078 
2079 void tcg_temp_free_i64(TCGv_i64 arg)
2080 {
2081     tcg_temp_free_internal(tcgv_i64_temp(arg));
2082 }
2083 
2084 void tcg_temp_free_i128(TCGv_i128 arg)
2085 {
2086     tcg_temp_free_internal(tcgv_i128_temp(arg));
2087 }
2088 
2089 void tcg_temp_free_ptr(TCGv_ptr arg)
2090 {
2091     tcg_temp_free_internal(tcgv_ptr_temp(arg));
2092 }
2093 
2094 void tcg_temp_free_vec(TCGv_vec arg)
2095 {
2096     tcg_temp_free_internal(tcgv_vec_temp(arg));
2097 }
2098 
2099 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
2100 {
2101     TCGContext *s = tcg_ctx;
2102     GHashTable *h = s->const_table[type];
2103     TCGTemp *ts;
2104 
2105     if (h == NULL) {
2106         h = g_hash_table_new(g_int64_hash, g_int64_equal);
2107         s->const_table[type] = h;
2108     }
2109 
2110     ts = g_hash_table_lookup(h, &val);
2111     if (ts == NULL) {
2112         int64_t *val_ptr;
2113 
2114         ts = tcg_temp_alloc(s);
2115 
2116         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
2117             TCGTemp *ts2 = tcg_temp_alloc(s);
2118 
2119             tcg_debug_assert(ts2 == ts + 1);
2120 
2121             ts->base_type = TCG_TYPE_I64;
2122             ts->type = TCG_TYPE_I32;
2123             ts->kind = TEMP_CONST;
2124             ts->temp_allocated = 1;
2125 
2126             ts2->base_type = TCG_TYPE_I64;
2127             ts2->type = TCG_TYPE_I32;
2128             ts2->kind = TEMP_CONST;
2129             ts2->temp_allocated = 1;
2130             ts2->temp_subindex = 1;
2131 
2132             /*
2133              * Retain the full value of the 64-bit constant in the low
2134              * part, so that the hash table works.  Actual uses will
2135              * truncate the value to the low part.
2136              */
2137             ts[HOST_BIG_ENDIAN].val = val;
2138             ts[!HOST_BIG_ENDIAN].val = val >> 32;
2139             val_ptr = &ts[HOST_BIG_ENDIAN].val;
2140         } else {
2141             ts->base_type = type;
2142             ts->type = type;
2143             ts->kind = TEMP_CONST;
2144             ts->temp_allocated = 1;
2145             ts->val = val;
2146             val_ptr = &ts->val;
2147         }
2148         g_hash_table_insert(h, val_ptr, ts);
2149     }
2150 
2151     return ts;
2152 }
2153 
2154 TCGv_i32 tcg_constant_i32(int32_t val)
2155 {
2156     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
2157 }
2158 
2159 TCGv_i64 tcg_constant_i64(int64_t val)
2160 {
2161     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
2162 }
2163 
2164 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
2165 {
2166     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
2167 }
2168 
2169 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
2170 {
2171     val = dup_const(vece, val);
2172     return temp_tcgv_vec(tcg_constant_internal(type, val));
2173 }
2174 
2175 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
2176 {
2177     TCGTemp *t = tcgv_vec_temp(match);
2178 
2179     tcg_debug_assert(t->temp_allocated != 0);
2180     return tcg_constant_vec(t->base_type, vece, val);
2181 }
2182 
2183 #ifdef CONFIG_DEBUG_TCG
2184 size_t temp_idx(TCGTemp *ts)
2185 {
2186     ptrdiff_t n = ts - tcg_ctx->temps;
2187     assert(n >= 0 && n < tcg_ctx->nb_temps);
2188     return n;
2189 }
2190 
2191 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
2192 {
2193     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
2194 
2195     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
2196     assert(o % sizeof(TCGTemp) == 0);
2197 
2198     return (void *)tcg_ctx + (uintptr_t)v;
2199 }
2200 #endif /* CONFIG_DEBUG_TCG */
2201 
2202 /*
2203  * Return true if OP may appear in the opcode stream with TYPE.
2204  * Test the runtime variable that controls each opcode.
2205  */
2206 bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
2207 {
2208     bool has_type;
2209 
2210     switch (type) {
2211     case TCG_TYPE_I32:
2212         has_type = true;
2213         break;
2214     case TCG_TYPE_I64:
2215         has_type = TCG_TARGET_REG_BITS == 64;
2216         break;
2217     case TCG_TYPE_V64:
2218         has_type = TCG_TARGET_HAS_v64;
2219         break;
2220     case TCG_TYPE_V128:
2221         has_type = TCG_TARGET_HAS_v128;
2222         break;
2223     case TCG_TYPE_V256:
2224         has_type = TCG_TARGET_HAS_v256;
2225         break;
2226     default:
2227         has_type = false;
2228         break;
2229     }
2230 
2231     switch (op) {
2232     case INDEX_op_discard:
2233     case INDEX_op_set_label:
2234     case INDEX_op_call:
2235     case INDEX_op_br:
2236     case INDEX_op_mb:
2237     case INDEX_op_insn_start:
2238     case INDEX_op_exit_tb:
2239     case INDEX_op_goto_tb:
2240     case INDEX_op_goto_ptr:
2241     case INDEX_op_qemu_ld_i32:
2242     case INDEX_op_qemu_st_i32:
2243     case INDEX_op_qemu_ld_i64:
2244     case INDEX_op_qemu_st_i64:
2245         return true;
2246 
2247     case INDEX_op_qemu_st8_i32:
2248         return TCG_TARGET_HAS_qemu_st8_i32;
2249 
2250     case INDEX_op_qemu_ld_i128:
2251     case INDEX_op_qemu_st_i128:
2252         return TCG_TARGET_HAS_qemu_ldst_i128;
2253 
2254     case INDEX_op_add:
2255     case INDEX_op_and:
2256     case INDEX_op_mov:
2257     case INDEX_op_or:
2258     case INDEX_op_xor:
2259         return has_type;
2260 
2261     case INDEX_op_setcond_i32:
2262     case INDEX_op_brcond_i32:
2263     case INDEX_op_movcond_i32:
2264     case INDEX_op_ld8u_i32:
2265     case INDEX_op_ld8s_i32:
2266     case INDEX_op_ld16u_i32:
2267     case INDEX_op_ld16s_i32:
2268     case INDEX_op_ld_i32:
2269     case INDEX_op_st8_i32:
2270     case INDEX_op_st16_i32:
2271     case INDEX_op_st_i32:
2272     case INDEX_op_extract_i32:
2273     case INDEX_op_sextract_i32:
2274     case INDEX_op_deposit_i32:
2275         return true;
2276 
2277     case INDEX_op_negsetcond_i32:
2278         return TCG_TARGET_HAS_negsetcond_i32;
2279     case INDEX_op_extract2_i32:
2280         return TCG_TARGET_HAS_extract2_i32;
2281     case INDEX_op_add2_i32:
2282         return TCG_TARGET_HAS_add2_i32;
2283     case INDEX_op_sub2_i32:
2284         return TCG_TARGET_HAS_sub2_i32;
2285     case INDEX_op_mulu2_i32:
2286         return TCG_TARGET_HAS_mulu2_i32;
2287     case INDEX_op_muls2_i32:
2288         return TCG_TARGET_HAS_muls2_i32;
2289     case INDEX_op_bswap16_i32:
2290         return TCG_TARGET_HAS_bswap16_i32;
2291     case INDEX_op_bswap32_i32:
2292         return TCG_TARGET_HAS_bswap32_i32;
2293     case INDEX_op_ctpop_i32:
2294         return TCG_TARGET_HAS_ctpop_i32;
2295 
2296     case INDEX_op_brcond2_i32:
2297     case INDEX_op_setcond2_i32:
2298         return TCG_TARGET_REG_BITS == 32;
2299 
2300     case INDEX_op_setcond_i64:
2301     case INDEX_op_brcond_i64:
2302     case INDEX_op_movcond_i64:
2303     case INDEX_op_ld8u_i64:
2304     case INDEX_op_ld8s_i64:
2305     case INDEX_op_ld16u_i64:
2306     case INDEX_op_ld16s_i64:
2307     case INDEX_op_ld32u_i64:
2308     case INDEX_op_ld32s_i64:
2309     case INDEX_op_ld_i64:
2310     case INDEX_op_st8_i64:
2311     case INDEX_op_st16_i64:
2312     case INDEX_op_st32_i64:
2313     case INDEX_op_st_i64:
2314     case INDEX_op_ext_i32_i64:
2315     case INDEX_op_extu_i32_i64:
2316     case INDEX_op_extract_i64:
2317     case INDEX_op_sextract_i64:
2318     case INDEX_op_deposit_i64:
2319         return TCG_TARGET_REG_BITS == 64;
2320 
2321     case INDEX_op_negsetcond_i64:
2322         return TCG_TARGET_HAS_negsetcond_i64;
2323     case INDEX_op_extract2_i64:
2324         return TCG_TARGET_HAS_extract2_i64;
2325     case INDEX_op_extrl_i64_i32:
2326     case INDEX_op_extrh_i64_i32:
2327         return TCG_TARGET_HAS_extr_i64_i32;
2328     case INDEX_op_bswap16_i64:
2329         return TCG_TARGET_HAS_bswap16_i64;
2330     case INDEX_op_bswap32_i64:
2331         return TCG_TARGET_HAS_bswap32_i64;
2332     case INDEX_op_bswap64_i64:
2333         return TCG_TARGET_HAS_bswap64_i64;
2334     case INDEX_op_ctpop_i64:
2335         return TCG_TARGET_HAS_ctpop_i64;
2336     case INDEX_op_add2_i64:
2337         return TCG_TARGET_HAS_add2_i64;
2338     case INDEX_op_sub2_i64:
2339         return TCG_TARGET_HAS_sub2_i64;
2340     case INDEX_op_mulu2_i64:
2341         return TCG_TARGET_HAS_mulu2_i64;
2342     case INDEX_op_muls2_i64:
2343         return TCG_TARGET_HAS_muls2_i64;
2344 
2345     case INDEX_op_mov_vec:
2346     case INDEX_op_dup_vec:
2347     case INDEX_op_dupm_vec:
2348     case INDEX_op_ld_vec:
2349     case INDEX_op_st_vec:
2350     case INDEX_op_add_vec:
2351     case INDEX_op_sub_vec:
2352     case INDEX_op_and_vec:
2353     case INDEX_op_or_vec:
2354     case INDEX_op_xor_vec:
2355     case INDEX_op_cmp_vec:
2356         return has_type;
2357     case INDEX_op_dup2_vec:
2358         return has_type && TCG_TARGET_REG_BITS == 32;
2359     case INDEX_op_not_vec:
2360         return has_type && TCG_TARGET_HAS_not_vec;
2361     case INDEX_op_neg_vec:
2362         return has_type && TCG_TARGET_HAS_neg_vec;
2363     case INDEX_op_abs_vec:
2364         return has_type && TCG_TARGET_HAS_abs_vec;
2365     case INDEX_op_andc_vec:
2366         return has_type && TCG_TARGET_HAS_andc_vec;
2367     case INDEX_op_orc_vec:
2368         return has_type && TCG_TARGET_HAS_orc_vec;
2369     case INDEX_op_nand_vec:
2370         return has_type && TCG_TARGET_HAS_nand_vec;
2371     case INDEX_op_nor_vec:
2372         return has_type && TCG_TARGET_HAS_nor_vec;
2373     case INDEX_op_eqv_vec:
2374         return has_type && TCG_TARGET_HAS_eqv_vec;
2375     case INDEX_op_mul_vec:
2376         return has_type && TCG_TARGET_HAS_mul_vec;
2377     case INDEX_op_shli_vec:
2378     case INDEX_op_shri_vec:
2379     case INDEX_op_sari_vec:
2380         return has_type && TCG_TARGET_HAS_shi_vec;
2381     case INDEX_op_shls_vec:
2382     case INDEX_op_shrs_vec:
2383     case INDEX_op_sars_vec:
2384         return has_type && TCG_TARGET_HAS_shs_vec;
2385     case INDEX_op_shlv_vec:
2386     case INDEX_op_shrv_vec:
2387     case INDEX_op_sarv_vec:
2388         return has_type && TCG_TARGET_HAS_shv_vec;
2389     case INDEX_op_rotli_vec:
2390         return has_type && TCG_TARGET_HAS_roti_vec;
2391     case INDEX_op_rotls_vec:
2392         return has_type && TCG_TARGET_HAS_rots_vec;
2393     case INDEX_op_rotlv_vec:
2394     case INDEX_op_rotrv_vec:
2395         return has_type && TCG_TARGET_HAS_rotv_vec;
2396     case INDEX_op_ssadd_vec:
2397     case INDEX_op_usadd_vec:
2398     case INDEX_op_sssub_vec:
2399     case INDEX_op_ussub_vec:
2400         return has_type && TCG_TARGET_HAS_sat_vec;
2401     case INDEX_op_smin_vec:
2402     case INDEX_op_umin_vec:
2403     case INDEX_op_smax_vec:
2404     case INDEX_op_umax_vec:
2405         return has_type && TCG_TARGET_HAS_minmax_vec;
2406     case INDEX_op_bitsel_vec:
2407         return has_type && TCG_TARGET_HAS_bitsel_vec;
2408     case INDEX_op_cmpsel_vec:
2409         return has_type && TCG_TARGET_HAS_cmpsel_vec;
2410 
2411     default:
2412         if (op < INDEX_op_last_generic) {
2413             const TCGOutOp *outop;
2414             TCGConstraintSetIndex con_set;
2415 
2416             if (!has_type) {
2417                 return false;
2418             }
2419 
2420             outop = all_outop[op];
2421             tcg_debug_assert(outop != NULL);
2422 
2423             con_set = outop->static_constraint;
2424             if (con_set == C_Dynamic) {
2425                 con_set = outop->dynamic_constraint(type, flags);
2426             }
2427             if (con_set >= 0) {
2428                 return true;
2429             }
2430             tcg_debug_assert(con_set == C_NotImplemented);
2431             return false;
2432         }
2433         tcg_debug_assert(op < NB_OPS);
2434         return true;
2435 
2436     case INDEX_op_last_generic:
2437         g_assert_not_reached();
2438     }
2439 }
2440 
2441 bool tcg_op_deposit_valid(TCGType type, unsigned ofs, unsigned len)
2442 {
2443     unsigned width;
2444 
2445     tcg_debug_assert(type == TCG_TYPE_I32 || type == TCG_TYPE_I64);
2446     width = (type == TCG_TYPE_I32 ? 32 : 64);
2447 
2448     tcg_debug_assert(ofs < width);
2449     tcg_debug_assert(len > 0);
2450     tcg_debug_assert(len <= width - ofs);
2451 
2452     return TCG_TARGET_deposit_valid(type, ofs, len);
2453 }
2454 
2455 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2456 
2457 static void tcg_gen_callN(void *func, TCGHelperInfo *info,
2458                           TCGTemp *ret, TCGTemp **args)
2459 {
2460     TCGv_i64 extend_free[MAX_CALL_IARGS];
2461     int n_extend = 0;
2462     TCGOp *op;
2463     int i, n, pi = 0, total_args;
2464 
2465     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2466         init_call_layout(info);
2467         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2468     }
2469 
2470     total_args = info->nr_out + info->nr_in + 2;
2471     op = tcg_op_alloc(INDEX_op_call, total_args);
2472 
2473 #ifdef CONFIG_PLUGIN
2474     /* Flag helpers that may affect guest state */
2475     if (tcg_ctx->plugin_insn && !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2476         tcg_ctx->plugin_insn->calls_helpers = true;
2477     }
2478 #endif
2479 
2480     TCGOP_CALLO(op) = n = info->nr_out;
2481     switch (n) {
2482     case 0:
2483         tcg_debug_assert(ret == NULL);
2484         break;
2485     case 1:
2486         tcg_debug_assert(ret != NULL);
2487         op->args[pi++] = temp_arg(ret);
2488         break;
2489     case 2:
2490     case 4:
2491         tcg_debug_assert(ret != NULL);
2492         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2493         tcg_debug_assert(ret->temp_subindex == 0);
2494         for (i = 0; i < n; ++i) {
2495             op->args[pi++] = temp_arg(ret + i);
2496         }
2497         break;
2498     default:
2499         g_assert_not_reached();
2500     }
2501 
2502     TCGOP_CALLI(op) = n = info->nr_in;
2503     for (i = 0; i < n; i++) {
2504         const TCGCallArgumentLoc *loc = &info->in[i];
2505         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2506 
2507         switch (loc->kind) {
2508         case TCG_CALL_ARG_NORMAL:
2509         case TCG_CALL_ARG_BY_REF:
2510         case TCG_CALL_ARG_BY_REF_N:
2511             op->args[pi++] = temp_arg(ts);
2512             break;
2513 
2514         case TCG_CALL_ARG_EXTEND_U:
2515         case TCG_CALL_ARG_EXTEND_S:
2516             {
2517                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2518                 TCGv_i32 orig = temp_tcgv_i32(ts);
2519 
2520                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2521                     tcg_gen_ext_i32_i64(temp, orig);
2522                 } else {
2523                     tcg_gen_extu_i32_i64(temp, orig);
2524                 }
2525                 op->args[pi++] = tcgv_i64_arg(temp);
2526                 extend_free[n_extend++] = temp;
2527             }
2528             break;
2529 
2530         default:
2531             g_assert_not_reached();
2532         }
2533     }
2534     op->args[pi++] = (uintptr_t)func;
2535     op->args[pi++] = (uintptr_t)info;
2536     tcg_debug_assert(pi == total_args);
2537 
2538     if (tcg_ctx->emit_before_op) {
2539         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2540     } else {
2541         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2542     }
2543 
2544     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2545     for (i = 0; i < n_extend; ++i) {
2546         tcg_temp_free_i64(extend_free[i]);
2547     }
2548 }
2549 
2550 void tcg_gen_call0(void *func, TCGHelperInfo *info, TCGTemp *ret)
2551 {
2552     tcg_gen_callN(func, info, ret, NULL);
2553 }
2554 
2555 void tcg_gen_call1(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2556 {
2557     tcg_gen_callN(func, info, ret, &t1);
2558 }
2559 
2560 void tcg_gen_call2(void *func, TCGHelperInfo *info, TCGTemp *ret,
2561                    TCGTemp *t1, TCGTemp *t2)
2562 {
2563     TCGTemp *args[2] = { t1, t2 };
2564     tcg_gen_callN(func, info, ret, args);
2565 }
2566 
2567 void tcg_gen_call3(void *func, TCGHelperInfo *info, TCGTemp *ret,
2568                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3)
2569 {
2570     TCGTemp *args[3] = { t1, t2, t3 };
2571     tcg_gen_callN(func, info, ret, args);
2572 }
2573 
2574 void tcg_gen_call4(void *func, TCGHelperInfo *info, TCGTemp *ret,
2575                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2576 {
2577     TCGTemp *args[4] = { t1, t2, t3, t4 };
2578     tcg_gen_callN(func, info, ret, args);
2579 }
2580 
2581 void tcg_gen_call5(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2582                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2583 {
2584     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2585     tcg_gen_callN(func, info, ret, args);
2586 }
2587 
2588 void tcg_gen_call6(void *func, TCGHelperInfo *info, TCGTemp *ret,
2589                    TCGTemp *t1, TCGTemp *t2, TCGTemp *t3,
2590                    TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2591 {
2592     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2593     tcg_gen_callN(func, info, ret, args);
2594 }
2595 
2596 void tcg_gen_call7(void *func, TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2597                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2598                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2599 {
2600     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2601     tcg_gen_callN(func, info, ret, args);
2602 }
2603 
2604 static void tcg_reg_alloc_start(TCGContext *s)
2605 {
2606     int i, n;
2607 
2608     for (i = 0, n = s->nb_temps; i < n; i++) {
2609         TCGTemp *ts = &s->temps[i];
2610         TCGTempVal val = TEMP_VAL_MEM;
2611 
2612         switch (ts->kind) {
2613         case TEMP_CONST:
2614             val = TEMP_VAL_CONST;
2615             break;
2616         case TEMP_FIXED:
2617             val = TEMP_VAL_REG;
2618             break;
2619         case TEMP_GLOBAL:
2620             break;
2621         case TEMP_EBB:
2622             val = TEMP_VAL_DEAD;
2623             /* fall through */
2624         case TEMP_TB:
2625             ts->mem_allocated = 0;
2626             break;
2627         default:
2628             g_assert_not_reached();
2629         }
2630         ts->val_type = val;
2631     }
2632 
2633     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2634 }
2635 
2636 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2637                                  TCGTemp *ts)
2638 {
2639     int idx = temp_idx(ts);
2640 
2641     switch (ts->kind) {
2642     case TEMP_FIXED:
2643     case TEMP_GLOBAL:
2644         pstrcpy(buf, buf_size, ts->name);
2645         break;
2646     case TEMP_TB:
2647         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2648         break;
2649     case TEMP_EBB:
2650         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2651         break;
2652     case TEMP_CONST:
2653         switch (ts->type) {
2654         case TCG_TYPE_I32:
2655             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2656             break;
2657 #if TCG_TARGET_REG_BITS > 32
2658         case TCG_TYPE_I64:
2659             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2660             break;
2661 #endif
2662         case TCG_TYPE_V64:
2663         case TCG_TYPE_V128:
2664         case TCG_TYPE_V256:
2665             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2666                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2667             break;
2668         default:
2669             g_assert_not_reached();
2670         }
2671         break;
2672     }
2673     return buf;
2674 }
2675 
2676 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2677                              int buf_size, TCGArg arg)
2678 {
2679     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2680 }
2681 
2682 static const char * const cond_name[] =
2683 {
2684     [TCG_COND_NEVER] = "never",
2685     [TCG_COND_ALWAYS] = "always",
2686     [TCG_COND_EQ] = "eq",
2687     [TCG_COND_NE] = "ne",
2688     [TCG_COND_LT] = "lt",
2689     [TCG_COND_GE] = "ge",
2690     [TCG_COND_LE] = "le",
2691     [TCG_COND_GT] = "gt",
2692     [TCG_COND_LTU] = "ltu",
2693     [TCG_COND_GEU] = "geu",
2694     [TCG_COND_LEU] = "leu",
2695     [TCG_COND_GTU] = "gtu",
2696     [TCG_COND_TSTEQ] = "tsteq",
2697     [TCG_COND_TSTNE] = "tstne",
2698 };
2699 
2700 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2701 {
2702     [MO_UB]   = "ub",
2703     [MO_SB]   = "sb",
2704     [MO_LEUW] = "leuw",
2705     [MO_LESW] = "lesw",
2706     [MO_LEUL] = "leul",
2707     [MO_LESL] = "lesl",
2708     [MO_LEUQ] = "leq",
2709     [MO_BEUW] = "beuw",
2710     [MO_BESW] = "besw",
2711     [MO_BEUL] = "beul",
2712     [MO_BESL] = "besl",
2713     [MO_BEUQ] = "beq",
2714     [MO_128 + MO_BE] = "beo",
2715     [MO_128 + MO_LE] = "leo",
2716 };
2717 
2718 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2719     [MO_UNALN >> MO_ASHIFT]    = "un+",
2720     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2721     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2722     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2723     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2724     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2725     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2726     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2727 };
2728 
2729 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2730     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2731     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2732     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2733     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2734     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2735     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2736 };
2737 
2738 static const char bswap_flag_name[][6] = {
2739     [TCG_BSWAP_IZ] = "iz",
2740     [TCG_BSWAP_OZ] = "oz",
2741     [TCG_BSWAP_OS] = "os",
2742     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2743     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2744 };
2745 
2746 #ifdef CONFIG_PLUGIN
2747 static const char * const plugin_from_name[] = {
2748     "from-tb",
2749     "from-insn",
2750     "after-insn",
2751     "after-tb",
2752 };
2753 #endif
2754 
2755 static inline bool tcg_regset_single(TCGRegSet d)
2756 {
2757     return (d & (d - 1)) == 0;
2758 }
2759 
2760 static inline TCGReg tcg_regset_first(TCGRegSet d)
2761 {
2762     if (TCG_TARGET_NB_REGS <= 32) {
2763         return ctz32(d);
2764     } else {
2765         return ctz64(d);
2766     }
2767 }
2768 
2769 /* Return only the number of characters output -- no error return. */
2770 #define ne_fprintf(...) \
2771     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2772 
2773 void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2774 {
2775     char buf[128];
2776     TCGOp *op;
2777 
2778     QTAILQ_FOREACH(op, &s->ops, link) {
2779         int i, k, nb_oargs, nb_iargs, nb_cargs;
2780         const TCGOpDef *def;
2781         TCGOpcode c;
2782         int col = 0;
2783 
2784         c = op->opc;
2785         def = &tcg_op_defs[c];
2786 
2787         if (c == INDEX_op_insn_start) {
2788             nb_oargs = 0;
2789             col += ne_fprintf(f, "\n ----");
2790 
2791             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2792                 col += ne_fprintf(f, " %016" PRIx64,
2793                                   tcg_get_insn_start_param(op, i));
2794             }
2795         } else if (c == INDEX_op_call) {
2796             const TCGHelperInfo *info = tcg_call_info(op);
2797             void *func = tcg_call_func(op);
2798 
2799             /* variable number of arguments */
2800             nb_oargs = TCGOP_CALLO(op);
2801             nb_iargs = TCGOP_CALLI(op);
2802             nb_cargs = def->nb_cargs;
2803 
2804             col += ne_fprintf(f, " %s ", def->name);
2805 
2806             /*
2807              * Print the function name from TCGHelperInfo, if available.
2808              * Note that plugins have a template function for the info,
2809              * but the actual function pointer comes from the plugin.
2810              */
2811             if (func == info->func) {
2812                 col += ne_fprintf(f, "%s", info->name);
2813             } else {
2814                 col += ne_fprintf(f, "plugin(%p)", func);
2815             }
2816 
2817             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2818             for (i = 0; i < nb_oargs; i++) {
2819                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2820                                                             op->args[i]));
2821             }
2822             for (i = 0; i < nb_iargs; i++) {
2823                 TCGArg arg = op->args[nb_oargs + i];
2824                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2825                 col += ne_fprintf(f, ",%s", t);
2826             }
2827         } else {
2828             if (def->flags & TCG_OPF_INT) {
2829                 col += ne_fprintf(f, " %s_i%d ",
2830                                   def->name,
2831                                   8 * tcg_type_size(TCGOP_TYPE(op)));
2832             } else if (def->flags & TCG_OPF_VECTOR) {
2833                 col += ne_fprintf(f, "%s v%d,e%d,",
2834                                   def->name,
2835                                   8 * tcg_type_size(TCGOP_TYPE(op)),
2836                                   8 << TCGOP_VECE(op));
2837             } else {
2838                 col += ne_fprintf(f, " %s ", def->name);
2839             }
2840 
2841             nb_oargs = def->nb_oargs;
2842             nb_iargs = def->nb_iargs;
2843             nb_cargs = def->nb_cargs;
2844 
2845             k = 0;
2846             for (i = 0; i < nb_oargs; i++) {
2847                 const char *sep =  k ? "," : "";
2848                 col += ne_fprintf(f, "%s%s", sep,
2849                                   tcg_get_arg_str(s, buf, sizeof(buf),
2850                                                   op->args[k++]));
2851             }
2852             for (i = 0; i < nb_iargs; i++) {
2853                 const char *sep =  k ? "," : "";
2854                 col += ne_fprintf(f, "%s%s", sep,
2855                                   tcg_get_arg_str(s, buf, sizeof(buf),
2856                                                   op->args[k++]));
2857             }
2858             switch (c) {
2859             case INDEX_op_brcond_i32:
2860             case INDEX_op_setcond_i32:
2861             case INDEX_op_negsetcond_i32:
2862             case INDEX_op_movcond_i32:
2863             case INDEX_op_brcond2_i32:
2864             case INDEX_op_setcond2_i32:
2865             case INDEX_op_brcond_i64:
2866             case INDEX_op_setcond_i64:
2867             case INDEX_op_negsetcond_i64:
2868             case INDEX_op_movcond_i64:
2869             case INDEX_op_cmp_vec:
2870             case INDEX_op_cmpsel_vec:
2871                 if (op->args[k] < ARRAY_SIZE(cond_name)
2872                     && cond_name[op->args[k]]) {
2873                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2874                 } else {
2875                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2876                 }
2877                 i = 1;
2878                 break;
2879             case INDEX_op_qemu_ld_i32:
2880             case INDEX_op_qemu_st_i32:
2881             case INDEX_op_qemu_st8_i32:
2882             case INDEX_op_qemu_ld_i64:
2883             case INDEX_op_qemu_st_i64:
2884             case INDEX_op_qemu_ld_i128:
2885             case INDEX_op_qemu_st_i128:
2886                 {
2887                     const char *s_al, *s_op, *s_at;
2888                     MemOpIdx oi = op->args[k++];
2889                     MemOp mop = get_memop(oi);
2890                     unsigned ix = get_mmuidx(oi);
2891 
2892                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2893                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2894                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2895                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2896 
2897                     /* If all fields are accounted for, print symbolically. */
2898                     if (!mop && s_al && s_op && s_at) {
2899                         col += ne_fprintf(f, ",%s%s%s,%u",
2900                                           s_at, s_al, s_op, ix);
2901                     } else {
2902                         mop = get_memop(oi);
2903                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2904                     }
2905                     i = 1;
2906                 }
2907                 break;
2908             case INDEX_op_bswap16_i32:
2909             case INDEX_op_bswap16_i64:
2910             case INDEX_op_bswap32_i32:
2911             case INDEX_op_bswap32_i64:
2912             case INDEX_op_bswap64_i64:
2913                 {
2914                     TCGArg flags = op->args[k];
2915                     const char *name = NULL;
2916 
2917                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2918                         name = bswap_flag_name[flags];
2919                     }
2920                     if (name) {
2921                         col += ne_fprintf(f, ",%s", name);
2922                     } else {
2923                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2924                     }
2925                     i = k = 1;
2926                 }
2927                 break;
2928 #ifdef CONFIG_PLUGIN
2929             case INDEX_op_plugin_cb:
2930                 {
2931                     TCGArg from = op->args[k++];
2932                     const char *name = NULL;
2933 
2934                     if (from < ARRAY_SIZE(plugin_from_name)) {
2935                         name = plugin_from_name[from];
2936                     }
2937                     if (name) {
2938                         col += ne_fprintf(f, "%s", name);
2939                     } else {
2940                         col += ne_fprintf(f, "$0x%" TCG_PRIlx, from);
2941                     }
2942                     i = 1;
2943                 }
2944                 break;
2945 #endif
2946             default:
2947                 i = 0;
2948                 break;
2949             }
2950             switch (c) {
2951             case INDEX_op_set_label:
2952             case INDEX_op_br:
2953             case INDEX_op_brcond_i32:
2954             case INDEX_op_brcond_i64:
2955             case INDEX_op_brcond2_i32:
2956                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2957                                   arg_label(op->args[k])->id);
2958                 i++, k++;
2959                 break;
2960             case INDEX_op_mb:
2961                 {
2962                     TCGBar membar = op->args[k];
2963                     const char *b_op, *m_op;
2964 
2965                     switch (membar & TCG_BAR_SC) {
2966                     case 0:
2967                         b_op = "none";
2968                         break;
2969                     case TCG_BAR_LDAQ:
2970                         b_op = "acq";
2971                         break;
2972                     case TCG_BAR_STRL:
2973                         b_op = "rel";
2974                         break;
2975                     case TCG_BAR_SC:
2976                         b_op = "seq";
2977                         break;
2978                     default:
2979                         g_assert_not_reached();
2980                     }
2981 
2982                     switch (membar & TCG_MO_ALL) {
2983                     case 0:
2984                         m_op = "none";
2985                         break;
2986                     case TCG_MO_LD_LD:
2987                         m_op = "rr";
2988                         break;
2989                     case TCG_MO_LD_ST:
2990                         m_op = "rw";
2991                         break;
2992                     case TCG_MO_ST_LD:
2993                         m_op = "wr";
2994                         break;
2995                     case TCG_MO_ST_ST:
2996                         m_op = "ww";
2997                         break;
2998                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2999                         m_op = "rr+rw";
3000                         break;
3001                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
3002                         m_op = "rr+wr";
3003                         break;
3004                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
3005                         m_op = "rr+ww";
3006                         break;
3007                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
3008                         m_op = "rw+wr";
3009                         break;
3010                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
3011                         m_op = "rw+ww";
3012                         break;
3013                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
3014                         m_op = "wr+ww";
3015                         break;
3016                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
3017                         m_op = "rr+rw+wr";
3018                         break;
3019                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
3020                         m_op = "rr+rw+ww";
3021                         break;
3022                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
3023                         m_op = "rr+wr+ww";
3024                         break;
3025                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
3026                         m_op = "rw+wr+ww";
3027                         break;
3028                     case TCG_MO_ALL:
3029                         m_op = "all";
3030                         break;
3031                     default:
3032                         g_assert_not_reached();
3033                     }
3034 
3035                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
3036                     i++, k++;
3037                 }
3038                 break;
3039             default:
3040                 break;
3041             }
3042             for (; i < nb_cargs; i++, k++) {
3043                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
3044                                   op->args[k]);
3045             }
3046         }
3047 
3048         if (have_prefs || op->life) {
3049             for (; col < 40; ++col) {
3050                 putc(' ', f);
3051             }
3052         }
3053 
3054         if (op->life) {
3055             unsigned life = op->life;
3056 
3057             if (life & (SYNC_ARG * 3)) {
3058                 ne_fprintf(f, "  sync:");
3059                 for (i = 0; i < 2; ++i) {
3060                     if (life & (SYNC_ARG << i)) {
3061                         ne_fprintf(f, " %d", i);
3062                     }
3063                 }
3064             }
3065             life /= DEAD_ARG;
3066             if (life) {
3067                 ne_fprintf(f, "  dead:");
3068                 for (i = 0; life; ++i, life >>= 1) {
3069                     if (life & 1) {
3070                         ne_fprintf(f, " %d", i);
3071                     }
3072                 }
3073             }
3074         }
3075 
3076         if (have_prefs) {
3077             for (i = 0; i < nb_oargs; ++i) {
3078                 TCGRegSet set = output_pref(op, i);
3079 
3080                 if (i == 0) {
3081                     ne_fprintf(f, "  pref=");
3082                 } else {
3083                     ne_fprintf(f, ",");
3084                 }
3085                 if (set == 0) {
3086                     ne_fprintf(f, "none");
3087                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
3088                     ne_fprintf(f, "all");
3089 #ifdef CONFIG_DEBUG_TCG
3090                 } else if (tcg_regset_single(set)) {
3091                     TCGReg reg = tcg_regset_first(set);
3092                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
3093 #endif
3094                 } else if (TCG_TARGET_NB_REGS <= 32) {
3095                     ne_fprintf(f, "0x%x", (uint32_t)set);
3096                 } else {
3097                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
3098                 }
3099             }
3100         }
3101 
3102         putc('\n', f);
3103     }
3104 }
3105 
3106 /* we give more priority to constraints with less registers */
3107 static int get_constraint_priority(const TCGArgConstraint *arg_ct, int k)
3108 {
3109     int n;
3110 
3111     arg_ct += k;
3112     n = ctpop64(arg_ct->regs);
3113 
3114     /*
3115      * Sort constraints of a single register first, which includes output
3116      * aliases (which must exactly match the input already allocated).
3117      */
3118     if (n == 1 || arg_ct->oalias) {
3119         return INT_MAX;
3120     }
3121 
3122     /*
3123      * Sort register pairs next, first then second immediately after.
3124      * Arbitrarily sort multiple pairs by the index of the first reg;
3125      * there shouldn't be many pairs.
3126      */
3127     switch (arg_ct->pair) {
3128     case 1:
3129     case 3:
3130         return (k + 1) * 2;
3131     case 2:
3132         return (arg_ct->pair_index + 1) * 2 - 1;
3133     }
3134 
3135     /* Finally, sort by decreasing register count. */
3136     assert(n > 1);
3137     return -n;
3138 }
3139 
3140 /* sort from highest priority to lowest */
3141 static void sort_constraints(TCGArgConstraint *a, int start, int n)
3142 {
3143     int i, j;
3144 
3145     for (i = 0; i < n; i++) {
3146         a[start + i].sort_index = start + i;
3147     }
3148     if (n <= 1) {
3149         return;
3150     }
3151     for (i = 0; i < n - 1; i++) {
3152         for (j = i + 1; j < n; j++) {
3153             int p1 = get_constraint_priority(a, a[start + i].sort_index);
3154             int p2 = get_constraint_priority(a, a[start + j].sort_index);
3155             if (p1 < p2) {
3156                 int tmp = a[start + i].sort_index;
3157                 a[start + i].sort_index = a[start + j].sort_index;
3158                 a[start + j].sort_index = tmp;
3159             }
3160         }
3161     }
3162 }
3163 
3164 static const TCGArgConstraint empty_cts[TCG_MAX_OP_ARGS];
3165 static TCGArgConstraint all_cts[ARRAY_SIZE(constraint_sets)][TCG_MAX_OP_ARGS];
3166 
3167 static void process_constraint_sets(void)
3168 {
3169     for (size_t c = 0; c < ARRAY_SIZE(constraint_sets); ++c) {
3170         const TCGConstraintSet *tdefs = &constraint_sets[c];
3171         TCGArgConstraint *args_ct = all_cts[c];
3172         int nb_oargs = tdefs->nb_oargs;
3173         int nb_iargs = tdefs->nb_iargs;
3174         int nb_args = nb_oargs + nb_iargs;
3175         bool saw_alias_pair = false;
3176 
3177         for (int i = 0; i < nb_args; i++) {
3178             const char *ct_str = tdefs->args_ct_str[i];
3179             bool input_p = i >= nb_oargs;
3180             int o;
3181 
3182             switch (*ct_str) {
3183             case '0' ... '9':
3184                 o = *ct_str - '0';
3185                 tcg_debug_assert(input_p);
3186                 tcg_debug_assert(o < nb_oargs);
3187                 tcg_debug_assert(args_ct[o].regs != 0);
3188                 tcg_debug_assert(!args_ct[o].oalias);
3189                 args_ct[i] = args_ct[o];
3190                 /* The output sets oalias.  */
3191                 args_ct[o].oalias = 1;
3192                 args_ct[o].alias_index = i;
3193                 /* The input sets ialias. */
3194                 args_ct[i].ialias = 1;
3195                 args_ct[i].alias_index = o;
3196                 if (args_ct[i].pair) {
3197                     saw_alias_pair = true;
3198                 }
3199                 tcg_debug_assert(ct_str[1] == '\0');
3200                 continue;
3201 
3202             case '&':
3203                 tcg_debug_assert(!input_p);
3204                 args_ct[i].newreg = true;
3205                 ct_str++;
3206                 break;
3207 
3208             case 'p': /* plus */
3209                 /* Allocate to the register after the previous. */
3210                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3211                 o = i - 1;
3212                 tcg_debug_assert(!args_ct[o].pair);
3213                 tcg_debug_assert(!args_ct[o].ct);
3214                 args_ct[i] = (TCGArgConstraint){
3215                     .pair = 2,
3216                     .pair_index = o,
3217                     .regs = args_ct[o].regs << 1,
3218                     .newreg = args_ct[o].newreg,
3219                 };
3220                 args_ct[o].pair = 1;
3221                 args_ct[o].pair_index = i;
3222                 tcg_debug_assert(ct_str[1] == '\0');
3223                 continue;
3224 
3225             case 'm': /* minus */
3226                 /* Allocate to the register before the previous. */
3227                 tcg_debug_assert(i > (input_p ? nb_oargs : 0));
3228                 o = i - 1;
3229                 tcg_debug_assert(!args_ct[o].pair);
3230                 tcg_debug_assert(!args_ct[o].ct);
3231                 args_ct[i] = (TCGArgConstraint){
3232                     .pair = 1,
3233                     .pair_index = o,
3234                     .regs = args_ct[o].regs >> 1,
3235                     .newreg = args_ct[o].newreg,
3236                 };
3237                 args_ct[o].pair = 2;
3238                 args_ct[o].pair_index = i;
3239                 tcg_debug_assert(ct_str[1] == '\0');
3240                 continue;
3241             }
3242 
3243             do {
3244                 switch (*ct_str) {
3245                 case 'i':
3246                     args_ct[i].ct |= TCG_CT_CONST;
3247                     break;
3248 #ifdef TCG_REG_ZERO
3249                 case 'z':
3250                     args_ct[i].ct |= TCG_CT_REG_ZERO;
3251                     break;
3252 #endif
3253 
3254                 /* Include all of the target-specific constraints. */
3255 
3256 #undef CONST
3257 #define CONST(CASE, MASK) \
3258     case CASE: args_ct[i].ct |= MASK; break;
3259 #define REGS(CASE, MASK) \
3260     case CASE: args_ct[i].regs |= MASK; break;
3261 
3262 #include "tcg-target-con-str.h"
3263 
3264 #undef REGS
3265 #undef CONST
3266                 default:
3267                 case '0' ... '9':
3268                 case '&':
3269                 case 'p':
3270                 case 'm':
3271                     /* Typo in TCGConstraintSet constraint. */
3272                     g_assert_not_reached();
3273                 }
3274             } while (*++ct_str != '\0');
3275         }
3276 
3277         /*
3278          * Fix up output pairs that are aliased with inputs.
3279          * When we created the alias, we copied pair from the output.
3280          * There are three cases:
3281          *    (1a) Pairs of inputs alias pairs of outputs.
3282          *    (1b) One input aliases the first of a pair of outputs.
3283          *    (2)  One input aliases the second of a pair of outputs.
3284          *
3285          * Case 1a is handled by making sure that the pair_index'es are
3286          * properly updated so that they appear the same as a pair of inputs.
3287          *
3288          * Case 1b is handled by setting the pair_index of the input to
3289          * itself, simply so it doesn't point to an unrelated argument.
3290          * Since we don't encounter the "second" during the input allocation
3291          * phase, nothing happens with the second half of the input pair.
3292          *
3293          * Case 2 is handled by setting the second input to pair=3, the
3294          * first output to pair=3, and the pair_index'es to match.
3295          */
3296         if (saw_alias_pair) {
3297             for (int i = nb_oargs; i < nb_args; i++) {
3298                 int o, o2, i2;
3299 
3300                 /*
3301                  * Since [0-9pm] must be alone in the constraint string,
3302                  * the only way they can both be set is if the pair comes
3303                  * from the output alias.
3304                  */
3305                 if (!args_ct[i].ialias) {
3306                     continue;
3307                 }
3308                 switch (args_ct[i].pair) {
3309                 case 0:
3310                     break;
3311                 case 1:
3312                     o = args_ct[i].alias_index;
3313                     o2 = args_ct[o].pair_index;
3314                     tcg_debug_assert(args_ct[o].pair == 1);
3315                     tcg_debug_assert(args_ct[o2].pair == 2);
3316                     if (args_ct[o2].oalias) {
3317                         /* Case 1a */
3318                         i2 = args_ct[o2].alias_index;
3319                         tcg_debug_assert(args_ct[i2].pair == 2);
3320                         args_ct[i2].pair_index = i;
3321                         args_ct[i].pair_index = i2;
3322                     } else {
3323                         /* Case 1b */
3324                         args_ct[i].pair_index = i;
3325                     }
3326                     break;
3327                 case 2:
3328                     o = args_ct[i].alias_index;
3329                     o2 = args_ct[o].pair_index;
3330                     tcg_debug_assert(args_ct[o].pair == 2);
3331                     tcg_debug_assert(args_ct[o2].pair == 1);
3332                     if (args_ct[o2].oalias) {
3333                         /* Case 1a */
3334                         i2 = args_ct[o2].alias_index;
3335                         tcg_debug_assert(args_ct[i2].pair == 1);
3336                         args_ct[i2].pair_index = i;
3337                         args_ct[i].pair_index = i2;
3338                     } else {
3339                         /* Case 2 */
3340                         args_ct[i].pair = 3;
3341                         args_ct[o2].pair = 3;
3342                         args_ct[i].pair_index = o2;
3343                         args_ct[o2].pair_index = i;
3344                     }
3345                     break;
3346                 default:
3347                     g_assert_not_reached();
3348                 }
3349             }
3350         }
3351 
3352         /* sort the constraints (XXX: this is just an heuristic) */
3353         sort_constraints(args_ct, 0, nb_oargs);
3354         sort_constraints(args_ct, nb_oargs, nb_iargs);
3355     }
3356 }
3357 
3358 static const TCGArgConstraint *opcode_args_ct(const TCGOp *op)
3359 {
3360     TCGOpcode opc = op->opc;
3361     TCGType type = TCGOP_TYPE(op);
3362     unsigned flags = TCGOP_FLAGS(op);
3363     const TCGOpDef *def = &tcg_op_defs[opc];
3364     const TCGOutOp *outop = all_outop[opc];
3365     TCGConstraintSetIndex con_set;
3366 
3367     if (def->flags & TCG_OPF_NOT_PRESENT) {
3368         return empty_cts;
3369     }
3370 
3371     if (outop) {
3372         con_set = outop->static_constraint;
3373         if (con_set == C_Dynamic) {
3374             con_set = outop->dynamic_constraint(type, flags);
3375         }
3376     } else {
3377         con_set = tcg_target_op_def(opc, type, flags);
3378     }
3379     tcg_debug_assert(con_set >= 0);
3380     tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
3381 
3382     /* The constraint arguments must match TCGOpcode arguments. */
3383     tcg_debug_assert(constraint_sets[con_set].nb_oargs == def->nb_oargs);
3384     tcg_debug_assert(constraint_sets[con_set].nb_iargs == def->nb_iargs);
3385 
3386     return all_cts[con_set];
3387 }
3388 
3389 static void remove_label_use(TCGOp *op, int idx)
3390 {
3391     TCGLabel *label = arg_label(op->args[idx]);
3392     TCGLabelUse *use;
3393 
3394     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3395         if (use->op == op) {
3396             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3397             return;
3398         }
3399     }
3400     g_assert_not_reached();
3401 }
3402 
3403 void tcg_op_remove(TCGContext *s, TCGOp *op)
3404 {
3405     switch (op->opc) {
3406     case INDEX_op_br:
3407         remove_label_use(op, 0);
3408         break;
3409     case INDEX_op_brcond_i32:
3410     case INDEX_op_brcond_i64:
3411         remove_label_use(op, 3);
3412         break;
3413     case INDEX_op_brcond2_i32:
3414         remove_label_use(op, 5);
3415         break;
3416     default:
3417         break;
3418     }
3419 
3420     QTAILQ_REMOVE(&s->ops, op, link);
3421     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3422     s->nb_ops--;
3423 }
3424 
3425 void tcg_remove_ops_after(TCGOp *op)
3426 {
3427     TCGContext *s = tcg_ctx;
3428 
3429     while (true) {
3430         TCGOp *last = tcg_last_op();
3431         if (last == op) {
3432             return;
3433         }
3434         tcg_op_remove(s, last);
3435     }
3436 }
3437 
3438 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3439 {
3440     TCGContext *s = tcg_ctx;
3441     TCGOp *op = NULL;
3442 
3443     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3444         QTAILQ_FOREACH(op, &s->free_ops, link) {
3445             if (nargs <= op->nargs) {
3446                 QTAILQ_REMOVE(&s->free_ops, op, link);
3447                 nargs = op->nargs;
3448                 goto found;
3449             }
3450         }
3451     }
3452 
3453     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3454     nargs = MAX(4, nargs);
3455     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3456 
3457  found:
3458     memset(op, 0, offsetof(TCGOp, link));
3459     op->opc = opc;
3460     op->nargs = nargs;
3461 
3462     /* Check for bitfield overflow. */
3463     tcg_debug_assert(op->nargs == nargs);
3464 
3465     s->nb_ops++;
3466     return op;
3467 }
3468 
3469 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3470 {
3471     TCGOp *op = tcg_op_alloc(opc, nargs);
3472 
3473     if (tcg_ctx->emit_before_op) {
3474         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3475     } else {
3476         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3477     }
3478     return op;
3479 }
3480 
3481 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3482                             TCGOpcode opc, TCGType type, unsigned nargs)
3483 {
3484     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3485 
3486     TCGOP_TYPE(new_op) = type;
3487     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3488     return new_op;
3489 }
3490 
3491 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3492                            TCGOpcode opc, TCGType type, unsigned nargs)
3493 {
3494     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3495 
3496     TCGOP_TYPE(new_op) = type;
3497     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3498     return new_op;
3499 }
3500 
3501 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3502 {
3503     TCGLabelUse *u;
3504 
3505     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3506         TCGOp *op = u->op;
3507         switch (op->opc) {
3508         case INDEX_op_br:
3509             op->args[0] = label_arg(to);
3510             break;
3511         case INDEX_op_brcond_i32:
3512         case INDEX_op_brcond_i64:
3513             op->args[3] = label_arg(to);
3514             break;
3515         case INDEX_op_brcond2_i32:
3516             op->args[5] = label_arg(to);
3517             break;
3518         default:
3519             g_assert_not_reached();
3520         }
3521     }
3522 
3523     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3524 }
3525 
3526 /* Reachable analysis : remove unreachable code.  */
3527 static void __attribute__((noinline))
3528 reachable_code_pass(TCGContext *s)
3529 {
3530     TCGOp *op, *op_next, *op_prev;
3531     bool dead = false;
3532 
3533     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3534         bool remove = dead;
3535         TCGLabel *label;
3536 
3537         switch (op->opc) {
3538         case INDEX_op_set_label:
3539             label = arg_label(op->args[0]);
3540 
3541             /*
3542              * Note that the first op in the TB is always a load,
3543              * so there is always something before a label.
3544              */
3545             op_prev = QTAILQ_PREV(op, link);
3546 
3547             /*
3548              * If we find two sequential labels, move all branches to
3549              * reference the second label and remove the first label.
3550              * Do this before branch to next optimization, so that the
3551              * middle label is out of the way.
3552              */
3553             if (op_prev->opc == INDEX_op_set_label) {
3554                 move_label_uses(label, arg_label(op_prev->args[0]));
3555                 tcg_op_remove(s, op_prev);
3556                 op_prev = QTAILQ_PREV(op, link);
3557             }
3558 
3559             /*
3560              * Optimization can fold conditional branches to unconditional.
3561              * If we find a label which is preceded by an unconditional
3562              * branch to next, remove the branch.  We couldn't do this when
3563              * processing the branch because any dead code between the branch
3564              * and label had not yet been removed.
3565              */
3566             if (op_prev->opc == INDEX_op_br &&
3567                 label == arg_label(op_prev->args[0])) {
3568                 tcg_op_remove(s, op_prev);
3569                 /* Fall through means insns become live again.  */
3570                 dead = false;
3571             }
3572 
3573             if (QSIMPLEQ_EMPTY(&label->branches)) {
3574                 /*
3575                  * While there is an occasional backward branch, virtually
3576                  * all branches generated by the translators are forward.
3577                  * Which means that generally we will have already removed
3578                  * all references to the label that will be, and there is
3579                  * little to be gained by iterating.
3580                  */
3581                 remove = true;
3582             } else {
3583                 /* Once we see a label, insns become live again.  */
3584                 dead = false;
3585                 remove = false;
3586             }
3587             break;
3588 
3589         case INDEX_op_br:
3590         case INDEX_op_exit_tb:
3591         case INDEX_op_goto_ptr:
3592             /* Unconditional branches; everything following is dead.  */
3593             dead = true;
3594             break;
3595 
3596         case INDEX_op_call:
3597             /* Notice noreturn helper calls, raising exceptions.  */
3598             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3599                 dead = true;
3600             }
3601             break;
3602 
3603         case INDEX_op_insn_start:
3604             /* Never remove -- we need to keep these for unwind.  */
3605             remove = false;
3606             break;
3607 
3608         default:
3609             break;
3610         }
3611 
3612         if (remove) {
3613             tcg_op_remove(s, op);
3614         }
3615     }
3616 }
3617 
3618 #define TS_DEAD  1
3619 #define TS_MEM   2
3620 
3621 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3622 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3623 
3624 /* For liveness_pass_1, the register preferences for a given temp.  */
3625 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3626 {
3627     return ts->state_ptr;
3628 }
3629 
3630 /* For liveness_pass_1, reset the preferences for a given temp to the
3631  * maximal regset for its type.
3632  */
3633 static inline void la_reset_pref(TCGTemp *ts)
3634 {
3635     *la_temp_pref(ts)
3636         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3637 }
3638 
3639 /* liveness analysis: end of function: all temps are dead, and globals
3640    should be in memory. */
3641 static void la_func_end(TCGContext *s, int ng, int nt)
3642 {
3643     int i;
3644 
3645     for (i = 0; i < ng; ++i) {
3646         s->temps[i].state = TS_DEAD | TS_MEM;
3647         la_reset_pref(&s->temps[i]);
3648     }
3649     for (i = ng; i < nt; ++i) {
3650         s->temps[i].state = TS_DEAD;
3651         la_reset_pref(&s->temps[i]);
3652     }
3653 }
3654 
3655 /* liveness analysis: end of basic block: all temps are dead, globals
3656    and local temps should be in memory. */
3657 static void la_bb_end(TCGContext *s, int ng, int nt)
3658 {
3659     int i;
3660 
3661     for (i = 0; i < nt; ++i) {
3662         TCGTemp *ts = &s->temps[i];
3663         int state;
3664 
3665         switch (ts->kind) {
3666         case TEMP_FIXED:
3667         case TEMP_GLOBAL:
3668         case TEMP_TB:
3669             state = TS_DEAD | TS_MEM;
3670             break;
3671         case TEMP_EBB:
3672         case TEMP_CONST:
3673             state = TS_DEAD;
3674             break;
3675         default:
3676             g_assert_not_reached();
3677         }
3678         ts->state = state;
3679         la_reset_pref(ts);
3680     }
3681 }
3682 
3683 /* liveness analysis: sync globals back to memory.  */
3684 static void la_global_sync(TCGContext *s, int ng)
3685 {
3686     int i;
3687 
3688     for (i = 0; i < ng; ++i) {
3689         int state = s->temps[i].state;
3690         s->temps[i].state = state | TS_MEM;
3691         if (state == TS_DEAD) {
3692             /* If the global was previously dead, reset prefs.  */
3693             la_reset_pref(&s->temps[i]);
3694         }
3695     }
3696 }
3697 
3698 /*
3699  * liveness analysis: conditional branch: all temps are dead unless
3700  * explicitly live-across-conditional-branch, globals and local temps
3701  * should be synced.
3702  */
3703 static void la_bb_sync(TCGContext *s, int ng, int nt)
3704 {
3705     la_global_sync(s, ng);
3706 
3707     for (int i = ng; i < nt; ++i) {
3708         TCGTemp *ts = &s->temps[i];
3709         int state;
3710 
3711         switch (ts->kind) {
3712         case TEMP_TB:
3713             state = ts->state;
3714             ts->state = state | TS_MEM;
3715             if (state != TS_DEAD) {
3716                 continue;
3717             }
3718             break;
3719         case TEMP_EBB:
3720         case TEMP_CONST:
3721             continue;
3722         default:
3723             g_assert_not_reached();
3724         }
3725         la_reset_pref(&s->temps[i]);
3726     }
3727 }
3728 
3729 /* liveness analysis: sync globals back to memory and kill.  */
3730 static void la_global_kill(TCGContext *s, int ng)
3731 {
3732     int i;
3733 
3734     for (i = 0; i < ng; i++) {
3735         s->temps[i].state = TS_DEAD | TS_MEM;
3736         la_reset_pref(&s->temps[i]);
3737     }
3738 }
3739 
3740 /* liveness analysis: note live globals crossing calls.  */
3741 static void la_cross_call(TCGContext *s, int nt)
3742 {
3743     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3744     int i;
3745 
3746     for (i = 0; i < nt; i++) {
3747         TCGTemp *ts = &s->temps[i];
3748         if (!(ts->state & TS_DEAD)) {
3749             TCGRegSet *pset = la_temp_pref(ts);
3750             TCGRegSet set = *pset;
3751 
3752             set &= mask;
3753             /* If the combination is not possible, restart.  */
3754             if (set == 0) {
3755                 set = tcg_target_available_regs[ts->type] & mask;
3756             }
3757             *pset = set;
3758         }
3759     }
3760 }
3761 
3762 /*
3763  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3764  * to TEMP_EBB, if possible.
3765  */
3766 static void __attribute__((noinline))
3767 liveness_pass_0(TCGContext *s)
3768 {
3769     void * const multiple_ebb = (void *)(uintptr_t)-1;
3770     int nb_temps = s->nb_temps;
3771     TCGOp *op, *ebb;
3772 
3773     for (int i = s->nb_globals; i < nb_temps; ++i) {
3774         s->temps[i].state_ptr = NULL;
3775     }
3776 
3777     /*
3778      * Represent each EBB by the op at which it begins.  In the case of
3779      * the first EBB, this is the first op, otherwise it is a label.
3780      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3781      * within a single EBB, else MULTIPLE_EBB.
3782      */
3783     ebb = QTAILQ_FIRST(&s->ops);
3784     QTAILQ_FOREACH(op, &s->ops, link) {
3785         const TCGOpDef *def;
3786         int nb_oargs, nb_iargs;
3787 
3788         switch (op->opc) {
3789         case INDEX_op_set_label:
3790             ebb = op;
3791             continue;
3792         case INDEX_op_discard:
3793             continue;
3794         case INDEX_op_call:
3795             nb_oargs = TCGOP_CALLO(op);
3796             nb_iargs = TCGOP_CALLI(op);
3797             break;
3798         default:
3799             def = &tcg_op_defs[op->opc];
3800             nb_oargs = def->nb_oargs;
3801             nb_iargs = def->nb_iargs;
3802             break;
3803         }
3804 
3805         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3806             TCGTemp *ts = arg_temp(op->args[i]);
3807 
3808             if (ts->kind != TEMP_TB) {
3809                 continue;
3810             }
3811             if (ts->state_ptr == NULL) {
3812                 ts->state_ptr = ebb;
3813             } else if (ts->state_ptr != ebb) {
3814                 ts->state_ptr = multiple_ebb;
3815             }
3816         }
3817     }
3818 
3819     /*
3820      * For TEMP_TB that turned out not to be used beyond one EBB,
3821      * reduce the liveness to TEMP_EBB.
3822      */
3823     for (int i = s->nb_globals; i < nb_temps; ++i) {
3824         TCGTemp *ts = &s->temps[i];
3825         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3826             ts->kind = TEMP_EBB;
3827         }
3828     }
3829 }
3830 
3831 /* Liveness analysis : update the opc_arg_life array to tell if a
3832    given input arguments is dead. Instructions updating dead
3833    temporaries are removed. */
3834 static void __attribute__((noinline))
3835 liveness_pass_1(TCGContext *s)
3836 {
3837     int nb_globals = s->nb_globals;
3838     int nb_temps = s->nb_temps;
3839     TCGOp *op, *op_prev;
3840     TCGRegSet *prefs;
3841     int i;
3842 
3843     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3844     for (i = 0; i < nb_temps; ++i) {
3845         s->temps[i].state_ptr = prefs + i;
3846     }
3847 
3848     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3849     la_func_end(s, nb_globals, nb_temps);
3850 
3851     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3852         int nb_iargs, nb_oargs;
3853         TCGOpcode opc_new, opc_new2;
3854         TCGLifeData arg_life = 0;
3855         TCGTemp *ts;
3856         TCGOpcode opc = op->opc;
3857         const TCGOpDef *def = &tcg_op_defs[opc];
3858         const TCGArgConstraint *args_ct;
3859 
3860         switch (opc) {
3861         case INDEX_op_call:
3862             {
3863                 const TCGHelperInfo *info = tcg_call_info(op);
3864                 int call_flags = tcg_call_flags(op);
3865 
3866                 nb_oargs = TCGOP_CALLO(op);
3867                 nb_iargs = TCGOP_CALLI(op);
3868 
3869                 /* pure functions can be removed if their result is unused */
3870                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3871                     for (i = 0; i < nb_oargs; i++) {
3872                         ts = arg_temp(op->args[i]);
3873                         if (ts->state != TS_DEAD) {
3874                             goto do_not_remove_call;
3875                         }
3876                     }
3877                     goto do_remove;
3878                 }
3879             do_not_remove_call:
3880 
3881                 /* Output args are dead.  */
3882                 for (i = 0; i < nb_oargs; i++) {
3883                     ts = arg_temp(op->args[i]);
3884                     if (ts->state & TS_DEAD) {
3885                         arg_life |= DEAD_ARG << i;
3886                     }
3887                     if (ts->state & TS_MEM) {
3888                         arg_life |= SYNC_ARG << i;
3889                     }
3890                     ts->state = TS_DEAD;
3891                     la_reset_pref(ts);
3892                 }
3893 
3894                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3895                 memset(op->output_pref, 0, sizeof(op->output_pref));
3896 
3897                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3898                                     TCG_CALL_NO_READ_GLOBALS))) {
3899                     la_global_kill(s, nb_globals);
3900                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3901                     la_global_sync(s, nb_globals);
3902                 }
3903 
3904                 /* Record arguments that die in this helper.  */
3905                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3906                     ts = arg_temp(op->args[i]);
3907                     if (ts->state & TS_DEAD) {
3908                         arg_life |= DEAD_ARG << i;
3909                     }
3910                 }
3911 
3912                 /* For all live registers, remove call-clobbered prefs.  */
3913                 la_cross_call(s, nb_temps);
3914 
3915                 /*
3916                  * Input arguments are live for preceding opcodes.
3917                  *
3918                  * For those arguments that die, and will be allocated in
3919                  * registers, clear the register set for that arg, to be
3920                  * filled in below.  For args that will be on the stack,
3921                  * reset to any available reg.  Process arguments in reverse
3922                  * order so that if a temp is used more than once, the stack
3923                  * reset to max happens before the register reset to 0.
3924                  */
3925                 for (i = nb_iargs - 1; i >= 0; i--) {
3926                     const TCGCallArgumentLoc *loc = &info->in[i];
3927                     ts = arg_temp(op->args[nb_oargs + i]);
3928 
3929                     if (ts->state & TS_DEAD) {
3930                         switch (loc->kind) {
3931                         case TCG_CALL_ARG_NORMAL:
3932                         case TCG_CALL_ARG_EXTEND_U:
3933                         case TCG_CALL_ARG_EXTEND_S:
3934                             if (arg_slot_reg_p(loc->arg_slot)) {
3935                                 *la_temp_pref(ts) = 0;
3936                                 break;
3937                             }
3938                             /* fall through */
3939                         default:
3940                             *la_temp_pref(ts) =
3941                                 tcg_target_available_regs[ts->type];
3942                             break;
3943                         }
3944                         ts->state &= ~TS_DEAD;
3945                     }
3946                 }
3947 
3948                 /*
3949                  * For each input argument, add its input register to prefs.
3950                  * If a temp is used once, this produces a single set bit;
3951                  * if a temp is used multiple times, this produces a set.
3952                  */
3953                 for (i = 0; i < nb_iargs; i++) {
3954                     const TCGCallArgumentLoc *loc = &info->in[i];
3955                     ts = arg_temp(op->args[nb_oargs + i]);
3956 
3957                     switch (loc->kind) {
3958                     case TCG_CALL_ARG_NORMAL:
3959                     case TCG_CALL_ARG_EXTEND_U:
3960                     case TCG_CALL_ARG_EXTEND_S:
3961                         if (arg_slot_reg_p(loc->arg_slot)) {
3962                             tcg_regset_set_reg(*la_temp_pref(ts),
3963                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3964                         }
3965                         break;
3966                     default:
3967                         break;
3968                     }
3969                 }
3970             }
3971             break;
3972         case INDEX_op_insn_start:
3973             break;
3974         case INDEX_op_discard:
3975             /* mark the temporary as dead */
3976             ts = arg_temp(op->args[0]);
3977             ts->state = TS_DEAD;
3978             la_reset_pref(ts);
3979             break;
3980 
3981         case INDEX_op_add2_i32:
3982         case INDEX_op_add2_i64:
3983             opc_new = INDEX_op_add;
3984             goto do_addsub2;
3985         case INDEX_op_sub2_i32:
3986         case INDEX_op_sub2_i64:
3987             opc_new = INDEX_op_sub;
3988         do_addsub2:
3989             nb_iargs = 4;
3990             nb_oargs = 2;
3991             /* Test if the high part of the operation is dead, but not
3992                the low part.  The result can be optimized to a simple
3993                add or sub.  This happens often for x86_64 guest when the
3994                cpu mode is set to 32 bit.  */
3995             if (arg_temp(op->args[1])->state == TS_DEAD) {
3996                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3997                     goto do_remove;
3998                 }
3999                 /* Replace the opcode and adjust the args in place,
4000                    leaving 3 unused args at the end.  */
4001                 op->opc = opc = opc_new;
4002                 op->args[1] = op->args[2];
4003                 op->args[2] = op->args[4];
4004                 /* Fall through and mark the single-word operation live.  */
4005                 nb_iargs = 2;
4006                 nb_oargs = 1;
4007             }
4008             goto do_not_remove;
4009 
4010         case INDEX_op_muls2_i32:
4011         case INDEX_op_muls2_i64:
4012             opc_new = INDEX_op_mul;
4013             opc_new2 = INDEX_op_mulsh;
4014             goto do_mul2;
4015         case INDEX_op_mulu2_i32:
4016         case INDEX_op_mulu2_i64:
4017             opc_new = INDEX_op_mul;
4018             opc_new2 = INDEX_op_muluh;
4019         do_mul2:
4020             nb_iargs = 2;
4021             nb_oargs = 2;
4022             if (arg_temp(op->args[1])->state == TS_DEAD) {
4023                 if (arg_temp(op->args[0])->state == TS_DEAD) {
4024                     /* Both parts of the operation are dead.  */
4025                     goto do_remove;
4026                 }
4027                 /* The high part of the operation is dead; generate the low. */
4028                 op->opc = opc = opc_new;
4029                 op->args[1] = op->args[2];
4030                 op->args[2] = op->args[3];
4031             } else if (arg_temp(op->args[0])->state == TS_DEAD &&
4032                        tcg_op_supported(opc_new2, TCGOP_TYPE(op), 0)) {
4033                 /* The low part of the operation is dead; generate the high. */
4034                 op->opc = opc = opc_new2;
4035                 op->args[0] = op->args[1];
4036                 op->args[1] = op->args[2];
4037                 op->args[2] = op->args[3];
4038             } else {
4039                 goto do_not_remove;
4040             }
4041             /* Mark the single-word operation live.  */
4042             nb_oargs = 1;
4043             goto do_not_remove;
4044 
4045         default:
4046             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
4047             nb_iargs = def->nb_iargs;
4048             nb_oargs = def->nb_oargs;
4049 
4050             /* Test if the operation can be removed because all
4051                its outputs are dead. We assume that nb_oargs == 0
4052                implies side effects */
4053             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
4054                 for (i = 0; i < nb_oargs; i++) {
4055                     if (arg_temp(op->args[i])->state != TS_DEAD) {
4056                         goto do_not_remove;
4057                     }
4058                 }
4059                 goto do_remove;
4060             }
4061             goto do_not_remove;
4062 
4063         do_remove:
4064             tcg_op_remove(s, op);
4065             break;
4066 
4067         do_not_remove:
4068             for (i = 0; i < nb_oargs; i++) {
4069                 ts = arg_temp(op->args[i]);
4070 
4071                 /* Remember the preference of the uses that followed.  */
4072                 if (i < ARRAY_SIZE(op->output_pref)) {
4073                     op->output_pref[i] = *la_temp_pref(ts);
4074                 }
4075 
4076                 /* Output args are dead.  */
4077                 if (ts->state & TS_DEAD) {
4078                     arg_life |= DEAD_ARG << i;
4079                 }
4080                 if (ts->state & TS_MEM) {
4081                     arg_life |= SYNC_ARG << i;
4082                 }
4083                 ts->state = TS_DEAD;
4084                 la_reset_pref(ts);
4085             }
4086 
4087             /* If end of basic block, update.  */
4088             if (def->flags & TCG_OPF_BB_EXIT) {
4089                 la_func_end(s, nb_globals, nb_temps);
4090             } else if (def->flags & TCG_OPF_COND_BRANCH) {
4091                 la_bb_sync(s, nb_globals, nb_temps);
4092             } else if (def->flags & TCG_OPF_BB_END) {
4093                 la_bb_end(s, nb_globals, nb_temps);
4094             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4095                 la_global_sync(s, nb_globals);
4096                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4097                     la_cross_call(s, nb_temps);
4098                 }
4099             }
4100 
4101             /* Record arguments that die in this opcode.  */
4102             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4103                 ts = arg_temp(op->args[i]);
4104                 if (ts->state & TS_DEAD) {
4105                     arg_life |= DEAD_ARG << i;
4106                 }
4107             }
4108 
4109             /* Input arguments are live for preceding opcodes.  */
4110             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4111                 ts = arg_temp(op->args[i]);
4112                 if (ts->state & TS_DEAD) {
4113                     /* For operands that were dead, initially allow
4114                        all regs for the type.  */
4115                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
4116                     ts->state &= ~TS_DEAD;
4117                 }
4118             }
4119 
4120             /* Incorporate constraints for this operand.  */
4121             switch (opc) {
4122             case INDEX_op_mov:
4123                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
4124                    have proper constraints.  That said, special case
4125                    moves to propagate preferences backward.  */
4126                 if (IS_DEAD_ARG(1)) {
4127                     *la_temp_pref(arg_temp(op->args[0]))
4128                         = *la_temp_pref(arg_temp(op->args[1]));
4129                 }
4130                 break;
4131 
4132             default:
4133                 args_ct = opcode_args_ct(op);
4134                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4135                     const TCGArgConstraint *ct = &args_ct[i];
4136                     TCGRegSet set, *pset;
4137 
4138                     ts = arg_temp(op->args[i]);
4139                     pset = la_temp_pref(ts);
4140                     set = *pset;
4141 
4142                     set &= ct->regs;
4143                     if (ct->ialias) {
4144                         set &= output_pref(op, ct->alias_index);
4145                     }
4146                     /* If the combination is not possible, restart.  */
4147                     if (set == 0) {
4148                         set = ct->regs;
4149                     }
4150                     *pset = set;
4151                 }
4152                 break;
4153             }
4154             break;
4155         }
4156         op->life = arg_life;
4157     }
4158 }
4159 
4160 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
4161 static bool __attribute__((noinline))
4162 liveness_pass_2(TCGContext *s)
4163 {
4164     int nb_globals = s->nb_globals;
4165     int nb_temps, i;
4166     bool changes = false;
4167     TCGOp *op, *op_next;
4168 
4169     /* Create a temporary for each indirect global.  */
4170     for (i = 0; i < nb_globals; ++i) {
4171         TCGTemp *its = &s->temps[i];
4172         if (its->indirect_reg) {
4173             TCGTemp *dts = tcg_temp_alloc(s);
4174             dts->type = its->type;
4175             dts->base_type = its->base_type;
4176             dts->temp_subindex = its->temp_subindex;
4177             dts->kind = TEMP_EBB;
4178             its->state_ptr = dts;
4179         } else {
4180             its->state_ptr = NULL;
4181         }
4182         /* All globals begin dead.  */
4183         its->state = TS_DEAD;
4184     }
4185     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
4186         TCGTemp *its = &s->temps[i];
4187         its->state_ptr = NULL;
4188         its->state = TS_DEAD;
4189     }
4190 
4191     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
4192         TCGOpcode opc = op->opc;
4193         const TCGOpDef *def = &tcg_op_defs[opc];
4194         TCGLifeData arg_life = op->life;
4195         int nb_iargs, nb_oargs, call_flags;
4196         TCGTemp *arg_ts, *dir_ts;
4197 
4198         if (opc == INDEX_op_call) {
4199             nb_oargs = TCGOP_CALLO(op);
4200             nb_iargs = TCGOP_CALLI(op);
4201             call_flags = tcg_call_flags(op);
4202         } else {
4203             nb_iargs = def->nb_iargs;
4204             nb_oargs = def->nb_oargs;
4205 
4206             /* Set flags similar to how calls require.  */
4207             if (def->flags & TCG_OPF_COND_BRANCH) {
4208                 /* Like reading globals: sync_globals */
4209                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4210             } else if (def->flags & TCG_OPF_BB_END) {
4211                 /* Like writing globals: save_globals */
4212                 call_flags = 0;
4213             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4214                 /* Like reading globals: sync_globals */
4215                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
4216             } else {
4217                 /* No effect on globals.  */
4218                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
4219                               TCG_CALL_NO_WRITE_GLOBALS);
4220             }
4221         }
4222 
4223         /* Make sure that input arguments are available.  */
4224         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4225             arg_ts = arg_temp(op->args[i]);
4226             dir_ts = arg_ts->state_ptr;
4227             if (dir_ts && arg_ts->state == TS_DEAD) {
4228                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
4229                                   ? INDEX_op_ld_i32
4230                                   : INDEX_op_ld_i64);
4231                 TCGOp *lop = tcg_op_insert_before(s, op, lopc,
4232                                                   arg_ts->type, 3);
4233 
4234                 lop->args[0] = temp_arg(dir_ts);
4235                 lop->args[1] = temp_arg(arg_ts->mem_base);
4236                 lop->args[2] = arg_ts->mem_offset;
4237 
4238                 /* Loaded, but synced with memory.  */
4239                 arg_ts->state = TS_MEM;
4240             }
4241         }
4242 
4243         /* Perform input replacement, and mark inputs that became dead.
4244            No action is required except keeping temp_state up to date
4245            so that we reload when needed.  */
4246         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4247             arg_ts = arg_temp(op->args[i]);
4248             dir_ts = arg_ts->state_ptr;
4249             if (dir_ts) {
4250                 op->args[i] = temp_arg(dir_ts);
4251                 changes = true;
4252                 if (IS_DEAD_ARG(i)) {
4253                     arg_ts->state = TS_DEAD;
4254                 }
4255             }
4256         }
4257 
4258         /* Liveness analysis should ensure that the following are
4259            all correct, for call sites and basic block end points.  */
4260         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4261             /* Nothing to do */
4262         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4263             for (i = 0; i < nb_globals; ++i) {
4264                 /* Liveness should see that globals are synced back,
4265                    that is, either TS_DEAD or TS_MEM.  */
4266                 arg_ts = &s->temps[i];
4267                 tcg_debug_assert(arg_ts->state_ptr == 0
4268                                  || arg_ts->state != 0);
4269             }
4270         } else {
4271             for (i = 0; i < nb_globals; ++i) {
4272                 /* Liveness should see that globals are saved back,
4273                    that is, TS_DEAD, waiting to be reloaded.  */
4274                 arg_ts = &s->temps[i];
4275                 tcg_debug_assert(arg_ts->state_ptr == 0
4276                                  || arg_ts->state == TS_DEAD);
4277             }
4278         }
4279 
4280         /* Outputs become available.  */
4281         if (opc == INDEX_op_mov) {
4282             arg_ts = arg_temp(op->args[0]);
4283             dir_ts = arg_ts->state_ptr;
4284             if (dir_ts) {
4285                 op->args[0] = temp_arg(dir_ts);
4286                 changes = true;
4287 
4288                 /* The output is now live and modified.  */
4289                 arg_ts->state = 0;
4290 
4291                 if (NEED_SYNC_ARG(0)) {
4292                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4293                                       ? INDEX_op_st_i32
4294                                       : INDEX_op_st_i64);
4295                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4296                                                      arg_ts->type, 3);
4297                     TCGTemp *out_ts = dir_ts;
4298 
4299                     if (IS_DEAD_ARG(0)) {
4300                         out_ts = arg_temp(op->args[1]);
4301                         arg_ts->state = TS_DEAD;
4302                         tcg_op_remove(s, op);
4303                     } else {
4304                         arg_ts->state = TS_MEM;
4305                     }
4306 
4307                     sop->args[0] = temp_arg(out_ts);
4308                     sop->args[1] = temp_arg(arg_ts->mem_base);
4309                     sop->args[2] = arg_ts->mem_offset;
4310                 } else {
4311                     tcg_debug_assert(!IS_DEAD_ARG(0));
4312                 }
4313             }
4314         } else {
4315             for (i = 0; i < nb_oargs; i++) {
4316                 arg_ts = arg_temp(op->args[i]);
4317                 dir_ts = arg_ts->state_ptr;
4318                 if (!dir_ts) {
4319                     continue;
4320                 }
4321                 op->args[i] = temp_arg(dir_ts);
4322                 changes = true;
4323 
4324                 /* The output is now live and modified.  */
4325                 arg_ts->state = 0;
4326 
4327                 /* Sync outputs upon their last write.  */
4328                 if (NEED_SYNC_ARG(i)) {
4329                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4330                                       ? INDEX_op_st_i32
4331                                       : INDEX_op_st_i64);
4332                     TCGOp *sop = tcg_op_insert_after(s, op, sopc,
4333                                                      arg_ts->type, 3);
4334 
4335                     sop->args[0] = temp_arg(dir_ts);
4336                     sop->args[1] = temp_arg(arg_ts->mem_base);
4337                     sop->args[2] = arg_ts->mem_offset;
4338 
4339                     arg_ts->state = TS_MEM;
4340                 }
4341                 /* Drop outputs that are dead.  */
4342                 if (IS_DEAD_ARG(i)) {
4343                     arg_ts->state = TS_DEAD;
4344                 }
4345             }
4346         }
4347     }
4348 
4349     return changes;
4350 }
4351 
4352 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4353 {
4354     intptr_t off;
4355     int size, align;
4356 
4357     /* When allocating an object, look at the full type. */
4358     size = tcg_type_size(ts->base_type);
4359     switch (ts->base_type) {
4360     case TCG_TYPE_I32:
4361         align = 4;
4362         break;
4363     case TCG_TYPE_I64:
4364     case TCG_TYPE_V64:
4365         align = 8;
4366         break;
4367     case TCG_TYPE_I128:
4368     case TCG_TYPE_V128:
4369     case TCG_TYPE_V256:
4370         /*
4371          * Note that we do not require aligned storage for V256,
4372          * and that we provide alignment for I128 to match V128,
4373          * even if that's above what the host ABI requires.
4374          */
4375         align = 16;
4376         break;
4377     default:
4378         g_assert_not_reached();
4379     }
4380 
4381     /*
4382      * Assume the stack is sufficiently aligned.
4383      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4384      * and do not require 16 byte vector alignment.  This seems slightly
4385      * easier than fully parameterizing the above switch statement.
4386      */
4387     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4388     off = ROUND_UP(s->current_frame_offset, align);
4389 
4390     /* If we've exhausted the stack frame, restart with a smaller TB. */
4391     if (off + size > s->frame_end) {
4392         tcg_raise_tb_overflow(s);
4393     }
4394     s->current_frame_offset = off + size;
4395 #if defined(__sparc__)
4396     off += TCG_TARGET_STACK_BIAS;
4397 #endif
4398 
4399     /* If the object was subdivided, assign memory to all the parts. */
4400     if (ts->base_type != ts->type) {
4401         int part_size = tcg_type_size(ts->type);
4402         int part_count = size / part_size;
4403 
4404         /*
4405          * Each part is allocated sequentially in tcg_temp_new_internal.
4406          * Jump back to the first part by subtracting the current index.
4407          */
4408         ts -= ts->temp_subindex;
4409         for (int i = 0; i < part_count; ++i) {
4410             ts[i].mem_offset = off + i * part_size;
4411             ts[i].mem_base = s->frame_temp;
4412             ts[i].mem_allocated = 1;
4413         }
4414     } else {
4415         ts->mem_offset = off;
4416         ts->mem_base = s->frame_temp;
4417         ts->mem_allocated = 1;
4418     }
4419 }
4420 
4421 /* Assign @reg to @ts, and update reg_to_temp[]. */
4422 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4423 {
4424     if (ts->val_type == TEMP_VAL_REG) {
4425         TCGReg old = ts->reg;
4426         tcg_debug_assert(s->reg_to_temp[old] == ts);
4427         if (old == reg) {
4428             return;
4429         }
4430         s->reg_to_temp[old] = NULL;
4431     }
4432     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4433     s->reg_to_temp[reg] = ts;
4434     ts->val_type = TEMP_VAL_REG;
4435     ts->reg = reg;
4436 }
4437 
4438 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4439 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4440 {
4441     tcg_debug_assert(type != TEMP_VAL_REG);
4442     if (ts->val_type == TEMP_VAL_REG) {
4443         TCGReg reg = ts->reg;
4444         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4445         s->reg_to_temp[reg] = NULL;
4446     }
4447     ts->val_type = type;
4448 }
4449 
4450 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4451 
4452 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4453    mark it free; otherwise mark it dead.  */
4454 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4455 {
4456     TCGTempVal new_type;
4457 
4458     switch (ts->kind) {
4459     case TEMP_FIXED:
4460         return;
4461     case TEMP_GLOBAL:
4462     case TEMP_TB:
4463         new_type = TEMP_VAL_MEM;
4464         break;
4465     case TEMP_EBB:
4466         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4467         break;
4468     case TEMP_CONST:
4469         new_type = TEMP_VAL_CONST;
4470         break;
4471     default:
4472         g_assert_not_reached();
4473     }
4474     set_temp_val_nonreg(s, ts, new_type);
4475 }
4476 
4477 /* Mark a temporary as dead.  */
4478 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4479 {
4480     temp_free_or_dead(s, ts, 1);
4481 }
4482 
4483 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4484    registers needs to be allocated to store a constant.  If 'free_or_dead'
4485    is non-zero, subsequently release the temporary; if it is positive, the
4486    temp is dead; if it is negative, the temp is free.  */
4487 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4488                       TCGRegSet preferred_regs, int free_or_dead)
4489 {
4490     if (!temp_readonly(ts) && !ts->mem_coherent) {
4491         if (!ts->mem_allocated) {
4492             temp_allocate_frame(s, ts);
4493         }
4494         switch (ts->val_type) {
4495         case TEMP_VAL_CONST:
4496             /* If we're going to free the temp immediately, then we won't
4497                require it later in a register, so attempt to store the
4498                constant to memory directly.  */
4499             if (free_or_dead
4500                 && tcg_out_sti(s, ts->type, ts->val,
4501                                ts->mem_base->reg, ts->mem_offset)) {
4502                 break;
4503             }
4504             temp_load(s, ts, tcg_target_available_regs[ts->type],
4505                       allocated_regs, preferred_regs);
4506             /* fallthrough */
4507 
4508         case TEMP_VAL_REG:
4509             tcg_out_st(s, ts->type, ts->reg,
4510                        ts->mem_base->reg, ts->mem_offset);
4511             break;
4512 
4513         case TEMP_VAL_MEM:
4514             break;
4515 
4516         case TEMP_VAL_DEAD:
4517         default:
4518             g_assert_not_reached();
4519         }
4520         ts->mem_coherent = 1;
4521     }
4522     if (free_or_dead) {
4523         temp_free_or_dead(s, ts, free_or_dead);
4524     }
4525 }
4526 
4527 /* free register 'reg' by spilling the corresponding temporary if necessary */
4528 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4529 {
4530     TCGTemp *ts = s->reg_to_temp[reg];
4531     if (ts != NULL) {
4532         temp_sync(s, ts, allocated_regs, 0, -1);
4533     }
4534 }
4535 
4536 /**
4537  * tcg_reg_alloc:
4538  * @required_regs: Set of registers in which we must allocate.
4539  * @allocated_regs: Set of registers which must be avoided.
4540  * @preferred_regs: Set of registers we should prefer.
4541  * @rev: True if we search the registers in "indirect" order.
4542  *
4543  * The allocated register must be in @required_regs & ~@allocated_regs,
4544  * but if we can put it in @preferred_regs we may save a move later.
4545  */
4546 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4547                             TCGRegSet allocated_regs,
4548                             TCGRegSet preferred_regs, bool rev)
4549 {
4550     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4551     TCGRegSet reg_ct[2];
4552     const int *order;
4553 
4554     reg_ct[1] = required_regs & ~allocated_regs;
4555     tcg_debug_assert(reg_ct[1] != 0);
4556     reg_ct[0] = reg_ct[1] & preferred_regs;
4557 
4558     /* Skip the preferred_regs option if it cannot be satisfied,
4559        or if the preference made no difference.  */
4560     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4561 
4562     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4563 
4564     /* Try free registers, preferences first.  */
4565     for (j = f; j < 2; j++) {
4566         TCGRegSet set = reg_ct[j];
4567 
4568         if (tcg_regset_single(set)) {
4569             /* One register in the set.  */
4570             TCGReg reg = tcg_regset_first(set);
4571             if (s->reg_to_temp[reg] == NULL) {
4572                 return reg;
4573             }
4574         } else {
4575             for (i = 0; i < n; i++) {
4576                 TCGReg reg = order[i];
4577                 if (s->reg_to_temp[reg] == NULL &&
4578                     tcg_regset_test_reg(set, reg)) {
4579                     return reg;
4580                 }
4581             }
4582         }
4583     }
4584 
4585     /* We must spill something.  */
4586     for (j = f; j < 2; j++) {
4587         TCGRegSet set = reg_ct[j];
4588 
4589         if (tcg_regset_single(set)) {
4590             /* One register in the set.  */
4591             TCGReg reg = tcg_regset_first(set);
4592             tcg_reg_free(s, reg, allocated_regs);
4593             return reg;
4594         } else {
4595             for (i = 0; i < n; i++) {
4596                 TCGReg reg = order[i];
4597                 if (tcg_regset_test_reg(set, reg)) {
4598                     tcg_reg_free(s, reg, allocated_regs);
4599                     return reg;
4600                 }
4601             }
4602         }
4603     }
4604 
4605     g_assert_not_reached();
4606 }
4607 
4608 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4609                                  TCGRegSet allocated_regs,
4610                                  TCGRegSet preferred_regs, bool rev)
4611 {
4612     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4613     TCGRegSet reg_ct[2];
4614     const int *order;
4615 
4616     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4617     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4618     tcg_debug_assert(reg_ct[1] != 0);
4619     reg_ct[0] = reg_ct[1] & preferred_regs;
4620 
4621     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4622 
4623     /*
4624      * Skip the preferred_regs option if it cannot be satisfied,
4625      * or if the preference made no difference.
4626      */
4627     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4628 
4629     /*
4630      * Minimize the number of flushes by looking for 2 free registers first,
4631      * then a single flush, then two flushes.
4632      */
4633     for (fmin = 2; fmin >= 0; fmin--) {
4634         for (j = k; j < 2; j++) {
4635             TCGRegSet set = reg_ct[j];
4636 
4637             for (i = 0; i < n; i++) {
4638                 TCGReg reg = order[i];
4639 
4640                 if (tcg_regset_test_reg(set, reg)) {
4641                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4642                     if (f >= fmin) {
4643                         tcg_reg_free(s, reg, allocated_regs);
4644                         tcg_reg_free(s, reg + 1, allocated_regs);
4645                         return reg;
4646                     }
4647                 }
4648             }
4649         }
4650     }
4651     g_assert_not_reached();
4652 }
4653 
4654 /* Make sure the temporary is in a register.  If needed, allocate the register
4655    from DESIRED while avoiding ALLOCATED.  */
4656 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4657                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4658 {
4659     TCGReg reg;
4660 
4661     switch (ts->val_type) {
4662     case TEMP_VAL_REG:
4663         return;
4664     case TEMP_VAL_CONST:
4665         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4666                             preferred_regs, ts->indirect_base);
4667         if (ts->type <= TCG_TYPE_I64) {
4668             tcg_out_movi(s, ts->type, reg, ts->val);
4669         } else {
4670             uint64_t val = ts->val;
4671             MemOp vece = MO_64;
4672 
4673             /*
4674              * Find the minimal vector element that matches the constant.
4675              * The targets will, in general, have to do this search anyway,
4676              * do this generically.
4677              */
4678             if (val == dup_const(MO_8, val)) {
4679                 vece = MO_8;
4680             } else if (val == dup_const(MO_16, val)) {
4681                 vece = MO_16;
4682             } else if (val == dup_const(MO_32, val)) {
4683                 vece = MO_32;
4684             }
4685 
4686             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4687         }
4688         ts->mem_coherent = 0;
4689         break;
4690     case TEMP_VAL_MEM:
4691         if (!ts->mem_allocated) {
4692             temp_allocate_frame(s, ts);
4693         }
4694         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4695                             preferred_regs, ts->indirect_base);
4696         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4697         ts->mem_coherent = 1;
4698         break;
4699     case TEMP_VAL_DEAD:
4700     default:
4701         g_assert_not_reached();
4702     }
4703     set_temp_val_reg(s, ts, reg);
4704 }
4705 
4706 /* Save a temporary to memory. 'allocated_regs' is used in case a
4707    temporary registers needs to be allocated to store a constant.  */
4708 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4709 {
4710     /* The liveness analysis already ensures that globals are back
4711        in memory. Keep an tcg_debug_assert for safety. */
4712     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4713 }
4714 
4715 /* save globals to their canonical location and assume they can be
4716    modified be the following code. 'allocated_regs' is used in case a
4717    temporary registers needs to be allocated to store a constant. */
4718 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4719 {
4720     int i, n;
4721 
4722     for (i = 0, n = s->nb_globals; i < n; i++) {
4723         temp_save(s, &s->temps[i], allocated_regs);
4724     }
4725 }
4726 
4727 /* sync globals to their canonical location and assume they can be
4728    read by the following code. 'allocated_regs' is used in case a
4729    temporary registers needs to be allocated to store a constant. */
4730 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4731 {
4732     int i, n;
4733 
4734     for (i = 0, n = s->nb_globals; i < n; i++) {
4735         TCGTemp *ts = &s->temps[i];
4736         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4737                          || ts->kind == TEMP_FIXED
4738                          || ts->mem_coherent);
4739     }
4740 }
4741 
4742 /* at the end of a basic block, we assume all temporaries are dead and
4743    all globals are stored at their canonical location. */
4744 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4745 {
4746     int i;
4747 
4748     for (i = s->nb_globals; i < s->nb_temps; i++) {
4749         TCGTemp *ts = &s->temps[i];
4750 
4751         switch (ts->kind) {
4752         case TEMP_TB:
4753             temp_save(s, ts, allocated_regs);
4754             break;
4755         case TEMP_EBB:
4756             /* The liveness analysis already ensures that temps are dead.
4757                Keep an tcg_debug_assert for safety. */
4758             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4759             break;
4760         case TEMP_CONST:
4761             /* Similarly, we should have freed any allocated register. */
4762             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4763             break;
4764         default:
4765             g_assert_not_reached();
4766         }
4767     }
4768 
4769     save_globals(s, allocated_regs);
4770 }
4771 
4772 /*
4773  * At a conditional branch, we assume all temporaries are dead unless
4774  * explicitly live-across-conditional-branch; all globals and local
4775  * temps are synced to their location.
4776  */
4777 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4778 {
4779     sync_globals(s, allocated_regs);
4780 
4781     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4782         TCGTemp *ts = &s->temps[i];
4783         /*
4784          * The liveness analysis already ensures that temps are dead.
4785          * Keep tcg_debug_asserts for safety.
4786          */
4787         switch (ts->kind) {
4788         case TEMP_TB:
4789             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4790             break;
4791         case TEMP_EBB:
4792         case TEMP_CONST:
4793             break;
4794         default:
4795             g_assert_not_reached();
4796         }
4797     }
4798 }
4799 
4800 /*
4801  * Specialized code generation for INDEX_op_mov_* with a constant.
4802  */
4803 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4804                                   tcg_target_ulong val, TCGLifeData arg_life,
4805                                   TCGRegSet preferred_regs)
4806 {
4807     /* ENV should not be modified.  */
4808     tcg_debug_assert(!temp_readonly(ots));
4809 
4810     /* The movi is not explicitly generated here.  */
4811     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4812     ots->val = val;
4813     ots->mem_coherent = 0;
4814     if (NEED_SYNC_ARG(0)) {
4815         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4816     } else if (IS_DEAD_ARG(0)) {
4817         temp_dead(s, ots);
4818     }
4819 }
4820 
4821 /*
4822  * Specialized code generation for INDEX_op_mov_*.
4823  */
4824 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4825 {
4826     const TCGLifeData arg_life = op->life;
4827     TCGRegSet allocated_regs, preferred_regs;
4828     TCGTemp *ts, *ots;
4829     TCGType otype, itype;
4830     TCGReg oreg, ireg;
4831 
4832     allocated_regs = s->reserved_regs;
4833     preferred_regs = output_pref(op, 0);
4834     ots = arg_temp(op->args[0]);
4835     ts = arg_temp(op->args[1]);
4836 
4837     /* ENV should not be modified.  */
4838     tcg_debug_assert(!temp_readonly(ots));
4839 
4840     /* Note that otype != itype for no-op truncation.  */
4841     otype = ots->type;
4842     itype = ts->type;
4843 
4844     if (ts->val_type == TEMP_VAL_CONST) {
4845         /* propagate constant or generate sti */
4846         tcg_target_ulong val = ts->val;
4847         if (IS_DEAD_ARG(1)) {
4848             temp_dead(s, ts);
4849         }
4850         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4851         return;
4852     }
4853 
4854     /* If the source value is in memory we're going to be forced
4855        to have it in a register in order to perform the copy.  Copy
4856        the SOURCE value into its own register first, that way we
4857        don't have to reload SOURCE the next time it is used. */
4858     if (ts->val_type == TEMP_VAL_MEM) {
4859         temp_load(s, ts, tcg_target_available_regs[itype],
4860                   allocated_regs, preferred_regs);
4861     }
4862     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4863     ireg = ts->reg;
4864 
4865     if (IS_DEAD_ARG(0)) {
4866         /* mov to a non-saved dead register makes no sense (even with
4867            liveness analysis disabled). */
4868         tcg_debug_assert(NEED_SYNC_ARG(0));
4869         if (!ots->mem_allocated) {
4870             temp_allocate_frame(s, ots);
4871         }
4872         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4873         if (IS_DEAD_ARG(1)) {
4874             temp_dead(s, ts);
4875         }
4876         temp_dead(s, ots);
4877         return;
4878     }
4879 
4880     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4881         /*
4882          * The mov can be suppressed.  Kill input first, so that it
4883          * is unlinked from reg_to_temp, then set the output to the
4884          * reg that we saved from the input.
4885          */
4886         temp_dead(s, ts);
4887         oreg = ireg;
4888     } else {
4889         if (ots->val_type == TEMP_VAL_REG) {
4890             oreg = ots->reg;
4891         } else {
4892             /* Make sure to not spill the input register during allocation. */
4893             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4894                                  allocated_regs | ((TCGRegSet)1 << ireg),
4895                                  preferred_regs, ots->indirect_base);
4896         }
4897         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4898             /*
4899              * Cross register class move not supported.
4900              * Store the source register into the destination slot
4901              * and leave the destination temp as TEMP_VAL_MEM.
4902              */
4903             assert(!temp_readonly(ots));
4904             if (!ts->mem_allocated) {
4905                 temp_allocate_frame(s, ots);
4906             }
4907             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4908             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4909             ots->mem_coherent = 1;
4910             return;
4911         }
4912     }
4913     set_temp_val_reg(s, ots, oreg);
4914     ots->mem_coherent = 0;
4915 
4916     if (NEED_SYNC_ARG(0)) {
4917         temp_sync(s, ots, allocated_regs, 0, 0);
4918     }
4919 }
4920 
4921 /*
4922  * Specialized code generation for INDEX_op_dup_vec.
4923  */
4924 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4925 {
4926     const TCGLifeData arg_life = op->life;
4927     TCGRegSet dup_out_regs, dup_in_regs;
4928     const TCGArgConstraint *dup_args_ct;
4929     TCGTemp *its, *ots;
4930     TCGType itype, vtype;
4931     unsigned vece;
4932     int lowpart_ofs;
4933     bool ok;
4934 
4935     ots = arg_temp(op->args[0]);
4936     its = arg_temp(op->args[1]);
4937 
4938     /* ENV should not be modified.  */
4939     tcg_debug_assert(!temp_readonly(ots));
4940 
4941     itype = its->type;
4942     vece = TCGOP_VECE(op);
4943     vtype = TCGOP_TYPE(op);
4944 
4945     if (its->val_type == TEMP_VAL_CONST) {
4946         /* Propagate constant via movi -> dupi.  */
4947         tcg_target_ulong val = its->val;
4948         if (IS_DEAD_ARG(1)) {
4949             temp_dead(s, its);
4950         }
4951         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4952         return;
4953     }
4954 
4955     dup_args_ct = opcode_args_ct(op);
4956     dup_out_regs = dup_args_ct[0].regs;
4957     dup_in_regs = dup_args_ct[1].regs;
4958 
4959     /* Allocate the output register now.  */
4960     if (ots->val_type != TEMP_VAL_REG) {
4961         TCGRegSet allocated_regs = s->reserved_regs;
4962         TCGReg oreg;
4963 
4964         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4965             /* Make sure to not spill the input register. */
4966             tcg_regset_set_reg(allocated_regs, its->reg);
4967         }
4968         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4969                              output_pref(op, 0), ots->indirect_base);
4970         set_temp_val_reg(s, ots, oreg);
4971     }
4972 
4973     switch (its->val_type) {
4974     case TEMP_VAL_REG:
4975         /*
4976          * The dup constriaints must be broad, covering all possible VECE.
4977          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4978          * to fail, indicating that extra moves are required for that case.
4979          */
4980         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4981             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4982                 goto done;
4983             }
4984             /* Try again from memory or a vector input register.  */
4985         }
4986         if (!its->mem_coherent) {
4987             /*
4988              * The input register is not synced, and so an extra store
4989              * would be required to use memory.  Attempt an integer-vector
4990              * register move first.  We do not have a TCGRegSet for this.
4991              */
4992             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4993                 break;
4994             }
4995             /* Sync the temp back to its slot and load from there.  */
4996             temp_sync(s, its, s->reserved_regs, 0, 0);
4997         }
4998         /* fall through */
4999 
5000     case TEMP_VAL_MEM:
5001         lowpart_ofs = 0;
5002         if (HOST_BIG_ENDIAN) {
5003             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
5004         }
5005         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
5006                              its->mem_offset + lowpart_ofs)) {
5007             goto done;
5008         }
5009         /* Load the input into the destination vector register. */
5010         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
5011         break;
5012 
5013     default:
5014         g_assert_not_reached();
5015     }
5016 
5017     /* We now have a vector input register, so dup must succeed. */
5018     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
5019     tcg_debug_assert(ok);
5020 
5021  done:
5022     ots->mem_coherent = 0;
5023     if (IS_DEAD_ARG(1)) {
5024         temp_dead(s, its);
5025     }
5026     if (NEED_SYNC_ARG(0)) {
5027         temp_sync(s, ots, s->reserved_regs, 0, 0);
5028     }
5029     if (IS_DEAD_ARG(0)) {
5030         temp_dead(s, ots);
5031     }
5032 }
5033 
5034 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
5035 {
5036     const TCGLifeData arg_life = op->life;
5037     const TCGOpDef * const def = &tcg_op_defs[op->opc];
5038     TCGRegSet i_allocated_regs;
5039     TCGRegSet o_allocated_regs;
5040     int i, k, nb_iargs, nb_oargs;
5041     TCGReg reg;
5042     TCGArg arg;
5043     const TCGArgConstraint *args_ct;
5044     const TCGArgConstraint *arg_ct;
5045     TCGTemp *ts;
5046     TCGArg new_args[TCG_MAX_OP_ARGS];
5047     int const_args[TCG_MAX_OP_ARGS];
5048     TCGCond op_cond;
5049 
5050     nb_oargs = def->nb_oargs;
5051     nb_iargs = def->nb_iargs;
5052 
5053     /* copy constants */
5054     memcpy(new_args + nb_oargs + nb_iargs,
5055            op->args + nb_oargs + nb_iargs,
5056            sizeof(TCGArg) * def->nb_cargs);
5057 
5058     i_allocated_regs = s->reserved_regs;
5059     o_allocated_regs = s->reserved_regs;
5060 
5061     switch (op->opc) {
5062     case INDEX_op_brcond_i32:
5063     case INDEX_op_brcond_i64:
5064         op_cond = op->args[2];
5065         break;
5066     case INDEX_op_setcond_i32:
5067     case INDEX_op_setcond_i64:
5068     case INDEX_op_negsetcond_i32:
5069     case INDEX_op_negsetcond_i64:
5070     case INDEX_op_cmp_vec:
5071         op_cond = op->args[3];
5072         break;
5073     case INDEX_op_brcond2_i32:
5074         op_cond = op->args[4];
5075         break;
5076     case INDEX_op_movcond_i32:
5077     case INDEX_op_movcond_i64:
5078     case INDEX_op_setcond2_i32:
5079     case INDEX_op_cmpsel_vec:
5080         op_cond = op->args[5];
5081         break;
5082     default:
5083         /* No condition within opcode. */
5084         op_cond = TCG_COND_ALWAYS;
5085         break;
5086     }
5087 
5088     args_ct = opcode_args_ct(op);
5089 
5090     /* satisfy input constraints */
5091     for (k = 0; k < nb_iargs; k++) {
5092         TCGRegSet i_preferred_regs, i_required_regs;
5093         bool allocate_new_reg, copyto_new_reg;
5094         TCGTemp *ts2;
5095         int i1, i2;
5096 
5097         i = args_ct[nb_oargs + k].sort_index;
5098         arg = op->args[i];
5099         arg_ct = &args_ct[i];
5100         ts = arg_temp(arg);
5101 
5102         if (ts->val_type == TEMP_VAL_CONST) {
5103 #ifdef TCG_REG_ZERO
5104             if (ts->val == 0 && (arg_ct->ct & TCG_CT_REG_ZERO)) {
5105                 /* Hardware zero register: indicate register via non-const. */
5106                 const_args[i] = 0;
5107                 new_args[i] = TCG_REG_ZERO;
5108                 continue;
5109             }
5110 #endif
5111 
5112             if (tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
5113                                        op_cond, TCGOP_VECE(op))) {
5114                 /* constant is OK for instruction */
5115                 const_args[i] = 1;
5116                 new_args[i] = ts->val;
5117                 continue;
5118             }
5119         }
5120 
5121         reg = ts->reg;
5122         i_preferred_regs = 0;
5123         i_required_regs = arg_ct->regs;
5124         allocate_new_reg = false;
5125         copyto_new_reg = false;
5126 
5127         switch (arg_ct->pair) {
5128         case 0: /* not paired */
5129             if (arg_ct->ialias) {
5130                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5131 
5132                 /*
5133                  * If the input is readonly, then it cannot also be an
5134                  * output and aliased to itself.  If the input is not
5135                  * dead after the instruction, we must allocate a new
5136                  * register and move it.
5137                  */
5138                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
5139                     || args_ct[arg_ct->alias_index].newreg) {
5140                     allocate_new_reg = true;
5141                 } else if (ts->val_type == TEMP_VAL_REG) {
5142                     /*
5143                      * Check if the current register has already been
5144                      * allocated for another input.
5145                      */
5146                     allocate_new_reg =
5147                         tcg_regset_test_reg(i_allocated_regs, reg);
5148                 }
5149             }
5150             if (!allocate_new_reg) {
5151                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5152                           i_preferred_regs);
5153                 reg = ts->reg;
5154                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
5155             }
5156             if (allocate_new_reg) {
5157                 /*
5158                  * Allocate a new register matching the constraint
5159                  * and move the temporary register into it.
5160                  */
5161                 temp_load(s, ts, tcg_target_available_regs[ts->type],
5162                           i_allocated_regs, 0);
5163                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
5164                                     i_preferred_regs, ts->indirect_base);
5165                 copyto_new_reg = true;
5166             }
5167             break;
5168 
5169         case 1:
5170             /* First of an input pair; if i1 == i2, the second is an output. */
5171             i1 = i;
5172             i2 = arg_ct->pair_index;
5173             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
5174 
5175             /*
5176              * It is easier to default to allocating a new pair
5177              * and to identify a few cases where it's not required.
5178              */
5179             if (arg_ct->ialias) {
5180                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
5181                 if (IS_DEAD_ARG(i1) &&
5182                     IS_DEAD_ARG(i2) &&
5183                     !temp_readonly(ts) &&
5184                     ts->val_type == TEMP_VAL_REG &&
5185                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
5186                     tcg_regset_test_reg(i_required_regs, reg) &&
5187                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
5188                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
5189                     (ts2
5190                      ? ts2->val_type == TEMP_VAL_REG &&
5191                        ts2->reg == reg + 1 &&
5192                        !temp_readonly(ts2)
5193                      : s->reg_to_temp[reg + 1] == NULL)) {
5194                     break;
5195                 }
5196             } else {
5197                 /* Without aliasing, the pair must also be an input. */
5198                 tcg_debug_assert(ts2);
5199                 if (ts->val_type == TEMP_VAL_REG &&
5200                     ts2->val_type == TEMP_VAL_REG &&
5201                     ts2->reg == reg + 1 &&
5202                     tcg_regset_test_reg(i_required_regs, reg)) {
5203                     break;
5204                 }
5205             }
5206             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
5207                                      0, ts->indirect_base);
5208             goto do_pair;
5209 
5210         case 2: /* pair second */
5211             reg = new_args[arg_ct->pair_index] + 1;
5212             goto do_pair;
5213 
5214         case 3: /* ialias with second output, no first input */
5215             tcg_debug_assert(arg_ct->ialias);
5216             i_preferred_regs = output_pref(op, arg_ct->alias_index);
5217 
5218             if (IS_DEAD_ARG(i) &&
5219                 !temp_readonly(ts) &&
5220                 ts->val_type == TEMP_VAL_REG &&
5221                 reg > 0 &&
5222                 s->reg_to_temp[reg - 1] == NULL &&
5223                 tcg_regset_test_reg(i_required_regs, reg) &&
5224                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
5225                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
5226                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
5227                 break;
5228             }
5229             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
5230                                      i_allocated_regs, 0,
5231                                      ts->indirect_base);
5232             tcg_regset_set_reg(i_allocated_regs, reg);
5233             reg += 1;
5234             goto do_pair;
5235 
5236         do_pair:
5237             /*
5238              * If an aliased input is not dead after the instruction,
5239              * we must allocate a new register and move it.
5240              */
5241             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
5242                 TCGRegSet t_allocated_regs = i_allocated_regs;
5243 
5244                 /*
5245                  * Because of the alias, and the continued life, make sure
5246                  * that the temp is somewhere *other* than the reg pair,
5247                  * and we get a copy in reg.
5248                  */
5249                 tcg_regset_set_reg(t_allocated_regs, reg);
5250                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
5251                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
5252                     /* If ts was already in reg, copy it somewhere else. */
5253                     TCGReg nr;
5254                     bool ok;
5255 
5256                     tcg_debug_assert(ts->kind != TEMP_FIXED);
5257                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
5258                                        t_allocated_regs, 0, ts->indirect_base);
5259                     ok = tcg_out_mov(s, ts->type, nr, reg);
5260                     tcg_debug_assert(ok);
5261 
5262                     set_temp_val_reg(s, ts, nr);
5263                 } else {
5264                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5265                               t_allocated_regs, 0);
5266                     copyto_new_reg = true;
5267                 }
5268             } else {
5269                 /* Preferably allocate to reg, otherwise copy. */
5270                 i_required_regs = (TCGRegSet)1 << reg;
5271                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5272                           i_preferred_regs);
5273                 copyto_new_reg = ts->reg != reg;
5274             }
5275             break;
5276 
5277         default:
5278             g_assert_not_reached();
5279         }
5280 
5281         if (copyto_new_reg) {
5282             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5283                 /*
5284                  * Cross register class move not supported.  Sync the
5285                  * temp back to its slot and load from there.
5286                  */
5287                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5288                 tcg_out_ld(s, ts->type, reg,
5289                            ts->mem_base->reg, ts->mem_offset);
5290             }
5291         }
5292         new_args[i] = reg;
5293         const_args[i] = 0;
5294         tcg_regset_set_reg(i_allocated_regs, reg);
5295     }
5296 
5297     /* mark dead temporaries and free the associated registers */
5298     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5299         if (IS_DEAD_ARG(i)) {
5300             temp_dead(s, arg_temp(op->args[i]));
5301         }
5302     }
5303 
5304     if (def->flags & TCG_OPF_COND_BRANCH) {
5305         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5306     } else if (def->flags & TCG_OPF_BB_END) {
5307         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5308     } else {
5309         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5310             /* XXX: permit generic clobber register list ? */
5311             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5312                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5313                     tcg_reg_free(s, i, i_allocated_regs);
5314                 }
5315             }
5316         }
5317         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5318             /* sync globals if the op has side effects and might trigger
5319                an exception. */
5320             sync_globals(s, i_allocated_regs);
5321         }
5322 
5323         /* satisfy the output constraints */
5324         for (k = 0; k < nb_oargs; k++) {
5325             i = args_ct[k].sort_index;
5326             arg = op->args[i];
5327             arg_ct = &args_ct[i];
5328             ts = arg_temp(arg);
5329 
5330             /* ENV should not be modified.  */
5331             tcg_debug_assert(!temp_readonly(ts));
5332 
5333             switch (arg_ct->pair) {
5334             case 0: /* not paired */
5335                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5336                     reg = new_args[arg_ct->alias_index];
5337                 } else if (arg_ct->newreg) {
5338                     reg = tcg_reg_alloc(s, arg_ct->regs,
5339                                         i_allocated_regs | o_allocated_regs,
5340                                         output_pref(op, k), ts->indirect_base);
5341                 } else {
5342                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5343                                         output_pref(op, k), ts->indirect_base);
5344                 }
5345                 break;
5346 
5347             case 1: /* first of pair */
5348                 if (arg_ct->oalias) {
5349                     reg = new_args[arg_ct->alias_index];
5350                 } else if (arg_ct->newreg) {
5351                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5352                                              i_allocated_regs | o_allocated_regs,
5353                                              output_pref(op, k),
5354                                              ts->indirect_base);
5355                 } else {
5356                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5357                                              output_pref(op, k),
5358                                              ts->indirect_base);
5359                 }
5360                 break;
5361 
5362             case 2: /* second of pair */
5363                 if (arg_ct->oalias) {
5364                     reg = new_args[arg_ct->alias_index];
5365                 } else {
5366                     reg = new_args[arg_ct->pair_index] + 1;
5367                 }
5368                 break;
5369 
5370             case 3: /* first of pair, aliasing with a second input */
5371                 tcg_debug_assert(!arg_ct->newreg);
5372                 reg = new_args[arg_ct->pair_index] - 1;
5373                 break;
5374 
5375             default:
5376                 g_assert_not_reached();
5377             }
5378             tcg_regset_set_reg(o_allocated_regs, reg);
5379             set_temp_val_reg(s, ts, reg);
5380             ts->mem_coherent = 0;
5381             new_args[i] = reg;
5382         }
5383     }
5384 
5385     /* emit instruction */
5386     TCGType type = TCGOP_TYPE(op);
5387     switch (op->opc) {
5388     case INDEX_op_ext_i32_i64:
5389         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5390         break;
5391     case INDEX_op_extu_i32_i64:
5392         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5393         break;
5394     case INDEX_op_extrl_i64_i32:
5395         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5396         break;
5397 
5398     case INDEX_op_add:
5399     case INDEX_op_and:
5400     case INDEX_op_andc:
5401     case INDEX_op_clz:
5402     case INDEX_op_ctz:
5403     case INDEX_op_divs:
5404     case INDEX_op_divu:
5405     case INDEX_op_eqv:
5406     case INDEX_op_mul:
5407     case INDEX_op_mulsh:
5408     case INDEX_op_muluh:
5409     case INDEX_op_nand:
5410     case INDEX_op_nor:
5411     case INDEX_op_or:
5412     case INDEX_op_orc:
5413     case INDEX_op_rems:
5414     case INDEX_op_remu:
5415     case INDEX_op_rotl:
5416     case INDEX_op_rotr:
5417     case INDEX_op_sar:
5418     case INDEX_op_shl:
5419     case INDEX_op_shr:
5420     case INDEX_op_xor:
5421         {
5422             const TCGOutOpBinary *out =
5423                 container_of(all_outop[op->opc], TCGOutOpBinary, base);
5424 
5425             /* Constants should never appear in the first source operand. */
5426             tcg_debug_assert(!const_args[1]);
5427             if (const_args[2]) {
5428                 out->out_rri(s, type, new_args[0], new_args[1], new_args[2]);
5429             } else {
5430                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5431             }
5432         }
5433         break;
5434 
5435     case INDEX_op_sub:
5436         {
5437             const TCGOutOpSubtract *out = &outop_sub;
5438 
5439             /*
5440              * Constants should never appear in the second source operand.
5441              * These are folded to add with negative constant.
5442              */
5443             tcg_debug_assert(!const_args[2]);
5444             if (const_args[1]) {
5445                 out->out_rir(s, type, new_args[0], new_args[1], new_args[2]);
5446             } else {
5447                 out->out_rrr(s, type, new_args[0], new_args[1], new_args[2]);
5448             }
5449         }
5450         break;
5451 
5452     case INDEX_op_neg:
5453     case INDEX_op_not:
5454         {
5455             const TCGOutOpUnary *out =
5456                 container_of(all_outop[op->opc], TCGOutOpUnary, base);
5457 
5458             /* Constants should have been folded. */
5459             tcg_debug_assert(!const_args[1]);
5460             out->out_rr(s, type, new_args[0], new_args[1]);
5461         }
5462         break;
5463 
5464     case INDEX_op_divs2:
5465     case INDEX_op_divu2:
5466         {
5467             const TCGOutOpDivRem *out =
5468                 container_of(all_outop[op->opc], TCGOutOpDivRem, base);
5469 
5470             /* Only used by x86 and s390x, which use matching constraints. */
5471             tcg_debug_assert(new_args[0] == new_args[2]);
5472             tcg_debug_assert(new_args[1] == new_args[3]);
5473             tcg_debug_assert(!const_args[4]);
5474             out->out_rr01r(s, type, new_args[0], new_args[1], new_args[4]);
5475         }
5476         break;
5477 
5478     default:
5479         if (def->flags & TCG_OPF_VECTOR) {
5480             tcg_out_vec_op(s, op->opc, type - TCG_TYPE_V64,
5481                            TCGOP_VECE(op), new_args, const_args);
5482         } else {
5483             tcg_out_op(s, op->opc, type, new_args, const_args);
5484         }
5485         break;
5486     }
5487 
5488     /* move the outputs in the correct register if needed */
5489     for(i = 0; i < nb_oargs; i++) {
5490         ts = arg_temp(op->args[i]);
5491 
5492         /* ENV should not be modified.  */
5493         tcg_debug_assert(!temp_readonly(ts));
5494 
5495         if (NEED_SYNC_ARG(i)) {
5496             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5497         } else if (IS_DEAD_ARG(i)) {
5498             temp_dead(s, ts);
5499         }
5500     }
5501 }
5502 
5503 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5504 {
5505     const TCGLifeData arg_life = op->life;
5506     TCGTemp *ots, *itsl, *itsh;
5507     TCGType vtype = TCGOP_TYPE(op);
5508 
5509     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5510     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5511     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5512 
5513     ots = arg_temp(op->args[0]);
5514     itsl = arg_temp(op->args[1]);
5515     itsh = arg_temp(op->args[2]);
5516 
5517     /* ENV should not be modified.  */
5518     tcg_debug_assert(!temp_readonly(ots));
5519 
5520     /* Allocate the output register now.  */
5521     if (ots->val_type != TEMP_VAL_REG) {
5522         TCGRegSet allocated_regs = s->reserved_regs;
5523         TCGRegSet dup_out_regs = opcode_args_ct(op)[0].regs;
5524         TCGReg oreg;
5525 
5526         /* Make sure to not spill the input registers. */
5527         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5528             tcg_regset_set_reg(allocated_regs, itsl->reg);
5529         }
5530         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5531             tcg_regset_set_reg(allocated_regs, itsh->reg);
5532         }
5533 
5534         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5535                              output_pref(op, 0), ots->indirect_base);
5536         set_temp_val_reg(s, ots, oreg);
5537     }
5538 
5539     /* Promote dup2 of immediates to dupi_vec. */
5540     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5541         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5542         MemOp vece = MO_64;
5543 
5544         if (val == dup_const(MO_8, val)) {
5545             vece = MO_8;
5546         } else if (val == dup_const(MO_16, val)) {
5547             vece = MO_16;
5548         } else if (val == dup_const(MO_32, val)) {
5549             vece = MO_32;
5550         }
5551 
5552         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5553         goto done;
5554     }
5555 
5556     /* If the two inputs form one 64-bit value, try dupm_vec. */
5557     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5558         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5559         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5560         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5561 
5562         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5563         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5564 
5565         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5566                              its->mem_base->reg, its->mem_offset)) {
5567             goto done;
5568         }
5569     }
5570 
5571     /* Fall back to generic expansion. */
5572     return false;
5573 
5574  done:
5575     ots->mem_coherent = 0;
5576     if (IS_DEAD_ARG(1)) {
5577         temp_dead(s, itsl);
5578     }
5579     if (IS_DEAD_ARG(2)) {
5580         temp_dead(s, itsh);
5581     }
5582     if (NEED_SYNC_ARG(0)) {
5583         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5584     } else if (IS_DEAD_ARG(0)) {
5585         temp_dead(s, ots);
5586     }
5587     return true;
5588 }
5589 
5590 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5591                          TCGRegSet allocated_regs)
5592 {
5593     if (ts->val_type == TEMP_VAL_REG) {
5594         if (ts->reg != reg) {
5595             tcg_reg_free(s, reg, allocated_regs);
5596             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5597                 /*
5598                  * Cross register class move not supported.  Sync the
5599                  * temp back to its slot and load from there.
5600                  */
5601                 temp_sync(s, ts, allocated_regs, 0, 0);
5602                 tcg_out_ld(s, ts->type, reg,
5603                            ts->mem_base->reg, ts->mem_offset);
5604             }
5605         }
5606     } else {
5607         TCGRegSet arg_set = 0;
5608 
5609         tcg_reg_free(s, reg, allocated_regs);
5610         tcg_regset_set_reg(arg_set, reg);
5611         temp_load(s, ts, arg_set, allocated_regs, 0);
5612     }
5613 }
5614 
5615 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5616                          TCGRegSet allocated_regs)
5617 {
5618     /*
5619      * When the destination is on the stack, load up the temp and store.
5620      * If there are many call-saved registers, the temp might live to
5621      * see another use; otherwise it'll be discarded.
5622      */
5623     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5624     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5625                arg_slot_stk_ofs(arg_slot));
5626 }
5627 
5628 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5629                             TCGTemp *ts, TCGRegSet *allocated_regs)
5630 {
5631     if (arg_slot_reg_p(l->arg_slot)) {
5632         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5633         load_arg_reg(s, reg, ts, *allocated_regs);
5634         tcg_regset_set_reg(*allocated_regs, reg);
5635     } else {
5636         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5637     }
5638 }
5639 
5640 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5641                          intptr_t ref_off, TCGRegSet *allocated_regs)
5642 {
5643     TCGReg reg;
5644 
5645     if (arg_slot_reg_p(arg_slot)) {
5646         reg = tcg_target_call_iarg_regs[arg_slot];
5647         tcg_reg_free(s, reg, *allocated_regs);
5648         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5649         tcg_regset_set_reg(*allocated_regs, reg);
5650     } else {
5651         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5652                             *allocated_regs, 0, false);
5653         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5654         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5655                    arg_slot_stk_ofs(arg_slot));
5656     }
5657 }
5658 
5659 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5660 {
5661     const int nb_oargs = TCGOP_CALLO(op);
5662     const int nb_iargs = TCGOP_CALLI(op);
5663     const TCGLifeData arg_life = op->life;
5664     const TCGHelperInfo *info = tcg_call_info(op);
5665     TCGRegSet allocated_regs = s->reserved_regs;
5666     int i;
5667 
5668     /*
5669      * Move inputs into place in reverse order,
5670      * so that we place stacked arguments first.
5671      */
5672     for (i = nb_iargs - 1; i >= 0; --i) {
5673         const TCGCallArgumentLoc *loc = &info->in[i];
5674         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5675 
5676         switch (loc->kind) {
5677         case TCG_CALL_ARG_NORMAL:
5678         case TCG_CALL_ARG_EXTEND_U:
5679         case TCG_CALL_ARG_EXTEND_S:
5680             load_arg_normal(s, loc, ts, &allocated_regs);
5681             break;
5682         case TCG_CALL_ARG_BY_REF:
5683             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5684             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5685                          arg_slot_stk_ofs(loc->ref_slot),
5686                          &allocated_regs);
5687             break;
5688         case TCG_CALL_ARG_BY_REF_N:
5689             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5690             break;
5691         default:
5692             g_assert_not_reached();
5693         }
5694     }
5695 
5696     /* Mark dead temporaries and free the associated registers.  */
5697     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5698         if (IS_DEAD_ARG(i)) {
5699             temp_dead(s, arg_temp(op->args[i]));
5700         }
5701     }
5702 
5703     /* Clobber call registers.  */
5704     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5705         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5706             tcg_reg_free(s, i, allocated_regs);
5707         }
5708     }
5709 
5710     /*
5711      * Save globals if they might be written by the helper,
5712      * sync them if they might be read.
5713      */
5714     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5715         /* Nothing to do */
5716     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5717         sync_globals(s, allocated_regs);
5718     } else {
5719         save_globals(s, allocated_regs);
5720     }
5721 
5722     /*
5723      * If the ABI passes a pointer to the returned struct as the first
5724      * argument, load that now.  Pass a pointer to the output home slot.
5725      */
5726     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5727         TCGTemp *ts = arg_temp(op->args[0]);
5728 
5729         if (!ts->mem_allocated) {
5730             temp_allocate_frame(s, ts);
5731         }
5732         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5733     }
5734 
5735     tcg_out_call(s, tcg_call_func(op), info);
5736 
5737     /* Assign output registers and emit moves if needed.  */
5738     switch (info->out_kind) {
5739     case TCG_CALL_RET_NORMAL:
5740         for (i = 0; i < nb_oargs; i++) {
5741             TCGTemp *ts = arg_temp(op->args[i]);
5742             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5743 
5744             /* ENV should not be modified.  */
5745             tcg_debug_assert(!temp_readonly(ts));
5746 
5747             set_temp_val_reg(s, ts, reg);
5748             ts->mem_coherent = 0;
5749         }
5750         break;
5751 
5752     case TCG_CALL_RET_BY_VEC:
5753         {
5754             TCGTemp *ts = arg_temp(op->args[0]);
5755 
5756             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5757             tcg_debug_assert(ts->temp_subindex == 0);
5758             if (!ts->mem_allocated) {
5759                 temp_allocate_frame(s, ts);
5760             }
5761             tcg_out_st(s, TCG_TYPE_V128,
5762                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5763                        ts->mem_base->reg, ts->mem_offset);
5764         }
5765         /* fall through to mark all parts in memory */
5766 
5767     case TCG_CALL_RET_BY_REF:
5768         /* The callee has performed a write through the reference. */
5769         for (i = 0; i < nb_oargs; i++) {
5770             TCGTemp *ts = arg_temp(op->args[i]);
5771             ts->val_type = TEMP_VAL_MEM;
5772         }
5773         break;
5774 
5775     default:
5776         g_assert_not_reached();
5777     }
5778 
5779     /* Flush or discard output registers as needed. */
5780     for (i = 0; i < nb_oargs; i++) {
5781         TCGTemp *ts = arg_temp(op->args[i]);
5782         if (NEED_SYNC_ARG(i)) {
5783             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5784         } else if (IS_DEAD_ARG(i)) {
5785             temp_dead(s, ts);
5786         }
5787     }
5788 }
5789 
5790 /**
5791  * atom_and_align_for_opc:
5792  * @s: tcg context
5793  * @opc: memory operation code
5794  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5795  * @allow_two_ops: true if we are prepared to issue two operations
5796  *
5797  * Return the alignment and atomicity to use for the inline fast path
5798  * for the given memory operation.  The alignment may be larger than
5799  * that specified in @opc, and the correct alignment will be diagnosed
5800  * by the slow path helper.
5801  *
5802  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5803  * and issue two loads or stores for subalignment.
5804  */
5805 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5806                                            MemOp host_atom, bool allow_two_ops)
5807 {
5808     MemOp align = memop_alignment_bits(opc);
5809     MemOp size = opc & MO_SIZE;
5810     MemOp half = size ? size - 1 : 0;
5811     MemOp atom = opc & MO_ATOM_MASK;
5812     MemOp atmax;
5813 
5814     switch (atom) {
5815     case MO_ATOM_NONE:
5816         /* The operation requires no specific atomicity. */
5817         atmax = MO_8;
5818         break;
5819 
5820     case MO_ATOM_IFALIGN:
5821         atmax = size;
5822         break;
5823 
5824     case MO_ATOM_IFALIGN_PAIR:
5825         atmax = half;
5826         break;
5827 
5828     case MO_ATOM_WITHIN16:
5829         atmax = size;
5830         if (size == MO_128) {
5831             /* Misalignment implies !within16, and therefore no atomicity. */
5832         } else if (host_atom != MO_ATOM_WITHIN16) {
5833             /* The host does not implement within16, so require alignment. */
5834             align = MAX(align, size);
5835         }
5836         break;
5837 
5838     case MO_ATOM_WITHIN16_PAIR:
5839         atmax = size;
5840         /*
5841          * Misalignment implies !within16, and therefore half atomicity.
5842          * Any host prepared for two operations can implement this with
5843          * half alignment.
5844          */
5845         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5846             align = MAX(align, half);
5847         }
5848         break;
5849 
5850     case MO_ATOM_SUBALIGN:
5851         atmax = size;
5852         if (host_atom != MO_ATOM_SUBALIGN) {
5853             /* If unaligned but not odd, there are subobjects up to half. */
5854             if (allow_two_ops) {
5855                 align = MAX(align, half);
5856             } else {
5857                 align = MAX(align, size);
5858             }
5859         }
5860         break;
5861 
5862     default:
5863         g_assert_not_reached();
5864     }
5865 
5866     return (TCGAtomAlign){ .atom = atmax, .align = align };
5867 }
5868 
5869 /*
5870  * Similarly for qemu_ld/st slow path helpers.
5871  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5872  * using only the provided backend tcg_out_* functions.
5873  */
5874 
5875 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5876 {
5877     int ofs = arg_slot_stk_ofs(slot);
5878 
5879     /*
5880      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5881      * require extension to uint64_t, adjust the address for uint32_t.
5882      */
5883     if (HOST_BIG_ENDIAN &&
5884         TCG_TARGET_REG_BITS == 64 &&
5885         type == TCG_TYPE_I32) {
5886         ofs += 4;
5887     }
5888     return ofs;
5889 }
5890 
5891 static void tcg_out_helper_load_slots(TCGContext *s,
5892                                       unsigned nmov, TCGMovExtend *mov,
5893                                       const TCGLdstHelperParam *parm)
5894 {
5895     unsigned i;
5896     TCGReg dst3;
5897 
5898     /*
5899      * Start from the end, storing to the stack first.
5900      * This frees those registers, so we need not consider overlap.
5901      */
5902     for (i = nmov; i-- > 0; ) {
5903         unsigned slot = mov[i].dst;
5904 
5905         if (arg_slot_reg_p(slot)) {
5906             goto found_reg;
5907         }
5908 
5909         TCGReg src = mov[i].src;
5910         TCGType dst_type = mov[i].dst_type;
5911         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5912 
5913         /* The argument is going onto the stack; extend into scratch. */
5914         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5915             tcg_debug_assert(parm->ntmp != 0);
5916             mov[i].dst = src = parm->tmp[0];
5917             tcg_out_movext1(s, &mov[i]);
5918         }
5919 
5920         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5921                    tcg_out_helper_stk_ofs(dst_type, slot));
5922     }
5923     return;
5924 
5925  found_reg:
5926     /*
5927      * The remaining arguments are in registers.
5928      * Convert slot numbers to argument registers.
5929      */
5930     nmov = i + 1;
5931     for (i = 0; i < nmov; ++i) {
5932         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5933     }
5934 
5935     switch (nmov) {
5936     case 4:
5937         /* The backend must have provided enough temps for the worst case. */
5938         tcg_debug_assert(parm->ntmp >= 2);
5939 
5940         dst3 = mov[3].dst;
5941         for (unsigned j = 0; j < 3; ++j) {
5942             if (dst3 == mov[j].src) {
5943                 /*
5944                  * Conflict. Copy the source to a temporary, perform the
5945                  * remaining moves, then the extension from our scratch
5946                  * on the way out.
5947                  */
5948                 TCGReg scratch = parm->tmp[1];
5949 
5950                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5951                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5952                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5953                 break;
5954             }
5955         }
5956 
5957         /* No conflicts: perform this move and continue. */
5958         tcg_out_movext1(s, &mov[3]);
5959         /* fall through */
5960 
5961     case 3:
5962         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5963                         parm->ntmp ? parm->tmp[0] : -1);
5964         break;
5965     case 2:
5966         tcg_out_movext2(s, mov, mov + 1,
5967                         parm->ntmp ? parm->tmp[0] : -1);
5968         break;
5969     case 1:
5970         tcg_out_movext1(s, mov);
5971         break;
5972     default:
5973         g_assert_not_reached();
5974     }
5975 }
5976 
5977 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5978                                     TCGType type, tcg_target_long imm,
5979                                     const TCGLdstHelperParam *parm)
5980 {
5981     if (arg_slot_reg_p(slot)) {
5982         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5983     } else {
5984         int ofs = tcg_out_helper_stk_ofs(type, slot);
5985         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5986             tcg_debug_assert(parm->ntmp != 0);
5987             tcg_out_movi(s, type, parm->tmp[0], imm);
5988             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5989         }
5990     }
5991 }
5992 
5993 static void tcg_out_helper_load_common_args(TCGContext *s,
5994                                             const TCGLabelQemuLdst *ldst,
5995                                             const TCGLdstHelperParam *parm,
5996                                             const TCGHelperInfo *info,
5997                                             unsigned next_arg)
5998 {
5999     TCGMovExtend ptr_mov = {
6000         .dst_type = TCG_TYPE_PTR,
6001         .src_type = TCG_TYPE_PTR,
6002         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
6003     };
6004     const TCGCallArgumentLoc *loc = &info->in[0];
6005     TCGType type;
6006     unsigned slot;
6007     tcg_target_ulong imm;
6008 
6009     /*
6010      * Handle env, which is always first.
6011      */
6012     ptr_mov.dst = loc->arg_slot;
6013     ptr_mov.src = TCG_AREG0;
6014     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6015 
6016     /*
6017      * Handle oi.
6018      */
6019     imm = ldst->oi;
6020     loc = &info->in[next_arg];
6021     type = TCG_TYPE_I32;
6022     switch (loc->kind) {
6023     case TCG_CALL_ARG_NORMAL:
6024         break;
6025     case TCG_CALL_ARG_EXTEND_U:
6026     case TCG_CALL_ARG_EXTEND_S:
6027         /* No extension required for MemOpIdx. */
6028         tcg_debug_assert(imm <= INT32_MAX);
6029         type = TCG_TYPE_REG;
6030         break;
6031     default:
6032         g_assert_not_reached();
6033     }
6034     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
6035     next_arg++;
6036 
6037     /*
6038      * Handle ra.
6039      */
6040     loc = &info->in[next_arg];
6041     slot = loc->arg_slot;
6042     if (parm->ra_gen) {
6043         int arg_reg = -1;
6044         TCGReg ra_reg;
6045 
6046         if (arg_slot_reg_p(slot)) {
6047             arg_reg = tcg_target_call_iarg_regs[slot];
6048         }
6049         ra_reg = parm->ra_gen(s, ldst, arg_reg);
6050 
6051         ptr_mov.dst = slot;
6052         ptr_mov.src = ra_reg;
6053         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
6054     } else {
6055         imm = (uintptr_t)ldst->raddr;
6056         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
6057     }
6058 }
6059 
6060 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
6061                                        const TCGCallArgumentLoc *loc,
6062                                        TCGType dst_type, TCGType src_type,
6063                                        TCGReg lo, TCGReg hi)
6064 {
6065     MemOp reg_mo;
6066 
6067     if (dst_type <= TCG_TYPE_REG) {
6068         MemOp src_ext;
6069 
6070         switch (loc->kind) {
6071         case TCG_CALL_ARG_NORMAL:
6072             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
6073             break;
6074         case TCG_CALL_ARG_EXTEND_U:
6075             dst_type = TCG_TYPE_REG;
6076             src_ext = MO_UL;
6077             break;
6078         case TCG_CALL_ARG_EXTEND_S:
6079             dst_type = TCG_TYPE_REG;
6080             src_ext = MO_SL;
6081             break;
6082         default:
6083             g_assert_not_reached();
6084         }
6085 
6086         mov[0].dst = loc->arg_slot;
6087         mov[0].dst_type = dst_type;
6088         mov[0].src = lo;
6089         mov[0].src_type = src_type;
6090         mov[0].src_ext = src_ext;
6091         return 1;
6092     }
6093 
6094     if (TCG_TARGET_REG_BITS == 32) {
6095         assert(dst_type == TCG_TYPE_I64);
6096         reg_mo = MO_32;
6097     } else {
6098         assert(dst_type == TCG_TYPE_I128);
6099         reg_mo = MO_64;
6100     }
6101 
6102     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
6103     mov[0].src = lo;
6104     mov[0].dst_type = TCG_TYPE_REG;
6105     mov[0].src_type = TCG_TYPE_REG;
6106     mov[0].src_ext = reg_mo;
6107 
6108     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
6109     mov[1].src = hi;
6110     mov[1].dst_type = TCG_TYPE_REG;
6111     mov[1].src_type = TCG_TYPE_REG;
6112     mov[1].src_ext = reg_mo;
6113 
6114     return 2;
6115 }
6116 
6117 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6118                                    const TCGLdstHelperParam *parm)
6119 {
6120     const TCGHelperInfo *info;
6121     const TCGCallArgumentLoc *loc;
6122     TCGMovExtend mov[2];
6123     unsigned next_arg, nmov;
6124     MemOp mop = get_memop(ldst->oi);
6125 
6126     switch (mop & MO_SIZE) {
6127     case MO_8:
6128     case MO_16:
6129     case MO_32:
6130         info = &info_helper_ld32_mmu;
6131         break;
6132     case MO_64:
6133         info = &info_helper_ld64_mmu;
6134         break;
6135     case MO_128:
6136         info = &info_helper_ld128_mmu;
6137         break;
6138     default:
6139         g_assert_not_reached();
6140     }
6141 
6142     /* Defer env argument. */
6143     next_arg = 1;
6144 
6145     loc = &info->in[next_arg];
6146     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6147         /*
6148          * 32-bit host with 32-bit guest: zero-extend the guest address
6149          * to 64-bits for the helper by storing the low part, then
6150          * load a zero for the high part.
6151          */
6152         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6153                                TCG_TYPE_I32, TCG_TYPE_I32,
6154                                ldst->addr_reg, -1);
6155         tcg_out_helper_load_slots(s, 1, mov, parm);
6156 
6157         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
6158                                 TCG_TYPE_I32, 0, parm);
6159         next_arg += 2;
6160     } else {
6161         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6162                                       ldst->addr_reg, -1);
6163         tcg_out_helper_load_slots(s, nmov, mov, parm);
6164         next_arg += nmov;
6165     }
6166 
6167     switch (info->out_kind) {
6168     case TCG_CALL_RET_NORMAL:
6169     case TCG_CALL_RET_BY_VEC:
6170         break;
6171     case TCG_CALL_RET_BY_REF:
6172         /*
6173          * The return reference is in the first argument slot.
6174          * We need memory in which to return: re-use the top of stack.
6175          */
6176         {
6177             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6178 
6179             if (arg_slot_reg_p(0)) {
6180                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
6181                                  TCG_REG_CALL_STACK, ofs_slot0);
6182             } else {
6183                 tcg_debug_assert(parm->ntmp != 0);
6184                 tcg_out_addi_ptr(s, parm->tmp[0],
6185                                  TCG_REG_CALL_STACK, ofs_slot0);
6186                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6187                            TCG_REG_CALL_STACK, ofs_slot0);
6188             }
6189         }
6190         break;
6191     default:
6192         g_assert_not_reached();
6193     }
6194 
6195     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6196 }
6197 
6198 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
6199                                   bool load_sign,
6200                                   const TCGLdstHelperParam *parm)
6201 {
6202     MemOp mop = get_memop(ldst->oi);
6203     TCGMovExtend mov[2];
6204     int ofs_slot0;
6205 
6206     switch (ldst->type) {
6207     case TCG_TYPE_I64:
6208         if (TCG_TARGET_REG_BITS == 32) {
6209             break;
6210         }
6211         /* fall through */
6212 
6213     case TCG_TYPE_I32:
6214         mov[0].dst = ldst->datalo_reg;
6215         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
6216         mov[0].dst_type = ldst->type;
6217         mov[0].src_type = TCG_TYPE_REG;
6218 
6219         /*
6220          * If load_sign, then we allowed the helper to perform the
6221          * appropriate sign extension to tcg_target_ulong, and all
6222          * we need now is a plain move.
6223          *
6224          * If they do not, then we expect the relevant extension
6225          * instruction to be no more expensive than a move, and
6226          * we thus save the icache etc by only using one of two
6227          * helper functions.
6228          */
6229         if (load_sign || !(mop & MO_SIGN)) {
6230             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
6231                 mov[0].src_ext = MO_32;
6232             } else {
6233                 mov[0].src_ext = MO_64;
6234             }
6235         } else {
6236             mov[0].src_ext = mop & MO_SSIZE;
6237         }
6238         tcg_out_movext1(s, mov);
6239         return;
6240 
6241     case TCG_TYPE_I128:
6242         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6243         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
6244         switch (TCG_TARGET_CALL_RET_I128) {
6245         case TCG_CALL_RET_NORMAL:
6246             break;
6247         case TCG_CALL_RET_BY_VEC:
6248             tcg_out_st(s, TCG_TYPE_V128,
6249                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
6250                        TCG_REG_CALL_STACK, ofs_slot0);
6251             /* fall through */
6252         case TCG_CALL_RET_BY_REF:
6253             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
6254                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
6255             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
6256                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
6257             return;
6258         default:
6259             g_assert_not_reached();
6260         }
6261         break;
6262 
6263     default:
6264         g_assert_not_reached();
6265     }
6266 
6267     mov[0].dst = ldst->datalo_reg;
6268     mov[0].src =
6269         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
6270     mov[0].dst_type = TCG_TYPE_REG;
6271     mov[0].src_type = TCG_TYPE_REG;
6272     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6273 
6274     mov[1].dst = ldst->datahi_reg;
6275     mov[1].src =
6276         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
6277     mov[1].dst_type = TCG_TYPE_REG;
6278     mov[1].src_type = TCG_TYPE_REG;
6279     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
6280 
6281     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
6282 }
6283 
6284 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
6285                                    const TCGLdstHelperParam *parm)
6286 {
6287     const TCGHelperInfo *info;
6288     const TCGCallArgumentLoc *loc;
6289     TCGMovExtend mov[4];
6290     TCGType data_type;
6291     unsigned next_arg, nmov, n;
6292     MemOp mop = get_memop(ldst->oi);
6293 
6294     switch (mop & MO_SIZE) {
6295     case MO_8:
6296     case MO_16:
6297     case MO_32:
6298         info = &info_helper_st32_mmu;
6299         data_type = TCG_TYPE_I32;
6300         break;
6301     case MO_64:
6302         info = &info_helper_st64_mmu;
6303         data_type = TCG_TYPE_I64;
6304         break;
6305     case MO_128:
6306         info = &info_helper_st128_mmu;
6307         data_type = TCG_TYPE_I128;
6308         break;
6309     default:
6310         g_assert_not_reached();
6311     }
6312 
6313     /* Defer env argument. */
6314     next_arg = 1;
6315     nmov = 0;
6316 
6317     /* Handle addr argument. */
6318     loc = &info->in[next_arg];
6319     tcg_debug_assert(s->addr_type <= TCG_TYPE_REG);
6320     if (TCG_TARGET_REG_BITS == 32) {
6321         /*
6322          * 32-bit host (and thus 32-bit guest): zero-extend the guest address
6323          * to 64-bits for the helper by storing the low part.  Later,
6324          * after we have processed the register inputs, we will load a
6325          * zero for the high part.
6326          */
6327         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6328                                TCG_TYPE_I32, TCG_TYPE_I32,
6329                                ldst->addr_reg, -1);
6330         next_arg += 2;
6331         nmov += 1;
6332     } else {
6333         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6334                                    ldst->addr_reg, -1);
6335         next_arg += n;
6336         nmov += n;
6337     }
6338 
6339     /* Handle data argument. */
6340     loc = &info->in[next_arg];
6341     switch (loc->kind) {
6342     case TCG_CALL_ARG_NORMAL:
6343     case TCG_CALL_ARG_EXTEND_U:
6344     case TCG_CALL_ARG_EXTEND_S:
6345         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6346                                    ldst->datalo_reg, ldst->datahi_reg);
6347         next_arg += n;
6348         nmov += n;
6349         tcg_out_helper_load_slots(s, nmov, mov, parm);
6350         break;
6351 
6352     case TCG_CALL_ARG_BY_REF:
6353         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6354         tcg_debug_assert(data_type == TCG_TYPE_I128);
6355         tcg_out_st(s, TCG_TYPE_I64,
6356                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6357                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6358         tcg_out_st(s, TCG_TYPE_I64,
6359                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6360                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6361 
6362         tcg_out_helper_load_slots(s, nmov, mov, parm);
6363 
6364         if (arg_slot_reg_p(loc->arg_slot)) {
6365             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6366                              TCG_REG_CALL_STACK,
6367                              arg_slot_stk_ofs(loc->ref_slot));
6368         } else {
6369             tcg_debug_assert(parm->ntmp != 0);
6370             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6371                              arg_slot_stk_ofs(loc->ref_slot));
6372             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6373                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6374         }
6375         next_arg += 2;
6376         break;
6377 
6378     default:
6379         g_assert_not_reached();
6380     }
6381 
6382     if (TCG_TARGET_REG_BITS == 32) {
6383         /* Zero extend the address by loading a zero for the high part. */
6384         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6385         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6386     }
6387 
6388     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6389 }
6390 
6391 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6392 {
6393     int i, start_words, num_insns;
6394     TCGOp *op;
6395 
6396     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6397                  && qemu_log_in_addr_range(pc_start))) {
6398         FILE *logfile = qemu_log_trylock();
6399         if (logfile) {
6400             fprintf(logfile, "OP:\n");
6401             tcg_dump_ops(s, logfile, false);
6402             fprintf(logfile, "\n");
6403             qemu_log_unlock(logfile);
6404         }
6405     }
6406 
6407 #ifdef CONFIG_DEBUG_TCG
6408     /* Ensure all labels referenced have been emitted.  */
6409     {
6410         TCGLabel *l;
6411         bool error = false;
6412 
6413         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6414             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6415                 qemu_log_mask(CPU_LOG_TB_OP,
6416                               "$L%d referenced but not present.\n", l->id);
6417                 error = true;
6418             }
6419         }
6420         assert(!error);
6421     }
6422 #endif
6423 
6424     /* Do not reuse any EBB that may be allocated within the TB. */
6425     tcg_temp_ebb_reset_freed(s);
6426 
6427     tcg_optimize(s);
6428 
6429     reachable_code_pass(s);
6430     liveness_pass_0(s);
6431     liveness_pass_1(s);
6432 
6433     if (s->nb_indirects > 0) {
6434         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6435                      && qemu_log_in_addr_range(pc_start))) {
6436             FILE *logfile = qemu_log_trylock();
6437             if (logfile) {
6438                 fprintf(logfile, "OP before indirect lowering:\n");
6439                 tcg_dump_ops(s, logfile, false);
6440                 fprintf(logfile, "\n");
6441                 qemu_log_unlock(logfile);
6442             }
6443         }
6444 
6445         /* Replace indirect temps with direct temps.  */
6446         if (liveness_pass_2(s)) {
6447             /* If changes were made, re-run liveness.  */
6448             liveness_pass_1(s);
6449         }
6450     }
6451 
6452     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6453                  && qemu_log_in_addr_range(pc_start))) {
6454         FILE *logfile = qemu_log_trylock();
6455         if (logfile) {
6456             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6457             tcg_dump_ops(s, logfile, true);
6458             fprintf(logfile, "\n");
6459             qemu_log_unlock(logfile);
6460         }
6461     }
6462 
6463     /* Initialize goto_tb jump offsets. */
6464     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6465     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6466     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6467     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6468 
6469     tcg_reg_alloc_start(s);
6470 
6471     /*
6472      * Reset the buffer pointers when restarting after overflow.
6473      * TODO: Move this into translate-all.c with the rest of the
6474      * buffer management.  Having only this done here is confusing.
6475      */
6476     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6477     s->code_ptr = s->code_buf;
6478     s->data_gen_ptr = NULL;
6479 
6480     QSIMPLEQ_INIT(&s->ldst_labels);
6481     s->pool_labels = NULL;
6482 
6483     start_words = s->insn_start_words;
6484     s->gen_insn_data =
6485         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6486 
6487     tcg_out_tb_start(s);
6488 
6489     num_insns = -1;
6490     QTAILQ_FOREACH(op, &s->ops, link) {
6491         TCGOpcode opc = op->opc;
6492 
6493         switch (opc) {
6494         case INDEX_op_mov:
6495         case INDEX_op_mov_vec:
6496             tcg_reg_alloc_mov(s, op);
6497             break;
6498         case INDEX_op_dup_vec:
6499             tcg_reg_alloc_dup(s, op);
6500             break;
6501         case INDEX_op_insn_start:
6502             if (num_insns >= 0) {
6503                 size_t off = tcg_current_code_size(s);
6504                 s->gen_insn_end_off[num_insns] = off;
6505                 /* Assert that we do not overflow our stored offset.  */
6506                 assert(s->gen_insn_end_off[num_insns] == off);
6507             }
6508             num_insns++;
6509             for (i = 0; i < start_words; ++i) {
6510                 s->gen_insn_data[num_insns * start_words + i] =
6511                     tcg_get_insn_start_param(op, i);
6512             }
6513             break;
6514         case INDEX_op_discard:
6515             temp_dead(s, arg_temp(op->args[0]));
6516             break;
6517         case INDEX_op_set_label:
6518             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6519             tcg_out_label(s, arg_label(op->args[0]));
6520             break;
6521         case INDEX_op_call:
6522             tcg_reg_alloc_call(s, op);
6523             break;
6524         case INDEX_op_exit_tb:
6525             tcg_out_exit_tb(s, op->args[0]);
6526             break;
6527         case INDEX_op_goto_tb:
6528             tcg_out_goto_tb(s, op->args[0]);
6529             break;
6530         case INDEX_op_dup2_vec:
6531             if (tcg_reg_alloc_dup2(s, op)) {
6532                 break;
6533             }
6534             /* fall through */
6535         default:
6536             /* Sanity check that we've not introduced any unhandled opcodes. */
6537             tcg_debug_assert(tcg_op_supported(opc, TCGOP_TYPE(op),
6538                                               TCGOP_FLAGS(op)));
6539             /* Note: in order to speed up the code, it would be much
6540                faster to have specialized register allocator functions for
6541                some common argument patterns */
6542             tcg_reg_alloc_op(s, op);
6543             break;
6544         }
6545         /* Test for (pending) buffer overflow.  The assumption is that any
6546            one operation beginning below the high water mark cannot overrun
6547            the buffer completely.  Thus we can test for overflow after
6548            generating code without having to check during generation.  */
6549         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6550             return -1;
6551         }
6552         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6553         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6554             return -2;
6555         }
6556     }
6557     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6558     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6559 
6560     /* Generate TB finalization at the end of block */
6561     i = tcg_out_ldst_finalize(s);
6562     if (i < 0) {
6563         return i;
6564     }
6565     i = tcg_out_pool_finalize(s);
6566     if (i < 0) {
6567         return i;
6568     }
6569     if (!tcg_resolve_relocs(s)) {
6570         return -2;
6571     }
6572 
6573 #ifndef CONFIG_TCG_INTERPRETER
6574     /* flush instruction cache */
6575     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6576                         (uintptr_t)s->code_buf,
6577                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6578 #endif
6579 
6580     return tcg_current_code_size(s);
6581 }
6582 
6583 #ifdef ELF_HOST_MACHINE
6584 /* In order to use this feature, the backend needs to do three things:
6585 
6586    (1) Define ELF_HOST_MACHINE to indicate both what value to
6587        put into the ELF image and to indicate support for the feature.
6588 
6589    (2) Define tcg_register_jit.  This should create a buffer containing
6590        the contents of a .debug_frame section that describes the post-
6591        prologue unwind info for the tcg machine.
6592 
6593    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6594 */
6595 
6596 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6597 typedef enum {
6598     JIT_NOACTION = 0,
6599     JIT_REGISTER_FN,
6600     JIT_UNREGISTER_FN
6601 } jit_actions_t;
6602 
6603 struct jit_code_entry {
6604     struct jit_code_entry *next_entry;
6605     struct jit_code_entry *prev_entry;
6606     const void *symfile_addr;
6607     uint64_t symfile_size;
6608 };
6609 
6610 struct jit_descriptor {
6611     uint32_t version;
6612     uint32_t action_flag;
6613     struct jit_code_entry *relevant_entry;
6614     struct jit_code_entry *first_entry;
6615 };
6616 
6617 void __jit_debug_register_code(void) __attribute__((noinline));
6618 void __jit_debug_register_code(void)
6619 {
6620     asm("");
6621 }
6622 
6623 /* Must statically initialize the version, because GDB may check
6624    the version before we can set it.  */
6625 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6626 
6627 /* End GDB interface.  */
6628 
6629 static int find_string(const char *strtab, const char *str)
6630 {
6631     const char *p = strtab + 1;
6632 
6633     while (1) {
6634         if (strcmp(p, str) == 0) {
6635             return p - strtab;
6636         }
6637         p += strlen(p) + 1;
6638     }
6639 }
6640 
6641 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6642                                  const void *debug_frame,
6643                                  size_t debug_frame_size)
6644 {
6645     struct __attribute__((packed)) DebugInfo {
6646         uint32_t  len;
6647         uint16_t  version;
6648         uint32_t  abbrev;
6649         uint8_t   ptr_size;
6650         uint8_t   cu_die;
6651         uint16_t  cu_lang;
6652         uintptr_t cu_low_pc;
6653         uintptr_t cu_high_pc;
6654         uint8_t   fn_die;
6655         char      fn_name[16];
6656         uintptr_t fn_low_pc;
6657         uintptr_t fn_high_pc;
6658         uint8_t   cu_eoc;
6659     };
6660 
6661     struct ElfImage {
6662         ElfW(Ehdr) ehdr;
6663         ElfW(Phdr) phdr;
6664         ElfW(Shdr) shdr[7];
6665         ElfW(Sym)  sym[2];
6666         struct DebugInfo di;
6667         uint8_t    da[24];
6668         char       str[80];
6669     };
6670 
6671     struct ElfImage *img;
6672 
6673     static const struct ElfImage img_template = {
6674         .ehdr = {
6675             .e_ident[EI_MAG0] = ELFMAG0,
6676             .e_ident[EI_MAG1] = ELFMAG1,
6677             .e_ident[EI_MAG2] = ELFMAG2,
6678             .e_ident[EI_MAG3] = ELFMAG3,
6679             .e_ident[EI_CLASS] = ELF_CLASS,
6680             .e_ident[EI_DATA] = ELF_DATA,
6681             .e_ident[EI_VERSION] = EV_CURRENT,
6682             .e_type = ET_EXEC,
6683             .e_machine = ELF_HOST_MACHINE,
6684             .e_version = EV_CURRENT,
6685             .e_phoff = offsetof(struct ElfImage, phdr),
6686             .e_shoff = offsetof(struct ElfImage, shdr),
6687             .e_ehsize = sizeof(ElfW(Shdr)),
6688             .e_phentsize = sizeof(ElfW(Phdr)),
6689             .e_phnum = 1,
6690             .e_shentsize = sizeof(ElfW(Shdr)),
6691             .e_shnum = ARRAY_SIZE(img->shdr),
6692             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6693 #ifdef ELF_HOST_FLAGS
6694             .e_flags = ELF_HOST_FLAGS,
6695 #endif
6696 #ifdef ELF_OSABI
6697             .e_ident[EI_OSABI] = ELF_OSABI,
6698 #endif
6699         },
6700         .phdr = {
6701             .p_type = PT_LOAD,
6702             .p_flags = PF_X,
6703         },
6704         .shdr = {
6705             [0] = { .sh_type = SHT_NULL },
6706             /* Trick: The contents of code_gen_buffer are not present in
6707                this fake ELF file; that got allocated elsewhere.  Therefore
6708                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6709                will not look for contents.  We can record any address.  */
6710             [1] = { /* .text */
6711                 .sh_type = SHT_NOBITS,
6712                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6713             },
6714             [2] = { /* .debug_info */
6715                 .sh_type = SHT_PROGBITS,
6716                 .sh_offset = offsetof(struct ElfImage, di),
6717                 .sh_size = sizeof(struct DebugInfo),
6718             },
6719             [3] = { /* .debug_abbrev */
6720                 .sh_type = SHT_PROGBITS,
6721                 .sh_offset = offsetof(struct ElfImage, da),
6722                 .sh_size = sizeof(img->da),
6723             },
6724             [4] = { /* .debug_frame */
6725                 .sh_type = SHT_PROGBITS,
6726                 .sh_offset = sizeof(struct ElfImage),
6727             },
6728             [5] = { /* .symtab */
6729                 .sh_type = SHT_SYMTAB,
6730                 .sh_offset = offsetof(struct ElfImage, sym),
6731                 .sh_size = sizeof(img->sym),
6732                 .sh_info = 1,
6733                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6734                 .sh_entsize = sizeof(ElfW(Sym)),
6735             },
6736             [6] = { /* .strtab */
6737                 .sh_type = SHT_STRTAB,
6738                 .sh_offset = offsetof(struct ElfImage, str),
6739                 .sh_size = sizeof(img->str),
6740             }
6741         },
6742         .sym = {
6743             [1] = { /* code_gen_buffer */
6744                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6745                 .st_shndx = 1,
6746             }
6747         },
6748         .di = {
6749             .len = sizeof(struct DebugInfo) - 4,
6750             .version = 2,
6751             .ptr_size = sizeof(void *),
6752             .cu_die = 1,
6753             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6754             .fn_die = 2,
6755             .fn_name = "code_gen_buffer"
6756         },
6757         .da = {
6758             1,          /* abbrev number (the cu) */
6759             0x11, 1,    /* DW_TAG_compile_unit, has children */
6760             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6761             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6762             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6763             0, 0,       /* end of abbrev */
6764             2,          /* abbrev number (the fn) */
6765             0x2e, 0,    /* DW_TAG_subprogram, no children */
6766             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6767             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6768             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6769             0, 0,       /* end of abbrev */
6770             0           /* no more abbrev */
6771         },
6772         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6773                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6774     };
6775 
6776     /* We only need a single jit entry; statically allocate it.  */
6777     static struct jit_code_entry one_entry;
6778 
6779     uintptr_t buf = (uintptr_t)buf_ptr;
6780     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6781     DebugFrameHeader *dfh;
6782 
6783     img = g_malloc(img_size);
6784     *img = img_template;
6785 
6786     img->phdr.p_vaddr = buf;
6787     img->phdr.p_paddr = buf;
6788     img->phdr.p_memsz = buf_size;
6789 
6790     img->shdr[1].sh_name = find_string(img->str, ".text");
6791     img->shdr[1].sh_addr = buf;
6792     img->shdr[1].sh_size = buf_size;
6793 
6794     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6795     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6796 
6797     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6798     img->shdr[4].sh_size = debug_frame_size;
6799 
6800     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6801     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6802 
6803     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6804     img->sym[1].st_value = buf;
6805     img->sym[1].st_size = buf_size;
6806 
6807     img->di.cu_low_pc = buf;
6808     img->di.cu_high_pc = buf + buf_size;
6809     img->di.fn_low_pc = buf;
6810     img->di.fn_high_pc = buf + buf_size;
6811 
6812     dfh = (DebugFrameHeader *)(img + 1);
6813     memcpy(dfh, debug_frame, debug_frame_size);
6814     dfh->fde.func_start = buf;
6815     dfh->fde.func_len = buf_size;
6816 
6817 #ifdef DEBUG_JIT
6818     /* Enable this block to be able to debug the ELF image file creation.
6819        One can use readelf, objdump, or other inspection utilities.  */
6820     {
6821         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6822         FILE *f = fopen(jit, "w+b");
6823         if (f) {
6824             if (fwrite(img, img_size, 1, f) != img_size) {
6825                 /* Avoid stupid unused return value warning for fwrite.  */
6826             }
6827             fclose(f);
6828         }
6829     }
6830 #endif
6831 
6832     one_entry.symfile_addr = img;
6833     one_entry.symfile_size = img_size;
6834 
6835     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6836     __jit_debug_descriptor.relevant_entry = &one_entry;
6837     __jit_debug_descriptor.first_entry = &one_entry;
6838     __jit_debug_register_code();
6839 }
6840 #else
6841 /* No support for the feature.  Provide the entry point expected by exec.c,
6842    and implement the internal function we declared earlier.  */
6843 
6844 static void tcg_register_jit_int(const void *buf, size_t size,
6845                                  const void *debug_frame,
6846                                  size_t debug_frame_size)
6847 {
6848 }
6849 
6850 void tcg_register_jit(const void *buf, size_t buf_size)
6851 {
6852 }
6853 #endif /* ELF_HOST_MACHINE */
6854 
6855 #if !TCG_TARGET_MAYBE_vec
6856 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6857 {
6858     g_assert_not_reached();
6859 }
6860 #endif
6861